linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm/z3fold.c: Fix race between migration and destruction
@ 2019-08-09 16:46 Henry Burns
  2019-08-09 19:37 ` Henry Burns
  2019-08-10  9:05 ` Vitaly Wool
  0 siblings, 2 replies; 3+ messages in thread
From: Henry Burns @ 2019-08-09 16:46 UTC (permalink / raw)
  To: Vitaly Wool, Andrew Morton
  Cc: Vitaly Vul, Shakeel Butt, Jonathan Adams, linux-mm, linux-kernel,
	Henry Burns

In z3fold_destroy_pool() we call destroy_workqueue(&pool->compact_wq).
However, we have no guarantee that migration isn't happening in the
background at that time.

Migration directly calls queue_work_on(pool->compact_wq), if destruction
wins that race we are using a destroyed workqueue.

Signed-off-by: Henry Burns <henryburns@google.com>
---
 mm/z3fold.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 78447cecfffa..e136d97ce56e 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -40,6 +40,7 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/wait.h>
 #include <linux/zpool.h>
 
 /*
@@ -161,8 +162,10 @@ struct z3fold_pool {
 	const struct zpool_ops *zpool_ops;
 	struct workqueue_struct *compact_wq;
 	struct workqueue_struct *release_wq;
+	struct wait_queue_head isolate_wait;
 	struct work_struct work;
 	struct inode *inode;
+	int isolated_pages;
 };
 
 /*
@@ -772,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 		goto out_c;
 	spin_lock_init(&pool->lock);
 	spin_lock_init(&pool->stale_lock);
+	init_waitqueue_head(&pool->isolate_wait);
 	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
 	if (!pool->unbuddied)
 		goto out_pool;
@@ -811,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 	return NULL;
 }
 
+static bool pool_isolated_are_drained(struct z3fold_pool *pool)
+{
+	bool ret;
+
+	spin_lock(&pool->lock);
+	ret = pool->isolated_pages == 0;
+	spin_unlock(&pool->lock);
+	return ret;
+}
 /**
  * z3fold_destroy_pool() - destroys an existing z3fold pool
  * @pool:	the z3fold pool to be destroyed
@@ -821,6 +834,13 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
 {
 	kmem_cache_destroy(pool->c_handle);
 
+	/*
+	 * We need to ensure that no pages are being migrated while we destroy
+	 * these workqueues, as migration can queue work on either of the
+	 * workqueues.
+	 */
+	wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
+
 	/*
 	 * We need to destroy pool->compact_wq before pool->release_wq,
 	 * as any pending work on pool->compact_wq will call
@@ -1317,6 +1337,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
 	return atomic64_read(&pool->pages_nr);
 }
 
+/*
+ * z3fold_dec_isolated() expects to be called while pool->lock is held.
+ */
+static void z3fold_dec_isolated(struct z3fold_pool *pool)
+{
+	assert_spin_locked(&pool->lock);
+	VM_BUG_ON(pool->isolated_pages <= 0);
+	pool->isolated_pages--;
+
+	/*
+	 * If we have no more isolated pages, we have to see if
+	 * z3fold_destroy_pool() is waiting for a signal.
+	 */
+	if (pool->isolated_pages == 0 && waitqueue_active(&pool->isolate_wait))
+		wake_up_all(&pool->isolate_wait);
+}
+
+static void z3fold_inc_isolated(struct z3fold_pool *pool)
+{
+	pool->isolated_pages++;
+}
+
 static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 {
 	struct z3fold_header *zhdr;
@@ -1343,6 +1385,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 		spin_lock(&pool->lock);
 		if (!list_empty(&page->lru))
 			list_del(&page->lru);
+		z3fold_inc_isolated(pool);
 		spin_unlock(&pool->lock);
 		z3fold_page_unlock(zhdr);
 		return true;
@@ -1417,6 +1460,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
 
 	queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
 
+	spin_lock(&pool->lock);
+	z3fold_dec_isolated(pool);
+	spin_unlock(&pool->lock);
+
 	page_mapcount_reset(page);
 	put_page(page);
 	return 0;
@@ -1436,10 +1483,14 @@ static void z3fold_page_putback(struct page *page)
 	INIT_LIST_HEAD(&page->lru);
 	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
 		atomic64_dec(&pool->pages_nr);
+		spin_lock(&pool->lock);
+		z3fold_dec_isolated(pool);
+		spin_unlock(&pool->lock);
 		return;
 	}
 	spin_lock(&pool->lock);
 	list_add(&page->lru, &pool->lru);
+	z3fold_dec_isolated(pool);
 	spin_unlock(&pool->lock);
 	z3fold_page_unlock(zhdr);
 }
-- 
2.22.0.770.g0f2c4a37fd-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] mm/z3fold.c: Fix race between migration and destruction
  2019-08-09 16:46 [PATCH] mm/z3fold.c: Fix race between migration and destruction Henry Burns
@ 2019-08-09 19:37 ` Henry Burns
  2019-08-10  9:05 ` Vitaly Wool
  1 sibling, 0 replies; 3+ messages in thread
From: Henry Burns @ 2019-08-09 19:37 UTC (permalink / raw)
  To: Vitaly Wool, Andrew Morton
  Cc: Vitaly Vul, Shakeel Butt, Jonathan Adams, Linux MM, LKML,
	henrywolfeburns

I've just CC'd a personal email here so that I can respond to any
replies after today.

On Fri, Aug 9, 2019 at 9:46 AM Henry Burns <henryburns@google.com> wrote:
>
> In z3fold_destroy_pool() we call destroy_workqueue(&pool->compact_wq).
> However, we have no guarantee that migration isn't happening in the
> background at that time.
>
> Migration directly calls queue_work_on(pool->compact_wq), if destruction
> wins that race we are using a destroyed workqueue.
>
> Signed-off-by: Henry Burns <henryburns@google.com>
> ---
>  mm/z3fold.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 51 insertions(+)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 78447cecfffa..e136d97ce56e 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -40,6 +40,7 @@
>  #include <linux/workqueue.h>
>  #include <linux/slab.h>
>  #include <linux/spinlock.h>
> +#include <linux/wait.h>
>  #include <linux/zpool.h>
>
>  /*
> @@ -161,8 +162,10 @@ struct z3fold_pool {
>         const struct zpool_ops *zpool_ops;
>         struct workqueue_struct *compact_wq;
>         struct workqueue_struct *release_wq;
> +       struct wait_queue_head isolate_wait;
>         struct work_struct work;
>         struct inode *inode;
> +       int isolated_pages;
>  };
>
>  /*
> @@ -772,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
>                 goto out_c;
>         spin_lock_init(&pool->lock);
>         spin_lock_init(&pool->stale_lock);
> +       init_waitqueue_head(&pool->isolate_wait);
>         pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
>         if (!pool->unbuddied)
>                 goto out_pool;
> @@ -811,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
>         return NULL;
>  }
>
> +static bool pool_isolated_are_drained(struct z3fold_pool *pool)
> +{
> +       bool ret;
> +
> +       spin_lock(&pool->lock);
> +       ret = pool->isolated_pages == 0;
> +       spin_unlock(&pool->lock);
> +       return ret;
> +}
>  /**
>   * z3fold_destroy_pool() - destroys an existing z3fold pool
>   * @pool:      the z3fold pool to be destroyed
> @@ -821,6 +834,13 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
>  {
>         kmem_cache_destroy(pool->c_handle);
>
> +       /*
> +        * We need to ensure that no pages are being migrated while we destroy
> +        * these workqueues, as migration can queue work on either of the
> +        * workqueues.
> +        */
> +       wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
> +
>         /*
>          * We need to destroy pool->compact_wq before pool->release_wq,
>          * as any pending work on pool->compact_wq will call
> @@ -1317,6 +1337,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
>         return atomic64_read(&pool->pages_nr);
>  }
>
> +/*
> + * z3fold_dec_isolated() expects to be called while pool->lock is held.
> + */
> +static void z3fold_dec_isolated(struct z3fold_pool *pool)
> +{
> +       assert_spin_locked(&pool->lock);
> +       VM_BUG_ON(pool->isolated_pages <= 0);
> +       pool->isolated_pages--;
> +
> +       /*
> +        * If we have no more isolated pages, we have to see if
> +        * z3fold_destroy_pool() is waiting for a signal.
> +        */
> +       if (pool->isolated_pages == 0 && waitqueue_active(&pool->isolate_wait))
> +               wake_up_all(&pool->isolate_wait);
> +}
> +
> +static void z3fold_inc_isolated(struct z3fold_pool *pool)
> +{
> +       pool->isolated_pages++;
> +}
> +
>  static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
>  {
>         struct z3fold_header *zhdr;
> @@ -1343,6 +1385,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
>                 spin_lock(&pool->lock);
>                 if (!list_empty(&page->lru))
>                         list_del(&page->lru);
> +               z3fold_inc_isolated(pool);
>                 spin_unlock(&pool->lock);
>                 z3fold_page_unlock(zhdr);
>                 return true;
> @@ -1417,6 +1460,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
>
>         queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
>
> +       spin_lock(&pool->lock);
> +       z3fold_dec_isolated(pool);
> +       spin_unlock(&pool->lock);
> +
>         page_mapcount_reset(page);
>         put_page(page);
>         return 0;
> @@ -1436,10 +1483,14 @@ static void z3fold_page_putback(struct page *page)
>         INIT_LIST_HEAD(&page->lru);
>         if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
>                 atomic64_dec(&pool->pages_nr);
> +               spin_lock(&pool->lock);
> +               z3fold_dec_isolated(pool);
> +               spin_unlock(&pool->lock);
>                 return;
>         }
>         spin_lock(&pool->lock);
>         list_add(&page->lru, &pool->lru);
> +       z3fold_dec_isolated(pool);
>         spin_unlock(&pool->lock);
>         z3fold_page_unlock(zhdr);
>  }
> --
> 2.22.0.770.g0f2c4a37fd-goog
>


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] mm/z3fold.c: Fix race between migration and destruction
  2019-08-09 16:46 [PATCH] mm/z3fold.c: Fix race between migration and destruction Henry Burns
  2019-08-09 19:37 ` Henry Burns
@ 2019-08-10  9:05 ` Vitaly Wool
  1 sibling, 0 replies; 3+ messages in thread
From: Vitaly Wool @ 2019-08-10  9:05 UTC (permalink / raw)
  To: Henry Burns
  Cc: Andrew Morton, Vitaly Vul, Shakeel Butt, Jonathan Adams, Linux-MM, LKML

Hi Henry,

Den fre 9 aug. 2019 6:46 emHenry Burns <henryburns@google.com> skrev:
>
> In z3fold_destroy_pool() we call destroy_workqueue(&pool->compact_wq).
> However, we have no guarantee that migration isn't happening in the
> background at that time.
>
> Migration directly calls queue_work_on(pool->compact_wq), if destruction
> wins that race we are using a destroyed workqueue.


Thanks for the fix. Would you please comment why adding
flush_workqueue() isn't enough?

~Vitaly
>
>
> Signed-off-by: Henry Burns <henryburns@google.com>
> ---
>  mm/z3fold.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 51 insertions(+)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 78447cecfffa..e136d97ce56e 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -40,6 +40,7 @@
>  #include <linux/workqueue.h>
>  #include <linux/slab.h>
>  #include <linux/spinlock.h>
> +#include <linux/wait.h>
>  #include <linux/zpool.h>
>
>  /*
> @@ -161,8 +162,10 @@ struct z3fold_pool {
>         const struct zpool_ops *zpool_ops;
>         struct workqueue_struct *compact_wq;
>         struct workqueue_struct *release_wq;
> +       struct wait_queue_head isolate_wait;
>         struct work_struct work;
>         struct inode *inode;
> +       int isolated_pages;
>  };
>
>  /*
> @@ -772,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
>                 goto out_c;
>         spin_lock_init(&pool->lock);
>         spin_lock_init(&pool->stale_lock);
> +       init_waitqueue_head(&pool->isolate_wait);
>         pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
>         if (!pool->unbuddied)
>                 goto out_pool;
> @@ -811,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
>         return NULL;
>  }
>
> +static bool pool_isolated_are_drained(struct z3fold_pool *pool)
> +{
> +       bool ret;
> +
> +       spin_lock(&pool->lock);
> +       ret = pool->isolated_pages == 0;
> +       spin_unlock(&pool->lock);
> +       return ret;
> +}
>  /**
>   * z3fold_destroy_pool() - destroys an existing z3fold pool
>   * @pool:      the z3fold pool to be destroyed
> @@ -821,6 +834,13 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
>  {
>         kmem_cache_destroy(pool->c_handle);
>
> +       /*
> +        * We need to ensure that no pages are being migrated while we destroy
> +        * these workqueues, as migration can queue work on either of the
> +        * workqueues.
> +        */
> +       wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
> +
>         /*
>          * We need to destroy pool->compact_wq before pool->release_wq,
>          * as any pending work on pool->compact_wq will call
> @@ -1317,6 +1337,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
>         return atomic64_read(&pool->pages_nr);
>  }
>
> +/*
> + * z3fold_dec_isolated() expects to be called while pool->lock is held.
> + */
> +static void z3fold_dec_isolated(struct z3fold_pool *pool)
> +{
> +       assert_spin_locked(&pool->lock);
> +       VM_BUG_ON(pool->isolated_pages <= 0);
> +       pool->isolated_pages--;
> +
> +       /*
> +        * If we have no more isolated pages, we have to see if
> +        * z3fold_destroy_pool() is waiting for a signal.
> +        */
> +       if (pool->isolated_pages == 0 && waitqueue_active(&pool->isolate_wait))
> +               wake_up_all(&pool->isolate_wait);
> +}
> +
> +static void z3fold_inc_isolated(struct z3fold_pool *pool)
> +{
> +       pool->isolated_pages++;
> +}
> +
>  static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
>  {
>         struct z3fold_header *zhdr;
> @@ -1343,6 +1385,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
>                 spin_lock(&pool->lock);
>                 if (!list_empty(&page->lru))
>                         list_del(&page->lru);
> +               z3fold_inc_isolated(pool);
>                 spin_unlock(&pool->lock);
>                 z3fold_page_unlock(zhdr);
>                 return true;
> @@ -1417,6 +1460,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
>
>         queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
>
> +       spin_lock(&pool->lock);
> +       z3fold_dec_isolated(pool);
> +       spin_unlock(&pool->lock);
> +
>         page_mapcount_reset(page);
>         put_page(page);
>         return 0;
> @@ -1436,10 +1483,14 @@ static void z3fold_page_putback(struct page *page)
>         INIT_LIST_HEAD(&page->lru);
>         if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
>                 atomic64_dec(&pool->pages_nr);
> +               spin_lock(&pool->lock);
> +               z3fold_dec_isolated(pool);
> +               spin_unlock(&pool->lock);
>                 return;
>         }
>         spin_lock(&pool->lock);
>         list_add(&page->lru, &pool->lru);
> +       z3fold_dec_isolated(pool);
>         spin_unlock(&pool->lock);
>         z3fold_page_unlock(zhdr);
>  }
> --
> 2.22.0.770.g0f2c4a37fd-goog
>


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-08-10  9:05 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-09 16:46 [PATCH] mm/z3fold.c: Fix race between migration and destruction Henry Burns
2019-08-09 19:37 ` Henry Burns
2019-08-10  9:05 ` Vitaly Wool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).