linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: jglisse@redhat.com
To: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Ralph Campbell" <rcampbell@nvidia.com>,
	"John Hubbard" <jhubbard@nvidia.com>,
	"Andrew Morton" <akpm@linux-foundation.org>
Subject: [PATCH 01/10] mm/hmm: use reference counting for HMM struct
Date: Tue, 29 Jan 2019 11:54:19 -0500	[thread overview]
Message-ID: <20190129165428.3931-2-jglisse@redhat.com> (raw)
In-Reply-To: <20190129165428.3931-1-jglisse@redhat.com>

From: Jérôme Glisse <jglisse@redhat.com>

Every time i read the code to check that the HMM structure does not
vanish before it should thanks to the many lock protecting its removal
i get a headache. Switch to reference counting instead it is much
easier to follow and harder to break. This also remove some code that
is no longer needed with refcounting.

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hmm.h |   2 +
 mm/hmm.c            | 178 +++++++++++++++++++++++++++++---------------
 2 files changed, 120 insertions(+), 60 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 66f9ebbb1df3..bd6e058597a6 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -131,6 +131,7 @@ enum hmm_pfn_value_e {
 /*
  * struct hmm_range - track invalidation lock on virtual address range
  *
+ * @hmm: the core HMM structure this range is active against
  * @vma: the vm area struct for the range
  * @list: all range lock are on a list
  * @start: range virtual start address (inclusive)
@@ -142,6 +143,7 @@ enum hmm_pfn_value_e {
  * @valid: pfns array did not change since it has been fill by an HMM function
  */
 struct hmm_range {
+	struct hmm		*hmm;
 	struct vm_area_struct	*vma;
 	struct list_head	list;
 	unsigned long		start;
diff --git a/mm/hmm.c b/mm/hmm.c
index a04e4b810610..b9f384ea15e9 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -50,6 +50,7 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
  */
 struct hmm {
 	struct mm_struct	*mm;
+	struct kref		kref;
 	spinlock_t		lock;
 	struct list_head	ranges;
 	struct list_head	mirrors;
@@ -57,6 +58,16 @@ struct hmm {
 	struct rw_semaphore	mirrors_sem;
 };
 
+static inline struct hmm *hmm_get(struct mm_struct *mm)
+{
+	struct hmm *hmm = READ_ONCE(mm->hmm);
+
+	if (hmm && kref_get_unless_zero(&hmm->kref))
+		return hmm;
+
+	return NULL;
+}
+
 /*
  * hmm_register - register HMM against an mm (HMM internal)
  *
@@ -67,14 +78,9 @@ struct hmm {
  */
 static struct hmm *hmm_register(struct mm_struct *mm)
 {
-	struct hmm *hmm = READ_ONCE(mm->hmm);
+	struct hmm *hmm = hmm_get(mm);
 	bool cleanup = false;
 
-	/*
-	 * The hmm struct can only be freed once the mm_struct goes away,
-	 * hence we should always have pre-allocated an new hmm struct
-	 * above.
-	 */
 	if (hmm)
 		return hmm;
 
@@ -86,6 +92,7 @@ static struct hmm *hmm_register(struct mm_struct *mm)
 	hmm->mmu_notifier.ops = NULL;
 	INIT_LIST_HEAD(&hmm->ranges);
 	spin_lock_init(&hmm->lock);
+	kref_init(&hmm->kref);
 	hmm->mm = mm;
 
 	spin_lock(&mm->page_table_lock);
@@ -106,7 +113,7 @@ static struct hmm *hmm_register(struct mm_struct *mm)
 	if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
 		goto error_mm;
 
-	return mm->hmm;
+	return hmm;
 
 error_mm:
 	spin_lock(&mm->page_table_lock);
@@ -118,9 +125,41 @@ static struct hmm *hmm_register(struct mm_struct *mm)
 	return NULL;
 }
 
+static void hmm_free(struct kref *kref)
+{
+	struct hmm *hmm = container_of(kref, struct hmm, kref);
+	struct mm_struct *mm = hmm->mm;
+
+	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+
+	spin_lock(&mm->page_table_lock);
+	if (mm->hmm == hmm)
+		mm->hmm = NULL;
+	spin_unlock(&mm->page_table_lock);
+
+	kfree(hmm);
+}
+
+static inline void hmm_put(struct hmm *hmm)
+{
+	kref_put(&hmm->kref, hmm_free);
+}
+
 void hmm_mm_destroy(struct mm_struct *mm)
 {
-	kfree(mm->hmm);
+	struct hmm *hmm;
+
+	spin_lock(&mm->page_table_lock);
+	hmm = hmm_get(mm);
+	mm->hmm = NULL;
+	if (hmm) {
+		hmm->mm = NULL;
+		spin_unlock(&mm->page_table_lock);
+		hmm_put(hmm);
+		return;
+	}
+
+	spin_unlock(&mm->page_table_lock);
 }
 
 static int hmm_invalidate_range(struct hmm *hmm, bool device,
@@ -165,7 +204,7 @@ static int hmm_invalidate_range(struct hmm *hmm, bool device,
 static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 {
 	struct hmm_mirror *mirror;
-	struct hmm *hmm = mm->hmm;
+	struct hmm *hmm = hmm_get(mm);
 
 	down_write(&hmm->mirrors_sem);
 	mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror,
@@ -186,36 +225,50 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 						  struct hmm_mirror, list);
 	}
 	up_write(&hmm->mirrors_sem);
+
+	hmm_put(hmm);
 }
 
 static int hmm_invalidate_range_start(struct mmu_notifier *mn,
 			const struct mmu_notifier_range *range)
 {
 	struct hmm_update update;
-	struct hmm *hmm = range->mm->hmm;
+	struct hmm *hmm = hmm_get(range->mm);
+	int ret;
 
 	VM_BUG_ON(!hmm);
 
+	/* Check if hmm_mm_destroy() was call. */
+	if (hmm->mm == NULL)
+		return 0;
+
 	update.start = range->start;
 	update.end = range->end;
 	update.event = HMM_UPDATE_INVALIDATE;
 	update.blockable = range->blockable;
-	return hmm_invalidate_range(hmm, true, &update);
+	ret = hmm_invalidate_range(hmm, true, &update);
+	hmm_put(hmm);
+	return ret;
 }
 
 static void hmm_invalidate_range_end(struct mmu_notifier *mn,
 			const struct mmu_notifier_range *range)
 {
 	struct hmm_update update;
-	struct hmm *hmm = range->mm->hmm;
+	struct hmm *hmm = hmm_get(range->mm);
 
 	VM_BUG_ON(!hmm);
 
+	/* Check if hmm_mm_destroy() was call. */
+	if (hmm->mm == NULL)
+		return;
+
 	update.start = range->start;
 	update.end = range->end;
 	update.event = HMM_UPDATE_INVALIDATE;
 	update.blockable = true;
 	hmm_invalidate_range(hmm, false, &update);
+	hmm_put(hmm);
 }
 
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
@@ -241,24 +294,13 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
 	if (!mm || !mirror || !mirror->ops)
 		return -EINVAL;
 
-again:
 	mirror->hmm = hmm_register(mm);
 	if (!mirror->hmm)
 		return -ENOMEM;
 
 	down_write(&mirror->hmm->mirrors_sem);
-	if (mirror->hmm->mm == NULL) {
-		/*
-		 * A racing hmm_mirror_unregister() is about to destroy the hmm
-		 * struct. Try again to allocate a new one.
-		 */
-		up_write(&mirror->hmm->mirrors_sem);
-		mirror->hmm = NULL;
-		goto again;
-	} else {
-		list_add(&mirror->list, &mirror->hmm->mirrors);
-		up_write(&mirror->hmm->mirrors_sem);
-	}
+	list_add(&mirror->list, &mirror->hmm->mirrors);
+	up_write(&mirror->hmm->mirrors_sem);
 
 	return 0;
 }
@@ -273,33 +315,18 @@ EXPORT_SYMBOL(hmm_mirror_register);
  */
 void hmm_mirror_unregister(struct hmm_mirror *mirror)
 {
-	bool should_unregister = false;
-	struct mm_struct *mm;
-	struct hmm *hmm;
+	struct hmm *hmm = READ_ONCE(mirror->hmm);
 
-	if (mirror->hmm == NULL)
+	if (hmm == NULL)
 		return;
 
-	hmm = mirror->hmm;
 	down_write(&hmm->mirrors_sem);
 	list_del_init(&mirror->list);
-	should_unregister = list_empty(&hmm->mirrors);
+	/* To protect us against double unregister ... */
 	mirror->hmm = NULL;
-	mm = hmm->mm;
-	hmm->mm = NULL;
 	up_write(&hmm->mirrors_sem);
 
-	if (!should_unregister || mm == NULL)
-		return;
-
-	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
-
-	spin_lock(&mm->page_table_lock);
-	if (mm->hmm == hmm)
-		mm->hmm = NULL;
-	spin_unlock(&mm->page_table_lock);
-
-	kfree(hmm);
+	hmm_put(hmm);
 }
 EXPORT_SYMBOL(hmm_mirror_unregister);
 
@@ -708,6 +735,8 @@ int hmm_vma_get_pfns(struct hmm_range *range)
 	struct mm_walk mm_walk;
 	struct hmm *hmm;
 
+	range->hmm = NULL;
+
 	/* Sanity check, this really should not happen ! */
 	if (range->start < vma->vm_start || range->start >= vma->vm_end)
 		return -EINVAL;
@@ -717,14 +746,18 @@ int hmm_vma_get_pfns(struct hmm_range *range)
 	hmm = hmm_register(vma->vm_mm);
 	if (!hmm)
 		return -ENOMEM;
-	/* Caller must have registered a mirror, via hmm_mirror_register() ! */
-	if (!hmm->mmu_notifier.ops)
+
+	/* Check if hmm_mm_destroy() was call. */
+	if (hmm->mm == NULL) {
+		hmm_put(hmm);
 		return -EINVAL;
+	}
 
 	/* FIXME support hugetlb fs */
 	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
 			vma_is_dax(vma)) {
 		hmm_pfns_special(range);
+		hmm_put(hmm);
 		return -EINVAL;
 	}
 
@@ -736,6 +769,7 @@ int hmm_vma_get_pfns(struct hmm_range *range)
 		 * operations such has atomic access would not work.
 		 */
 		hmm_pfns_clear(range, range->pfns, range->start, range->end);
+		hmm_put(hmm);
 		return -EPERM;
 	}
 
@@ -758,6 +792,12 @@ int hmm_vma_get_pfns(struct hmm_range *range)
 	mm_walk.pte_hole = hmm_vma_walk_hole;
 
 	walk_page_range(range->start, range->end, &mm_walk);
+	/*
+	 * Transfer hmm reference to the range struct it will be drop inside
+	 * the hmm_vma_range_done() function (which _must_ be call if this
+	 * function return 0).
+	 */
+	range->hmm = hmm;
 	return 0;
 }
 EXPORT_SYMBOL(hmm_vma_get_pfns);
@@ -802,25 +842,27 @@ EXPORT_SYMBOL(hmm_vma_get_pfns);
  */
 bool hmm_vma_range_done(struct hmm_range *range)
 {
-	unsigned long npages = (range->end - range->start) >> PAGE_SHIFT;
-	struct hmm *hmm;
+	bool ret = false;
 
-	if (range->end <= range->start) {
+	/* Sanity check this really should not happen. */
+	if (range->hmm == NULL || range->end <= range->start) {
 		BUG();
 		return false;
 	}
 
-	hmm = hmm_register(range->vma->vm_mm);
-	if (!hmm) {
-		memset(range->pfns, 0, sizeof(*range->pfns) * npages);
-		return false;
-	}
-
-	spin_lock(&hmm->lock);
+	spin_lock(&range->hmm->lock);
 	list_del_rcu(&range->list);
-	spin_unlock(&hmm->lock);
+	ret = range->valid;
+	spin_unlock(&range->hmm->lock);
 
-	return range->valid;
+	/* Is the mm still alive ? */
+	if (range->hmm->mm == NULL)
+		ret = false;
+
+	/* Drop reference taken by hmm_vma_fault() or hmm_vma_get_pfns() */
+	hmm_put(range->hmm);
+	range->hmm = NULL;
+	return ret;
 }
 EXPORT_SYMBOL(hmm_vma_range_done);
 
@@ -880,6 +922,8 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
 	struct hmm *hmm;
 	int ret;
 
+	range->hmm = NULL;
+
 	/* Sanity check, this really should not happen ! */
 	if (range->start < vma->vm_start || range->start >= vma->vm_end)
 		return -EINVAL;
@@ -891,14 +935,18 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
 		hmm_pfns_clear(range, range->pfns, range->start, range->end);
 		return -ENOMEM;
 	}
-	/* Caller must have registered a mirror using hmm_mirror_register() */
-	if (!hmm->mmu_notifier.ops)
+
+	/* Check if hmm_mm_destroy() was call. */
+	if (hmm->mm == NULL) {
+		hmm_put(hmm);
 		return -EINVAL;
+	}
 
 	/* FIXME support hugetlb fs */
 	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
 			vma_is_dax(vma)) {
 		hmm_pfns_special(range);
+		hmm_put(hmm);
 		return -EINVAL;
 	}
 
@@ -910,6 +958,7 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
 		 * operations such has atomic access would not work.
 		 */
 		hmm_pfns_clear(range, range->pfns, range->start, range->end);
+		hmm_put(hmm);
 		return -EPERM;
 	}
 
@@ -945,7 +994,16 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
 		hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last,
 			       range->end);
 		hmm_vma_range_done(range);
+		hmm_put(hmm);
+	} else {
+		/*
+		 * Transfer hmm reference to the range struct it will be drop
+		 * inside the hmm_vma_range_done() function (which _must_ be
+		 * call if this function return 0).
+		 */
+		range->hmm = hmm;
 	}
+
 	return ret;
 }
 EXPORT_SYMBOL(hmm_vma_fault);
-- 
2.17.2


  reply	other threads:[~2019-01-29 16:55 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-29 16:54 [PATCH 00/10] HMM updates for 5.1 jglisse
2019-01-29 16:54 ` jglisse [this message]
2019-02-20 23:47   ` [PATCH 01/10] mm/hmm: use reference counting for HMM struct John Hubbard
2019-02-20 23:59     ` Jerome Glisse
2019-02-21  0:06       ` John Hubbard
2019-02-21  0:15         ` Jerome Glisse
2019-02-21  0:32           ` John Hubbard
2019-02-21  0:37             ` Jerome Glisse
2019-02-21  0:42               ` John Hubbard
2019-01-29 16:54 ` [PATCH 02/10] mm/hmm: do not erase snapshot when a range is invalidated jglisse
2019-02-20 23:58   ` John Hubbard
2019-01-29 16:54 ` [PATCH 03/10] mm/hmm: improve and rename hmm_vma_get_pfns() to hmm_range_snapshot() jglisse
2019-02-21  0:25   ` John Hubbard
2019-02-21  0:28     ` Jerome Glisse
2019-01-29 16:54 ` [PATCH 04/10] mm/hmm: improve and rename hmm_vma_fault() to hmm_range_fault() jglisse
2019-01-29 16:54 ` [PATCH 05/10] mm/hmm: improve driver API to work and wait over a range jglisse
2019-01-29 16:54 ` [PATCH 06/10] mm/hmm: add default fault flags to avoid the need to pre-fill pfns arrays jglisse
2019-01-29 16:54 ` [PATCH 07/10] mm/hmm: add an helper function that fault pages and map them to a device jglisse
2019-03-18 20:21   ` Dan Williams
2019-03-18 20:41     ` Jerome Glisse
2019-03-18 21:30       ` Dan Williams
2019-03-18 22:15         ` Jerome Glisse
2019-03-19  3:29           ` Dan Williams
2019-03-19 13:30             ` Jerome Glisse
2019-03-19  8:44               ` Ira Weiny
2019-03-19 17:10                 ` Jerome Glisse
2019-03-19 14:10                   ` Ira Weiny
2019-01-29 16:54 ` [PATCH 08/10] mm/hmm: support hugetlbfs (snap shoting, faulting and DMA mapping) jglisse
2019-01-29 16:54 ` [PATCH 09/10] mm/hmm: allow to mirror vma of a file on a DAX backed filesystem jglisse
2019-01-29 18:41   ` Dan Williams
2019-01-29 19:31     ` Jerome Glisse
2019-01-29 20:51       ` Dan Williams
2019-01-29 21:21         ` Jerome Glisse
2019-01-30  2:32           ` Dan Williams
2019-01-30  3:03             ` Jerome Glisse
2019-01-30 17:25               ` Dan Williams
2019-01-30 18:36                 ` Jerome Glisse
2019-01-31  3:28                   ` Dan Williams
2019-01-31  4:16                     ` Jerome Glisse
2019-01-31  5:44                       ` Dan Williams
2019-03-05 22:16                         ` Andrew Morton
2019-03-06  4:20                           ` Dan Williams
2019-03-06 15:51                             ` Jerome Glisse
2019-03-06 15:57                               ` Dan Williams
2019-03-06 16:03                                 ` Jerome Glisse
2019-03-06 16:06                                   ` Dan Williams
2019-03-07 17:46                             ` Andrew Morton
2019-03-07 18:56                               ` Jerome Glisse
2019-03-12  3:13                                 ` Dan Williams
2019-03-12 15:25                                   ` Jerome Glisse
2019-03-12 16:06                                     ` Dan Williams
2019-03-12 19:06                                       ` Jerome Glisse
2019-03-12 19:30                                         ` Dan Williams
2019-03-12 20:34                                           ` Dave Chinner
2019-03-13  1:06                                             ` Dan Williams
2019-03-12 21:52                                           ` Andrew Morton
2019-03-13  0:10                                             ` Jerome Glisse
2019-03-13  0:46                                               ` Dan Williams
2019-03-13  1:00                                                 ` Jerome Glisse
2019-03-13 16:06                                               ` Andrew Morton
2019-03-13 18:39                                                 ` Jerome Glisse
2019-03-06 15:49                           ` Jerome Glisse
2019-03-06 22:18                             ` Andrew Morton
2019-03-07  0:36                               ` Jerome Glisse
2019-01-29 16:54 ` [PATCH 10/10] mm/hmm: add helpers for driver to safely take the mmap_sem jglisse
2019-02-20 21:59   ` John Hubbard
2019-02-20 22:19     ` Jerome Glisse
2019-02-20 22:40       ` John Hubbard
2019-02-20 23:09         ` Jerome Glisse
2019-02-20 23:17 ` [PATCH 00/10] HMM updates for 5.1 John Hubbard
2019-02-20 23:36   ` Jerome Glisse
2019-02-22 23:31 ` Ralph Campbell
2019-03-13  1:27 ` Jerome Glisse
2019-03-13 16:10   ` Andrew Morton
2019-03-13 18:01     ` Jason Gunthorpe
2019-03-13 18:33     ` Jerome Glisse
2019-03-18 17:00     ` Kuehling, Felix
2019-03-18 17:04     ` Jerome Glisse
2019-03-18 18:30       ` Dan Williams
2019-03-18 18:54         ` Jerome Glisse
2019-03-18 19:18           ` Dan Williams
2019-03-18 19:28             ` Jerome Glisse
2019-03-18 19:36               ` Dan Williams
2019-03-19 16:40       ` Andrew Morton
2019-03-19 16:58         ` Jerome Glisse
2019-03-19 17:12           ` Andrew Morton
2019-03-19 17:18             ` Jerome Glisse
2019-03-19 17:33               ` Dan Williams
2019-03-19 17:45                 ` Jerome Glisse
2019-03-19 18:42                   ` Dan Williams
2019-03-19 19:05                     ` Jerome Glisse
2019-03-19 19:13                       ` Dan Williams
2019-03-19 14:18                         ` Ira Weiny
2019-03-19 22:24                           ` Jerome Glisse
2019-03-19 19:18                         ` Jerome Glisse
2019-03-19 20:25                           ` Jerome Glisse
2019-03-19 21:51             ` Stephen Rothwell
2019-03-19 18:51           ` Deucher, Alexander

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190129165428.3931-2-jglisse@redhat.com \
    --to=jglisse@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rcampbell@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).