From: Christoph Hellwig <hch@lst.de>
To: "Dan Williams" <dan.j.williams@intel.com>,
"Jérôme Glisse" <jglisse@redhat.com>,
"Jason Gunthorpe" <jgg@mellanox.com>,
"Ben Skeggs" <bskeggs@redhat.com>
Cc: Ira Weiny <ira.weiny@intel.com>,
linux-mm@kvack.org, nouveau@lists.freedesktop.org,
dri-devel@lists.freedesktop.org, linux-nvdimm@lists.01.org,
linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org,
John Hubbard <jhubbard@nvidia.com>,
Ralph Campbell <rcampbell@nvidia.com>,
Philip Yang <Philip.Yang@amd.com>
Subject: [PATCH 09/22] mm/hmm: Simplify hmm_get_or_create and make it reliable
Date: Mon, 1 Jul 2019 08:20:07 +0200 [thread overview]
Message-ID: <20190701062020.19239-10-hch@lst.de> (raw)
In-Reply-To: <20190701062020.19239-1-hch@lst.de>
From: Jason Gunthorpe <jgg@mellanox.com>
As coded this function can false-fail in various racy situations. Make it
reliable and simpler by running under the write side of the mmap_sem and
avoiding the false-failing compare/exchange pattern. Due to the mmap_sem
this no longer has to avoid racing with a 2nd parallel
hmm_get_or_create().
Unfortunately this still has to use the page_table_lock as the
non-sleeping lock protecting mm->hmm, since the contexts where we free the
hmm are incompatible with mmap_sem.
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Philip Yang <Philip.Yang@amd.com>
---
mm/hmm.c | 77 ++++++++++++++++++++++----------------------------------
1 file changed, 30 insertions(+), 47 deletions(-)
diff --git a/mm/hmm.c b/mm/hmm.c
index 080b17a2e87e..0423f4ca3a7e 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -31,16 +31,6 @@
#if IS_ENABLED(CONFIG_HMM_MIRROR)
static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
-static inline struct hmm *mm_get_hmm(struct mm_struct *mm)
-{
- struct hmm *hmm = READ_ONCE(mm->hmm);
-
- if (hmm && kref_get_unless_zero(&hmm->kref))
- return hmm;
-
- return NULL;
-}
-
/**
* hmm_get_or_create - register HMM against an mm (HMM internal)
*
@@ -55,11 +45,16 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm)
*/
static struct hmm *hmm_get_or_create(struct mm_struct *mm)
{
- struct hmm *hmm = mm_get_hmm(mm);
- bool cleanup = false;
+ struct hmm *hmm;
+
+ lockdep_assert_held_exclusive(&mm->mmap_sem);
- if (hmm)
- return hmm;
+ /* Abuse the page_table_lock to also protect mm->hmm. */
+ spin_lock(&mm->page_table_lock);
+ hmm = mm->hmm;
+ if (mm->hmm && kref_get_unless_zero(&mm->hmm->kref))
+ goto out_unlock;
+ spin_unlock(&mm->page_table_lock);
hmm = kmalloc(sizeof(*hmm), GFP_KERNEL);
if (!hmm)
@@ -74,57 +69,45 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm)
hmm->notifiers = 0;
hmm->dead = false;
hmm->mm = mm;
- mmgrab(hmm->mm);
- spin_lock(&mm->page_table_lock);
- if (!mm->hmm)
- mm->hmm = hmm;
- else
- cleanup = true;
- spin_unlock(&mm->page_table_lock);
+ hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
+ if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
+ kfree(hmm);
+ return NULL;
+ }
- if (cleanup)
- goto error;
+ mmgrab(hmm->mm);
/*
- * We should only get here if hold the mmap_sem in write mode ie on
- * registration of first mirror through hmm_mirror_register()
+ * We hold the exclusive mmap_sem here so we know that mm->hmm is
+ * still NULL or 0 kref, and is safe to update.
*/
- hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
- if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
- goto error_mm;
-
- return hmm;
-
-error_mm:
spin_lock(&mm->page_table_lock);
- if (mm->hmm == hmm)
- mm->hmm = NULL;
+ mm->hmm = hmm;
+
+out_unlock:
spin_unlock(&mm->page_table_lock);
-error:
- mmdrop(hmm->mm);
- kfree(hmm);
- return NULL;
+ return hmm;
}
static void hmm_free_rcu(struct rcu_head *rcu)
{
- kfree(container_of(rcu, struct hmm, rcu));
+ struct hmm *hmm = container_of(rcu, struct hmm, rcu);
+
+ mmdrop(hmm->mm);
+ kfree(hmm);
}
static void hmm_free(struct kref *kref)
{
struct hmm *hmm = container_of(kref, struct hmm, kref);
- struct mm_struct *mm = hmm->mm;
- mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+ spin_lock(&hmm->mm->page_table_lock);
+ if (hmm->mm->hmm == hmm)
+ hmm->mm->hmm = NULL;
+ spin_unlock(&hmm->mm->page_table_lock);
- spin_lock(&mm->page_table_lock);
- if (mm->hmm == hmm)
- mm->hmm = NULL;
- spin_unlock(&mm->page_table_lock);
-
- mmdrop(hmm->mm);
+ mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm);
mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu);
}
--
2.20.1
next prev parent reply other threads:[~2019-07-01 6:20 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-07-01 6:19 dev_pagemap related cleanups v4 Christoph Hellwig
2019-07-01 6:19 ` [PATCH 01/22] mm/hmm.c: suppress compilation warnings when CONFIG_HUGETLB_PAGE is not set Christoph Hellwig
2019-07-01 6:20 ` [PATCH 02/22] mm/hmm: update HMM documentation Christoph Hellwig
2019-07-01 6:20 ` [PATCH 03/22] mm/hmm: clean up some coding style and comments Christoph Hellwig
2019-07-01 6:20 ` [PATCH 04/22] mm/hmm: support automatic NUMA balancing Christoph Hellwig
2019-07-01 6:20 ` [PATCH 05/22] mm/hmm: Only set FAULT_FLAG_ALLOW_RETRY for non-blocking Christoph Hellwig
2019-07-01 6:20 ` [PATCH 06/22] mm/hmm: fix use after free with struct hmm in the mmu notifiers Christoph Hellwig
2019-07-01 6:20 ` [PATCH 07/22] mm/hmm: Use hmm_mirror not mm as an argument for hmm_range_register Christoph Hellwig
2019-07-01 6:20 ` [PATCH 08/22] mm/hmm: Hold a mmgrab from hmm to mm Christoph Hellwig
2019-07-01 6:20 ` Christoph Hellwig [this message]
2019-07-01 6:20 ` [PATCH 10/22] mm/hmm: Remove duplicate condition test before wait_event_timeout Christoph Hellwig
2019-07-01 6:20 ` [PATCH 11/22] mm/hmm: Do not use list*_rcu() for hmm->ranges Christoph Hellwig
2019-07-01 6:20 ` [PATCH 12/22] mm/hmm: Hold on to the mmget for the lifetime of the range Christoph Hellwig
2019-07-01 6:20 ` [PATCH 13/22] mm/hmm: Use lockdep instead of comments Christoph Hellwig
2019-07-01 6:20 ` [PATCH 14/22] mm/hmm: Remove racy protection against double-unregistration Christoph Hellwig
2019-07-01 6:20 ` [PATCH 15/22] mm/hmm: Poison hmm_range during unregister Christoph Hellwig
2019-07-01 6:20 ` [PATCH 16/22] mm/hmm: Remove confusing comment and logic from hmm_release Christoph Hellwig
2019-07-01 6:20 ` [PATCH 17/22] mm/hmm: Fix error flows in hmm_invalidate_range_start Christoph Hellwig
2019-07-01 6:20 ` [PATCH 18/22] mm: return valid info from hmm_range_unregister Christoph Hellwig
2019-07-03 17:22 ` Ralph Campbell
2019-07-01 6:20 ` [PATCH 19/22] mm: always return EBUSY for invalid ranges in hmm_range_{fault,snapshot} Christoph Hellwig
2019-07-02 21:43 ` Kuehling, Felix
2019-07-03 17:32 ` Ralph Campbell
2019-07-01 6:20 ` [PATCH 20/22] mm: move hmm_vma_fault to nouveau Christoph Hellwig
2019-07-03 17:48 ` Ralph Campbell
2019-07-03 17:50 ` [Nouveau] " Ilia Mirkin
2019-07-03 18:03 ` Jason Gunthorpe
2019-07-03 18:05 ` Christoph Hellwig
2019-07-03 18:13 ` Jason Gunthorpe
2019-07-01 6:20 ` [PATCH 21/22] nouveau: unlock mmap_sem on all errors from nouveau_range_fault Christoph Hellwig
2019-07-01 6:20 ` [PATCH 22/22] mm: remove the legacy hmm_pfn_* APIs Christoph Hellwig
2019-07-03 18:01 ` Jason Gunthorpe
2019-07-03 18:03 ` Christoph Hellwig
2019-07-03 18:15 ` Jason Gunthorpe
2019-07-01 8:25 ` dev_pagemap related cleanups v4 Christoph Hellwig
2019-07-02 18:42 ` Jason Gunthorpe
2019-07-02 22:45 ` Weiny, Ira
2019-07-02 22:47 ` Christoph Hellwig
2019-07-02 23:17 ` Dan Williams
2019-07-03 1:08 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190701062020.19239-10-hch@lst.de \
--to=hch@lst.de \
--cc=Philip.Yang@amd.com \
--cc=bskeggs@redhat.com \
--cc=dan.j.williams@intel.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=ira.weiny@intel.com \
--cc=jgg@mellanox.com \
--cc=jglisse@redhat.com \
--cc=jhubbard@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-nvdimm@lists.01.org \
--cc=linux-pci@vger.kernel.org \
--cc=nouveau@lists.freedesktop.org \
--cc=rcampbell@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).