All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org
Cc: hughd@google.com, kirill@shutemov.name,
	n-horiguchi@ah.jp.nec.com, akpm@linux-foundation.org,
	mgorman@techsingularity.net, dave.hansen@intel.com,
	aneesh.kumar@linux.vnet.ibm.com, mpe@ellerman.id.au
Subject: [PATCH 03/10] mm/hugetlb: Protect follow_huge_(pud|pgd) functions from race
Date: Thu,  7 Apr 2016 11:07:37 +0530	[thread overview]
Message-ID: <1460007464-26726-4-git-send-email-khandual@linux.vnet.ibm.com> (raw)
In-Reply-To: <1460007464-26726-1-git-send-email-khandual@linux.vnet.ibm.com>

follow_huge_(pmd|pud|pgd) functions are used to walk the page table and
fetch the page struct during 'follow_page_mask' call. There are possible
race conditions faced by these functions which arise out of simultaneous
calls of move_pages() and freeing of huge pages. This was fixed partly
by the previous commit e66f17ff7177 ("mm/hugetlb: take page table lock
in follow_huge_pmd()") for only PMD based huge pages.

After implementing similar logic, functions like follow_huge_(pud|pgd)
are now safe from above mentioned race conditions and also can support
FOLL_GET. Generic version of the function 'follow_huge_addr' has been
left as it is and its upto the architecture to decide on it.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 include/linux/mm.h | 33 +++++++++++++++++++++++++++
 mm/hugetlb.c       | 67 ++++++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ffcff53..734182a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1751,6 +1751,19 @@ static inline void pgtable_page_dtor(struct page *page)
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
+static struct page *pgd_to_page(pgd_t *pgd)
+{
+	unsigned long mask = ~(PTRS_PER_PGD * sizeof(pgd_t) - 1);
+
+	return virt_to_page((void *)((unsigned long) pgd & mask));
+}
+
+static struct page *pud_to_page(pud_t *pud)
+{
+	unsigned long mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
+
+	return virt_to_page((void *)((unsigned long) pud & mask));
+}
 
 static struct page *pmd_to_page(pmd_t *pmd)
 {
@@ -1758,6 +1771,16 @@ static struct page *pmd_to_page(pmd_t *pmd)
 	return virt_to_page((void *)((unsigned long) pmd & mask));
 }
 
+static inline spinlock_t *pgd_lockptr(struct mm_struct *mm, pgd_t *pgd)
+{
+	return ptlock_ptr(pgd_to_page(pgd));
+}
+
+static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
+{
+	return ptlock_ptr(pud_to_page(pud));
+}
+
 static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return ptlock_ptr(pmd_to_page(pmd));
@@ -1783,6 +1806,16 @@ static inline void pgtable_pmd_page_dtor(struct page *page)
 
 #else
 
+static inline spinlock_t *pgd_lockptr(struct mm_struct *mm, pgd_t *pgd)
+{
+	return &mm->page_table_lock;
+}
+
+static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
+{
+	return &mm->page_table_lock;
+}
+
 static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return &mm->page_table_lock;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5ea3158..e84e479 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4346,21 +4346,70 @@ struct page * __weak
 follow_huge_pud(struct mm_struct *mm, unsigned long address,
 		pud_t *pud, int flags)
 {
-	if (flags & FOLL_GET)
-		return NULL;
-
-	return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+	struct page *page = NULL;
+	spinlock_t *ptl;
+retry:
+	ptl = pud_lockptr(mm, pud);
+	spin_lock(ptl);
+	/*
+	 * make sure that the address range covered by this pud is not
+	 * unmapped from other threads.
+	 */
+	if (!pud_huge(*pud))
+		goto out;
+	if (pud_present(*pud)) {
+		page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+		if (flags & FOLL_GET)
+			get_page(page);
+	} else {
+		if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pud))) {
+			spin_unlock(ptl);
+			__migration_entry_wait(mm, (pte_t *)pud, ptl);
+			goto retry;
+		}
+		/*
+		 * hwpoisoned entry is treated as no_page_table in
+		 * follow_page_mask().
+		 */
+	}
+out:
+	spin_unlock(ptl);
+	return page;
 }
 
 struct page * __weak
 follow_huge_pgd(struct mm_struct *mm, unsigned long address,
 		pgd_t *pgd, int flags)
 {
-	if (flags & FOLL_GET)
-		return NULL;
-
-	return pte_page(*(pte_t *)pgd) +
-				((address & ~PGDIR_MASK) >> PAGE_SHIFT);
+	struct page *page = NULL;
+	spinlock_t *ptl;
+retry:
+	ptl = pgd_lockptr(mm, pgd);
+	spin_lock(ptl);
+	/*
+	 * make sure that the address range covered by this pgd is not
+	 * unmapped from other threads.
+	 */
+	if (!pgd_huge(*pgd))
+		goto out;
+	if (pgd_present(*pgd)) {
+		page = pgd_page(*pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
+		if (flags & FOLL_GET)
+			get_page(page);
+	} else {
+		if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pgd))) {
+			spin_unlock(ptl);
+			__migration_entry_wait(mm, (pte_t *)pgd, ptl);
+			goto retry;
+		}
+		/*
+		 * hwpoisoned entry is treated as no_page_table in
+		 * follow_page_mask().
+		 */
+	}
+out:
+	spin_unlock(ptl);
+	return page;
 }
 
 #ifdef CONFIG_MEMORY_FAILURE
-- 
2.1.0

WARNING: multiple messages have this Message-ID (diff)
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org
Cc: hughd@google.com, kirill@shutemov.name,
	n-horiguchi@ah.jp.nec.com, akpm@linux-foundation.org,
	mgorman@techsingularity.net, dave.hansen@intel.com,
	aneesh.kumar@linux.vnet.ibm.com, mpe@ellerman.id.au
Subject: [PATCH 03/10] mm/hugetlb: Protect follow_huge_(pud|pgd) functions from race
Date: Thu,  7 Apr 2016 11:07:37 +0530	[thread overview]
Message-ID: <1460007464-26726-4-git-send-email-khandual@linux.vnet.ibm.com> (raw)
In-Reply-To: <1460007464-26726-1-git-send-email-khandual@linux.vnet.ibm.com>

follow_huge_(pmd|pud|pgd) functions are used to walk the page table and
fetch the page struct during 'follow_page_mask' call. There are possible
race conditions faced by these functions which arise out of simultaneous
calls of move_pages() and freeing of huge pages. This was fixed partly
by the previous commit e66f17ff7177 ("mm/hugetlb: take page table lock
in follow_huge_pmd()") for only PMD based huge pages.

After implementing similar logic, functions like follow_huge_(pud|pgd)
are now safe from above mentioned race conditions and also can support
FOLL_GET. Generic version of the function 'follow_huge_addr' has been
left as it is and its upto the architecture to decide on it.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 include/linux/mm.h | 33 +++++++++++++++++++++++++++
 mm/hugetlb.c       | 67 ++++++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ffcff53..734182a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1751,6 +1751,19 @@ static inline void pgtable_page_dtor(struct page *page)
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
+static struct page *pgd_to_page(pgd_t *pgd)
+{
+	unsigned long mask = ~(PTRS_PER_PGD * sizeof(pgd_t) - 1);
+
+	return virt_to_page((void *)((unsigned long) pgd & mask));
+}
+
+static struct page *pud_to_page(pud_t *pud)
+{
+	unsigned long mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
+
+	return virt_to_page((void *)((unsigned long) pud & mask));
+}
 
 static struct page *pmd_to_page(pmd_t *pmd)
 {
@@ -1758,6 +1771,16 @@ static struct page *pmd_to_page(pmd_t *pmd)
 	return virt_to_page((void *)((unsigned long) pmd & mask));
 }
 
+static inline spinlock_t *pgd_lockptr(struct mm_struct *mm, pgd_t *pgd)
+{
+	return ptlock_ptr(pgd_to_page(pgd));
+}
+
+static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
+{
+	return ptlock_ptr(pud_to_page(pud));
+}
+
 static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return ptlock_ptr(pmd_to_page(pmd));
@@ -1783,6 +1806,16 @@ static inline void pgtable_pmd_page_dtor(struct page *page)
 
 #else
 
+static inline spinlock_t *pgd_lockptr(struct mm_struct *mm, pgd_t *pgd)
+{
+	return &mm->page_table_lock;
+}
+
+static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
+{
+	return &mm->page_table_lock;
+}
+
 static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return &mm->page_table_lock;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5ea3158..e84e479 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4346,21 +4346,70 @@ struct page * __weak
 follow_huge_pud(struct mm_struct *mm, unsigned long address,
 		pud_t *pud, int flags)
 {
-	if (flags & FOLL_GET)
-		return NULL;
-
-	return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+	struct page *page = NULL;
+	spinlock_t *ptl;
+retry:
+	ptl = pud_lockptr(mm, pud);
+	spin_lock(ptl);
+	/*
+	 * make sure that the address range covered by this pud is not
+	 * unmapped from other threads.
+	 */
+	if (!pud_huge(*pud))
+		goto out;
+	if (pud_present(*pud)) {
+		page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+		if (flags & FOLL_GET)
+			get_page(page);
+	} else {
+		if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pud))) {
+			spin_unlock(ptl);
+			__migration_entry_wait(mm, (pte_t *)pud, ptl);
+			goto retry;
+		}
+		/*
+		 * hwpoisoned entry is treated as no_page_table in
+		 * follow_page_mask().
+		 */
+	}
+out:
+	spin_unlock(ptl);
+	return page;
 }
 
 struct page * __weak
 follow_huge_pgd(struct mm_struct *mm, unsigned long address,
 		pgd_t *pgd, int flags)
 {
-	if (flags & FOLL_GET)
-		return NULL;
-
-	return pte_page(*(pte_t *)pgd) +
-				((address & ~PGDIR_MASK) >> PAGE_SHIFT);
+	struct page *page = NULL;
+	spinlock_t *ptl;
+retry:
+	ptl = pgd_lockptr(mm, pgd);
+	spin_lock(ptl);
+	/*
+	 * make sure that the address range covered by this pgd is not
+	 * unmapped from other threads.
+	 */
+	if (!pgd_huge(*pgd))
+		goto out;
+	if (pgd_present(*pgd)) {
+		page = pgd_page(*pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
+		if (flags & FOLL_GET)
+			get_page(page);
+	} else {
+		if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pgd))) {
+			spin_unlock(ptl);
+			__migration_entry_wait(mm, (pte_t *)pgd, ptl);
+			goto retry;
+		}
+		/*
+		 * hwpoisoned entry is treated as no_page_table in
+		 * follow_page_mask().
+		 */
+	}
+out:
+	spin_unlock(ptl);
+	return page;
 }
 
 #ifdef CONFIG_MEMORY_FAILURE
-- 
2.1.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-04-07  5:38 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-07  5:37 [PATCH 00/10] Enable HugeTLB page migration on POWER Anshuman Khandual
2016-04-07  5:37 ` Anshuman Khandual
2016-04-07  5:37 ` [PATCH 01/10] mm/mmap: Replace SHM_HUGE_MASK with MAP_HUGE_MASK inside mmap_pgoff Anshuman Khandual
2016-04-07  5:37   ` Anshuman Khandual
2016-04-07  8:28   ` Balbir Singh
2016-04-07  8:28     ` Balbir Singh
2016-04-13  7:54   ` Michal Hocko
2016-04-13  7:54     ` Michal Hocko
2016-04-07  5:37 ` [PATCH 02/10] mm/hugetlb: Add PGD based implementation awareness Anshuman Khandual
2016-04-07  5:37   ` Anshuman Khandual
2016-04-07  9:04   ` Balbir Singh
2016-04-07  9:04     ` Balbir Singh
2016-04-11  5:25     ` Anshuman Khandual
2016-04-11  5:25       ` Anshuman Khandual
2016-04-11  6:10       ` Anshuman Khandual
2016-04-11  6:10         ` Anshuman Khandual
2016-04-07  5:37 ` Anshuman Khandual [this message]
2016-04-07  5:37   ` [PATCH 03/10] mm/hugetlb: Protect follow_huge_(pud|pgd) functions from race Anshuman Khandual
2016-04-07  9:16   ` kbuild test robot
2016-04-18  8:44     ` Anshuman Khandual
2016-04-18  8:44       ` Anshuman Khandual
2016-04-07  9:26   ` Balbir Singh
2016-04-07  9:26     ` Balbir Singh
2016-04-11  5:39     ` Anshuman Khandual
2016-04-11  5:39       ` Anshuman Khandual
2016-04-11 12:46       ` Balbir Singh
2016-04-11 12:46         ` Balbir Singh
2016-04-07  9:34   ` kbuild test robot
2016-04-11  6:04     ` Anshuman Khandual
2016-04-11  6:04       ` Anshuman Khandual
2016-04-18  8:42       ` Anshuman Khandual
2016-04-18  8:42         ` Anshuman Khandual
2016-04-07  5:37 ` [PATCH 04/10] powerpc/hugetlb: Add ABI defines for MAP_HUGE_16MB and MAP_HUGE_16GB Anshuman Khandual
2016-04-07  5:37   ` Anshuman Khandual
2016-04-07  5:37 ` [PATCH 05/10] powerpc/hugetlb: Split the function 'huge_pte_alloc' Anshuman Khandual
2016-04-07  5:37   ` Anshuman Khandual
2016-04-11 13:51   ` Balbir Singh
2016-04-11 13:51     ` Balbir Singh
2016-04-13 11:08     ` Anshuman Khandual
2016-04-13 11:08       ` Anshuman Khandual
2016-04-07  5:37 ` [PATCH 06/10] powerpc/hugetlb: Split the function 'huge_pte_offset' Anshuman Khandual
2016-04-07  5:37   ` Anshuman Khandual
2016-04-07  5:37 ` [PATCH 07/10] powerpc/hugetlb: Prepare arch functions for ARCH_WANT_GENERAL_HUGETLB Anshuman Khandual
2016-04-07  5:37   ` Anshuman Khandual
2016-04-07  5:37 ` [PATCH 08/10] powerpc/hugetlb: Selectively enable ARCH_WANT_GENERAL_HUGETLB Anshuman Khandual
2016-04-07  5:37   ` Anshuman Khandual
2016-04-07  5:37 ` [PATCH 09/10] powerpc/hugetlb: Selectively enable ARCH_ENABLE_HUGEPAGE_MIGRATION Anshuman Khandual
2016-04-07  5:37   ` Anshuman Khandual
2016-04-07  5:37 ` [PATCH 10/10] selfttest/powerpc: Add memory page migration tests Anshuman Khandual
2016-04-07  5:37   ` Anshuman Khandual
2016-04-18  8:52 ` [PATCH 00/10] Enable HugeTLB page migration on POWER Anshuman Khandual
2016-04-18  8:52   ` Anshuman Khandual

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1460007464-26726-4-git-send-email-khandual@linux.vnet.ibm.com \
    --to=khandual@linux.vnet.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=dave.hansen@intel.com \
    --cc=hughd@google.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mgorman@techsingularity.net \
    --cc=mpe@ellerman.id.au \
    --cc=n-horiguchi@ah.jp.nec.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.