linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] powerpc: thp: Fix crash on mremap
@ 2014-01-01  9:53 Aneesh Kumar K.V
  2014-01-01 10:29 ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 10+ messages in thread
From: Aneesh Kumar K.V @ 2014-01-01  9:53 UTC (permalink / raw)
  To: benh, paulus, aarcange, kirill.shutemov
  Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch fix the below crash

NIP [c00000000004cee4] .__hash_page_thp+0x2a4/0x440
LR [c0000000000439ac] .hash_page+0x18c/0x5e0
...
Call Trace:
[c000000736103c40] [00001ffffb000000] 0x1ffffb000000(unreliable)
[437908.479693] [c000000736103d50] [c0000000000439ac] .hash_page+0x18c/0x5e0
[437908.479699] [c000000736103e30] [c00000000000924c] .do_hash_page+0x4c/0x58

On ppc64 we use the pgtable for storing the hpte slot information and
store address to the pgtable at a constant offset (PTRS_PER_PMD) from
pmd. On mremap, when we switch the pmd, we need to withdraw and deposit
the pgtable again, so that we find the pgtable at PTRS_PER_PMD offset
from new pmd.

We also want to move the withdraw and deposit before the set_pmd so
that, when page fault find the pmd as trans huge we can be sure that
pgtable can be located at the offset.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
NOTE:
For other archs we would just be removing the pgtable from the list and adding it back.
I didn't find an easy way to make it not do that without lots of #ifdef around. Any
suggestion around that is welcome.

 mm/huge_memory.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7de1bf85f683..eb2e60d9ba45 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1500,24 +1500,23 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
 	 */
 	ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
 	if (ret == 1) {
+		pgtable_t pgtable;
+
 		new_ptl = pmd_lockptr(mm, new_pmd);
 		if (new_ptl != old_ptl)
 			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
 		pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
 		VM_BUG_ON(!pmd_none(*new_pmd));
+		/*
+		 * Archs like ppc64 use pgtable to store per pmd
+		 * specific information. So when we switch the pmd,
+		 * we should also withdraw and deposit the pgtable
+		 */
+		pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
+		pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
 		set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
-		if (new_ptl != old_ptl) {
-			pgtable_t pgtable;
-
-			/*
-			 * Move preallocated PTE page table if new_pmd is on
-			 * different PMD page table.
-			 */
-			pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
-			pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
-
+		if (new_ptl != old_ptl)
 			spin_unlock(new_ptl);
-		}
 		spin_unlock(old_ptl);
 	}
 out:
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread
* [PATCH] powerpc: thp: Fix crash on mremap
@ 2014-01-28 12:17 Aneesh Kumar K.V
  2014-02-03 14:47 ` Luis Henriques
  0 siblings, 1 reply; 10+ messages in thread
From: Aneesh Kumar K.V @ 2014-01-28 12:17 UTC (permalink / raw)
  To: benh, paulus, stable; +Cc: linuxppc-dev, Aneesh Kumar K.V

This patch fix the below crash

NIP [c00000000004cee4] .__hash_page_thp+0x2a4/0x440
LR [c0000000000439ac] .hash_page+0x18c/0x5e0
...
Call Trace:
[c000000736103c40] [00001ffffb000000] 0x1ffffb000000(unreliable)
[437908.479693] [c000000736103d50] [c0000000000439ac] .hash_page+0x18c/0x5e0
[437908.479699] [c000000736103e30] [c00000000000924c] .do_hash_page+0x4c/0x58

On ppc64 we use the pgtable for storing the hpte slot information and
store address to the pgtable at a constant offset (PTRS_PER_PMD) from
pmd. On mremap, when we switch the pmd, we need to withdraw and deposit
the pgtable again, so that we find the pgtable at PTRS_PER_PMD offset
from new pmd.

We also want to move the withdraw and deposit before the set_pmd so
that, when page fault find the pmd as trans huge we can be sure that
pgtable can be located at the offset.

variant of upstream SHA1: b3084f4db3aeb991c507ca774337c7e7893ed04f
for 3.11 stable series

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/Kconfig                           |  3 +++
 arch/powerpc/platforms/Kconfig.cputype |  1 +
 mm/huge_memory.c                       | 12 ++++++++++++
 3 files changed, 16 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 1feb169274fe..c5863b35d054 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -368,6 +368,9 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 config HAVE_ARCH_SOFT_DIRTY
 	bool
 
+config ARCH_THP_MOVE_PMD_ALWAYS_WITHDRAW
+	bool
+
 config HAVE_MOD_ARCH_SPECIFIC
 	bool
 	help
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 47d9a03dd415..d11a34be018d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -71,6 +71,7 @@ config PPC_BOOK3S_64
 	select PPC_FPU
 	select PPC_HAVE_PMU_SUPPORT
 	select SYS_SUPPORTS_HUGETLBFS
+	select ARCH_THP_MOVE_PMD_ALWAYS_WITHDRAW
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
 
 config PPC_BOOK3E_64
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 12acb0ba7991..beaa7cc9de75 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1461,8 +1461,20 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
 
 	ret = __pmd_trans_huge_lock(old_pmd, vma);
 	if (ret == 1) {
+#ifdef CONFIG_ARCH_THP_MOVE_PMD_ALWAYS_WITHDRAW
+		pgtable_t pgtable;
+#endif
 		pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
 		VM_BUG_ON(!pmd_none(*new_pmd));
+#ifdef CONFIG_ARCH_THP_MOVE_PMD_ALWAYS_WITHDRAW
+		/*
+		 * Archs like ppc64 use pgtable to store per pmd
+		 * specific information. So when we switch the pmd,
+		 * we should also withdraw and deposit the pgtable
+		 */
+		pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
+		pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
+#endif
 		set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
 		spin_unlock(&mm->page_table_lock);
 	}
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 10+ messages in thread
* [PATCH] powerpc: thp: Fix crash on mremap
@ 2014-01-28 12:18 Aneesh Kumar K.V
  0 siblings, 0 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2014-01-28 12:18 UTC (permalink / raw)
  To: benh, paulus, stable; +Cc: linuxppc-dev, Aneesh Kumar K.V

This patch fix the below crash

NIP [c00000000004cee4] .__hash_page_thp+0x2a4/0x440
LR [c0000000000439ac] .hash_page+0x18c/0x5e0
...
Call Trace:
[c000000736103c40] [00001ffffb000000] 0x1ffffb000000(unreliable)
[437908.479693] [c000000736103d50] [c0000000000439ac] .hash_page+0x18c/0x5e0
[437908.479699] [c000000736103e30] [c00000000000924c] .do_hash_page+0x4c/0x58

On ppc64 we use the pgtable for storing the hpte slot information and
store address to the pgtable at a constant offset (PTRS_PER_PMD) from
pmd. On mremap, when we switch the pmd, we need to withdraw and deposit
the pgtable again, so that we find the pgtable at PTRS_PER_PMD offset
from new pmd.

We also want to move the withdraw and deposit before the set_pmd so
that, when page fault find the pmd as trans huge we can be sure that
pgtable can be located at the offset.

variant of upstream SHA1: b3084f4db3aeb991c507ca774337c7e7893ed04f
for 3.12 stable series

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/Kconfig                           |  3 +++
 arch/powerpc/platforms/Kconfig.cputype |  1 +
 mm/huge_memory.c                       | 12 ++++++++++++
 3 files changed, 16 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index af2cc6eabcc7..bca9e7a18bd2 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -365,6 +365,9 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 config HAVE_ARCH_SOFT_DIRTY
 	bool
 
+config ARCH_THP_MOVE_PMD_ALWAYS_WITHDRAW
+	bool
+
 config HAVE_MOD_ARCH_SPECIFIC
 	bool
 	help
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 6704e2e20e6b..0225011231ea 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -71,6 +71,7 @@ config PPC_BOOK3S_64
 	select PPC_FPU
 	select PPC_HAVE_PMU_SUPPORT
 	select SYS_SUPPORTS_HUGETLBFS
+	select ARCH_THP_MOVE_PMD_ALWAYS_WITHDRAW
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
 
 config PPC_BOOK3E_64
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 292a266e0d42..89b7a647f1cb 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1474,8 +1474,20 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
 
 	ret = __pmd_trans_huge_lock(old_pmd, vma);
 	if (ret == 1) {
+#ifdef CONFIG_ARCH_THP_MOVE_PMD_ALWAYS_WITHDRAW
+		pgtable_t pgtable;
+#endif
 		pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
 		VM_BUG_ON(!pmd_none(*new_pmd));
+#ifdef CONFIG_ARCH_THP_MOVE_PMD_ALWAYS_WITHDRAW
+		/*
+		 * Archs like ppc64 use pgtable to store per pmd
+		 * specific information. So when we switch the pmd,
+		 * we should also withdraw and deposit the pgtable
+		 */
+		pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
+		pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
+#endif
 		set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
 		spin_unlock(&mm->page_table_lock);
 	}
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2014-02-03 14:48 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-01-01  9:53 [PATCH] powerpc: thp: Fix crash on mremap Aneesh Kumar K.V
2014-01-01 10:29 ` Benjamin Herrenschmidt
2014-01-02  2:19   ` Kirill A. Shutemov
2014-01-13 22:17     ` Andrew Morton
2014-01-13 22:30       ` Kirill A. Shutemov
2014-01-14  4:13       ` Benjamin Herrenschmidt
2014-01-14  4:32         ` Andrew Morton
2014-01-28 12:17 Aneesh Kumar K.V
2014-02-03 14:47 ` Luis Henriques
2014-01-28 12:18 Aneesh Kumar K.V

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).