All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit
@ 2018-09-20 18:09 Aneesh Kumar K.V
  2018-09-20 18:09 ` [PATCH V3 2/6] powerpc/mm/hugetlb/book3s: add _PAGE_PRESENT to hugepd pointer Aneesh Kumar K.V
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2018-09-20 18:09 UTC (permalink / raw)
  To: npiggin, benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

With this patch we use 0x8000000000000000UL (_PAGE_PRESENT) to indicate a valid
pgd/pud/pmd entry. We also switch the p**_present() to look at this bit.

With pmd_present, we have a special case. We need to make sure we consider a
pmd marked invalid during THP split as present. Right now we clear the
_PAGE_PRESENT bit during a pmdp_invalidate. Inorder to consider this special
case we add a new pte bit _PAGE_INVALID (mapped to _RPAGE_SW0). This bit is
only used with _PAGE_PRESENT cleared. Hence we are not really losing a pte bit
for this special case. pmd_present is also updated to look at _PAGE_INVALID.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    |  5 +++++
 arch/powerpc/include/asm/book3s/64/pgtable.h | 14 +++++++++++---
 arch/powerpc/mm/hash_utils_64.c              |  6 +++---
 arch/powerpc/mm/pgtable-book3s64.c           |  8 ++++++--
 arch/powerpc/mm/pgtable.c                    |  7 +++----
 5 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index d52a51b2ce7b..fcf8b10a209f 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -18,6 +18,11 @@
 #include <asm/book3s/64/hash-4k.h>
 #endif
 
+/* Bits to set in a PMD/PUD/PGD entry valid bit*/
+#define HASH_PMD_VAL_BITS		(0x8000000000000000UL)
+#define HASH_PUD_VAL_BITS		(0x8000000000000000UL)
+#define HASH_PGD_VAL_BITS		(0x8000000000000000UL)
+
 /*
  * Size of EA range mapped by our pagetables.
  */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 13a688fc8cd0..8feb4a3240d5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -875,8 +875,16 @@ static inline int pmd_none(pmd_t pmd)
 
 static inline int pmd_present(pmd_t pmd)
 {
+	/*
+	 * A pmd is considerent present if _PAGE_PRESENT is set.
+	 * We also need to consider the pmd present which is marked
+	 * invalid during a split. Hence we look for _PAGE_INVALID
+	 * if we find _PAGE_PRESENT cleared.
+	 */
+	if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
+		return true;
 
-	return !pmd_none(pmd);
+	return false;
 }
 
 static inline int pmd_bad(pmd_t pmd)
@@ -903,7 +911,7 @@ static inline int pud_none(pud_t pud)
 
 static inline int pud_present(pud_t pud)
 {
-	return !pud_none(pud);
+	return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
 }
 
 extern struct page *pud_page(pud_t pud);
@@ -950,7 +958,7 @@ static inline int pgd_none(pgd_t pgd)
 
 static inline int pgd_present(pgd_t pgd)
 {
-	return !pgd_none(pgd);
+	return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT));
 }
 
 static inline pte_t pgd_pte(pgd_t pgd)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 88c95dc8b141..13ba718c9680 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1001,9 +1001,9 @@ void __init hash__early_init_mmu(void)
 	 * 4k use hugepd format, so for hash set then to
 	 * zero
 	 */
-	__pmd_val_bits = 0;
-	__pud_val_bits = 0;
-	__pgd_val_bits = 0;
+	__pmd_val_bits = HASH_PMD_VAL_BITS;
+	__pud_val_bits = HASH_PUD_VAL_BITS;
+	__pgd_val_bits = HASH_PGD_VAL_BITS;
 
 	__kernel_virt_start = H_KERN_VIRT_START;
 	__kernel_virt_size = H_KERN_VIRT_SIZE;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..654000da8b15 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -69,7 +69,11 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		pmd_t *pmdp, pmd_t pmd)
 {
 #ifdef CONFIG_DEBUG_VM
-	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
+	/*
+	 * Make sure hardware valid bit is not set. We don't do
+	 * tlb flush for this update.
+	 */
+	WARN_ON(pte_val(pmd_pte(*pmdp)) & _PAGE_PRESENT);
 	assert_spin_locked(pmd_lockptr(mm, pmdp));
 	WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd)));
 #endif
@@ -106,7 +110,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 {
 	unsigned long old_pmd;
 
-	old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+	old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
 	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	/*
 	 * This ensures that generic code that rely on IRQ disabling
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index d71c7777669c..aee04b209b51 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -188,11 +188,10 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 		pte_t pte)
 {
 	/*
-	 * When handling numa faults, we already have the pte marked
-	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
-	 * Hence we can use set_pte_at for them.
+	 * Make sure hardware valid bit is not set. We don't do
+	 * tlb flush for this update.
 	 */
-	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
+	VM_WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
 
 	/* Add the pte bit when trying to set a pte */
 	pte = __pte(pte_val(pte) | _PAGE_PTE);
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 2/6] powerpc/mm/hugetlb/book3s: add _PAGE_PRESENT to hugepd pointer.
  2018-09-20 18:09 [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Aneesh Kumar K.V
@ 2018-09-20 18:09 ` Aneesh Kumar K.V
  2018-09-20 18:09 ` [PATCH V3 3/6] powerpc/mm/book3s: Check for pmd_large instead of pmd_trans_huge Aneesh Kumar K.V
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2018-09-20 18:09 UTC (permalink / raw)
  To: npiggin, benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This make hugetlb directory pointer similar to other page able entries. A hugepd
entry is identified by lack of _PAGE_PTE bit set and directory size stored in
HUGEPD_SHIFT_MASK. We update that to also look at _PAGE_PRESENT

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h | 2 +-
 arch/powerpc/include/asm/book3s/64/hugetlb.h | 3 +++
 arch/powerpc/mm/hugetlbpage.c                | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 9a3798660cef..15bc16b1dc9c 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -66,7 +66,7 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
 	 * if it is not a pte and have hugepd shift mask
 	 * set, then it is a hugepd directory pointer
 	 */
-	if (!(hpdval & _PAGE_PTE) &&
+	if (!(hpdval & _PAGE_PTE) && (hpdval & _PAGE_PRESENT) &&
 	    ((hpdval & HUGEPD_SHIFT_MASK) != 0))
 		return true;
 	return false;
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 50888388a359..5b0177733994 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -39,4 +39,7 @@ static inline bool gigantic_page_supported(void)
 }
 #endif
 
+/* hugepd entry valid bit */
+#define HUGEPD_VAL_BITS		(0x8000000000000000UL)
+
 #endif
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index e87f9ef9115b..c6df73c66c40 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -95,7 +95,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 			break;
 		else {
 #ifdef CONFIG_PPC_BOOK3S_64
-			*hpdp = __hugepd(__pa(new) |
+			*hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS |
 					 (shift_to_mmu_psize(pshift) << 2));
 #elif defined(CONFIG_PPC_8xx)
 			*hpdp = __hugepd(__pa(new) | _PMD_USER |
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 3/6] powerpc/mm/book3s: Check for pmd_large instead of pmd_trans_huge
  2018-09-20 18:09 [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Aneesh Kumar K.V
  2018-09-20 18:09 ` [PATCH V3 2/6] powerpc/mm/hugetlb/book3s: add _PAGE_PRESENT to hugepd pointer Aneesh Kumar K.V
@ 2018-09-20 18:09 ` Aneesh Kumar K.V
  2018-09-20 18:09 ` [PATCH V3 4/6] arch/powerpc/mm/hash: validate the pte entries before handling the hash fault Aneesh Kumar K.V
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2018-09-20 18:09 UTC (permalink / raw)
  To: npiggin, benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Update few code paths to check for pmd_large.

set_pmd_at:
We want to use this to store swap pte at pmd level. For swap ptes we don't want
to set H_PAGE_THP_HUGE. Hence check for pmd_large in set_pmd_at. This remove
the false WARN_ON when using this with swap pmd entry.

pmd_page:
We don't really use them on pmd migration entries. But they can also work with
migration entries and we don't differentiate at the pte level. Hence update
pmd_page to work with pmd migration entries too

__find_linux_pte:
lockless page table walk need to handle pmd migration entries. pmd_trans_huge
check will return false on them. We don't set thp = 1 for such entries, but
update hpage_shift correctly. Without this we will walk pmd migration entries
as a pte page pointer which is wrong.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/mm/hugetlbpage.c      | 8 ++++++--
 arch/powerpc/mm/pgtable-book3s64.c | 2 +-
 arch/powerpc/mm/pgtable_64.c       | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index c6df73c66c40..9504641bd4d9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -837,8 +837,12 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
 				ret_pte = (pte_t *) pmdp;
 				goto out;
 			}
-
-			if (pmd_huge(pmd)) {
+			/*
+			 * pmd_large check below will handle the swap pmd pte
+			 * we need to do both the check because they are config
+			 * dependent.
+			 */
+			if (pmd_huge(pmd) || pmd_large(pmd)) {
 				ret_pte = (pte_t *) pmdp;
 				goto out;
 			} else if (is_hugepd(__hugepd(pmd_val(pmd))))
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 654000da8b15..43e99e1d947b 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -75,7 +75,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 	 */
 	WARN_ON(pte_val(pmd_pte(*pmdp)) & _PAGE_PRESENT);
 	assert_spin_locked(pmd_lockptr(mm, pmdp));
-	WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd)));
+	WARN_ON(!(pmd_large(pmd) || pmd_devmap(pmd)));
 #endif
 	trace_hugepage_set_pmd(addr, pmd_val(pmd));
 	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 53e9eeecd5d4..e15e63079ba8 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -306,7 +306,7 @@ struct page *pud_page(pud_t pud)
  */
 struct page *pmd_page(pmd_t pmd)
 {
-	if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
+	if (pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
 		return pte_page(pmd_pte(pmd));
 	return virt_to_page(pmd_page_vaddr(pmd));
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 4/6] arch/powerpc/mm/hash: validate the pte entries before handling the hash fault
  2018-09-20 18:09 [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Aneesh Kumar K.V
  2018-09-20 18:09 ` [PATCH V3 2/6] powerpc/mm/hugetlb/book3s: add _PAGE_PRESENT to hugepd pointer Aneesh Kumar K.V
  2018-09-20 18:09 ` [PATCH V3 3/6] powerpc/mm/book3s: Check for pmd_large instead of pmd_trans_huge Aneesh Kumar K.V
@ 2018-09-20 18:09 ` Aneesh Kumar K.V
  2018-09-20 18:09 ` [PATCH V3 5/6] powerpc/mm/thp: update pmd_trans_huge to check for pmd_present Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2018-09-20 18:09 UTC (permalink / raw)
  To: npiggin, benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Make sure we are operating on THP and hugetlb entries in the respective hash
fault handling routines.

No functional change in this patch. If we walked the table wrongly before, we
will retry the access.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/mm/hugepage-hash64.c    | 6 ++++++
 arch/powerpc/mm/hugetlbpage-hash64.c | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 01f213d2bcb9..dfbc3b32f09b 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -51,6 +51,12 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 			new_pmd |= _PAGE_DIRTY;
 	} while (!pmd_xchg(pmdp, __pmd(old_pmd), __pmd(new_pmd)));
 
+	/*
+	 * Make sure this is thp or devmap entry
+	 */
+	if (!(old_pmd & (H_PAGE_THP_HUGE | _PAGE_DEVMAP)))
+		return 0;
+
 	rflags = htab_convert_pte_flags(new_pmd);
 
 #if 0
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index b320f5097a06..2e6a8f9345d3 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -62,6 +62,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 			new_pte |= _PAGE_DIRTY;
 	} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
+	/* Make sure this is a hugetlb entry */
+	if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))
+		return 0;
+
 	rflags = htab_convert_pte_flags(new_pte);
 	if (unlikely(mmu_psize == MMU_PAGE_16G))
 		offset = PTRS_PER_PUD;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 5/6] powerpc/mm/thp: update pmd_trans_huge to check for pmd_present
  2018-09-20 18:09 [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2018-09-20 18:09 ` [PATCH V3 4/6] arch/powerpc/mm/hash: validate the pte entries before handling the hash fault Aneesh Kumar K.V
@ 2018-09-20 18:09 ` Aneesh Kumar K.V
  2018-09-20 18:09 ` [PATCH V3 6/6] powerpc/mm:book3s: Enable THP migration support Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2018-09-20 18:09 UTC (permalink / raw)
  To: npiggin, benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We need to make sure pmd_trans_huge returns false for a pmd migration entry.
We mark the migration entry by clearing the _PAGE_PRESENT bit. We keep the
_PAGE_PTE bit set to indicate a leaf page table entry. Hence we need to make
sure we check for pmd_present() so that pmd_trans_huge won't return true on
pmd migration entry.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 .../include/asm/book3s/64/pgtable-64k.h        |  3 +++
 arch/powerpc/include/asm/book3s/64/pgtable.h   | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index d7ee249d6890..e3d4dd4ae2fa 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -10,6 +10,9 @@
  *
  * Defined in such a way that we can optimize away code block at build time
  * if CONFIG_HUGETLB_PAGE=n.
+ *
+ * returns true for pmd migration entries, THP, devmap, hugetlb
+ * But compile time dependent on CONFIG_HUGETLB_PAGE
  */
 static inline int pmd_huge(pmd_t pmd)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8feb4a3240d5..e24db2aa260f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1136,6 +1136,10 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
 	return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
 }
 
+/*
+ * returns true for pmd migration entries, THP, devmap, hugetlb
+ * But compile time dependent on THP config
+ */
 static inline int pmd_large(pmd_t pmd)
 {
 	return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
@@ -1170,8 +1174,22 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 		pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
 }
 
+/*
+ * Only returns true for a THP. False for pmd migration entry.
+ * We also need to return true when we come across a pte that
+ * in between a thp split. While splitting THP, we mark the pmd
+ * invalid (pmdp_invalidate()) before we set it with pte page
+ * address. A pmd_trans_huge() check against a pmd entry during that time
+ * should return true.
+ * We should not call this on a hugetlb entry. We should check for HugeTLB
+ * entry using vma->vm_flags
+ * The page table walk rule is explained in Documentation/vm/transhuge.rst
+ */
 static inline int pmd_trans_huge(pmd_t pmd)
 {
+	if (!pmd_present(pmd))
+		return false;
+
 	if (radix_enabled())
 		return radix__pmd_trans_huge(pmd);
 	return hash__pmd_trans_huge(pmd);
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 6/6] powerpc/mm:book3s: Enable THP migration support
  2018-09-20 18:09 [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2018-09-20 18:09 ` [PATCH V3 5/6] powerpc/mm/thp: update pmd_trans_huge to check for pmd_present Aneesh Kumar K.V
@ 2018-09-20 18:09 ` Aneesh Kumar K.V
  2018-09-21  5:55 ` [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Christophe LEROY
  2018-10-04  6:14 ` [V3, " Michael Ellerman
  6 siblings, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2018-09-20 18:09 UTC (permalink / raw)
  To: npiggin, benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 8 ++++++++
 arch/powerpc/platforms/Kconfig.cputype       | 1 +
 2 files changed, 9 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index e24db2aa260f..c68cbbff3429 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -741,6 +741,8 @@ static inline bool pte_user(pte_t pte)
  */
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
 #define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
+#define __pmd_to_swp_entry(pmd)	(__pte_to_swp_entry(pmd_pte(pmd)))
+#define __swp_entry_to_pmd(x)	(pte_pmd(__swp_entry_to_pte(x)))
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
 #define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
@@ -1091,6 +1093,12 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
 #define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
 #define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define pmd_swp_mksoft_dirty(pmd)	pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
+#define pmd_swp_soft_dirty(pmd)		pte_swp_soft_dirty(pmd_pte(pmd))
+#define pmd_swp_clear_soft_dirty(pmd)	pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
+#endif
 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 6c6a7c72cae4..495db17dcbca 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -72,6 +72,7 @@ config PPC_BOOK3S_64
 	select PPC_HAVE_PMU_SUPPORT
 	select SYS_SUPPORTS_HUGETLBFS
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select IRQ_WORK
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit
  2018-09-20 18:09 [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Aneesh Kumar K.V
                   ` (4 preceding siblings ...)
  2018-09-20 18:09 ` [PATCH V3 6/6] powerpc/mm:book3s: Enable THP migration support Aneesh Kumar K.V
@ 2018-09-21  5:55 ` Christophe LEROY
  2018-09-21 10:26   ` Aneesh Kumar K.V
  2018-10-04  6:14 ` [V3, " Michael Ellerman
  6 siblings, 1 reply; 14+ messages in thread
From: Christophe LEROY @ 2018-09-21  5:55 UTC (permalink / raw)
  To: Aneesh Kumar K.V, npiggin, benh, paulus, mpe; +Cc: linuxppc-dev



Le 20/09/2018 à 20:09, Aneesh Kumar K.V a écrit :
> With this patch we use 0x8000000000000000UL (_PAGE_PRESENT) to indicate a valid
> pgd/pud/pmd entry. We also switch the p**_present() to look at this bit.
> 
> With pmd_present, we have a special case. We need to make sure we consider a
> pmd marked invalid during THP split as present. Right now we clear the
> _PAGE_PRESENT bit during a pmdp_invalidate. Inorder to consider this special
> case we add a new pte bit _PAGE_INVALID (mapped to _RPAGE_SW0). This bit is
> only used with _PAGE_PRESENT cleared. Hence we are not really losing a pte bit
> for this special case. pmd_present is also updated to look at _PAGE_INVALID.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/book3s/64/hash.h    |  5 +++++
>   arch/powerpc/include/asm/book3s/64/pgtable.h | 14 +++++++++++---
>   arch/powerpc/mm/hash_utils_64.c              |  6 +++---
>   arch/powerpc/mm/pgtable-book3s64.c           |  8 ++++++--
>   arch/powerpc/mm/pgtable.c                    |  7 +++----
>   5 files changed, 28 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
> index d52a51b2ce7b..fcf8b10a209f 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -18,6 +18,11 @@
>   #include <asm/book3s/64/hash-4k.h>
>   #endif
>   
> +/* Bits to set in a PMD/PUD/PGD entry valid bit*/
> +#define HASH_PMD_VAL_BITS		(0x8000000000000000UL)
> +#define HASH_PUD_VAL_BITS		(0x8000000000000000UL)
> +#define HASH_PGD_VAL_BITS		(0x8000000000000000UL)
> +
>   /*
>    * Size of EA range mapped by our pagetables.
>    */
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 13a688fc8cd0..8feb4a3240d5 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -875,8 +875,16 @@ static inline int pmd_none(pmd_t pmd)
>   
>   static inline int pmd_present(pmd_t pmd)
>   {
> +	/*
> +	 * A pmd is considerent present if _PAGE_PRESENT is set.
> +	 * We also need to consider the pmd present which is marked
> +	 * invalid during a split. Hence we look for _PAGE_INVALID
> +	 * if we find _PAGE_PRESENT cleared.
> +	 */
> +	if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
> +		return true;
>   
> -	return !pmd_none(pmd);
> +	return false;
>   }
>   
>   static inline int pmd_bad(pmd_t pmd)
> @@ -903,7 +911,7 @@ static inline int pud_none(pud_t pud)
>   
>   static inline int pud_present(pud_t pud)
>   {
> -	return !pud_none(pud);
> +	return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
>   }
>   
>   extern struct page *pud_page(pud_t pud);
> @@ -950,7 +958,7 @@ static inline int pgd_none(pgd_t pgd)
>   
>   static inline int pgd_present(pgd_t pgd)
>   {
> -	return !pgd_none(pgd);
> +	return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT));
>   }
>   
>   static inline pte_t pgd_pte(pgd_t pgd)
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index 88c95dc8b141..13ba718c9680 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -1001,9 +1001,9 @@ void __init hash__early_init_mmu(void)
>   	 * 4k use hugepd format, so for hash set then to
>   	 * zero
>   	 */
> -	__pmd_val_bits = 0;
> -	__pud_val_bits = 0;
> -	__pgd_val_bits = 0;
> +	__pmd_val_bits = HASH_PMD_VAL_BITS;
> +	__pud_val_bits = HASH_PUD_VAL_BITS;
> +	__pgd_val_bits = HASH_PGD_VAL_BITS;
>   
>   	__kernel_virt_start = H_KERN_VIRT_START;
>   	__kernel_virt_size = H_KERN_VIRT_SIZE;
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index 01d7c0f7c4f0..654000da8b15 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -69,7 +69,11 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>   		pmd_t *pmdp, pmd_t pmd)
>   {
>   #ifdef CONFIG_DEBUG_VM
> -	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
> +	/*
> +	 * Make sure hardware valid bit is not set. We don't do
> +	 * tlb flush for this update.
> +	 */
> +	WARN_ON(pte_val(pmd_pte(*pmdp)) & _PAGE_PRESENT);
>   	assert_spin_locked(pmd_lockptr(mm, pmdp));
>   	WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd)));
>   #endif
> @@ -106,7 +110,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
>   {
>   	unsigned long old_pmd;
>   
> -	old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
> +	old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
>   	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
>   	/*
>   	 * This ensures that generic code that rely on IRQ disabling
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index d71c7777669c..aee04b209b51 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -188,11 +188,10 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
>   		pte_t pte)
>   {
>   	/*
> -	 * When handling numa faults, we already have the pte marked
> -	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
> -	 * Hence we can use set_pte_at for them.
> +	 * Make sure hardware valid bit is not set. We don't do
> +	 * tlb flush for this update.
>   	 */
> -	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
> +	VM_WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);

Why not using pte_present() anymore ?

Also, you are removing the pte_protnone() check, won't it change the 
behaviour ?

If we can't use pte_present(), can we create a new helper for that 
(allthough _PAGE_PRESENT exists on all platforms).

Christophe


>   
>   	/* Add the pte bit when trying to set a pte */
>   	pte = __pte(pte_val(pte) | _PAGE_PTE);
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit
  2018-09-21  5:55 ` [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Christophe LEROY
@ 2018-09-21 10:26   ` Aneesh Kumar K.V
  2018-09-21 10:52     ` Christophe LEROY
  0 siblings, 1 reply; 14+ messages in thread
From: Aneesh Kumar K.V @ 2018-09-21 10:26 UTC (permalink / raw)
  To: Christophe LEROY, npiggin, benh, paulus, mpe; +Cc: linuxppc-dev

On 9/21/18 11:25 AM, Christophe LEROY wrote:
> 
> 
> Le 20/09/2018 à 20:09, Aneesh Kumar K.V a écrit :

>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>> index d71c7777669c..aee04b209b51 100644
>> --- a/arch/powerpc/mm/pgtable.c
>> +++ b/arch/powerpc/mm/pgtable.c
>> @@ -188,11 +188,10 @@ void set_pte_at(struct mm_struct *mm, unsigned 
>> long addr, pte_t *ptep,
>>           pte_t pte)
>>   {
>>       /*
>> -     * When handling numa faults, we already have the pte marked
>> -     * _PAGE_PRESENT, but we can be sure that it is not in hpte.
>> -     * Hence we can use set_pte_at for them.
>> +     * Make sure hardware valid bit is not set. We don't do
>> +     * tlb flush for this update.
>>        */
>> -    VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
>> +    VM_WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
> 
> Why not using pte_present() anymore ?
> 
> Also, you are removing the pte_protnone() check, won't it change the 
> behaviour ?
> 
> If we can't use pte_present(), can we create a new helper for that 
> (allthough _PAGE_PRESENT exists on all platforms).
> 
> Christophe
> 

This patch update a page table clear to clear _PAGE_PRESENT and mark it 
invalid via _PAGE_INVALID. The pte_present now looks at both the flag. 
That is we want these transient clear of pte to be considered as present 
pte even if _PAGE_PRESENT is cleared. What we are catching by the debug 
BUG_ON in these function is we are not using them to set a pte where the 
old entry has a hadware valid bit set. This is because we don't do any 
tlb flush with set_pte_at.


So the reason for pte_present -> pte_val() & _PAGE_PRESENT is because we 
swtiched the clear to clear _PAGE_PRESENT and set _PAGE_INVALID and 
pte_present now check both.

The reason for the removal of pte_protnone is because we dropped that 
set_pte_at usage from core autonuma code long time back.

Now Considering we are calling this from mm/pgtable.c With your approach 
of not using pte flags directly in core code we could switch this to 
pte_hw_valid(). May be we can do that as an addon patch?

-aneesh

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit
  2018-09-21 10:26   ` Aneesh Kumar K.V
@ 2018-09-21 10:52     ` Christophe LEROY
  0 siblings, 0 replies; 14+ messages in thread
From: Christophe LEROY @ 2018-09-21 10:52 UTC (permalink / raw)
  To: Aneesh Kumar K.V, npiggin, benh, paulus, mpe; +Cc: linuxppc-dev



Le 21/09/2018 à 12:26, Aneesh Kumar K.V a écrit :
> On 9/21/18 11:25 AM, Christophe LEROY wrote:
>>
>>
>> Le 20/09/2018 à 20:09, Aneesh Kumar K.V a écrit :
> 
>>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>>> index d71c7777669c..aee04b209b51 100644
>>> --- a/arch/powerpc/mm/pgtable.c
>>> +++ b/arch/powerpc/mm/pgtable.c
>>> @@ -188,11 +188,10 @@ void set_pte_at(struct mm_struct *mm, unsigned 
>>> long addr, pte_t *ptep,
>>>           pte_t pte)
>>>   {
>>>       /*
>>> -     * When handling numa faults, we already have the pte marked
>>> -     * _PAGE_PRESENT, but we can be sure that it is not in hpte.
>>> -     * Hence we can use set_pte_at for them.
>>> +     * Make sure hardware valid bit is not set. We don't do
>>> +     * tlb flush for this update.
>>>        */
>>> -    VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
>>> +    VM_WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
>>
>> Why not using pte_present() anymore ?
>>
>> Also, you are removing the pte_protnone() check, won't it change the 
>> behaviour ?
>>
>> If we can't use pte_present(), can we create a new helper for that 
>> (allthough _PAGE_PRESENT exists on all platforms).
>>
>> Christophe
>>
> 
> This patch update a page table clear to clear _PAGE_PRESENT and mark it 
> invalid via _PAGE_INVALID. The pte_present now looks at both the flag. 
> That is we want these transient clear of pte to be considered as present 
> pte even if _PAGE_PRESENT is cleared. What we are catching by the debug 
> BUG_ON in these function is we are not using them to set a pte where the 
> old entry has a hadware valid bit set. This is because we don't do any 
> tlb flush with set_pte_at.
> 
> 
> So the reason for pte_present -> pte_val() & _PAGE_PRESENT is because we 
> swtiched the clear to clear _PAGE_PRESENT and set _PAGE_INVALID and 
> pte_present now check both.
> 
> The reason for the removal of pte_protnone is because we dropped that 
> set_pte_at usage from core autonuma code long time back.
> 
> Now Considering we are calling this from mm/pgtable.c With your approach 
> of not using pte flags directly in core code we could switch this to 
> pte_hw_valid(). May be we can do that as an addon patch?
> 

Ok, depending on which serie goes first, I'll add it in mine if I have 
to rebase.

Christophe

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [V3, 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit
  2018-09-20 18:09 [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Aneesh Kumar K.V
                   ` (5 preceding siblings ...)
  2018-09-21  5:55 ` [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Christophe LEROY
@ 2018-10-04  6:14 ` Michael Ellerman
  6 siblings, 0 replies; 14+ messages in thread
From: Michael Ellerman @ 2018-10-04  6:14 UTC (permalink / raw)
  To: Aneesh Kumar K.V, npiggin, benh, paulus; +Cc: Aneesh Kumar K.V, linuxppc-dev

On Thu, 2018-09-20 at 18:09:42 UTC, "Aneesh Kumar K.V" wrote:
> With this patch we use 0x8000000000000000UL (_PAGE_PRESENT) to indicate a valid
> pgd/pud/pmd entry. We also switch the p**_present() to look at this bit.
> 
> With pmd_present, we have a special case. We need to make sure we consider a
> pmd marked invalid during THP split as present. Right now we clear the
> _PAGE_PRESENT bit during a pmdp_invalidate. Inorder to consider this special
> case we add a new pte bit _PAGE_INVALID (mapped to _RPAGE_SW0). This bit is
> only used with _PAGE_PRESENT cleared. Hence we are not really losing a pte bit
> for this special case. pmd_present is also updated to look at _PAGE_INVALID.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/da7ad366b497f5fc1d4a416f168057

cheers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit
  2018-08-24  6:00 [PATCH V3 " Aneesh Kumar K.V
  2018-08-24  6:02 ` Aneesh Kumar K.V
  2018-08-24 10:29 ` kbuild test robot
@ 2018-08-24 10:34 ` kbuild test robot
  2 siblings, 0 replies; 14+ messages in thread
From: kbuild test robot @ 2018-08-24 10:34 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kbuild-all, npiggin, benh, paulus, mpe, Aneesh Kumar K.V, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 8168 bytes --]

Hi Aneesh,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on powerpc/next]
[also build test WARNING on v4.18 next-20180822]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/powerpc-mm-book3s-Update-pmd_present-to-look-at-_PAGE_PRESENT-bit/20180824-141837
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-obs600_defconfig (attached as .config)
compiler: powerpc-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.2.0 make.cross ARCH=powerpc 

All warnings (new ones prefixed by >>):

   In file included from include/linux/kernel.h:15:0,
                    from arch/powerpc/mm/pgtable.c:24:
   arch/powerpc/mm/pgtable.c: In function 'set_pte_at':
   arch/powerpc/mm/pgtable.c:195:14: error: implicit declaration of function 'pte_raw'; did you mean 'pte_read'? [-Werror=implicit-function-declaration]
     VM_WARN_ON((pte_raw(*ptep) & cpu_to_be64(_PAGE_PRESENT)) &&
                 ^
   include/linux/build_bug.h:36:63: note: in definition of macro 'BUILD_BUG_ON_INVALID'
    #define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
                                                                  ^
>> arch/powerpc/mm/pgtable.c:195:2: note: in expansion of macro 'VM_WARN_ON'
     VM_WARN_ON((pte_raw(*ptep) & cpu_to_be64(_PAGE_PRESENT)) &&
     ^~~~~~~~~~
   cc1: all warnings being treated as errors

vim +/VM_WARN_ON +195 arch/powerpc/mm/pgtable.c

  > 24	#include <linux/kernel.h>
    25	#include <linux/gfp.h>
    26	#include <linux/mm.h>
    27	#include <linux/percpu.h>
    28	#include <linux/hardirq.h>
    29	#include <linux/hugetlb.h>
    30	#include <asm/pgalloc.h>
    31	#include <asm/tlbflush.h>
    32	#include <asm/tlb.h>
    33	
    34	static inline int is_exec_fault(void)
    35	{
    36		return current->thread.regs && TRAP(current->thread.regs) == 0x400;
    37	}
    38	
    39	/* We only try to do i/d cache coherency on stuff that looks like
    40	 * reasonably "normal" PTEs. We currently require a PTE to be present
    41	 * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that
    42	 * on userspace PTEs
    43	 */
    44	static inline int pte_looks_normal(pte_t pte)
    45	{
    46	
    47	#if defined(CONFIG_PPC_BOOK3S_64)
    48		if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) {
    49			if (pte_ci(pte))
    50				return 0;
    51			if (pte_user(pte))
    52				return 1;
    53		}
    54		return 0;
    55	#else
    56		return (pte_val(pte) &
    57			(_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER |
    58			 _PAGE_PRIVILEGED)) ==
    59			(_PAGE_PRESENT | _PAGE_USER);
    60	#endif
    61	}
    62	
    63	static struct page *maybe_pte_to_page(pte_t pte)
    64	{
    65		unsigned long pfn = pte_pfn(pte);
    66		struct page *page;
    67	
    68		if (unlikely(!pfn_valid(pfn)))
    69			return NULL;
    70		page = pfn_to_page(pfn);
    71		if (PageReserved(page))
    72			return NULL;
    73		return page;
    74	}
    75	
    76	#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0
    77	
    78	/* Server-style MMU handles coherency when hashing if HW exec permission
    79	 * is supposed per page (currently 64-bit only). If not, then, we always
    80	 * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec
    81	 * support falls into the same category.
    82	 */
    83	
    84	static pte_t set_pte_filter(pte_t pte)
    85	{
    86		if (radix_enabled())
    87			return pte;
    88	
    89		pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
    90		if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
    91					       cpu_has_feature(CPU_FTR_NOEXECUTE))) {
    92			struct page *pg = maybe_pte_to_page(pte);
    93			if (!pg)
    94				return pte;
    95			if (!test_bit(PG_arch_1, &pg->flags)) {
    96				flush_dcache_icache_page(pg);
    97				set_bit(PG_arch_1, &pg->flags);
    98			}
    99		}
   100		return pte;
   101	}
   102	
   103	static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
   104					     int dirty)
   105	{
   106		return pte;
   107	}
   108	
   109	#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */
   110	
   111	/* Embedded type MMU with HW exec support. This is a bit more complicated
   112	 * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
   113	 * instead we "filter out" the exec permission for non clean pages.
   114	 */
   115	static pte_t set_pte_filter(pte_t pte)
   116	{
   117		struct page *pg;
   118	
   119		/* No exec permission in the first place, move on */
   120		if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte))
   121			return pte;
   122	
   123		/* If you set _PAGE_EXEC on weird pages you're on your own */
   124		pg = maybe_pte_to_page(pte);
   125		if (unlikely(!pg))
   126			return pte;
   127	
   128		/* If the page clean, we move on */
   129		if (test_bit(PG_arch_1, &pg->flags))
   130			return pte;
   131	
   132		/* If it's an exec fault, we flush the cache and make it clean */
   133		if (is_exec_fault()) {
   134			flush_dcache_icache_page(pg);
   135			set_bit(PG_arch_1, &pg->flags);
   136			return pte;
   137		}
   138	
   139		/* Else, we filter out _PAGE_EXEC */
   140		return __pte(pte_val(pte) & ~_PAGE_EXEC);
   141	}
   142	
   143	static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
   144					     int dirty)
   145	{
   146		struct page *pg;
   147	
   148		/* So here, we only care about exec faults, as we use them
   149		 * to recover lost _PAGE_EXEC and perform I$/D$ coherency
   150		 * if necessary. Also if _PAGE_EXEC is already set, same deal,
   151		 * we just bail out
   152		 */
   153		if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault())
   154			return pte;
   155	
   156	#ifdef CONFIG_DEBUG_VM
   157		/* So this is an exec fault, _PAGE_EXEC is not set. If it was
   158		 * an error we would have bailed out earlier in do_page_fault()
   159		 * but let's make sure of it
   160		 */
   161		if (WARN_ON(!(vma->vm_flags & VM_EXEC)))
   162			return pte;
   163	#endif /* CONFIG_DEBUG_VM */
   164	
   165		/* If you set _PAGE_EXEC on weird pages you're on your own */
   166		pg = maybe_pte_to_page(pte);
   167		if (unlikely(!pg))
   168			goto bail;
   169	
   170		/* If the page is already clean, we move on */
   171		if (test_bit(PG_arch_1, &pg->flags))
   172			goto bail;
   173	
   174		/* Clean the page and set PG_arch_1 */
   175		flush_dcache_icache_page(pg);
   176		set_bit(PG_arch_1, &pg->flags);
   177	
   178	 bail:
   179		return __pte(pte_val(pte) | _PAGE_EXEC);
   180	}
   181	
   182	#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */
   183	
   184	/*
   185	 * set_pte stores a linux PTE into the linux page table.
   186	 */
   187	void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
   188			pte_t pte)
   189	{
   190		/*
   191		 * When handling numa faults, we already have the pte marked
   192		 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
   193		 * Hence we can use set_pte_at for them.
   194		 */
 > 195		VM_WARN_ON((pte_raw(*ptep) & cpu_to_be64(_PAGE_PRESENT)) &&
   196			!pte_protnone(*ptep));
   197	
   198		/* Add the pte bit when trying to set a pte */
   199		pte = __pte(pte_val(pte) | _PAGE_PTE);
   200	
   201		/* Note: mm->context.id might not yet have been assigned as
   202		 * this context might not have been activated yet when this
   203		 * is called.
   204		 */
   205		pte = set_pte_filter(pte);
   206	
   207		/* Perform the setting of the PTE */
   208		__set_pte_at(mm, addr, ptep, pte, 0);
   209	}
   210	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 12365 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit
  2018-08-24  6:00 [PATCH V3 " Aneesh Kumar K.V
  2018-08-24  6:02 ` Aneesh Kumar K.V
@ 2018-08-24 10:29 ` kbuild test robot
  2018-08-24 10:34 ` kbuild test robot
  2 siblings, 0 replies; 14+ messages in thread
From: kbuild test robot @ 2018-08-24 10:29 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kbuild-all, npiggin, benh, paulus, mpe, Aneesh Kumar K.V, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 8140 bytes --]

Hi Aneesh,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.18 next-20180822]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/powerpc-mm-book3s-Update-pmd_present-to-look-at-_PAGE_PRESENT-bit/20180824-141837
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allnoconfig (attached as .config)
compiler: powerpc-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.2.0 make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:15:0,
                    from arch/powerpc/mm/pgtable.c:24:
   arch/powerpc/mm/pgtable.c: In function 'set_pte_at':
>> arch/powerpc/mm/pgtable.c:195:14: error: implicit declaration of function 'pte_raw'; did you mean 'pte_read'? [-Werror=implicit-function-declaration]
     VM_WARN_ON((pte_raw(*ptep) & cpu_to_be64(_PAGE_PRESENT)) &&
                 ^
   include/linux/build_bug.h:36:63: note: in definition of macro 'BUILD_BUG_ON_INVALID'
    #define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
                                                                  ^
   arch/powerpc/mm/pgtable.c:195:2: note: in expansion of macro 'VM_WARN_ON'
     VM_WARN_ON((pte_raw(*ptep) & cpu_to_be64(_PAGE_PRESENT)) &&
     ^~~~~~~~~~
   cc1: all warnings being treated as errors

vim +195 arch/powerpc/mm/pgtable.c

  > 24	#include <linux/kernel.h>
    25	#include <linux/gfp.h>
    26	#include <linux/mm.h>
    27	#include <linux/percpu.h>
    28	#include <linux/hardirq.h>
    29	#include <linux/hugetlb.h>
    30	#include <asm/pgalloc.h>
    31	#include <asm/tlbflush.h>
    32	#include <asm/tlb.h>
    33	
    34	static inline int is_exec_fault(void)
    35	{
    36		return current->thread.regs && TRAP(current->thread.regs) == 0x400;
    37	}
    38	
    39	/* We only try to do i/d cache coherency on stuff that looks like
    40	 * reasonably "normal" PTEs. We currently require a PTE to be present
    41	 * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that
    42	 * on userspace PTEs
    43	 */
    44	static inline int pte_looks_normal(pte_t pte)
    45	{
    46	
    47	#if defined(CONFIG_PPC_BOOK3S_64)
    48		if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) {
    49			if (pte_ci(pte))
    50				return 0;
    51			if (pte_user(pte))
    52				return 1;
    53		}
    54		return 0;
    55	#else
    56		return (pte_val(pte) &
    57			(_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER |
    58			 _PAGE_PRIVILEGED)) ==
    59			(_PAGE_PRESENT | _PAGE_USER);
    60	#endif
    61	}
    62	
    63	static struct page *maybe_pte_to_page(pte_t pte)
    64	{
    65		unsigned long pfn = pte_pfn(pte);
    66		struct page *page;
    67	
    68		if (unlikely(!pfn_valid(pfn)))
    69			return NULL;
    70		page = pfn_to_page(pfn);
    71		if (PageReserved(page))
    72			return NULL;
    73		return page;
    74	}
    75	
    76	#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0
    77	
    78	/* Server-style MMU handles coherency when hashing if HW exec permission
    79	 * is supposed per page (currently 64-bit only). If not, then, we always
    80	 * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec
    81	 * support falls into the same category.
    82	 */
    83	
    84	static pte_t set_pte_filter(pte_t pte)
    85	{
    86		if (radix_enabled())
    87			return pte;
    88	
    89		pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
    90		if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
    91					       cpu_has_feature(CPU_FTR_NOEXECUTE))) {
    92			struct page *pg = maybe_pte_to_page(pte);
    93			if (!pg)
    94				return pte;
    95			if (!test_bit(PG_arch_1, &pg->flags)) {
    96				flush_dcache_icache_page(pg);
    97				set_bit(PG_arch_1, &pg->flags);
    98			}
    99		}
   100		return pte;
   101	}
   102	
   103	static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
   104					     int dirty)
   105	{
   106		return pte;
   107	}
   108	
   109	#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */
   110	
   111	/* Embedded type MMU with HW exec support. This is a bit more complicated
   112	 * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
   113	 * instead we "filter out" the exec permission for non clean pages.
   114	 */
   115	static pte_t set_pte_filter(pte_t pte)
   116	{
   117		struct page *pg;
   118	
   119		/* No exec permission in the first place, move on */
   120		if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte))
   121			return pte;
   122	
   123		/* If you set _PAGE_EXEC on weird pages you're on your own */
   124		pg = maybe_pte_to_page(pte);
   125		if (unlikely(!pg))
   126			return pte;
   127	
   128		/* If the page clean, we move on */
   129		if (test_bit(PG_arch_1, &pg->flags))
   130			return pte;
   131	
   132		/* If it's an exec fault, we flush the cache and make it clean */
   133		if (is_exec_fault()) {
   134			flush_dcache_icache_page(pg);
   135			set_bit(PG_arch_1, &pg->flags);
   136			return pte;
   137		}
   138	
   139		/* Else, we filter out _PAGE_EXEC */
   140		return __pte(pte_val(pte) & ~_PAGE_EXEC);
   141	}
   142	
   143	static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
   144					     int dirty)
   145	{
   146		struct page *pg;
   147	
   148		/* So here, we only care about exec faults, as we use them
   149		 * to recover lost _PAGE_EXEC and perform I$/D$ coherency
   150		 * if necessary. Also if _PAGE_EXEC is already set, same deal,
   151		 * we just bail out
   152		 */
   153		if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault())
   154			return pte;
   155	
   156	#ifdef CONFIG_DEBUG_VM
   157		/* So this is an exec fault, _PAGE_EXEC is not set. If it was
   158		 * an error we would have bailed out earlier in do_page_fault()
   159		 * but let's make sure of it
   160		 */
   161		if (WARN_ON(!(vma->vm_flags & VM_EXEC)))
   162			return pte;
   163	#endif /* CONFIG_DEBUG_VM */
   164	
   165		/* If you set _PAGE_EXEC on weird pages you're on your own */
   166		pg = maybe_pte_to_page(pte);
   167		if (unlikely(!pg))
   168			goto bail;
   169	
   170		/* If the page is already clean, we move on */
   171		if (test_bit(PG_arch_1, &pg->flags))
   172			goto bail;
   173	
   174		/* Clean the page and set PG_arch_1 */
   175		flush_dcache_icache_page(pg);
   176		set_bit(PG_arch_1, &pg->flags);
   177	
   178	 bail:
   179		return __pte(pte_val(pte) | _PAGE_EXEC);
   180	}
   181	
   182	#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */
   183	
   184	/*
   185	 * set_pte stores a linux PTE into the linux page table.
   186	 */
   187	void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
   188			pte_t pte)
   189	{
   190		/*
   191		 * When handling numa faults, we already have the pte marked
   192		 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
   193		 * Hence we can use set_pte_at for them.
   194		 */
 > 195		VM_WARN_ON((pte_raw(*ptep) & cpu_to_be64(_PAGE_PRESENT)) &&
   196			!pte_protnone(*ptep));
   197	
   198		/* Add the pte bit when trying to set a pte */
   199		pte = __pte(pte_val(pte) | _PAGE_PTE);
   200	
   201		/* Note: mm->context.id might not yet have been assigned as
   202		 * this context might not have been activated yet when this
   203		 * is called.
   204		 */
   205		pte = set_pte_filter(pte);
   206	
   207		/* Perform the setting of the PTE */
   208		__set_pte_at(mm, addr, ptep, pte, 0);
   209	}
   210	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 5702 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit
  2018-08-24  6:00 [PATCH V3 " Aneesh Kumar K.V
@ 2018-08-24  6:02 ` Aneesh Kumar K.V
  2018-08-24 10:29 ` kbuild test robot
  2018-08-24 10:34 ` kbuild test robot
  2 siblings, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2018-08-24  6:02 UTC (permalink / raw)
  To: npiggin, benh, paulus, mpe; +Cc: linuxppc-dev

On 08/24/2018 11:30 AM, Aneesh Kumar K.V wrote:
> With this patch we use 0x8000000000000000UL (_PAGE_PRESENT) to indicate a valid
> pgd/pud/pmd entry. We also switch the p**_present() to look at this bit.
> 
> With pmd_present, we have a special case. We need to make sure we consider a
> pmd marked invalid during THP split as present. Right now we clear the
> _PAGE_PRESENT bit during a pmdp_invalidate. Inorder to consider this special
> case we add a new pte bit _PAGE_INVALID (mapped to _RPAGE_SW0). This bit is
> only used with _PAGE_PRESENT cleared. Hence we are not really losing a pte bit
> for this special case. pmd_present is also updated to look at _PAGE_INVALID.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Changese from V2:
* Rebased to mpe/merge
* Also fixup new WARN_ON with pte_present looking at _PAGE_INVALID now.

-aneesh

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit
@ 2018-08-24  6:00 Aneesh Kumar K.V
  2018-08-24  6:02 ` Aneesh Kumar K.V
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2018-08-24  6:00 UTC (permalink / raw)
  To: npiggin, benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

With this patch we use 0x8000000000000000UL (_PAGE_PRESENT) to indicate a valid
pgd/pud/pmd entry. We also switch the p**_present() to look at this bit.

With pmd_present, we have a special case. We need to make sure we consider a
pmd marked invalid during THP split as present. Right now we clear the
_PAGE_PRESENT bit during a pmdp_invalidate. Inorder to consider this special
case we add a new pte bit _PAGE_INVALID (mapped to _RPAGE_SW0). This bit is
only used with _PAGE_PRESENT cleared. Hence we are not really losing a pte bit
for this special case. pmd_present is also updated to look at _PAGE_INVALID.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    |  5 +++++
 arch/powerpc/include/asm/book3s/64/pgtable.h | 14 +++++++++++---
 arch/powerpc/mm/hash_utils_64.c              |  6 +++---
 arch/powerpc/mm/pgtable-book3s64.c           |  9 +++++++--
 arch/powerpc/mm/pgtable.c                    |  3 ++-
 5 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index d52a51b2ce7b..fcf8b10a209f 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -18,6 +18,11 @@
 #include <asm/book3s/64/hash-4k.h>
 #endif
 
+/* Bits to set in a PMD/PUD/PGD entry valid bit*/
+#define HASH_PMD_VAL_BITS		(0x8000000000000000UL)
+#define HASH_PUD_VAL_BITS		(0x8000000000000000UL)
+#define HASH_PGD_VAL_BITS		(0x8000000000000000UL)
+
 /*
  * Size of EA range mapped by our pagetables.
  */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 13a688fc8cd0..8feb4a3240d5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -875,8 +875,16 @@ static inline int pmd_none(pmd_t pmd)
 
 static inline int pmd_present(pmd_t pmd)
 {
+	/*
+	 * A pmd is considerent present if _PAGE_PRESENT is set.
+	 * We also need to consider the pmd present which is marked
+	 * invalid during a split. Hence we look for _PAGE_INVALID
+	 * if we find _PAGE_PRESENT cleared.
+	 */
+	if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
+		return true;
 
-	return !pmd_none(pmd);
+	return false;
 }
 
 static inline int pmd_bad(pmd_t pmd)
@@ -903,7 +911,7 @@ static inline int pud_none(pud_t pud)
 
 static inline int pud_present(pud_t pud)
 {
-	return !pud_none(pud);
+	return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
 }
 
 extern struct page *pud_page(pud_t pud);
@@ -950,7 +958,7 @@ static inline int pgd_none(pgd_t pgd)
 
 static inline int pgd_present(pgd_t pgd)
 {
-	return !pgd_none(pgd);
+	return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT));
 }
 
 static inline pte_t pgd_pte(pgd_t pgd)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index f23a89d8e4ce..8ff03c7205a0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1001,9 +1001,9 @@ void __init hash__early_init_mmu(void)
 	 * 4k use hugepd format, so for hash set then to
 	 * zero
 	 */
-	__pmd_val_bits = 0;
-	__pud_val_bits = 0;
-	__pgd_val_bits = 0;
+	__pmd_val_bits = HASH_PMD_VAL_BITS;
+	__pud_val_bits = HASH_PUD_VAL_BITS;
+	__pgd_val_bits = HASH_PGD_VAL_BITS;
 
 	__kernel_virt_start = H_KERN_VIRT_START;
 	__kernel_virt_size = H_KERN_VIRT_SIZE;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..cb73b3f9912c 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -69,7 +69,12 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		pmd_t *pmdp, pmd_t pmd)
 {
 #ifdef CONFIG_DEBUG_VM
-	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
+	/*
+	 * pmd during temporary invalidate will be considered present.
+	 * So check for _PAGE_PRESENT explicitly.
+	 */
+	WARN_ON((pte_raw(pmd_pte(*pmdp)) & cpu_to_be64(_PAGE_PRESENT)) &&
+		!pte_protnone(pmd_pte(*pmdp)));
 	assert_spin_locked(pmd_lockptr(mm, pmdp));
 	WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd)));
 #endif
@@ -106,7 +111,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 {
 	unsigned long old_pmd;
 
-	old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+	old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
 	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	/*
 	 * This ensures that generic code that rely on IRQ disabling
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index d71c7777669c..4e065383fbe3 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -192,7 +192,8 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
 	 * Hence we can use set_pte_at for them.
 	 */
-	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
+	VM_WARN_ON((pte_raw(*ptep) & cpu_to_be64(_PAGE_PRESENT)) &&
+		!pte_protnone(*ptep));
 
 	/* Add the pte bit when trying to set a pte */
 	pte = __pte(pte_val(pte) | _PAGE_PTE);
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2018-10-04  6:37 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-20 18:09 [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Aneesh Kumar K.V
2018-09-20 18:09 ` [PATCH V3 2/6] powerpc/mm/hugetlb/book3s: add _PAGE_PRESENT to hugepd pointer Aneesh Kumar K.V
2018-09-20 18:09 ` [PATCH V3 3/6] powerpc/mm/book3s: Check for pmd_large instead of pmd_trans_huge Aneesh Kumar K.V
2018-09-20 18:09 ` [PATCH V3 4/6] arch/powerpc/mm/hash: validate the pte entries before handling the hash fault Aneesh Kumar K.V
2018-09-20 18:09 ` [PATCH V3 5/6] powerpc/mm/thp: update pmd_trans_huge to check for pmd_present Aneesh Kumar K.V
2018-09-20 18:09 ` [PATCH V3 6/6] powerpc/mm:book3s: Enable THP migration support Aneesh Kumar K.V
2018-09-21  5:55 ` [PATCH V3 1/6] powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit Christophe LEROY
2018-09-21 10:26   ` Aneesh Kumar K.V
2018-09-21 10:52     ` Christophe LEROY
2018-10-04  6:14 ` [V3, " Michael Ellerman
  -- strict thread matches above, loose matches on Subject: below --
2018-08-24  6:00 [PATCH V3 " Aneesh Kumar K.V
2018-08-24  6:02 ` Aneesh Kumar K.V
2018-08-24 10:29 ` kbuild test robot
2018-08-24 10:34 ` kbuild test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.