All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64
@ 2016-03-07 13:39 Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 02/14] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
                   ` (12 more replies)
  0 siblings, 13 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This enables us to share the same page table code for
both radix and hash. Radix use a hardware defined big endian
page table

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h   |  16 +++--
 arch/powerpc/include/asm/kvm_book3s_64.h    |  13 ++--
 arch/powerpc/include/asm/page.h             |   4 ++
 arch/powerpc/include/asm/pgtable-be-types.h | 104 ++++++++++++++++++++++++++++
 arch/powerpc/mm/hash64_4k.c                 |   7 +-
 arch/powerpc/mm/hash64_64k.c                |  14 ++--
 arch/powerpc/mm/hugepage-hash64.c           |   7 +-
 arch/powerpc/mm/hugetlbpage-hash64.c        |   7 +-
 arch/powerpc/mm/pgtable_64.c                |   9 ++-
 9 files changed, 159 insertions(+), 22 deletions(-)
 create mode 100644 arch/powerpc/include/asm/pgtable-be-types.h

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index d0ee6fcef823..2113de051824 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -250,22 +250,27 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 				       int huge)
 {
 	unsigned long old, tmp;
+	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
+
+	clr = cpu_to_be64(clr);
+	set = cpu_to_be64(set);
 
 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%3		# pte_update\n\
-	andi.	%1,%0,%6\n\
+	and.	%1,%0,%6\n\
 	bne-	1b \n\
 	andc	%1,%0,%4 \n\
 	or	%1,%1,%7\n\
 	stdcx.	%1,0,%3 \n\
 	bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
+	: "r" (ptep), "r" (clr), "m" (*ptep), "r" (busy), "r" (set)
 	: "cc" );
 	/* huge pages use the old page table lock */
 	if (!huge)
 		assert_pte_locked(mm, addr);
 
+	old = be64_to_cpu(old);
 	if (old & _PAGE_HASHPTE)
 		hpte_need_flush(mm, addr, ptep, old, huge);
 
@@ -351,16 +356,19 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 		 _PAGE_SOFT_DIRTY);
 
 	unsigned long old, tmp;
+	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
+
+	bits = cpu_to_be64(bits);
 
 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%4\n\
-		andi.	%1,%0,%6\n\
+		and.	%1,%0,%6\n\
 		bne-	1b \n\
 		or	%0,%3,%0\n\
 		stdcx.	%0,0,%4\n\
 		bne-	1b"
 	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	:"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
+	:"r" (bits), "r" (ptep), "m" (*ptep), "r" (busy)
 	:"cc");
 }
 
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2aa79c864e91..f9a7a89a3e4f 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -299,6 +299,8 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
  */
 static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
 {
+	__be64 opte, npte;
+	unsigned long old_ptev;
 	pte_t old_pte, new_pte = __pte(0);
 
 	while (1) {
@@ -306,24 +308,25 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
 		 * Make sure we don't reload from ptep
 		 */
 		old_pte = READ_ONCE(*ptep);
+		old_ptev = pte_val(old_pte);
 		/*
 		 * wait until _PAGE_BUSY is clear then set it atomically
 		 */
-		if (unlikely(pte_val(old_pte) & _PAGE_BUSY)) {
+		if (unlikely(old_ptev & _PAGE_BUSY)) {
 			cpu_relax();
 			continue;
 		}
 		/* If pte is not present return None */
-		if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
+		if (unlikely(!(old_ptev & _PAGE_PRESENT)))
 			return __pte(0);
 
 		new_pte = pte_mkyoung(old_pte);
 		if (writing && pte_write(old_pte))
 			new_pte = pte_mkdirty(new_pte);
 
-		if (pte_val(old_pte) == __cmpxchg_u64((unsigned long *)ptep,
-						      pte_val(old_pte),
-						      pte_val(new_pte))) {
+		npte = cpu_to_be64(pte_val(new_pte));
+		opte = cpu_to_be64(old_ptev);
+		if (opte == __cmpxchg_u64((unsigned long *)ptep, opte, npte)) {
 			break;
 		}
 	}
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index ab3d8977bacd..158574d2acf4 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -288,7 +288,11 @@ extern long long virt_phys_offset;
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/pgtable-be-types.h>
+#else
 #include <asm/pgtable-types.h>
+#endif
 
 typedef struct { signed long pd; } hugepd_t;
 
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
new file mode 100644
index 000000000000..20527200d6ae
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -0,0 +1,104 @@
+#ifndef _ASM_POWERPC_PGTABLE_BE_TYPES_H
+#define _ASM_POWERPC_PGTABLE_BE_TYPES_H
+
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
+/* These are used to make use of C type-checking. */
+
+/* PTE level */
+typedef struct { __be64 pte; } pte_t;
+#define __pte(x)	((pte_t) { cpu_to_be64(x) })
+static inline unsigned long pte_val(pte_t x)
+{
+	return be64_to_cpu(x.pte);
+}
+
+/* PMD level */
+#ifdef CONFIG_PPC64
+typedef struct { __be64 pmd; } pmd_t;
+#define __pmd(x)	((pmd_t) { cpu_to_be64(x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+	return be64_to_cpu(x.pmd);
+}
+
+/*
+ * 64 bit hash always use 4 level table. Everybody else use 4 level
+ * only for 4K page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+typedef struct { __be64 pud; } pud_t;
+#define __pud(x)	((pud_t) { cpu_to_be64(x) })
+static inline unsigned long pud_val(pud_t x)
+{
+	return be64_to_cpu(x.pud);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+/* PGD level */
+typedef struct { __be64 pgd; } pgd_t;
+#define __pgd(x)	((pgd_t) { cpu_to_be64(x) })
+static inline unsigned long pgd_val(pgd_t x)
+{
+	return be64_to_cpu(x.pgd);
+}
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+
+#else
+
+/*
+ * .. while these make it easier on the compiler
+ */
+
+typedef __be64 pte_t;
+#define __pte(x)	cpu_to_be64(x)
+static inline unsigned long pte_val(pte_t pte)
+{
+	return be64_to_cpu(pte);
+}
+
+#ifdef CONFIG_PPC64
+typedef __be64 pmd_t;
+#define __pmd(x)	cpu_to_be64(x)
+static inline unsigned long pmd_val(pmd_t pmd)
+{
+	return be64_to_cpu(pmd);
+}
+
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+typedef __be64 pud_t;
+#define __pud(x)	cpu_to_be64(x)
+static inline unsigned long pud_val(pud_t pud)
+{
+	return be64_to_cpu(pud);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+typedef __be64 pgd_t;
+#define __pgd(x)	cpu_to_be64(x)
+static inline unsigned long pgd_val(pgd_t pgd)
+{
+	return be64_to_cpu(pgd);
+}
+
+typedef unsigned long pgprot_t;
+#define pgprot_val(x)	(x)
+#define __pgprot(x)	(x)
+
+#endif /* CONFIG_STRICT_MM_TYPECHECKS */
+/*
+ * With hash config 64k pages additionally define a bigger "real PTE" type that
+ * gathers the "second half" part of the PTE for pseudo 64k pages
+ */
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+
+#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 47d1b26effc6..71abd4c44c27 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -20,6 +20,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		   pte_t *ptep, unsigned long trap, unsigned long flags,
 		   int ssize, int subpg_prot)
 {
+	__be64 opte, npte;
 	unsigned long hpte_group;
 	unsigned long rflags, pa;
 	unsigned long old_pte, new_pte;
@@ -47,8 +48,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_RW)
 			new_pte |= _PAGE_DIRTY;
-	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
-					  old_pte, new_pte));
+
+		opte = cpu_to_be64(old_pte);
+		npte = cpu_to_be64(new_pte);
+	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
 	/*
 	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
 	 * need to add in 0x1 if it's a read-only user page
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index b2d659cf51c6..6f9b3c34a5c0 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -49,6 +49,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		   pte_t *ptep, unsigned long trap, unsigned long flags,
 		   int ssize, int subpg_prot)
 {
+	__be64 opte, npte;
 	real_pte_t rpte;
 	unsigned long *hidxp;
 	unsigned long hpte_group;
@@ -79,8 +80,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO;
 		if (access & _PAGE_RW)
 			new_pte |= _PAGE_DIRTY;
-	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
-					  old_pte, new_pte));
+
+		opte = cpu_to_be64(old_pte);
+		npte = cpu_to_be64(new_pte);
+	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
 	/*
 	 * Handle the subpage protection bits
 	 */
@@ -220,7 +223,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		    unsigned long vsid, pte_t *ptep, unsigned long trap,
 		    unsigned long flags, int ssize)
 {
-
+	__be64 opte, npte;
 	unsigned long hpte_group;
 	unsigned long rflags, pa;
 	unsigned long old_pte, new_pte;
@@ -254,8 +257,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_RW)
 			new_pte |= _PAGE_DIRTY;
-	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
-					  old_pte, new_pte));
+		opte = cpu_to_be64(old_pte);
+		npte = cpu_to_be64(new_pte);
+	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
 
 	rflags = htab_convert_pte_flags(new_pte);
 
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index eb2accdd76fd..98891139c044 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -22,6 +22,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		    pmd_t *pmdp, unsigned long trap, unsigned long flags,
 		    int ssize, unsigned int psize)
 {
+	__be64 opmd, npmd;
 	unsigned int index, valid;
 	unsigned char *hpte_slot_array;
 	unsigned long rflags, pa, hidx;
@@ -49,8 +50,10 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_RW)
 			new_pmd |= _PAGE_DIRTY;
-	} while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
-					  old_pmd, new_pmd));
+		opmd = cpu_to_be64(old_pmd);
+		npmd = cpu_to_be64(new_pmd);
+	} while (opmd != __cmpxchg_u64((unsigned long *)pmdp, opmd, npmd));
+
 	rflags = htab_convert_pte_flags(new_pmd);
 
 #if 0
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 8555fce902fe..5bcb28606158 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -22,6 +22,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		     pte_t *ptep, unsigned long trap, unsigned long flags,
 		     int ssize, unsigned int shift, unsigned int mmu_psize)
 {
+	__be64 opte, npte;
 	unsigned long vpn;
 	unsigned long old_pte, new_pte;
 	unsigned long rflags, pa, sz;
@@ -57,8 +58,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_RW)
 			new_pte |= _PAGE_DIRTY;
-	} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
-					 old_pte, new_pte));
+		opte = cpu_to_be64(old_pte);
+		npte = cpu_to_be64(new_pte);
+	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
+
 	rflags = htab_convert_pte_flags(new_pte);
 
 	sz = ((1UL) << shift);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 0eb53128ca2a..aa742aa35b64 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -516,6 +516,7 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
 {
 
 	unsigned long old, tmp;
+	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
 
 #ifdef CONFIG_DEBUG_VM
 	WARN_ON(!pmd_trans_huge(*pmdp));
@@ -523,17 +524,21 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
 #endif
 
 #ifdef PTE_ATOMIC_UPDATES
+	clr = cpu_to_be64(clr);
+	set = cpu_to_be64(set);
 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%3\n\
-		andi.	%1,%0,%6\n\
+		and.	%1,%0,%6\n\
 		bne-	1b \n\
 		andc	%1,%0,%4 \n\
 		or	%1,%1,%7\n\
 		stdcx.	%1,0,%3 \n\
 		bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
-	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
+	: "r" (pmdp), "r" (clr), "m" (*pmdp), "r" (busy), "r" (set)
 	: "cc" );
+
+	old = be64_to_cpu(old);
 #else
 	old = pmd_val(*pmdp);
 	*pmdp = __pmd((old & ~clr) | set);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 02/14] powerpc/mm: use _PAGE_READ to indicate Read access
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 03/14] powerpc/mm/subpage: Clear RWX bit to indicate no access Aneesh Kumar K.V
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This split _PAGE_RW bit to _PAGE_READ and _PAGE_WRITE. It also remove
the dependency on _PAGE_USER for implying read only. Few things to note
here is that, we have read implied with write and execute permission.
Hence we should always find _PAGE_READ set on hash pte fault.

We still can't switch PROT_NONE to !(_PAGE_RWX). Auto numa do depend
on marking a prot none pte _PAGE_WRITE. (For more details look at
b191f9b106ea "mm: numa: preserve PTE write permissions across a NUMA hinting fault")

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h |  4 +--
 arch/powerpc/include/asm/book3s/64/hash.h     | 35 ++++++++++++++++-----------
 arch/powerpc/include/asm/pte-common.h         |  5 ++++
 arch/powerpc/mm/hash64_4k.c                   |  2 +-
 arch/powerpc/mm/hash64_64k.c                  |  4 +--
 arch/powerpc/mm/hash_utils_64.c               |  9 ++++---
 arch/powerpc/mm/hugepage-hash64.c             |  2 +-
 arch/powerpc/mm/hugetlbpage-hash64.c          |  2 +-
 arch/powerpc/mm/hugetlbpage.c                 |  4 +--
 arch/powerpc/mm/pgtable.c                     |  4 +--
 arch/powerpc/mm/pgtable_64.c                  |  5 ++--
 arch/powerpc/platforms/cell/spu_base.c        |  2 +-
 arch/powerpc/platforms/cell/spufs/fault.c     |  4 +--
 drivers/misc/cxl/fault.c                      |  4 +--
 14 files changed, 49 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 0a7956a80a08..279ded72f1db 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -291,10 +291,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pmd_t *pmdp)
 {
 
-	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
+	if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0)
 		return;
 
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
+	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
 }
 
 #endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 2113de051824..f092d83fa623 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -16,8 +16,10 @@
 #define _PAGE_BIT_SWAP_TYPE	0
 
 #define _PAGE_EXEC		0x00001 /* execute permission */
-#define _PAGE_RW		0x00002 /* read & write access allowed */
+#define _PAGE_WRITE		0x00002 /* write access allowed */
 #define _PAGE_READ		0x00004	/* read access allowed */
+#define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
+#define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
 #define _PAGE_USER		0x00008 /* page may be accessed by userspace */
 #define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
 /* M (memory coherence) is always set in the HPTE, so we don't need it here */
@@ -147,8 +149,8 @@
  */
 #define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
 			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
-			 _PAGE_USER | _PAGE_ACCESSED |  \
-			 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC | \
+			 _PAGE_USER | _PAGE_ACCESSED |  _PAGE_READ |\
+			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
 			 _PAGE_SOFT_DIRTY)
 /*
  * We define 2 sets of base prot bits, one for basic pages (ie,
@@ -173,10 +175,12 @@
 #define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
 #define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
 				 _PAGE_EXEC)
-#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER )
-#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
-#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER )
-#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
+				 _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
+				 _PAGE_EXEC)
 
 #define __P000	PAGE_NONE
 #define __P001	PAGE_READONLY
@@ -300,19 +304,19 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
 
-	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+	if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
 		return;
 
-	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
+	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
 }
 
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+	if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
 		return;
 
-	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
+	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
 }
 
 /*
@@ -352,7 +356,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 {
 	unsigned long bits = pte_val(entry) &
-		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC |
+		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |
 		 _PAGE_SOFT_DIRTY);
 
 	unsigned long old, tmp;
@@ -386,7 +390,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 
 
 /* Generic accessors to PTE bits */
-static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
+static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_WRITE);}
 static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
 static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
 static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
@@ -447,7 +451,7 @@ static inline unsigned long pte_pfn(pte_t pte)
 /* Generic modifiers for PTE bits */
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~_PAGE_RW);
+	return __pte(pte_val(pte) & ~_PAGE_WRITE);
 }
 
 static inline pte_t pte_mkclean(pte_t pte)
@@ -462,6 +466,9 @@ static inline pte_t pte_mkold(pte_t pte)
 
 static inline pte_t pte_mkwrite(pte_t pte)
 {
+	/*
+	 * write implies read, hence set both
+	 */
 	return __pte(pte_val(pte) | _PAGE_RW);
 }
 
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index 1ec67b043065..9f5dea58b0db 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -198,3 +198,8 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
 /* Advertise support for _PAGE_SPECIAL */
 #define __HAVE_ARCH_PTE_SPECIAL
 
+#ifndef _PAGE_READ
+/* if not defined, we should not find _PAGE_WRITE too */
+#define _PAGE_READ 0
+#define _PAGE_WRITE _PAGE_RW
+#endif
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 71abd4c44c27..7ebac279d38e 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -46,7 +46,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		 * also add _PAGE_COMBO
 		 */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
-		if (access & _PAGE_RW)
+		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 
 		opte = cpu_to_be64(old_pte);
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 6f9b3c34a5c0..83ac9f658733 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -78,7 +78,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		 * also add _PAGE_COMBO
 		 */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO;
-		if (access & _PAGE_RW)
+		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 
 		opte = cpu_to_be64(old_pte);
@@ -255,7 +255,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		 * a write access.
 		 */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
-		if (access & _PAGE_RW)
+		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 		opte = cpu_to_be64(old_pte);
 		npte = cpu_to_be64(new_pte);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 90dd9280894f..ea23403b3fc0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -175,8 +175,9 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	 * or PP=0x3 for read-only (including writeable but clean pages).
 	 */
 	if (pteflags & _PAGE_USER) {
-		rflags |= 0x2;
-		if (!((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY)))
+		if (pteflags & _PAGE_RWX)
+			rflags |= 0x2;
+		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
 			rflags |= 0x1;
 	}
 	/*
@@ -1205,7 +1206,7 @@ EXPORT_SYMBOL_GPL(hash_page);
 int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
 		unsigned long dsisr)
 {
-	unsigned long access = _PAGE_PRESENT;
+	unsigned long access = _PAGE_PRESENT | _PAGE_READ;
 	unsigned long flags = 0;
 	struct mm_struct *mm = current->mm;
 
@@ -1216,7 +1217,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
 		flags |= HPTE_NOHPTE_UPDATE;
 
 	if (dsisr & DSISR_ISSTORE)
-		access |= _PAGE_RW;
+		access |= _PAGE_WRITE;
 	/*
 	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
 	 * accessing a userspace segment (even from the kernel). We assume
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 98891139c044..39342638a498 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -48,7 +48,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		 * a write access
 		 */
 		new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
-		if (access & _PAGE_RW)
+		if (access & _PAGE_WRITE)
 			new_pmd |= _PAGE_DIRTY;
 		opmd = cpu_to_be64(old_pmd);
 		npmd = cpu_to_be64(new_pmd);
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 5bcb28606158..e6e54a04bd32 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -56,7 +56,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
 		 * a write access */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
-		if (access & _PAGE_RW)
+		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 		opte = cpu_to_be64(old_pte);
 		npte = cpu_to_be64(new_pte);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 6dd272b6196f..6e52e722d3f2 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1003,9 +1003,9 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		end = pte_end;
 
 	pte = READ_ONCE(*ptep);
-	mask = _PAGE_PRESENT | _PAGE_USER;
+	mask = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
 	if (write)
-		mask |= _PAGE_RW;
+		mask |= _PAGE_WRITE;
 
 	if ((pte_val(pte) & mask) != mask)
 		return 0;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 83dfd7925c72..98b5c03e344d 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -177,8 +177,8 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
 	 * Hence we can use set_pte_at for them.
 	 */
-	VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
-		(_PAGE_PRESENT | _PAGE_USER));
+	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
+
 	/*
 	 * Add the pte bit when tryint set a pte
 	 */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index aa742aa35b64..00d8d985bba3 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -277,7 +277,7 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
 	void *caller = __builtin_return_address(0);
 
 	/* writeable implies dirty for kernel addresses */
-	if (flags & _PAGE_RW)
+	if (flags & _PAGE_WRITE)
 		flags |= _PAGE_DIRTY;
 
 	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
@@ -681,8 +681,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		pmd_t *pmdp, pmd_t pmd)
 {
 #ifdef CONFIG_DEBUG_VM
-	WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
-		(_PAGE_PRESENT | _PAGE_USER));
+	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
 	assert_spin_locked(&mm->page_table_lock);
 	WARN_ON(!pmd_trans_huge(pmd));
 #endif
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index f7af74f83693..7bc00b508128 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -197,7 +197,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
 	    (REGION_ID(ea) != USER_REGION_ID)) {
 
 		spin_unlock(&spu->register_lock);
-		ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr);
+		ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr);
 		spin_lock(&spu->register_lock);
 
 		if (!ret) {
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index d98f845ac777..c3a3bf1745b7 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -141,8 +141,8 @@ int spufs_handle_class1(struct spu_context *ctx)
 	/* we must not hold the lock when entering copro_handle_mm_fault */
 	spu_release(ctx);
 
-	access = (_PAGE_PRESENT | _PAGE_USER);
-	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+	access = (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER);
+	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
 	local_irq_save(flags);
 	ret = hash_page(ea, access, 0x300, dsisr);
 	local_irq_restore(flags);
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 81c3f75b7330..a3d5e1e16c21 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -149,9 +149,9 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
 	 * update_mmu_cache() will not have loaded the hash since current->trap
 	 * is not a 0x400 or 0x300, so just call hash_page_mm() here.
 	 */
-	access = _PAGE_PRESENT;
+	access = _PAGE_PRESENT | _PAGE_READ;
 	if (dsisr & CXL_PSL_DSISR_An_S)
-		access |= _PAGE_RW;
+		access |= _PAGE_WRITE;
 	if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
 		access |= _PAGE_USER;
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 03/14] powerpc/mm/subpage: Clear RWX bit to indicate no access
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 02/14] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 04/14] powerpc/mm: Use pte_user instead of opencoding Aneesh Kumar K.V
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Subpage protection used to depend on _PAGE_USER bit to implement no
access mode. This patch switch that to use _PAGE_RWX. We clear READ
and Write access from pte instead of clearing _PAGE_USER now. This was
done to enable us to switch to _PAGE_PRIVILGED. subpage_protection()
returns the pte bits that need to be cleared. Instead of updating the
interface to handle no-access in a separate way, it appears simple to
clear RWX acecss to indicate no access.

We still don't insert hash pte for these ptes, hence we should not
get PROT_FAULT with change.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hash_utils_64.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ea23403b3fc0..ec37f4b0a8ff 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -917,7 +917,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
  * Userspace sets the subpage permissions using the subpage_prot system call.
  *
  * Result is 0: full permissions, _PAGE_RW: read-only,
- * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access.
+ * _PAGE_RWX: no access.
  */
 static int subpage_protection(struct mm_struct *mm, unsigned long ea)
 {
@@ -943,8 +943,13 @@ static int subpage_protection(struct mm_struct *mm, unsigned long ea)
 	/* extract 2-bit bitfield for this 4k subpage */
 	spp >>= 30 - 2 * ((ea >> 12) & 0xf);
 
-	/* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */
-	spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0);
+	/*
+	 * 0 -> full premission
+	 * 1 -> Read only
+	 * 2 -> no access.
+	 * We return the flag that need to be cleared.
+	 */
+	spp = ((spp & 2) ? _PAGE_RWX : 0) | ((spp & 1) ? _PAGE_WRITE : 0);
 	return spp;
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 04/14] powerpc/mm: Use pte_user instead of opencoding
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 02/14] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 03/14] powerpc/mm/subpage: Clear RWX bit to indicate no access Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 05/14] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED Aneesh Kumar K.V
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We have common declaration in pte-common.h Add book3s specific one
and switch to pte_user. In the later patch we will be switching
_PAGE_USER to _PAGE_PRIVILEGED

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 5 +++++
 arch/powerpc/perf/callchain.c                | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 77d3ce05798e..4ac6221802ad 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -185,6 +185,11 @@ extern struct page *pgd_page(pgd_t pgd);
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
 #define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
 
+static inline bool pte_user(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_USER);
+}
+
 #ifdef CONFIG_MEM_SOFT_DIRTY
 #define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
 #else
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index e04a6752b399..0071de76d776 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -137,7 +137,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
 	offset = addr & ((1UL << shift) - 1);
 
 	pte = READ_ONCE(*ptep);
-	if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
+	if (!pte_present(pte) || !pte_user(pte))
 		goto err_out;
 	pfn = pte_pfn(pte);
 	if (!page_is_ram(pfn))
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 05/14] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2016-03-07 13:39 ` [PATCH 04/14] powerpc/mm: Use pte_user instead of opencoding Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-22  6:05   ` Michael Neuling
  2016-03-07 13:39 ` [PATCH 06/14] powerpc/mm: Remove RPN_SHIFT and RPN_SIZE Aneesh Kumar K.V
                   ` (8 subsequent siblings)
  12 siblings, 1 reply; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

_PAGE_PRIVILEGED means the page can be accessed only by kernel. This is done
to keep pte bits similar to PowerISA 3.0 radix PTE format. User
pages are now makred by clearing _PAGE_PRIVILEGED bit.

Previously we allowed kernel to have a privileged page
in the lower address range(USER_REGION). With this patch such access
is denied.

We also prevent a kernel access to a non-privileged page in
higher address range (ie, REGION_ID != 0). Both the above access
scenario should never happen.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    | 34 ++++++++++++++--------------
 arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++++++-
 arch/powerpc/mm/hash64_4k.c                  |  2 +-
 arch/powerpc/mm/hash64_64k.c                 |  4 ++--
 arch/powerpc/mm/hash_utils_64.c              | 17 ++++++++------
 arch/powerpc/mm/hugepage-hash64.c            |  2 +-
 arch/powerpc/mm/hugetlbpage-hash64.c         |  3 ++-
 arch/powerpc/mm/hugetlbpage.c                |  2 +-
 arch/powerpc/mm/pgtable.c                    | 15 ++++++++++--
 arch/powerpc/mm/pgtable_64.c                 | 15 +++++++++---
 arch/powerpc/platforms/cell/spufs/fault.c    |  2 +-
 drivers/misc/cxl/fault.c                     |  5 ++--
 12 files changed, 80 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index f092d83fa623..fbefbaa92736 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -20,7 +20,7 @@
 #define _PAGE_READ		0x00004	/* read access allowed */
 #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
 #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
-#define _PAGE_USER		0x00008 /* page may be accessed by userspace */
+#define _PAGE_PRIVILEGED	0x00008 /* page can only be access by kernel */
 #define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
 /* M (memory coherence) is always set in the HPTE, so we don't need it here */
 #define _PAGE_COHERENT		0x0
@@ -114,10 +114,13 @@
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 #endif /* CONFIG_PPC_MM_SLICES */
 
-/* No separate kernel read-only */
-#define _PAGE_KERNEL_RW		(_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
+/*
+ * No separate kernel read-only, user access blocked by key
+ */
+#define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
 #define _PAGE_KERNEL_RO		 _PAGE_KERNEL_RW
-#define _PAGE_KERNEL_RWX	(_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
+				 _PAGE_RW | _PAGE_EXEC)
 
 /* Strong Access Ordering */
 #define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
@@ -149,7 +152,7 @@
  */
 #define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
 			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
-			 _PAGE_USER | _PAGE_ACCESSED |  _PAGE_READ |\
+			 _PAGE_PRIVILEGED | _PAGE_ACCESSED |  _PAGE_READ |\
 			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
 			 _PAGE_SOFT_DIRTY)
 /*
@@ -171,16 +174,13 @@
  *
  * Note due to the way vm flags are laid out, the bits are XWR
  */
-#define PAGE_NONE	__pgprot(_PAGE_BASE)
-#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
-#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
-				 _PAGE_EXEC)
-#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
-#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
-				 _PAGE_EXEC)
-#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
-#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
-				 _PAGE_EXEC)
+#define PAGE_NONE	__pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
 
 #define __P000	PAGE_NONE
 #define __P001	PAGE_READONLY
@@ -421,8 +421,8 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte)
  */
 static inline int pte_protnone(pte_t pte)
 {
-	return (pte_val(pte) &
-		(_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
+	return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) ==
+		(_PAGE_PRESENT | _PAGE_PRIVILEGED);
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 4ac6221802ad..97d06de8dbf6 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -187,7 +187,7 @@ extern struct page *pgd_page(pgd_t pgd);
 
 static inline bool pte_user(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_USER);
+	return !(pte_val(pte) & _PAGE_PRIVILEGED);
 }
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
@@ -211,6 +211,22 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 }
 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 
+static inline bool check_pte_access(unsigned long access, unsigned long ptev)
+{
+	/*
+	 * This check for _PAGE_RWX and _PAG_PRESENT bits
+	 */
+	if (access & ~ptev)
+		return false;
+	/*
+	 * This check for access to privilege space
+	 */
+	if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
+		return false;
+
+	return true;
+}
+
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
 
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 7ebac279d38e..42ba12c184e1 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -38,7 +38,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		if (unlikely(old_pte & _PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
-		if (unlikely(access & ~old_pte))
+		if (unlikely(!check_pte_access(access, old_pte)))
 			return 1;
 		/*
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 83ac9f658733..f33b410d6c8a 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -70,7 +70,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		if (unlikely(old_pte & _PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
-		if (unlikely(access & ~old_pte))
+		if (unlikely(!check_pte_access(access, old_pte)))
 			return 1;
 		/*
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
@@ -241,7 +241,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		if (unlikely(old_pte & _PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
-		if (unlikely(access & ~old_pte))
+		if (unlikely(!check_pte_access(access, old_pte)))
 			return 1;
 		/*
 		 * Check if PTE has the cache-inhibit bit set
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ec37f4b0a8ff..630603f74056 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -174,7 +174,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	 * User area is mapped with PP=0x2 for read/write
 	 * or PP=0x3 for read-only (including writeable but clean pages).
 	 */
-	if (pteflags & _PAGE_USER) {
+	if (!(pteflags & _PAGE_PRIVILEGED)) {
 		if (pteflags & _PAGE_RWX)
 			rflags |= 0x2;
 		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
@@ -1086,7 +1086,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 	/* Pre-check access permissions (will be re-checked atomically
 	 * in __hash_page_XX but this pre-check is a fast path
 	 */
-	if (access & ~pte_val(*ptep)) {
+	if (!check_pte_access(access, pte_val(*ptep))) {
 		DBG_LOW(" no access !\n");
 		rc = 1;
 		goto bail;
@@ -1224,12 +1224,15 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
 	if (dsisr & DSISR_ISSTORE)
 		access |= _PAGE_WRITE;
 	/*
-	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
-	 * accessing a userspace segment (even from the kernel). We assume
-	 * kernel addresses always have the high bit set.
+	 * We set _PAGE_PRIVILEGED only when
+	 * kernel mode access kernel space.
+	 *
+	 * _PAGE_PRIVILEGED is NOT set
+	 * 1) when kernel mode access user space
+	 * 2) user space access kernel space.
 	 */
-	if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
-		access |= _PAGE_USER;
+	if (!(msr & MSR_PR) && !(REGION_ID(ea) == USER_REGION_ID))
+		access |= _PAGE_PRIVILEGED;
 
 	if (trap == 0x400)
 		access |= _PAGE_EXEC;
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 39342638a498..182f1d3fe73c 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -41,7 +41,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		if (unlikely(old_pmd & _PAGE_BUSY))
 			return 0;
 		/* If PMD permissions don't match, take page fault */
-		if (unlikely(access & ~old_pmd))
+		if (unlikely(!check_pte_access(access, old_pmd)))
 			return 1;
 		/*
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index e6e54a04bd32..96765510a49c 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -51,8 +51,9 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		if (unlikely(old_pte & _PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
-		if (unlikely(access & ~old_pte))
+		if (unlikely(!check_pte_access(access, old_pte)))
 			return 1;
+
 		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
 		 * a write access */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 6e52e722d3f2..7201e9c624d5 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1003,7 +1003,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		end = pte_end;
 
 	pte = READ_ONCE(*ptep);
-	mask = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
+	mask = _PAGE_PRESENT | _PAGE_READ;
 	if (write)
 		mask |= _PAGE_WRITE;
 
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 98b5c03e344d..7b492283d502 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -43,9 +43,20 @@ static inline int is_exec_fault(void)
  */
 static inline int pte_looks_normal(pte_t pte)
 {
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+	if ((pte_val(pte) &
+	     (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
+	    _PAGE_PRESENT) {
+		if (!(pte_val(pte) & _PAGE_PRIVILEGED))
+			return 1;
+	}
+	return 0;
+#else
 	return (pte_val(pte) &
-	    (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
-	    (_PAGE_PRESENT | _PAGE_USER);
+		(_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
+		(_PAGE_PRESENT | _PAGE_USER);
+#endif
 }
 
 static struct page *maybe_pte_to_page(pte_t pte)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 00d8d985bba3..441905f7bba4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -280,8 +280,17 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
 	if (flags & _PAGE_WRITE)
 		flags |= _PAGE_DIRTY;
 
-	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
-	flags &= ~(_PAGE_USER | _PAGE_EXEC);
+	/* we don't want to let _PAGE_EXEC leak out */
+	flags &= ~_PAGE_EXEC;
+	/*
+	 * Force kernel mapping.
+	 */
+#if defined(CONFIG_PPC_BOOK3S_64)
+	flags |= _PAGE_PRIVILEGED;
+#else
+	flags &= ~_PAGE_USER;
+#endif
+
 
 #ifdef _PAGE_BAP_SR
 	/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
@@ -669,7 +678,7 @@ void pmdp_huge_split_prepare(struct vm_area_struct *vma,
 	 * the translation is still valid, because we will withdraw
 	 * pgtable_t after this.
 	 */
-	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
+	pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
 }
 
 
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index c3a3bf1745b7..e29e4d5afa2d 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -141,7 +141,7 @@ int spufs_handle_class1(struct spu_context *ctx)
 	/* we must not hold the lock when entering copro_handle_mm_fault */
 	spu_release(ctx);
 
-	access = (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER);
+	access = (_PAGE_PRESENT | _PAGE_READ);
 	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
 	local_irq_save(flags);
 	ret = hash_page(ea, access, 0x300, dsisr);
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index a3d5e1e16c21..23e37cd41e64 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -152,8 +152,9 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
 	access = _PAGE_PRESENT | _PAGE_READ;
 	if (dsisr & CXL_PSL_DSISR_An_S)
 		access |= _PAGE_WRITE;
-	if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
-		access |= _PAGE_USER;
+
+	if (ctx->kernel && (dar & (1ULL << 63)))
+		access |= _PAGE_PRIVILEGED;
 
 	if (dsisr & DSISR_NOHPTE)
 		inv_flags |= HPTE_NOHPTE_UPDATE;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 06/14] powerpc/mm: Remove RPN_SHIFT and RPN_SIZE
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2016-03-07 13:39 ` [PATCH 05/14] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 07/14] powerpc/mm: Update _PAGE_KERNEL_RO Aneesh Kumar K.V
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We had them as page size dependent. Use PAGE_SIZE instead. While there
remove them and define RPN_MASK better.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  4 ----
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 11 +----------
 arch/powerpc/include/asm/book3s/64/hash.h     | 10 +++++-----
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  4 ++--
 arch/powerpc/mm/pgtable_64.c                  |  2 +-
 5 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 5f08a0832238..772850e517f3 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -51,10 +51,6 @@
 #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
 			 _PAGE_F_SECOND | _PAGE_F_GIX)
 
-/* shift to put page number into pte */
-#define PTE_RPN_SHIFT	(12)
-#define PTE_RPN_SIZE	(45)	/* gives 57-bit real addresses */
-
 #define _PAGE_4K_PFN		0
 #ifndef __ASSEMBLY__
 /*
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 279ded72f1db..a053e8a1d0d1 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -40,15 +40,6 @@
 /* PTE flags to conserve for HPTE identification */
 #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \
 			 _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO)
-
-/* Shift to put page number into pte.
- *
- * That gives us a max RPN of 41 bits, which means a max of 57 bits
- * of addressable physical space, or 53 bits for the special 4k PFNs.
- */
-#define PTE_RPN_SHIFT	(16)
-#define PTE_RPN_SIZE	(41)
-
 /*
  * we support 16 fragments per PTE page of 64K size.
  */
@@ -125,7 +116,7 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
 	(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
 
 #define remap_4k_pfn(vma, addr, pfn, prot)				\
-	(WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL :	\
+	(WARN_ON(((pfn) > (PTE_RPN_MASK >> PAGE_SHIFT))) ? -EINVAL :	\
 		remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,	\
 			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
 
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index fbefbaa92736..8ccb2970f30f 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -136,10 +136,10 @@
 #define PTE_ATOMIC_UPDATES	1
 #define _PTE_NONE_MASK	_PAGE_HPTEFLAGS
 /*
- * The mask convered by the RPN must be a ULL on 32-bit platforms with
- * 64-bit PTEs
+ * We support 57 bit real address in pte. Clear everything above 57, and
+ * every thing below PAGE_SHIFT;
  */
-#define PTE_RPN_MASK	(((1UL << PTE_RPN_SIZE) - 1) << PTE_RPN_SHIFT)
+#define PTE_RPN_MASK	(((1UL << 57) - 1) & (PAGE_MASK))
 /*
  * _PAGE_CHG_MASK masks of bits that are to be preserved across
  * pgprot changes
@@ -439,13 +439,13 @@ static inline int pte_present(pte_t pte)
  */
 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
 {
-	return __pte((((pte_basic_t)(pfn) << PTE_RPN_SHIFT) & PTE_RPN_MASK) |
+	return __pte((((pte_basic_t)(pfn) << PAGE_SHIFT) & PTE_RPN_MASK) |
 		     pgprot_val(pgprot));
 }
 
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
+	return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
 }
 
 /* Generic modifiers for PTE bits */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 97d06de8dbf6..144680382306 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -172,10 +172,10 @@ extern struct page *pgd_page(pgd_t pgd);
 #define SWP_TYPE_BITS 5
 #define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
 				& ((1UL << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x)		(((x).val & PTE_RPN_MASK) >> PTE_RPN_SHIFT)
+#define __swp_offset(x)		(((x).val & PTE_RPN_MASK) >> PAGE_SHIFT)
 #define __swp_entry(type, offset)	((swp_entry_t) { \
 				((type) << _PAGE_BIT_SWAP_TYPE) \
-				| (((offset) << PTE_RPN_SHIFT) & PTE_RPN_MASK)})
+				| (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)})
 /*
  * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
  * swap type and offset we get from swap and convert that to pte to find a
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 441905f7bba4..1254cf107871 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -762,7 +762,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
 {
 	unsigned long pmdv;
 
-	pmdv = (pfn << PTE_RPN_SHIFT) & PTE_RPN_MASK;
+	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
 	return pmd_set_protbits(__pmd(pmdv), pgprot);
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 07/14] powerpc/mm: Update _PAGE_KERNEL_RO
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (4 preceding siblings ...)
  2016-03-07 13:39 ` [PATCH 06/14] powerpc/mm: Remove RPN_SHIFT and RPN_SIZE Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 08/14] powerpc/mm: Use helper for finding pte bits mapping I/O area Aneesh Kumar K.V
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

PS3 had used PPP bit hack to implement a read only mapping in the
kernel area. Since we are bolt mapping the ioremap area, it used
the pte flags _PAGE_PRESENT | _PAGE_USER to get a PPP value of 0x3
there by resulting in a read only mapping. This means the area
can be accessed by user space, but kernel will never return such an
address to user space.

Fix this by doing a read only kernel mapping using PPP bits 0b110

This also allows us to do read only kernel mapping for radix in later
patches.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h |  4 ++--
 arch/powerpc/mm/hash_utils_64.c           | 17 +++++++++++------
 arch/powerpc/platforms/ps3/spu.c          |  2 +-
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 8ccb2970f30f..c2b567456796 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -115,10 +115,10 @@
 #endif /* CONFIG_PPC_MM_SLICES */
 
 /*
- * No separate kernel read-only, user access blocked by key
+ * user access blocked by key
  */
 #define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
-#define _PAGE_KERNEL_RO		 _PAGE_KERNEL_RW
+#define _PAGE_KERNEL_RO		 (_PAGE_PRIVILEGED | _PAGE_READ)
 #define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
 				 _PAGE_RW | _PAGE_EXEC)
 
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 630603f74056..c81c08aaff0e 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -167,14 +167,19 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	if ((pteflags & _PAGE_EXEC) == 0)
 		rflags |= HPTE_R_N;
 	/*
-	 * PP bits:
+	 * PPP bits:
 	 * Linux uses slb key 0 for kernel and 1 for user.
-	 * kernel areas are mapped with PP=00
-	 * and there is no kernel RO (_PAGE_KERNEL_RO).
-	 * User area is mapped with PP=0x2 for read/write
-	 * or PP=0x3 for read-only (including writeable but clean pages).
+	 * kernel RW areas are mapped with PPP=0b000
+	 * User area is mapped with PPP=0b010 for read/write
+	 * or PPP=0b011 for read-only (including writeable but clean pages).
 	 */
-	if (!(pteflags & _PAGE_PRIVILEGED)) {
+	if (pteflags & _PAGE_PRIVILEGED) {
+		/*
+		 * Kernel read only mapped with ppp bits 0b110
+		 */
+		if (!(pteflags & _PAGE_WRITE))
+			rflags |= (HPTE_R_PP0 | 0x2);
+	} else {
 		if (pteflags & _PAGE_RWX)
 			rflags |= 0x2;
 		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index a0bca05e26b0..5e8a40f9739f 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -205,7 +205,7 @@ static void spu_unmap(struct spu *spu)
 static int __init setup_areas(struct spu *spu)
 {
 	struct table {char* name; unsigned long addr; unsigned long size;};
-	static const unsigned long shadow_flags = _PAGE_NO_CACHE | 3;
+	unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO));
 
 	spu_pdata(spu)->shadow = __ioremap(spu_pdata(spu)->shadow_addr,
 					   sizeof(struct spe_shadow),
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 08/14] powerpc/mm: Use helper for finding pte bits mapping I/O area
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (5 preceding siblings ...)
  2016-03-07 13:39 ` [PATCH 07/14] powerpc/mm: Update _PAGE_KERNEL_RO Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 09/14] powerpc/mm: Drop WIMG in favour of new constants Aneesh Kumar K.V
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Use helper instead of opencoding with constants. Later patch will
drop the WIMG bits and use PowerISA 3.0 defines

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/btext.c      | 2 +-
 arch/powerpc/kernel/isa-bridge.c | 4 ++--
 arch/powerpc/kernel/pci_64.c     | 2 +-
 arch/powerpc/mm/pgtable_64.c     | 4 ++--
 arch/powerpc/platforms/ps3/spu.c | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 41c011cb6070..8275858a434d 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -162,7 +162,7 @@ void btext_map(void)
 	offset = ((unsigned long) dispDeviceBase) - base;
 	size = dispDeviceRowBytes * dispDeviceRect[3] + offset
 		+ dispDeviceRect[0];
-	vbase = __ioremap(base, size, _PAGE_NO_CACHE);
+	vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
 	if (vbase == 0)
 		return;
 	logicalDisplayBase = vbase + offset;
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 0f1997097960..ae1316106e2b 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -109,14 +109,14 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
 		size = 0x10000;
 
 	__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
-		     size, _PAGE_NO_CACHE|_PAGE_GUARDED);
+		     size, pgprot_val(pgprot_noncached(__pgprot(0))));
 	return;
 
 inval_range:
 	printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
 	       "mapping 64k\n");
 	__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
-		     0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED);
+		     0x10000, pgprot_val(pgprot_noncached(__pgprot(0))));
 }
 
 
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 60bb187cb46a..41503d7d53a1 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -159,7 +159,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
 
 	/* Establish the mapping */
 	if (__ioremap_at(phys_page, area->addr, size_page,
-			 _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)
+			 pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)
 		return -ENOMEM;
 
 	/* Fixup hose IO resource */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 1254cf107871..6f1b7064f822 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -253,7 +253,7 @@ void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
 
 void __iomem * ioremap(phys_addr_t addr, unsigned long size)
 {
-	unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED;
+	unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
 	void *caller = __builtin_return_address(0);
 
 	if (ppc_md.ioremap)
@@ -263,7 +263,7 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size)
 
 void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
 {
-	unsigned long flags = _PAGE_NO_CACHE;
+	unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
 	void *caller = __builtin_return_address(0);
 
 	if (ppc_md.ioremap)
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 5e8a40f9739f..492b2575e0d2 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -216,7 +216,7 @@ static int __init setup_areas(struct spu *spu)
 	}
 
 	spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
-		LS_SIZE, _PAGE_NO_CACHE);
+		LS_SIZE, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
 
 	if (!spu->local_store) {
 		pr_debug("%s:%d: ioremap local_store failed\n",
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 09/14] powerpc/mm: Drop WIMG in favour of new constants
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (6 preceding siblings ...)
  2016-03-07 13:39 ` [PATCH 08/14] powerpc/mm: Use helper for finding pte bits mapping I/O area Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 17:29   ` kbuild test robot
  2016-03-22  4:59   ` Michael Neuling
  2016-03-07 13:39 ` [PATCH 10/14] powerpc/mm: Use generic version of pmdp_clear_flush_young Aneesh Kumar K.V
                   ` (4 subsequent siblings)
  12 siblings, 2 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

PowerISA 3.0 introduce three pte bits with the below meaning
000 ->  Normal Memory
001 ->  Strong Access Order
010 -> Non idempotent I/O ( Also cache inhibited and guarded)
100 -> Tolerant I/O (Cache inhibited)

We drop the existing WIMG bits in linux page table in favour of above
contants. We loose _PAGE_WRITETHRU with this conversion. We only use
writethru via pgprot_cached_wthru() which is used by fbdev/controlfb.c
which is Apple control display and also PPC32.

With respect to _PAGE_COHERENCE, we have been marking hpte
always coherent for some time now. htab_convert_pte_flags always added
HPTE_R_M.

NOTE: KVM changes need closer review.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h | 47 +++++++++----------------------
 arch/powerpc/include/asm/kvm_book3s_64.h  | 29 ++++++++++---------
 arch/powerpc/kvm/book3s_64_mmu_hv.c       | 11 ++++----
 arch/powerpc/kvm/book3s_hv_rm_mmu.c       | 12 ++++----
 arch/powerpc/mm/hash64_64k.c              |  2 +-
 arch/powerpc/mm/hash_utils_64.c           | 14 ++++-----
 arch/powerpc/mm/pgtable.c                 |  2 +-
 arch/powerpc/mm/pgtable_64.c              |  4 ---
 arch/powerpc/platforms/pseries/lpar.c     |  4 ---
 9 files changed, 48 insertions(+), 77 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index c2b567456796..edd3d47ef9a4 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -21,11 +21,9 @@
 #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
 #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
 #define _PAGE_PRIVILEGED	0x00008 /* page can only be access by kernel */
-#define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
-/* M (memory coherence) is always set in the HPTE, so we don't need it here */
-#define _PAGE_COHERENT		0x0
-#define _PAGE_NO_CACHE		0x00020 /* I: cache inhibit */
-#define _PAGE_WRITETHRU		0x00040 /* W: cache write-through */
+#define _PAGE_SAO		0x00010 /* Strong access order */
+#define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
+#define _PAGE_TOLERANT		0x00040 /* tolerant memory, cache inhibited */
 #define _PAGE_DIRTY		0x00080 /* C: page changed */
 #define _PAGE_ACCESSED		0x00100 /* R: page referenced */
 #define _PAGE_SPECIAL		0x00400 /* software: special page */
@@ -122,9 +120,6 @@
 #define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
 				 _PAGE_RW | _PAGE_EXEC)
 
-/* Strong Access Ordering */
-#define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
-
 /* No page size encoding in the linux PTE */
 #define _PAGE_PSIZE		0
 
@@ -150,10 +145,9 @@
 /*
  * Mask of bits returned by pte_pgprot()
  */
-#define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
-			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
-			 _PAGE_PRIVILEGED | _PAGE_ACCESSED |  _PAGE_READ |\
-			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
+#define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
+			 _PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
+			 _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
 			 _PAGE_SOFT_DIRTY)
 /*
  * We define 2 sets of base prot bits, one for basic pages (ie,
@@ -162,7 +156,7 @@
  * the processor might need it for DMA coherency.
  */
 #define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
-#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
+#define _PAGE_BASE	(_PAGE_BASE_NC)
 
 /* Permission masks used to generate the __P and __S table,
  *
@@ -203,9 +197,9 @@
 /* Permission masks used for kernel mappings */
 #define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
 #define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_NO_CACHE)
+				 _PAGE_TOLERANT)
 #define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_NO_CACHE | _PAGE_GUARDED)
+				 _PAGE_NON_IDEMPOTENT)
 #define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
 #define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
 #define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
@@ -516,41 +510,26 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
  * Macro to mark a page protection value as "uncacheable".
  */
 
-#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
-			 _PAGE_WRITETHRU)
+#define _PAGE_CACHE_CTL	(_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
 
 #define pgprot_noncached pgprot_noncached
 static inline pgprot_t pgprot_noncached(pgprot_t prot)
 {
 	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_NO_CACHE | _PAGE_GUARDED);
+			_PAGE_NON_IDEMPOTENT);
 }
 
 #define pgprot_noncached_wc pgprot_noncached_wc
 static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
 {
 	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_NO_CACHE);
+			_PAGE_TOLERANT);
 }
 
 #define pgprot_cached pgprot_cached
 static inline pgprot_t pgprot_cached(pgprot_t prot)
 {
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_COHERENT);
-}
-
-#define pgprot_cached_wthru pgprot_cached_wthru
-static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
-{
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_COHERENT | _PAGE_WRITETHRU);
-}
-
-#define pgprot_cached_noncoherent pgprot_cached_noncoherent
-static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
-{
-	return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
 }
 
 #define pgprot_writecombine pgprot_writecombine
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index f9a7a89a3e4f..f23b1698ad3c 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -278,19 +278,24 @@ static inline unsigned long hpte_make_readonly(unsigned long ptel)
 	return ptel;
 }
 
-static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
+static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
 {
-	unsigned int wimg = ptel & HPTE_R_WIMG;
+	unsigned int wimg = hptel & HPTE_R_WIMG;
 
 	/* Handle SAO */
 	if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
 	    cpu_has_feature(CPU_FTR_ARCH_206))
 		wimg = HPTE_R_M;
 
-	if (!io_type)
+	if (!is_ci)
 		return wimg == HPTE_R_M;
-
-	return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
+	/*
+	 * if host is mapped cache inhibited, make sure hptel also have
+	 * cache inhibited.
+	 */
+	if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */
+		return false;
+	return !!(wimg & HPTE_R_I);
 }
 
 /*
@@ -333,16 +338,12 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
 	return new_pte;
 }
 
-
-/* Return HPTE cache control bits corresponding to Linux pte bits */
-static inline unsigned long hpte_cache_bits(unsigned long pte_val)
+/*
+ * check whether the mapping is cache inhibited
+ */
+static inline bool hpte_is_cache_inhibited(unsigned long pte_val)
 {
-#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W
-	return pte_val & (HPTE_R_W | HPTE_R_I);
-#else
-	return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
-		((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
-#endif
+	return !!(pte_val & (_PAGE_TOLERANT | _PAGE_NON_IDEMPOTENT));
 }
 
 static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c7b78d8336b2..40ad06c41ca1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -447,7 +447,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	struct revmap_entry *rev;
 	struct page *page, *pages[1];
 	long index, ret, npages;
-	unsigned long is_io;
+	bool is_ci;
 	unsigned int writing, write_ok;
 	struct vm_area_struct *vma;
 	unsigned long rcbits;
@@ -503,7 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	smp_rmb();
 
 	ret = -EFAULT;
-	is_io = 0;
+	is_ci = false;
 	pfn = 0;
 	page = NULL;
 	pte_size = PAGE_SIZE;
@@ -521,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			pfn = vma->vm_pgoff +
 				((hva - vma->vm_start) >> PAGE_SHIFT);
 			pte_size = psize;
-			is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+			is_ci = hpte_is_cache_inhibited(pgprot_val(vma->vm_page_prot));
 			write_ok = vma->vm_flags & VM_WRITE;
 		}
 		up_read(&current->mm->mmap_sem);
@@ -558,10 +558,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		goto out_put;
 
 	/* Check WIMG vs. the actual page we're accessing */
-	if (!hpte_cache_flags_ok(r, is_io)) {
-		if (is_io)
+	if (!hpte_cache_flags_ok(r, is_ci)) {
+		if (is_ci)
 			goto out_put;
-
 		/*
 		 * Allow guest to map emulated device memory as
 		 * uncacheable, but actually make it cacheable.
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 4cb8db05f3e5..3ebd620589a9 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -175,7 +175,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	unsigned long g_ptel;
 	struct kvm_memory_slot *memslot;
 	unsigned hpage_shift;
-	unsigned long is_io;
+	bool is_ci;
 	unsigned long *rmap;
 	pte_t *ptep;
 	unsigned int writing;
@@ -199,7 +199,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	gfn = gpa >> PAGE_SHIFT;
 	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
 	pa = 0;
-	is_io = ~0ul;
+	is_ci = false;
 	rmap = NULL;
 	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
 		/* Emulated MMIO - mark this with key=31 */
@@ -250,7 +250,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			if (writing && !pte_write(pte))
 				/* make the actual HPTE be read-only */
 				ptel = hpte_make_readonly(ptel);
-			is_io = hpte_cache_bits(pte_val(pte));
+			is_ci = hpte_is_cache_inhibited(pte_val(pte));
 			pa = pte_pfn(pte) << PAGE_SHIFT;
 			pa |= hva & (host_pte_size - 1);
 			pa |= gpa & ~PAGE_MASK;
@@ -267,9 +267,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	else
 		pteh |= HPTE_V_ABSENT;
 
-	/* Check WIMG */
-	if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
-		if (is_io)
+	/*If we had host pte mapping then  Check WIMG */
+	if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
+		if (is_ci)
 			return H_PARAMETER;
 		/*
 		 * Allow guest to map emulated device memory as
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index f33b410d6c8a..243c822913e4 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -248,7 +248,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		 * If so, bail out and refault as a 4k page
 		 */
 		if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) &&
-		    unlikely(old_pte & _PAGE_NO_CACHE))
+		    unlikely(old_pte & _PAGE_TOLERANT))
 			return 0;
 		/*
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index c81c08aaff0e..728acd17f2a6 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -192,12 +192,12 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	/*
 	 * Add in WIG bits
 	 */
-	if (pteflags & _PAGE_WRITETHRU)
-		rflags |= HPTE_R_W;
-	if (pteflags & _PAGE_NO_CACHE)
+	if (pteflags & _PAGE_TOLERANT)
 		rflags |= HPTE_R_I;
-	if (pteflags & _PAGE_GUARDED)
-		rflags |= HPTE_R_G;
+	if (pteflags & _PAGE_NON_IDEMPOTENT)
+		rflags |= (HPTE_R_I | HPTE_R_G);
+	if (pteflags & _PAGE_SAO)
+		rflags |= (HPTE_R_I | HPTE_R_W);
 
 	return rflags;
 }
@@ -1139,7 +1139,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 	 * using non cacheable large pages, then we switch to 4k
 	 */
 	if (mmu_ci_restrictions && psize == MMU_PAGE_64K &&
-	    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
+	    (pte_val(*ptep) & _PAGE_TOLERANT)) {
 		if (user_region) {
 			demote_segment_4k(mm, ea);
 			psize = MMU_PAGE_4K;
@@ -1298,7 +1298,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 	 * That way we don't have to duplicate all of the logic for segment
 	 * page size demotion here
 	 */
-	if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
+	if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_TOLERANT))
 		goto out_exit;
 #endif /* CONFIG_PPC_64K_PAGES */
 
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 7b492283d502..ad8b6432f7e3 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -46,7 +46,7 @@ static inline int pte_looks_normal(pte_t pte)
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 	if ((pte_val(pte) &
-	     (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
+	     (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_TOLERANT)) ==
 	    _PAGE_PRESENT) {
 		if (!(pte_val(pte) & _PAGE_PRIVILEGED))
 			return 1;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 6f1b7064f822..db924c54f370 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -167,10 +167,6 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
 	if ((flags & _PAGE_PRESENT) == 0)
 		flags |= pgprot_val(PAGE_KERNEL);
 
-	/* Non-cacheable page cannot be coherent */
-	if (flags & _PAGE_NO_CACHE)
-		flags &= ~_PAGE_COHERENT;
-
 	/* We don't support the 4K PFN hack with ioremap */
 	if (flags & _PAGE_4K_PFN)
 		return NULL;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 2415a0d31f8f..0d4608990702 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -152,10 +152,6 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	/* Exact = 0                   */
 	flags = 0;
 
-	/* Make pHyp happy */
-	if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
-		hpte_r &= ~HPTE_R_M;
-
 	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
 		flags |= H_COALESCE_CAND;
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 10/14] powerpc/mm: Use generic version of pmdp_clear_flush_young
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (7 preceding siblings ...)
  2016-03-07 13:39 ` [PATCH 09/14] powerpc/mm: Drop WIMG in favour of new constants Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 11/14] powerpc/mm: Use generic version of ptep_clear_flush_young Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

The radix variant is going to require a flush_pmd_tlb_range. With
flush_pmd_tlb_range added, pmdp_clear_flush_young is same as the generic
version. So drop the powerpc specific variant

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h |  3 ---
 arch/powerpc/mm/pgtable_64.c                 | 13 +++----------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 144680382306..e7171323884a 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -298,9 +298,6 @@ extern int pmdp_set_access_flags(struct vm_area_struct *vma,
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 				     unsigned long address, pmd_t *pmdp);
-#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
-				  unsigned long address, pmd_t *pmdp);
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index db924c54f370..98c91ad18ba7 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -593,22 +593,15 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
 	return pmd;
 }
 
-int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-			      unsigned long address, pmd_t *pmdp)
-{
-	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
-}
-
 /*
  * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
+ * the entry was young or dirty.
  *
  * We should be more intelligent about this but for the moment we override
  * these functions and force a tlb flush unconditionally
  */
-int pmdp_clear_flush_young(struct vm_area_struct *vma,
-				  unsigned long address, pmd_t *pmdp)
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long address, pmd_t *pmdp)
 {
 	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
 }
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 11/14] powerpc/mm: Use generic version of ptep_clear_flush_young
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (8 preceding siblings ...)
  2016-03-07 13:39 ` [PATCH 10/14] powerpc/mm: Use generic version of pmdp_clear_flush_young Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 12/14] powerpc/mm: Move common data structure between radix and hash to book3s 64 generic headers Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

The radix variant is going to require a flush_tlb_range. With
flush_tlb_range added, ptep_clear_flush_young is same as the generic
version. So drop the powerpc specific variant

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index edd3d47ef9a4..f04c7ae810b2 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -275,6 +275,13 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 	return old;
 }
 
+/*
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ */
 static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 					      unsigned long addr, pte_t *ptep)
 {
@@ -313,22 +320,6 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
 }
 
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-#define ptep_clear_flush_young(__vma, __address, __ptep)		\
-({									\
-	int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
-						  __ptep);		\
-	__young;							\
-})
-
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long addr, pte_t *ptep)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 12/14] powerpc/mm: Move common data structure between radix and hash to book3s 64 generic headers
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (9 preceding siblings ...)
  2016-03-07 13:39 ` [PATCH 11/14] powerpc/mm: Use generic version of ptep_clear_flush_young Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 13/14] powerpc/mm/power9: Add partition table format Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 14/14] powerpc/mm/hash: Add support for POWER9 hash Aneesh Kumar K.V
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We want to use mmu_context_t between radix and hash. Move that mmuh.h

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/32/mmu-hash.h |  6 +--
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 61 ++---------------------
 arch/powerpc/include/asm/book3s/64/mmu.h      | 72 +++++++++++++++++++++++++++
 arch/powerpc/include/asm/mmu.h                | 11 ++--
 4 files changed, 85 insertions(+), 65 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/64/mmu.h

diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 16f513e5cbd7..b82e063494dd 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_POWERPC_MMU_HASH32_H_
-#define _ASM_POWERPC_MMU_HASH32_H_
+#ifndef _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_
+#define _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_
 /*
  * 32-bit hash table MMU support
  */
@@ -90,4 +90,4 @@ typedef struct {
 #define mmu_virtual_psize	MMU_PAGE_4K
 #define mmu_linear_psize	MMU_PAGE_256M
 
-#endif /* _ASM_POWERPC_MMU_HASH32_H_ */
+#endif /* _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 0cea4807e26f..ce73736b42db 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_POWERPC_MMU_HASH64_H_
-#define _ASM_POWERPC_MMU_HASH64_H_
+#ifndef _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_
+#define _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_
 /*
  * PowerPC64 memory management structures
  *
@@ -127,24 +127,6 @@ extern struct hash_pte *htab_address;
 extern unsigned long htab_size_bytes;
 extern unsigned long htab_hash_mask;
 
-/*
- * Page size definition
- *
- *    shift : is the "PAGE_SHIFT" value for that page size
- *    sllp  : is a bit mask with the value of SLB L || LP to be or'ed
- *            directly to a slbmte "vsid" value
- *    penc  : is the HPTE encoding mask for the "LP" field:
- *
- */
-struct mmu_psize_def
-{
-	unsigned int	shift;	/* number of bits */
-	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
-	unsigned int	tlbiel;	/* tlbiel supported for that page size */
-	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
-	unsigned long	sllp;	/* SLB L||LP (exact mask to use in slbmte) */
-};
-extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 
 static inline int shift_to_mmu_psize(unsigned int shift)
 {
@@ -210,11 +192,6 @@ static inline int segment_shift(int ssize)
 /*
  * The current system page and segment sizes
  */
-extern int mmu_linear_psize;
-extern int mmu_virtual_psize;
-extern int mmu_vmalloc_psize;
-extern int mmu_vmemmap_psize;
-extern int mmu_io_psize;
 extern int mmu_kernel_ssize;
 extern int mmu_highuser_ssize;
 extern u16 mmu_slb_size;
@@ -512,38 +489,6 @@ static inline void subpage_prot_free(struct mm_struct *mm) {}
 static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
 
-typedef unsigned long mm_context_id_t;
-struct spinlock;
-
-typedef struct {
-	mm_context_id_t id;
-	u16 user_psize;		/* page size index */
-
-#ifdef CONFIG_PPC_MM_SLICES
-	u64 low_slices_psize;	/* SLB page size encodings */
-	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
-#else
-	u16 sllp;		/* SLB page size encoding */
-#endif
-	unsigned long vdso_base;
-#ifdef CONFIG_PPC_SUBPAGE_PROT
-	struct subpage_prot_table spt;
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
-#ifdef CONFIG_PPC_ICSWX
-	struct spinlock *cop_lockp; /* guard acop and cop_pid */
-	unsigned long acop;	/* mask of enabled coprocessor types */
-	unsigned int cop_pid;	/* pid value used with coprocessors */
-#endif /* CONFIG_PPC_ICSWX */
-#ifdef CONFIG_PPC_64K_PAGES
-	/* for 4K PTE fragment support */
-	void *pte_frag;
-#endif
-#ifdef CONFIG_SPAPR_TCE_IOMMU
-	struct list_head iommu_group_mem_list;
-#endif
-} mm_context_t;
-
-
 #if 0
 /*
  * The code below is equivalent to this function for arguments
@@ -613,4 +558,4 @@ unsigned htab_shift_for_mem_size(unsigned long mem_size);
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* _ASM_POWERPC_MMU_HASH64_H_ */
+#endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
new file mode 100644
index 000000000000..aadb0bbc5c71
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -0,0 +1,72 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_MMU_H_
+#define _ASM_POWERPC_BOOK3S_64_MMU_H_
+
+#ifndef __ASSEMBLY__
+/*
+ * Page size definition
+ *
+ *    shift : is the "PAGE_SHIFT" value for that page size
+ *    sllp  : is a bit mask with the value of SLB L || LP to be or'ed
+ *            directly to a slbmte "vsid" value
+ *    penc  : is the HPTE encoding mask for the "LP" field:
+ *
+ */
+struct mmu_psize_def {
+	unsigned int	shift;	/* number of bits */
+	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
+	unsigned int	tlbiel;	/* tlbiel supported for that page size */
+	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
+	unsigned long	sllp;	/* SLB L||LP (exact mask to use in slbmte) */
+};
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_STD_MMU_64
+/* 64-bit classic hash table MMU */
+#include <asm/book3s/64/mmu-hash.h>
+#endif
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long mm_context_id_t;
+struct spinlock;
+
+typedef struct {
+	mm_context_id_t id;
+	u16 user_psize;		/* page size index */
+
+#ifdef CONFIG_PPC_MM_SLICES
+	u64 low_slices_psize;	/* SLB page size encodings */
+	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+#else
+	u16 sllp;		/* SLB page size encoding */
+#endif
+	unsigned long vdso_base;
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+	struct subpage_prot_table spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+#ifdef CONFIG_PPC_ICSWX
+	struct spinlock *cop_lockp; /* guard acop and cop_pid */
+	unsigned long acop;	/* mask of enabled coprocessor types */
+	unsigned int cop_pid;	/* pid value used with coprocessors */
+#endif /* CONFIG_PPC_ICSWX */
+#ifdef CONFIG_PPC_64K_PAGES
+	/* for 4K PTE fragment support */
+	void *pte_frag;
+#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	struct list_head iommu_group_mem_list;
+#endif
+} mm_context_t;
+
+/*
+ * The current system page and segment sizes
+ */
+extern int mmu_linear_psize;
+extern int mmu_virtual_psize;
+extern int mmu_vmalloc_psize;
+extern int mmu_vmemmap_psize;
+extern int mmu_io_psize;
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 8ca1c983bf6c..5f55024f9522 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -181,10 +181,13 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
 
 #define MMU_PAGE_COUNT	15
 
-#if defined(CONFIG_PPC_STD_MMU_64)
-/* 64-bit classic hash table MMU */
-#include <asm/book3s/64/mmu-hash.h>
-#elif defined(CONFIG_PPC_STD_MMU_32)
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/mmu.h>
+#else /* CONFIG_PPC_BOOK3S_64 */
+
+#endif
+
+#if defined(CONFIG_PPC_STD_MMU_32)
 /* 32-bit classic hash table MMU */
 #include <asm/book3s/32/mmu-hash.h>
 #elif defined(CONFIG_40x)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 13/14] powerpc/mm/power9: Add partition table format
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (10 preceding siblings ...)
  2016-03-07 13:39 ` [PATCH 12/14] powerpc/mm: Move common data structure between radix and hash to book3s 64 generic headers Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  2016-03-07 13:39 ` [PATCH 14/14] powerpc/mm/hash: Add support for POWER9 hash Aneesh Kumar K.V
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We also add mach dep call back for updating partition table entry.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 31 +++++++++++++++++++++++++++++--
 arch/powerpc/include/asm/machdep.h       |  1 +
 arch/powerpc/include/asm/reg.h           |  1 +
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index aadb0bbc5c71..b86786f2521c 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -21,12 +21,39 @@ struct mmu_psize_def {
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 #endif /* __ASSEMBLY__ */
 
-#ifdef CONFIG_PPC_STD_MMU_64
 /* 64-bit classic hash table MMU */
 #include <asm/book3s/64/mmu-hash.h>
-#endif
 
 #ifndef __ASSEMBLY__
+/*
+ * ISA 3.0 partiton and process table entry format
+ */
+struct prtb_entry {
+	__be64 prtb0;
+	__be64 prtb1;
+};
+extern struct prtb_entry *process_tb;
+
+struct patb_entry {
+	__be64 patb0;
+	__be64 patb1;
+};
+extern struct patb_entry *partition_tb;
+
+#define PATB_HR		(1UL << 63)
+#define PATB_GR		(1UL << 63)
+#define RPDB_MASK	0x0ffffffffffff00fUL
+#define RPDB_SHIFT	(1UL << 8)
+/*
+ * Limit process table to PAGE_SIZE table. This
+ * also limit the max pid we can support.
+ * MAX_USER_CONTEXT * 16 bytes of space.
+ */
+#define PRTB_SIZE_SHIFT	(CONTEXT_BITS + 4)
+/*
+ * Power9 currently only support 64K partition table size.
+ */
+#define PATB_SIZE_SHIFT	16
 
 typedef unsigned long mm_context_id_t;
 struct spinlock;
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index fd22442d30a9..6bdcd0da9e21 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -256,6 +256,7 @@ struct machdep_calls {
 #ifdef CONFIG_ARCH_RANDOM
 	int (*get_random_seed)(unsigned long *v);
 #endif
+	int (*update_partition_table)(u64);
 };
 
 extern void e500_idle(void);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 52ed654d01ba..257251ada3a3 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -587,6 +587,7 @@
 #define SPRN_PIR	0x3FF	/* Processor Identification Register */
 #endif
 #define SPRN_TIR	0x1BE	/* Thread Identification Register */
+#define SPRN_PTCR	0x1D0	/* Partition table control Register */
 #define SPRN_PSPB	0x09F	/* Problem State Priority Boost reg */
 #define SPRN_PTEHI	0x3D5	/* 981 7450 PTE HI word (S/W TLB load) */
 #define SPRN_PTELO	0x3D6	/* 982 7450 PTE LO word (S/W TLB load) */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 14/14] powerpc/mm/hash: Add support for POWER9 hash
  2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (11 preceding siblings ...)
  2016-03-07 13:39 ` [PATCH 13/14] powerpc/mm/power9: Add partition table format Aneesh Kumar K.V
@ 2016-03-07 13:39 ` Aneesh Kumar K.V
  12 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-07 13:39 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This add support for p9 hash with UPRT=0. ie, we don't have
segment table support yet.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 13 +++++++--
 arch/powerpc/mm/hash_native_64.c              | 11 ++++++-
 arch/powerpc/mm/hash_utils_64.c               | 42 +++++++++++++++++++++++++--
 arch/powerpc/mm/pgtable_64.c                  |  7 +++++
 arch/powerpc/platforms/ps3/htab.c             |  2 +-
 arch/powerpc/platforms/pseries/lpar.c         |  2 +-
 6 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index ce73736b42db..843b5d839904 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -78,6 +78,10 @@
 #define HPTE_V_SECONDARY	ASM_CONST(0x0000000000000002)
 #define HPTE_V_VALID		ASM_CONST(0x0000000000000001)
 
+/*
+ * ISA 3.0 have a different HPTE format.
+ */
+#define HPTE_R_3_0_SSIZE_SHIFT	58
 #define HPTE_R_PP0		ASM_CONST(0x8000000000000000)
 #define HPTE_R_TS		ASM_CONST(0x4000000000000000)
 #define HPTE_R_KEY_HI		ASM_CONST(0x3000000000000000)
@@ -224,7 +228,8 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
 	 */
 	v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
 	v <<= HPTE_V_AVPN_SHIFT;
-	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
 	return v;
 }
 
@@ -248,8 +253,12 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
  * aligned for the requested page size
  */
 static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
-					  int actual_psize)
+					  int actual_psize, int ssize)
 {
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT;
+
 	/* A 4K page needs no special encoding */
 	if (actual_psize == MMU_PAGE_4K)
 		return pa & HPTE_R_RPN;
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 8eaac81347fd..d873f6507f72 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -221,7 +221,7 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 		return -1;
 
 	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
-	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+	hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
 
 	if (!(vflags & HPTE_V_BOLTED)) {
 		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
@@ -719,6 +719,12 @@ static void native_flush_hash_range(unsigned long number, int local)
 	local_irq_restore(flags);
 }
 
+static int native_update_partition_table(u64 patb1)
+{
+	partition_tb->patb1 = cpu_to_be64(patb1);
+	return 0;
+}
+
 void __init hpte_init_native(void)
 {
 	ppc_md.hpte_invalidate	= native_hpte_invalidate;
@@ -729,4 +735,7 @@ void __init hpte_init_native(void)
 	ppc_md.hpte_clear_all	= native_hpte_clear;
 	ppc_md.flush_hash_range = native_flush_hash_range;
 	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		ppc_md.update_partition_table = native_update_partition_table;
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 728acd17f2a6..0e25d2981e5e 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -673,6 +673,41 @@ int remove_section_mapping(unsigned long start, unsigned long end)
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
+static void __init hash_init_partition_table(phys_addr_t hash_table,
+					     unsigned long pteg_count)
+{
+	unsigned long ps_field;
+	unsigned long htab_size;
+	unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+
+	/*
+	 * slb llp encoding for the page size used in VPM real mode.
+	 * We can ignore that for lpid 0
+	 */
+	ps_field = 0;
+	htab_size =  __ilog2(pteg_count) - 11;
+
+	BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
+	partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
+						MEMBLOCK_ALLOC_ANYWHERE));
+
+	/* Initialize the Partition Table with no entries */
+	memset((void *)partition_tb, 0, patb_size);
+	partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size);
+	/*
+	 * FIXME!! This should be done via update_partition table
+	 * For now UPRT is 0 for us.
+	 */
+	partition_tb->patb1 = 0;
+	DBG("Partition table %p\n", partition_tb);
+	/*
+	 * update partition table control register,
+	 * 64 K size.
+	 */
+	mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+
+}
+
 static void __init htab_initialize(void)
 {
 	unsigned long table;
@@ -741,8 +776,11 @@ static void __init htab_initialize(void)
 		/* Initialize the HPT with no entries */
 		memset((void *)table, 0, htab_size_bytes);
 
-		/* Set SDR1 */
-		mtspr(SPRN_SDR1, _SDR1);
+		if (!cpu_has_feature(CPU_FTR_ARCH_300))
+			/* Set SDR1 */
+			mtspr(SPRN_SDR1, _SDR1);
+		else
+			hash_init_partition_table(table, pteg_count);
 	}
 
 	prot = pgprot_val(PAGE_KERNEL);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 98c91ad18ba7..5fff787da17a 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -69,6 +69,13 @@
 #endif
 #endif
 
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * partition table and process table for ISA 3.0
+ */
+struct prtb_entry *process_tb;
+struct patb_entry *partition_tb;
+#endif
 unsigned long ioremap_bot = IOREMAP_BASE;
 
 #ifdef CONFIG_PPC_MMU_NOHASH
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 2f95d33cf34a..c9a3e677192a 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 	vflags &= ~HPTE_V_SECONDARY;
 
 	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
-	hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
+	hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags;
 
 	spin_lock_irqsave(&ps3_htab_lock, flags);
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 0d4608990702..e4ceba2b1551 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -139,7 +139,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 			 hpte_group, vpn,  pa, rflags, vflags, psize);
 
 	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
-	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+	hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
 
 	if (!(vflags & HPTE_V_BOLTED))
 		pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 09/14] powerpc/mm: Drop WIMG in favour of new constants
  2016-03-07 13:39 ` [PATCH 09/14] powerpc/mm: Drop WIMG in favour of new constants Aneesh Kumar K.V
@ 2016-03-07 17:29   ` kbuild test robot
  2016-03-22  4:59   ` Michael Neuling
  1 sibling, 0 replies; 19+ messages in thread
From: kbuild test robot @ 2016-03-07 17:29 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kbuild-all, benh, paulus, mpe, linuxppc-dev, Aneesh Kumar K.V

[-- Attachment #1: Type: text/plain, Size: 2932 bytes --]

Hi Aneesh,

[auto build test ERROR on powerpc/next]
[also build test ERROR on next-20160307]
[cannot apply to v4.5-rc7]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/powerpc-mm-Use-big-endian-page-table-for-book3s-64/20160307-232212
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-defconfig (attached as .config)
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

   arch/powerpc/sysdev/axonram.c: In function 'axon_ram_probe':
>> arch/powerpc/sysdev/axonram.c:208:31: error: '_PAGE_NO_CACHE' undeclared (first use in this function)
       bank->ph_addr, bank->size, _PAGE_NO_CACHE);
                                  ^
   arch/powerpc/sysdev/axonram.c:208:31: note: each undeclared identifier is reported only once for each function it appears in
--
   drivers/pcmcia/electra_cf.c: In function 'electra_cf_probe':
>> drivers/pcmcia/electra_cf.c:231:3: error: '_PAGE_NO_CACHE' undeclared (first use in this function)
      _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)) {
      ^
   drivers/pcmcia/electra_cf.c:231:3: note: each undeclared identifier is reported only once for each function it appears in
>> drivers/pcmcia/electra_cf.c:231:20: error: '_PAGE_GUARDED' undeclared (first use in this function)
      _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)) {
                       ^

vim +/_PAGE_NO_CACHE +208 arch/powerpc/sysdev/axonram.c

dbdf04c4 Maxim Shchetynin 2007-07-20  202  
dbdf04c4 Maxim Shchetynin 2007-07-20  203  	dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n",
dbdf04c4 Maxim Shchetynin 2007-07-20  204  			AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
dbdf04c4 Maxim Shchetynin 2007-07-20  205  
dbdf04c4 Maxim Shchetynin 2007-07-20  206  	bank->ph_addr = resource.start;
40f1ce7f Anton Blanchard  2011-05-08  207  	bank->io_addr = (unsigned long) ioremap_prot(
dbdf04c4 Maxim Shchetynin 2007-07-20 @208  			bank->ph_addr, bank->size, _PAGE_NO_CACHE);
dbdf04c4 Maxim Shchetynin 2007-07-20  209  	if (bank->io_addr == 0) {
dbdf04c4 Maxim Shchetynin 2007-07-20  210  		dev_err(&device->dev, "ioremap() failed\n");
dbdf04c4 Maxim Shchetynin 2007-07-20  211  		rc = -EFAULT;

:::::: The code at line 208 was first introduced by commit
:::::: dbdf04c40161f81d74e27f04e201acb3a5dfad69 [CELL] driver for DDR2 memory on AXON

:::::: TO: Maxim Shchetynin <maxim@de.ibm.com>
:::::: CC: Arnd Bergmann <arnd@klappe.arndb.de>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 21917 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 09/14] powerpc/mm: Drop WIMG in favour of new constants
  2016-03-07 13:39 ` [PATCH 09/14] powerpc/mm: Drop WIMG in favour of new constants Aneesh Kumar K.V
  2016-03-07 17:29   ` kbuild test robot
@ 2016-03-22  4:59   ` Michael Neuling
  2016-03-26  6:12     ` Aneesh Kumar K.V
  1 sibling, 1 reply; 19+ messages in thread
From: Michael Neuling @ 2016-03-22  4:59 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev

On Mon, 2016-03-07 at 19:09 +0530, Aneesh Kumar K.V wrote:

> PowerISA 3.0 introduce three pte bits with the below meaning
> 000 ->  Normal Memory
> 001 ->  Strong Access Order
> 010 -> Non idempotent I/O ( Also cache inhibited and guarded)
> 100 -> Tolerant I/O (Cache inhibited)

Which PTE are you talking about here?  Radix, new Hash (ISA 3.0) or
old Hash (ISA 2.07)?

A couple more comments below

> We drop the existing WIMG bits in linux page table in favour of above
> contants. We loose _PAGE_WRITETHRU with this conversion. We only use
> writethru via pgprot_cached_wthru() which is used by fbdev/controlfb.c
> which is Apple control display and also PPC32.
>=20
> With respect to _PAGE_COHERENCE, we have been marking hpte
> always coherent for some time now. htab_convert_pte_flags always added
> HPTE_R_M.
>=20
> NOTE: KVM changes need closer review.
>=20
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/hash.h | 47 +++++++++----------------=
------
>  arch/powerpc/include/asm/kvm_book3s_64.h  | 29 ++++++++++---------
>  arch/powerpc/kvm/book3s_64_mmu_hv.c       | 11 ++++----
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c       | 12 ++++----
>  arch/powerpc/mm/hash64_64k.c              |  2 +-
>  arch/powerpc/mm/hash_utils_64.c           | 14 ++++-----
>  arch/powerpc/mm/pgtable.c                 |  2 +-
>  arch/powerpc/mm/pgtable_64.c              |  4 ---
>  arch/powerpc/platforms/pseries/lpar.c     |  4 ---
>  9 files changed, 48 insertions(+), 77 deletions(-)
>=20
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/inc=
lude/asm/book3s/64/hash.h
> index c2b567456796..edd3d47ef9a4 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -21,11 +21,9 @@
>  #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
>  #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
>  #define _PAGE_PRIVILEGED	0x00008 /* page can only be access by kernel */
> -#define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
> -/* M (memory coherence) is always set in the HPTE, so we don't need it h=
ere */
> -#define _PAGE_COHERENT		0x0
> -#define _PAGE_NO_CACHE		0x00020 /* I: cache inhibit */
> -#define _PAGE_WRITETHRU		0x00040 /* W: cache write-through */
> +#define _PAGE_SAO		0x00010 /* Strong access order */
> +#define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
> +#define _PAGE_TOLERANT		0x00040 /* tolerant memory, cache inhibited */
>  #define _PAGE_DIRTY		0x00080 /* C: page changed */
>  #define _PAGE_ACCESSED		0x00100 /* R: page referenced */
>  #define _PAGE_SPECIAL		0x00400 /* software: special page */
> @@ -122,9 +120,6 @@
>  #define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
>  				 _PAGE_RW | _PAGE_EXEC)
> =20
> -/* Strong Access Ordering */
> -#define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
> -
>  /* No page size encoding in the linux PTE */
>  #define _PAGE_PSIZE		0
> =20
> @@ -150,10 +145,9 @@
>  /*
>   * Mask of bits returned by pte_pgprot()
>   */
> -#define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE =
| \
> -			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
> -			 _PAGE_PRIVILEGED | _PAGE_ACCESSED |  _PAGE_READ |\
> -			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
> +#define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERA=
NT | \
> +			 _PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
> +			 _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
>  			 _PAGE_SOFT_DIRTY)
>  /*is this
>   * We define 2 sets of base prot bits, one for basic pages (ie,
> @@ -162,7 +156,7 @@
>   * the processor might need it for DMA coherency.
>   */
>  #define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
> -#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
> +#define _PAGE_BASE	(_PAGE_BASE_NC)
> =20
>  /* Permission masks used to generate the __P and __S table,
>   *
> @@ -203,9 +197,9 @@
>  /* Permission masks used for kernel mappings */
>  #define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
>  #define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
> -				 _PAGE_NO_CACHE)
> +				 _PAGE_TOLERANT)
>  #define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
> -				 _PAGE_NO_CACHE | _PAGE_GUARDED)
> +				 _PAGE_NON_IDEMPOTENT)
>  #define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
>  #define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
>  #define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
> @@ -516,41 +510,26 @@ static inline void __set_pte_at(struct mm_struct *m=
m, unsigned long addr,
>   * Macro to mark a page protection value as "uncacheable".
>   */
> =20
> -#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE=
 | \
> -			 _PAGE_WRITETHRU)
> +#define _PAGE_CACHE_CTL	(_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERA=
NT)

The comment here says 'Macro to mark a page protection value as
"uncacheable"' but why do we put _PAGE_SAO in that?

> =20
>  #define pgprot_noncached pgprot_noncached
>  static inline pgprot_t pgprot_noncached(pgprot_t prot)
>  {
>  	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
> -			_PAGE_NO_CACHE | _PAGE_GUARDED);
> +			_PAGE_NON_IDEMPOTENT);
>  }
> =20
>  #define pgprot_noncached_wc pgprot_noncached_wc
>  static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
>  {
>  	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
> -			_PAGE_NO_CACHE);
> +			_PAGE_TOLERANT);
>  }
> =20
>  #define pgprot_cached pgprot_cached
>  static inline pgprot_t pgprot_cached(pgprot_t prot)
>  {
> -	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
> -			_PAGE_COHERENT);
> -}
> -
> -#define pgprot_cached_wthru pgprot_cached_wthru
> -static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
> -{
> -	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
> -			_PAGE_COHERENT | _PAGE_WRITETHRU);
> -}
> -
> -#define pgprot_cached_noncoherent pgprot_cached_noncoherent
> -static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
> -{
> -	return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
> +	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
>  }
> =20
>  #define pgprot_writecombine pgprot_writecombine
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/incl=
ude/asm/kvm_book3s_64.h
> index f9a7a89a3e4f..f23b1698ad3c 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -278,19 +278,24 @@ static inline unsigned long hpte_make_readonly(unsi=
gned long ptel)
>  	return ptel;
>  }
> =20
> -static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long =
io_type)
> +static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
>  {
> -	unsigned int wimg =3D ptel & HPTE_R_WIMG;
> +	unsigned int wimg =3D hptel & HPTE_R_WIMG;
> =20
>  	/* Handle SAO */
>  	if (wimg =3D=3D (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
>  	    cpu_has_feature(CPU_FTR_ARCH_206))
>  		wimg =3D HPTE_R_M;
> =20
> -	if (!io_type)
> +	if (!is_ci)
>  		return wimg =3D=3D HPTE_R_M;
> -
> -	return (wimg & (HPTE_R_W | HPTE_R_I)) =3D=3D io_type;
> +	/*
> +	 * if host is mapped cache inhibited, make sure hptel also have
> +	 * cache inhibited.
> +	 */
> +	if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */
> +		return false;
> +	return !!(wimg & HPTE_R_I);
>  }
> =20
>  /*
> @@ -333,16 +338,12 @@ static inline pte_t kvmppc_read_update_linux_pte(pt=
e_t *ptep, int writing)
>  	return new_pte;
>  }
> =20
> -
> -/* Return HPTE cache control bits corresponding to Linux pte bits */
> -static inline unsigned long hpte_cache_bits(unsigned long pte_val)
> +/*
> + * check whether the mapping is cache inhibited
> + */
> +static inline bool hpte_is_cache_inhibited(unsigned long pte_val)
>  {
> -#if _PAGE_NO_CACHE =3D=3D HPTE_R_I && _PAGE_WRITETHRU =3D=3D HPTE_R_W
> -	return pte_val & (HPTE_R_W | HPTE_R_I);
> -#else
> -	return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
> -		((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
> -#endif
> +	return !!(pte_val & (_PAGE_TOLERANT | _PAGE_NON_IDEMPOTENT));

Can we use _PAGE_CACHE_CTL here?

>  }
> =20
>  static inline bool hpte_read_permission(unsigned long pp, unsigned long =
key)
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3=
s_64_mmu_hv.c
> index c7b78d8336b2..40ad06c41ca1 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -447,7 +447,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, =
struct kvm_vcpu *vcpu,
>  	struct revmap_entry *rev;
>  	struct page *page, *pages[1];
>  	long index, ret, npages;
> -	unsigned long is_io;
> +	bool is_ci;
>  	unsigned int writing, write_ok;
>  	struct vm_area_struct *vma;
>  	unsigned long rcbits;
> @@ -503,7 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, =
struct kvm_vcpu *vcpu,
>  	smp_rmb();
> =20
>  	ret =3D -EFAULT;
> -	is_io =3D 0;
> +	is_ci =3D false;
>  	pfn =3D 0;
>  	page =3D NULL;
>  	pte_size =3D PAGE_SIZE;
> @@ -521,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, =
struct kvm_vcpu *vcpu,
>  			pfn =3D vma->vm_pgoff +
>  				((hva - vma->vm_start) >> PAGE_SHIFT);
>  			pte_size =3D psize;
> -			is_io =3D hpte_cache_bits(pgprot_val(vma->vm_page_prot));
> +			is_ci =3D hpte_is_cache_inhibited(pgprot_val(vma->vm_page_prot));
>  			write_ok =3D vma->vm_flags & VM_WRITE;
>  		}
>  		up_read(&current->mm->mmap_sem);
> @@ -558,10 +558,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run,=
 struct kvm_vcpu *vcpu,
>  		goto out_put;
> =20
>  	/* Check WIMG vs. the actual page we're accessing */
> -	if (!hpte_cache_flags_ok(r, is_io)) {
> -		if (is_io)
> +	if (!hpte_cache_flags_ok(r, is_ci)) {
> +		if (is_ci)
>  			goto out_put;
> -
>  		/*
>  		 * Allow guest to map emulated device memory as
>  		 * uncacheable, but actually make it cacheable.
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3=
s_hv_rm_mmu.c
> index 4cb8db05f3e5..3ebd620589a9 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -175,7 +175,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long=
 flags,
>  	unsigned long g_ptel;
>  	struct kvm_memory_slot *memslot;
>  	unsigned hpage_shift;
> -	unsigned long is_io;
> +	bool is_ci;
>  	unsigned long *rmap;
>  	pte_t *ptep;
>  	unsigned int writing;
> @@ -199,7 +199,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long=
 flags,
>  	gfn =3D gpa >> PAGE_SHIFT;
>  	memslot =3D __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
>  	pa =3D 0;
> -	is_io =3D ~0ul;
> +	is_ci =3D false;
>  	rmap =3D NULL;
>  	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
>  		/* Emulated MMIO - mark this with key=3D31 */
> @@ -250,7 +250,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long=
 flags,
>  			if (writing && !pte_write(pte))
>  				/* make the actual HPTE be read-only */
>  				ptel =3D hpte_make_readonly(ptel);
> -			is_io =3D hpte_cache_bits(pte_val(pte));
> +			is_ci =3D hpte_is_cache_inhibited(pte_val(pte));
>  			pa =3D pte_pfn(pte) << PAGE_SHIFT;
>  			pa |=3D hva & (host_pte_size - 1);
>  			pa |=3D gpa & ~PAGE_MASK;
> @@ -267,9 +267,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long=
 flags,
>  	else
>  		pteh |=3D HPTE_V_ABSENT;
> =20
> -	/* Check WIMG */
> -	if (is_io !=3D ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
> -		if (is_io)
> +	/*If we had host pte mapping then  Check WIMG */
> +	if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
> +		if (is_ci)
>  			return H_PARAMETER;
>  		/*
>  		 * Allow guest to map emulated device memory as
> diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
> index f33b410d6c8a..243c822913e4 100644
> --- a/arch/powerpc/mm/hash64_64k.c
> +++ b/arch/powerpc/mm/hash64_64k.c
> @@ -248,7 +248,7 @@ int __hash_page_64K(unsigned long ea, unsigned long a=
ccess,
>  		 * If so, bail out and refault as a 4k page
>  		 */
>  		if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) &&
> -		    unlikely(old_pte & _PAGE_NO_CACHE))
> +		    unlikely(old_pte & _PAGE_TOLERANT))
>  			return 0;
>  		/*
>  		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils=
_64.c
> index c81c08aaff0e..728acd17f2a6 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -192,12 +192,12 @@ unsigned long htab_convert_pte_flags(unsigned long =
pteflags)
>  	/*
>  	 * Add in WIG bits
>  	 */
> -	if (pteflags & _PAGE_WRITETHRU)
> -		rflags |=3D HPTE_R_W;
> -	if (pteflags & _PAGE_NO_CACHE)
> +	if (pteflags & _PAGE_TOLERANT)
>  		rflags |=3D HPTE_R_I;
> -	if (pteflags & _PAGE_GUARDED)
> -		rflags |=3D HPTE_R_G;
> +	if (pteflags & _PAGE_NON_IDEMPOTENT)
> +		rflags |=3D (HPTE_R_I | HPTE_R_G);
> +	if (pteflags & _PAGE_SAO)
> +		rflags |=3D (HPTE_R_I | HPTE_R_W);
> =20
>  	return rflags;
>  }
> @@ -1139,7 +1139,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned lon=
g ea,
>  	 * using non cacheable large pages, then we switch to 4k
>  	 */
>  	if (mmu_ci_restrictions && psize =3D=3D MMU_PAGE_64K &&
> -	    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
> +	    (pte_val(*ptep) & _PAGE_TOLERANT)) {
>  		if (user_region) {
>  			demote_segment_4k(mm, ea);
>  			psize =3D MMU_PAGE_4K;
> @@ -1298,7 +1298,7 @@ void hash_preload(struct mm_struct *mm, unsigned lo=
ng ea,
>  	 * That way we don't have to duplicate all of the logic for segment
>  	 * page size demotion here
>  	 */
> -	if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
> +	if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_TOLERANT))
>  		goto out_exit;
>  #endif /* CONFIG_PPC_64K_PAGES */
> =20
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 7b492283d502..ad8b6432f7e3 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -46,7 +46,7 @@ static inline int pte_looks_normal(pte_t pte)
> =20
>  #if defined(CONFIG_PPC_BOOK3S_64)
>  	if ((pte_val(pte) &
> -	     (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) =3D=3D
> +	     (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_TOLERANT)) =3D=3D
>  	    _PAGE_PRESENT) {
>  		if (!(pte_val(pte) & _PAGE_PRIVILEGED))
>  			return 1;
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index 6f1b7064f822..db924c54f370 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -167,10 +167,6 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea=
, unsigned long size,
>  	if ((flags & _PAGE_PRESENT) =3D=3D 0)
>  		flags |=3D pgprot_val(PAGE_KERNEL);
> =20
> -	/* Non-cacheable page cannot be coherent */
> -	if (flags & _PAGE_NO_CACHE)
> -		flags &=3D ~_PAGE_COHERENT;
> -
>  	/* We don't support the 4K PFN hack with ioremap */
>  	if (flags & _PAGE_4K_PFN)
>  		return NULL;
> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platfor=
ms/pseries/lpar.c
> index 2415a0d31f8f..0d4608990702 100644
> --- a/arch/powerpc/platforms/pseries/lpar.c
> +++ b/arch/powerpc/platforms/pseries/lpar.c
> @@ -152,10 +152,6 @@ static long pSeries_lpar_hpte_insert(unsigned long h=
pte_group,
>  	/* Exact =3D 0                   */
>  	flags =3D 0;
> =20
> -	/* Make pHyp happy */
> -	if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
> -		hpte_r &=3D ~HPTE_R_M;
> -
>  	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
>  		flags |=3D H_COALESCE_CAND;
> =20

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 05/14] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED
  2016-03-07 13:39 ` [PATCH 05/14] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED Aneesh Kumar K.V
@ 2016-03-22  6:05   ` Michael Neuling
  2016-03-26  5:32     ` Aneesh Kumar K.V
  0 siblings, 1 reply; 19+ messages in thread
From: Michael Neuling @ 2016-03-22  6:05 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev

On Mon, 2016-03-07 at 19:09 +0530, Aneesh Kumar K.V wrote:
> _PAGE_PRIVILEGED means the page can be accessed only by kernel. This is d=
one
> to keep pte bits similar to PowerISA 3.0 radix PTE format. User
> pages are now makred by clearing _PAGE_PRIVILEGED bit.
>=20
> Previously we allowed kernel to have a privileged page
> in the lower address range(USER_REGION). With this patch such access
> is denied.
>=20
> We also prevent a kernel access to a non-privileged page in
> higher address range (ie, REGION_ID !=3D 0). Both the above access
> scenario should never happen.

A few comments below.  I didn't find any issues, just some potential
cleanups.

Mikey

> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/hash.h    | 34 ++++++++++++++--------=
------
>  arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++++++-
>  arch/powerpc/mm/hash64_4k.c                  |  2 +-
>  arch/powerpc/mm/hash64_64k.c                 |  4 ++--
>  arch/powerpc/mm/hash_utils_64.c              | 17 ++++++++------
>  arch/powerpc/mm/hugepage-hash64.c            |  2 +-
>  arch/powerpc/mm/hugetlbpage-hash64.c         |  3 ++-
>  arch/powerpc/mm/hugetlbpage.c                |  2 +-
>  arch/powerpc/mm/pgtable.c                    | 15 ++++++++++--
>  arch/powerpc/mm/pgtable_64.c                 | 15 +++++++++---
>  arch/powerpc/platforms/cell/spufs/fault.c    |  2 +-
>  drivers/misc/cxl/fault.c                     |  5 ++--
>  12 files changed, 80 insertions(+), 39 deletions(-)
>=20
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/inc=
lude/asm/book3s/64/hash.h
> index f092d83fa623..fbefbaa92736 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -20,7 +20,7 @@
>  #define _PAGE_READ		0x00004	/* read access allowed */
>  #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
>  #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
> -#define _PAGE_USER		0x00008 /* page may be accessed by userspace */
> +#define _PAGE_PRIVILEGED	0x00008 /* page can only be access by kernel */

/* page can only be accessed by kernel */

Or just

/* kernel access only */

>  #define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
>  /* M (memory coherence) is always set in the HPTE, so we don't need it h=
ere */
>  #define _PAGE_COHERENT		0x0
> @@ -114,10 +114,13 @@
>  #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
>  #endif /* CONFIG_PPC_MM_SLICES */
> =20
> -/* No separate kernel read-only */
> -#define _PAGE_KERNEL_RW		(_PAGE_RW | _PAGE_DIRTY) /* user access blocked=
 by key */
> +/*
> + * No separate kernel read-only, user access blocked by key
> + */
> +#define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
>  #define _PAGE_KERNEL_RO		 _PAGE_KERNEL_RW
> -#define _PAGE_KERNEL_RWX	(_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
> +#define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
> +				 _PAGE_RW | _PAGE_EXEC)
> =20
>  /* Strong Access Ordering */
>  #define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
> @@ -149,7 +152,7 @@
>   */
>  #define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE =
| \
>  			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
> -			 _PAGE_USER | _PAGE_ACCESSED |  _PAGE_READ |\
> +			 _PAGE_PRIVILEGED | _PAGE_ACCESSED |  _PAGE_READ |\
>  			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
>  			 _PAGE_SOFT_DIRTY)
>  /*
> @@ -171,16 +174,13 @@
>   *
>   * Note due to the way vm flags are laid out, the bits are XWR
>   */
> -#define PAGE_NONE	__pgprot(_PAGE_BASE)
> -#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
> -#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
> -				 _PAGE_EXEC)
> -#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
> -#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
> -				 _PAGE_EXEC)
> -#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
> -#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
> -				 _PAGE_EXEC)
> +#define PAGE_NONE	__pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
> +#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_RW)
> +#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
> +#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_READ)
> +#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
> +#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_READ)
> +#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)

Eyeballing these, they seemed to have been converted ok

> =20
>  #define __P000	PAGE_NONE
>  #define __P001	PAGE_READONLY
> @@ -421,8 +421,8 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte)
>   */
>  static inline int pte_protnone(pte_t pte)
>  {
> -	return (pte_val(pte) &
> -		(_PAGE_PRESENT | _PAGE_USER)) =3D=3D _PAGE_PRESENT;
> +	return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) =3D=3D
> +		(_PAGE_PRESENT | _PAGE_PRIVILEGED);
>  }
>  #endif /* CONFIG_NUMA_BALANCING */
> =20
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/=
include/asm/book3s/64/pgtable.h
> index 4ac6221802ad..97d06de8dbf6 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -187,7 +187,7 @@ extern struct page *pgd_page(pgd_t pgd);
> =20
>  static inline bool pte_user(pte_t pte)
>  {
> -	return (pte_val(pte) & _PAGE_USER);
> +	return !(pte_val(pte) & _PAGE_PRIVILEGED);

This function might be usable in some places you have in the patch.

>  }
> =20
>  #ifdef CONFIG_MEM_SOFT_DIRTY
> @@ -211,6 +211,22 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t p=
te)
>  }
>  #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
> =20
> +static inline bool check_pte_access(unsigned long access, unsigned long =
ptev)
> +{
> +	/*
> +	 * This check for _PAGE_RWX and _PAG_PRESENT bits
> +	 */
> +	if (access & ~ptev)

Is this really doing what the comment says?

Also small typo _PAG_ =3D> _PAGE_

> +		return false;
> +	/*
> +	 * This check for access to privilege space
> +	 */
> +	if ((access & _PAGE_PRIVILEGED) !=3D (ptev & _PAGE_PRIVILEGED))
> +		return false;
> +
> +	return true;
> +}
> +
>  void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
>  void pgtable_cache_init(void);
> =20
> diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
> index 7ebac279d38e..42ba12c184e1 100644
> --- a/arch/powerpc/mm/hash64_4k.c
> +++ b/arch/powerpc/mm/hash64_4k.c
> @@ -38,7 +38,7 @@ int __hash_page_4K(unsigned long ea, unsigned long acce=
ss, unsigned long vsid,
>  		if (unlikely(old_pte & _PAGE_BUSY))
>  			return 0;
>  		/* If PTE permissions don't match, take page fault */

Is this comment still relevant?  Same below.

> -		if (unlikely(access & ~old_pte))
> +		if (unlikely(!check_pte_access(access, old_pte)))
>  			return 1;
>  		/*
>  		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
> diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
> index 83ac9f658733..f33b410d6c8a 100644
> --- a/arch/powerpc/mm/hash64_64k.c
> +++ b/arch/powerpc/mm/hash64_64k.c
> @@ -70,7 +70,7 @@ int __hash_page_4K(unsigned long ea, unsigned long acce=
ss, unsigned long vsid,
>  		if (unlikely(old_pte & _PAGE_BUSY))
>  			return 0;
>  		/* If PTE permissions don't match, take page fault */
> -		if (unlikely(access & ~old_pte))
> +		if (unlikely(!check_pte_access(access, old_pte)))
>  			return 1;
>  		/*
>  		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
> @@ -241,7 +241,7 @@ int __hash_page_64K(unsigned long ea, unsigned long a=
ccess,
>  		if (unlikely(old_pte & _PAGE_BUSY))
>  			return 0;
>  		/* If PTE permissions don't match, take page fault */
> -		if (unlikely(access & ~old_pte))
> +		if (unlikely(!check_pte_access(access, old_pte)))
>  			return 1;
>  		/*
>  		 * Check if PTE has the cache-inhibit bit set
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils=
_64.c
> index ec37f4b0a8ff..630603f74056 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -174,7 +174,7 @@ unsigned long htab_convert_pte_flags(unsigned long pt=
eflags)
>  	 * User area is mapped with PP=3D0x2 for read/write
>  	 * or PP=3D0x3 for read-only (including writeable but clean pages).
>  	 */
> -	if (pteflags & _PAGE_USER) {
> +	if (!(pteflags & _PAGE_PRIVILEGED)) {

Could use pte_user() here?  Maybe other places too.

>  		if (pteflags & _PAGE_RWX)
>  			rflags |=3D 0x2;
>  		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
> @@ -1086,7 +1086,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned lon=
g ea,
>  	/* Pre-check access permissions (will be re-checked atomically
>  	 * in __hash_page_XX but this pre-check is a fast path
>  	 */
> -	if (access & ~pte_val(*ptep)) {
> +	if (!check_pte_access(access, pte_val(*ptep))) {
>  		DBG_LOW(" no access !\n");
>  		rc =3D 1;
>  		goto bail;
> @@ -1224,12 +1224,15 @@ int __hash_page(unsigned long ea, unsigned long m=
sr, unsigned long trap,
>  	if (dsisr & DSISR_ISSTORE)
>  		access |=3D _PAGE_WRITE;
>  	/*
> -	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
> -	 * accessing a userspace segment (even from the kernel). We assume
> -	 * kernel addresses always have the high bit set.
> +	 * We set _PAGE_PRIVILEGED only when
> +	 * kernel mode access kernel space.
> +	 *
> +	 * _PAGE_PRIVILEGED is NOT set
> +	 * 1) when kernel mode access user space
> +	 * 2) user space access kernel space.
>  	 */
> -	if ((msr & MSR_PR) || (REGION_ID(ea) =3D=3D USER_REGION_ID))
> -		access |=3D _PAGE_USER;
> +	if (!(msr & MSR_PR) && !(REGION_ID(ea) =3D=3D USER_REGION_ID))
> +		access |=3D _PAGE_PRIVILEGED;


This is a bit ugly:
 (!(msr & MSR_PR) && !(REGION_ID(ea) =3D=3D USER_REGION_ID))

You could set  _PAGE_PRIVILEGED and then clear it using the same if
statement as before. Might be easier to read. ie

 	access |=3D _PAGE_PRIVILEGED;
	if ((msr & MSR_PR) || (REGION_ID(ea) =3D=3D USER_REGION_ID))
		access &=3D ~(_PAGE_PRIVILEGED);



> =20
>  	if (trap =3D=3D 0x400)
>  		access |=3D _PAGE_EXEC;
> diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage=
-hash64.c
> index 39342638a498..182f1d3fe73c 100644
> --- a/arch/powerpc/mm/hugepage-hash64.c
> +++ b/arch/powerpc/mm/hugepage-hash64.c
> @@ -41,7 +41,7 @@ int __hash_page_thp(unsigned long ea, unsigned long acc=
ess, unsigned long vsid,
>  		if (unlikely(old_pmd & _PAGE_BUSY))
>  			return 0;
>  		/* If PMD permissions don't match, take page fault */

As above, is this comment still correct?  The comment says
"permissions don't match" but the function call is
"check_pte_access()".  Seems like a different concept.

> -		if (unlikely(access & ~old_pmd))
> +		if (unlikely(!check_pte_access(access, old_pmd)))
>  			return 1;
>  		/*
>  		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
> diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/huget=
lbpage-hash64.c
> index e6e54a04bd32..96765510a49c 100644
> --- a/arch/powerpc/mm/hugetlbpage-hash64.c
> +++ b/arch/powerpc/mm/hugetlbpage-hash64.c
> @@ -51,8 +51,9 @@ int __hash_page_huge(unsigned long ea, unsigned long ac=
cess, unsigned long vsid,
>  		if (unlikely(old_pte & _PAGE_BUSY))
>  			return 0;
>  		/* If PTE permissions don't match, take page fault */
> -		if (unlikely(access & ~old_pte))
> +		if (unlikely(!check_pte_access(access, old_pte)))

Same comment again....=20

>  			return 1;
> +
>  		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
>  		 * a write access */
>  		new_pte =3D old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.=
c
> index 6e52e722d3f2..7201e9c624d5 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -1003,7 +1003,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsi=
gned long addr,
>  		end =3D pte_end;
> =20
>  	pte =3D READ_ONCE(*ptep);
> -	mask =3D _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
> +	mask =3D _PAGE_PRESENT | _PAGE_READ;
>  	if (write)
>  		mask |=3D _PAGE_WRITE;
> =20
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 98b5c03e344d..7b492283d502 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -43,9 +43,20 @@ static inline int is_exec_fault(void)
>   */
>  static inline int pte_looks_normal(pte_t pte)
>  {
> +
> +#if defined(CONFIG_PPC_BOOK3S_64)
> +	if ((pte_val(pte) &
> +	     (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) =3D=3D
> +	    _PAGE_PRESENT) {
> +		if (!(pte_val(pte) & _PAGE_PRIVILEGED))
> +			return 1;
> +	}
> +	return 0;
> +#else
>  	return (pte_val(pte) &
> -	    (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) =3D=
=3D
> -	    (_PAGE_PRESENT | _PAGE_USER);
> +		(_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) =3D=3D
> +		(_PAGE_PRESENT | _PAGE_USER);
> +#endif
>  }
> =20
>  static struct page *maybe_pte_to_page(pte_t pte)
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index 00d8d985bba3..441905f7bba4 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -280,8 +280,17 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsign=
ed long size,
>  	if (flags & _PAGE_WRITE)
>  		flags |=3D _PAGE_DIRTY;
> =20
> -	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
> -	flags &=3D ~(_PAGE_USER | _PAGE_EXEC);
> +	/* we don't want to let _PAGE_EXEC leak out */
> +	flags &=3D ~_PAGE_EXEC;
> +	/*
> +	 * Force kernel mapping.
> +	 */
> +#if defined(CONFIG_PPC_BOOK3S_64)
> +	flags |=3D _PAGE_PRIVILEGED;
> +#else
> +	flags &=3D ~_PAGE_USER;
> +#endif
> +
> =20
>  #ifdef _PAGE_BAP_SR
>  	/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
> @@ -669,7 +678,7 @@ void pmdp_huge_split_prepare(struct vm_area_struct *v=
ma,
>  	 * the translation is still valid, because we will withdraw
>  	 * pgtable_t after this.
>  	 */
> -	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
> +	pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
>  }
> =20
> =20
> diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/pla=
tforms/cell/spufs/fault.c
> index c3a3bf1745b7..e29e4d5afa2d 100644
> --- a/arch/powerpc/platforms/cell/spufs/fault.c
> +++ b/arch/powerpc/platforms/cell/spufs/fault.c

You need to CC the spufs maintainer for this one.


> @@ -141,7 +141,7 @@ int spufs_handle_class1(struct spu_context *ctx)
>  	/* we must not hold the lock when entering copro_handle_mm_fault */
>  	spu_release(ctx);
> =20
> -	access =3D (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER);
> +	access =3D (_PAGE_PRESENT | _PAGE_READ);
>  	access |=3D (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
>  	local_irq_save(flags);
>  	ret =3D hash_page(ea, access, 0x300, dsisr);
> diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
> index a3d5e1e16c21..23e37cd41e64 100644
> --- a/drivers/misc/cxl/fault.c
> +++ b/drivers/misc/cxl/fault.c

You need to CC both cxl driver maintainers for this one.

> @@ -152,8 +152,9 @@ static void cxl_handle_page_fault(struct cxl_context =
*ctx,
>  	access =3D _PAGE_PRESENT | _PAGE_READ;
>  	if (dsisr & CXL_PSL_DSISR_An_S)
>  		access |=3D _PAGE_WRITE;
> -	if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
> -		access |=3D _PAGE_USER;
> +
> +	if (ctx->kernel && (dar & (1ULL << 63)))
> +		access |=3D _PAGE_PRIVILEGED;
> =20
>  	if (dsisr & DSISR_NOHPTE)
>  		inv_flags |=3D HPTE_NOHPTE_UPDATE;

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 05/14] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED
  2016-03-22  6:05   ` Michael Neuling
@ 2016-03-26  5:32     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-26  5:32 UTC (permalink / raw)
  To: Michael Neuling, benh, paulus, mpe; +Cc: linuxppc-dev

Michael Neuling <mikey@neuling.org> writes:

> [ text/plain ]
> On Mon, 2016-03-07 at 19:09 +0530, Aneesh Kumar K.V wrote:
>> _PAGE_PRIVILEGED means the page can be accessed only by kernel. This is done
>> to keep pte bits similar to PowerISA 3.0 radix PTE format. User
>> pages are now makred by clearing _PAGE_PRIVILEGED bit.
>> 
>> Previously we allowed kernel to have a privileged page
>> in the lower address range(USER_REGION). With this patch such access
>> is denied.
>> 
>> We also prevent a kernel access to a non-privileged page in
>> higher address range (ie, REGION_ID != 0). Both the above access
>> scenario should never happen.
>
> A few comments below.  I didn't find any issues, just some potential
> cleanups.
>
> Mikey
>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/book3s/64/hash.h    | 34 ++++++++++++++--------------
>>  arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++++++-
>>  arch/powerpc/mm/hash64_4k.c                  |  2 +-
>>  arch/powerpc/mm/hash64_64k.c                 |  4 ++--
>>  arch/powerpc/mm/hash_utils_64.c              | 17 ++++++++------
>>  arch/powerpc/mm/hugepage-hash64.c            |  2 +-
>>  arch/powerpc/mm/hugetlbpage-hash64.c         |  3 ++-
>>  arch/powerpc/mm/hugetlbpage.c                |  2 +-
>>  arch/powerpc/mm/pgtable.c                    | 15 ++++++++++--
>>  arch/powerpc/mm/pgtable_64.c                 | 15 +++++++++---
>>  arch/powerpc/platforms/cell/spufs/fault.c    |  2 +-
>>  drivers/misc/cxl/fault.c                     |  5 ++--
>>  12 files changed, 80 insertions(+), 39 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
>> index f092d83fa623..fbefbaa92736 100644
>> --- a/arch/powerpc/include/asm/book3s/64/hash.h
>> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
>> @@ -20,7 +20,7 @@
>>  #define _PAGE_READ		0x00004	/* read access allowed */
>>  #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
>>  #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
>> -#define _PAGE_USER		0x00008 /* page may be accessed by userspace */
>> +#define _PAGE_PRIVILEGED	0x00008 /* page can only be access by kernel */
>
> /* page can only be accessed by kernel */
>
> Or just
>
> /* kernel access only */
>

fixed

>>  #define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
>>  /* M (memory coherence) is always set in the HPTE, so we don't need it here */
>>  #define _PAGE_COHERENT		0x0
>> @@ -114,10 +114,13 @@
>>  #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
>>  #endif /* CONFIG_PPC_MM_SLICES */
>>  
>> -/* No separate kernel read-only */
>> -#define _PAGE_KERNEL_RW		(_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
>> +/*
>> + * No separate kernel read-only, user access blocked by key
>> + */
>> +#define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
>>  #define _PAGE_KERNEL_RO		 _PAGE_KERNEL_RW
>> -#define _PAGE_KERNEL_RWX	(_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
>> +#define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
>> +				 _PAGE_RW | _PAGE_EXEC)
>>  
>>  /* Strong Access Ordering */
>>  #define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
>> @@ -149,7 +152,7 @@
>>   */
>>  #define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
>>  			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
>> -			 _PAGE_USER | _PAGE_ACCESSED |  _PAGE_READ |\
>> +			 _PAGE_PRIVILEGED | _PAGE_ACCESSED |  _PAGE_READ |\
>>  			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
>>  			 _PAGE_SOFT_DIRTY)
>>  /*
>> @@ -171,16 +174,13 @@
>>   *
>>   * Note due to the way vm flags are laid out, the bits are XWR
>>   */
>> -#define PAGE_NONE	__pgprot(_PAGE_BASE)
>> -#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
>> -#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
>> -				 _PAGE_EXEC)
>> -#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
>> -#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
>> -				 _PAGE_EXEC)
>> -#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
>> -#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
>> -				 _PAGE_EXEC)
>> +#define PAGE_NONE	__pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
>> +#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_RW)
>> +#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
>> +#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_READ)
>> +#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
>> +#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_READ)
>> +#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
>
> Eyeballing these, they seemed to have been converted ok
>
>>  
>>  #define __P000	PAGE_NONE
>>  #define __P001	PAGE_READONLY
>> @@ -421,8 +421,8 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte)
>>   */
>>  static inline int pte_protnone(pte_t pte)
>>  {
>> -	return (pte_val(pte) &
>> -		(_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
>> +	return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) ==
>> +		(_PAGE_PRESENT | _PAGE_PRIVILEGED);
>>  }
>>  #endif /* CONFIG_NUMA_BALANCING */
>>  
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 4ac6221802ad..97d06de8dbf6 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -187,7 +187,7 @@ extern struct page *pgd_page(pgd_t pgd);
>>  
>>  static inline bool pte_user(pte_t pte)
>>  {
>> -	return (pte_val(pte) & _PAGE_USER);
>> +	return !(pte_val(pte) & _PAGE_PRIVILEGED);
>
> This function might be usable in some places you have in the patch.
>
>>  }
>>  
>>  #ifdef CONFIG_MEM_SOFT_DIRTY
>> @@ -211,6 +211,22 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
>>  }
>>  #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
>>  
>> +static inline bool check_pte_access(unsigned long access, unsigned long ptev)
>> +{
>> +	/*
>> +	 * This check for _PAGE_RWX and _PAG_PRESENT bits
>> +	 */
>> +	if (access & ~ptev)
>
> Is this really doing what the comment says?

yes, we make sure we have the right prvilege bit in access when
we call this function. Since privilege access is now checked by the
presence of _PAGE_PRIVILGED the above won't check that.

>
> Also small typo _PAG_ => _PAGE_

fixed

>
>> +		return false;
>> +	/*
>> +	 * This check for access to privilege space
>> +	 */
>> +	if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
>> +		return false;
>> +
>> +	return true;
>> +}
>> +
>>  void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
>>  void pgtable_cache_init(void);
>>  
>> diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
>> index 7ebac279d38e..42ba12c184e1 100644
>> --- a/arch/powerpc/mm/hash64_4k.c
>> +++ b/arch/powerpc/mm/hash64_4k.c
>> @@ -38,7 +38,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
>>  		if (unlikely(old_pte & _PAGE_BUSY))
>>  			return 0;
>>  		/* If PTE permissions don't match, take page fault */
>
> Is this comment still relevant?  Same below.

I would guess yes, if the pte permission which include R,W,X and
privilege access don't match that in pte, take a fault.

>
>> -		if (unlikely(access & ~old_pte))
>> +		if (unlikely(!check_pte_access(access, old_pte)))
>>  			return 1;
>>  		/*
>>  		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
>> diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
>> index 83ac9f658733..f33b410d6c8a 100644
>> --- a/arch/powerpc/mm/hash64_64k.c
>> +++ b/arch/powerpc/mm/hash64_64k.c
>> @@ -70,7 +70,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
>>  		if (unlikely(old_pte & _PAGE_BUSY))
>>  			return 0;
>>  		/* If PTE permissions don't match, take page fault */
>> -		if (unlikely(access & ~old_pte))
>> +		if (unlikely(!check_pte_access(access, old_pte)))
>>  			return 1;
>>  		/*
>>  		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
>> @@ -241,7 +241,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
>>  		if (unlikely(old_pte & _PAGE_BUSY))
>>  			return 0;
>>  		/* If PTE permissions don't match, take page fault */
>> -		if (unlikely(access & ~old_pte))
>> +		if (unlikely(!check_pte_access(access, old_pte)))
>>  			return 1;
>>  		/*
>>  		 * Check if PTE has the cache-inhibit bit set
>> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
>> index ec37f4b0a8ff..630603f74056 100644
>> --- a/arch/powerpc/mm/hash_utils_64.c
>> +++ b/arch/powerpc/mm/hash_utils_64.c
>> @@ -174,7 +174,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
>>  	 * User area is mapped with PP=0x2 for read/write
>>  	 * or PP=0x3 for read-only (including writeable but clean pages).
>>  	 */
>> -	if (pteflags & _PAGE_USER) {
>> +	if (!(pteflags & _PAGE_PRIVILEGED)) {
>
> Could use pte_user() here?  Maybe other places too.


pte_user use pte_t type as argument. hence opencoded here.

>
>>  		if (pteflags & _PAGE_RWX)
>>  			rflags |= 0x2;
>>  		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
>> @@ -1086,7 +1086,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
>>  	/* Pre-check access permissions (will be re-checked atomically
>>  	 * in __hash_page_XX but this pre-check is a fast path
>>  	 */
>> -	if (access & ~pte_val(*ptep)) {
>> +	if (!check_pte_access(access, pte_val(*ptep))) {
>>  		DBG_LOW(" no access !\n");
>>  		rc = 1;
>>  		goto bail;
>> @@ -1224,12 +1224,15 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
>>  	if (dsisr & DSISR_ISSTORE)
>>  		access |= _PAGE_WRITE;
>>  	/*
>> -	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
>> -	 * accessing a userspace segment (even from the kernel). We assume
>> -	 * kernel addresses always have the high bit set.
>> +	 * We set _PAGE_PRIVILEGED only when
>> +	 * kernel mode access kernel space.
>> +	 *
>> +	 * _PAGE_PRIVILEGED is NOT set
>> +	 * 1) when kernel mode access user space
>> +	 * 2) user space access kernel space.
>>  	 */
>> -	if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
>> -		access |= _PAGE_USER;
>> +	if (!(msr & MSR_PR) && !(REGION_ID(ea) == USER_REGION_ID))
>> +		access |= _PAGE_PRIVILEGED;
>
>
> This is a bit ugly:
>  (!(msr & MSR_PR) && !(REGION_ID(ea) == USER_REGION_ID))
>
> You could set  _PAGE_PRIVILEGED and then clear it using the same if
> statement as before. Might be easier to read. ie
>
>  	access |= _PAGE_PRIVILEGED;
> 	if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
> 		access &= ~(_PAGE_PRIVILEGED);
>

fixed

>
>
>>  
>>  	if (trap == 0x400)
>>  		access |= _PAGE_EXEC;
>> diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
>> index 39342638a498..182f1d3fe73c 100644
>> --- a/arch/powerpc/mm/hugepage-hash64.c
>> +++ b/arch/powerpc/mm/hugepage-hash64.c
>> @@ -41,7 +41,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
>>  		if (unlikely(old_pmd & _PAGE_BUSY))
>>  			return 0;
>>  		/* If PMD permissions don't match, take page fault */
>
> As above, is this comment still correct?  The comment says
> "permissions don't match" but the function call is
> "check_pte_access()".  Seems like a different concept.
>
>> -		if (unlikely(access & ~old_pmd))
>> +		if (unlikely(!check_pte_access(access, old_pmd)))
>>  			return 1;
>>  		/*
>>  		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
>> diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
>> index e6e54a04bd32..96765510a49c 100644
>> --- a/arch/powerpc/mm/hugetlbpage-hash64.c
>> +++ b/arch/powerpc/mm/hugetlbpage-hash64.c
>> @@ -51,8 +51,9 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
>>  		if (unlikely(old_pte & _PAGE_BUSY))
>>  			return 0;
>>  		/* If PTE permissions don't match, take page fault */
>> -		if (unlikely(access & ~old_pte))
>> +		if (unlikely(!check_pte_access(access, old_pte)))
>
> Same comment again.... 
>
>>  			return 1;
>> +
>>  		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
>>  		 * a write access */
>>  		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
>> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
>> index 6e52e722d3f2..7201e9c624d5 100644
>> --- a/arch/powerpc/mm/hugetlbpage.c
>> +++ b/arch/powerpc/mm/hugetlbpage.c
>> @@ -1003,7 +1003,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
>>  		end = pte_end;
>>  
>>  	pte = READ_ONCE(*ptep);
>> -	mask = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
>> +	mask = _PAGE_PRESENT | _PAGE_READ;
>>  	if (write)
>>  		mask |= _PAGE_WRITE;
>>  
>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>> index 98b5c03e344d..7b492283d502 100644
>> --- a/arch/powerpc/mm/pgtable.c
>> +++ b/arch/powerpc/mm/pgtable.c
>> @@ -43,9 +43,20 @@ static inline int is_exec_fault(void)
>>   */
>>  static inline int pte_looks_normal(pte_t pte)
>>  {
>> +
>> +#if defined(CONFIG_PPC_BOOK3S_64)
>> +	if ((pte_val(pte) &
>> +	     (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
>> +	    _PAGE_PRESENT) {
>> +		if (!(pte_val(pte) & _PAGE_PRIVILEGED))
>> +			return 1;
>> +	}
>> +	return 0;
>> +#else
>>  	return (pte_val(pte) &
>> -	    (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
>> -	    (_PAGE_PRESENT | _PAGE_USER);
>> +		(_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
>> +		(_PAGE_PRESENT | _PAGE_USER);
>> +#endif
>>  }
>>  
>>  static struct page *maybe_pte_to_page(pte_t pte)
>> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
>> index 00d8d985bba3..441905f7bba4 100644
>> --- a/arch/powerpc/mm/pgtable_64.c
>> +++ b/arch/powerpc/mm/pgtable_64.c
>> @@ -280,8 +280,17 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
>>  	if (flags & _PAGE_WRITE)
>>  		flags |= _PAGE_DIRTY;
>>  
>> -	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
>> -	flags &= ~(_PAGE_USER | _PAGE_EXEC);
>> +	/* we don't want to let _PAGE_EXEC leak out */
>> +	flags &= ~_PAGE_EXEC;
>> +	/*
>> +	 * Force kernel mapping.
>> +	 */
>> +#if defined(CONFIG_PPC_BOOK3S_64)
>> +	flags |= _PAGE_PRIVILEGED;
>> +#else
>> +	flags &= ~_PAGE_USER;
>> +#endif
>> +
>>  
>>  #ifdef _PAGE_BAP_SR
>>  	/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
>> @@ -669,7 +678,7 @@ void pmdp_huge_split_prepare(struct vm_area_struct *vma,
>>  	 * the translation is still valid, because we will withdraw
>>  	 * pgtable_t after this.
>>  	 */
>> -	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
>> +	pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
>>  }
>>  
>>  
>> diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
>> index c3a3bf1745b7..e29e4d5afa2d 100644
>> --- a/arch/powerpc/platforms/cell/spufs/fault.c
>> +++ b/arch/powerpc/platforms/cell/spufs/fault.c
>
> You need to CC the spufs maintainer for this one.
>
>
>> @@ -141,7 +141,7 @@ int spufs_handle_class1(struct spu_context *ctx)
>>  	/* we must not hold the lock when entering copro_handle_mm_fault */
>>  	spu_release(ctx);
>>  
>> -	access = (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER);
>> +	access = (_PAGE_PRESENT | _PAGE_READ);
>>  	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
>>  	local_irq_save(flags);
>>  	ret = hash_page(ea, access, 0x300, dsisr);
>> diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
>> index a3d5e1e16c21..23e37cd41e64 100644
>> --- a/drivers/misc/cxl/fault.c
>> +++ b/drivers/misc/cxl/fault.c
>
> You need to CC both cxl driver maintainers for this one.
>
>> @@ -152,8 +152,9 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
>>  	access = _PAGE_PRESENT | _PAGE_READ;
>>  	if (dsisr & CXL_PSL_DSISR_An_S)
>>  		access |= _PAGE_WRITE;
>> -	if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
>> -		access |= _PAGE_USER;
>> +
>> +	if (ctx->kernel && (dar & (1ULL << 63)))
>> +		access |= _PAGE_PRIVILEGED;
>>  
>>  	if (dsisr & DSISR_NOHPTE)
>>  		inv_flags |= HPTE_NOHPTE_UPDATE;

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 09/14] powerpc/mm: Drop WIMG in favour of new constants
  2016-03-22  4:59   ` Michael Neuling
@ 2016-03-26  6:12     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 19+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-26  6:12 UTC (permalink / raw)
  To: Michael Neuling, benh, paulus, mpe; +Cc: linuxppc-dev

Michael Neuling <mikey@neuling.org> writes:

> [ text/plain ]
> On Mon, 2016-03-07 at 19:09 +0530, Aneesh Kumar K.V wrote:
>
>> PowerISA 3.0 introduce three pte bits with the below meaning
>> 000 ->  Normal Memory
>> 001 ->  Strong Access Order
>> 010 -> Non idempotent I/O ( Also cache inhibited and guarded)
>> 100 -> Tolerant I/O (Cache inhibited)
>
> Which PTE are you talking about here?  Radix, new Hash (ISA 3.0) or
> old Hash (ISA 2.07)?

Radix. Paul also pointed out that with latest ISA spec, this is now a
two bit value. I have also updated the patchset accordingly.

>
> A couple more comments below
>
>> We drop the existing WIMG bits in linux page table in favour of above
>> contants. We loose _PAGE_WRITETHRU with this conversion. We only use
>> writethru via pgprot_cached_wthru() which is used by fbdev/controlfb.c
>> which is Apple control display and also PPC32.
>> 
>> With respect to _PAGE_COHERENCE, we have been marking hpte
>> always coherent for some time now. htab_convert_pte_flags always added
>> HPTE_R_M.
>> 
>> NOTE: KVM changes need closer review.
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/book3s/64/hash.h | 47 +++++++++----------------------
>>  arch/powerpc/include/asm/kvm_book3s_64.h  | 29 ++++++++++---------
>>  arch/powerpc/kvm/book3s_64_mmu_hv.c       | 11 ++++----
>>  arch/powerpc/kvm/book3s_hv_rm_mmu.c       | 12 ++++----
>>  arch/powerpc/mm/hash64_64k.c              |  2 +-
>>  arch/powerpc/mm/hash_utils_64.c           | 14 ++++-----
>>  arch/powerpc/mm/pgtable.c                 |  2 +-
>>  arch/powerpc/mm/pgtable_64.c              |  4 ---
>>  arch/powerpc/platforms/pseries/lpar.c     |  4 ---
>>  9 files changed, 48 insertions(+), 77 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
>> index c2b567456796..edd3d47ef9a4 100644
>> --- a/arch/powerpc/include/asm/book3s/64/hash.h
>> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
>> @@ -21,11 +21,9 @@
>>  #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
>>  #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
>>  #define _PAGE_PRIVILEGED	0x00008 /* page can only be access by kernel */
>> -#define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
>> -/* M (memory coherence) is always set in the HPTE, so we don't need it here */
>> -#define _PAGE_COHERENT		0x0
>> -#define _PAGE_NO_CACHE		0x00020 /* I: cache inhibit */
>> -#define _PAGE_WRITETHRU		0x00040 /* W: cache write-through */
>> +#define _PAGE_SAO		0x00010 /* Strong access order */
>> +#define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
>> +#define _PAGE_TOLERANT		0x00040 /* tolerant memory, cache inhibited */
>>  #define _PAGE_DIRTY		0x00080 /* C: page changed */
>>  #define _PAGE_ACCESSED		0x00100 /* R: page referenced */
>>  #define _PAGE_SPECIAL		0x00400 /* software: special page */
>> @@ -122,9 +120,6 @@
>>  #define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
>>  				 _PAGE_RW | _PAGE_EXEC)
>>  
>> -/* Strong Access Ordering */
>> -#define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
>> -
>>  /* No page size encoding in the linux PTE */
>>  #define _PAGE_PSIZE		0
>>  
>> @@ -150,10 +145,9 @@
>>  /*
>>   * Mask of bits returned by pte_pgprot()
>>   */
>> -#define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
>> -			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
>> -			 _PAGE_PRIVILEGED | _PAGE_ACCESSED |  _PAGE_READ |\
>> -			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
>> +#define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
>> +			 _PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
>> +			 _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
>>  			 _PAGE_SOFT_DIRTY)
>>  /*is this
>>   * We define 2 sets of base prot bits, one for basic pages (ie,
>> @@ -162,7 +156,7 @@
>>   * the processor might need it for DMA coherency.
>>   */
>>  #define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
>> -#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
>> +#define _PAGE_BASE	(_PAGE_BASE_NC)
>>  
>>  /* Permission masks used to generate the __P and __S table,
>>   *
>> @@ -203,9 +197,9 @@
>>  /* Permission masks used for kernel mappings */
>>  #define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
>>  #define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
>> -				 _PAGE_NO_CACHE)
>> +				 _PAGE_TOLERANT)
>>  #define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
>> -				 _PAGE_NO_CACHE | _PAGE_GUARDED)
>> +				 _PAGE_NON_IDEMPOTENT)
>>  #define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
>>  #define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
>>  #define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
>> @@ -516,41 +510,26 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
>>   * Macro to mark a page protection value as "uncacheable".
>>   */
>>  
>> -#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
>> -			 _PAGE_WRITETHRU)
>> +#define _PAGE_CACHE_CTL	(_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
>
> The comment here says 'Macro to mark a page protection value as
> "uncacheable"' but why do we put _PAGE_SAO in that?

yes, that comment is confusing and I removed that. Now we don't
support a noncached SAO pte. Hence the idea of clearing all
bits concerning pte attributes and enable only the requested caching
attribute. Also if we don't clear SAO and set non-idempotent, it bcomes
a tolerant mapping. Below is the bit mapping now

#define _PAGE_SAO		0x00010 /* Strong access order */
#define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
#define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */

>
>>  
>>  #define pgprot_noncached pgprot_noncached
>>  static inline pgprot_t pgprot_noncached(pgprot_t prot)
>>  {
>>  	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
>> -			_PAGE_NO_CACHE | _PAGE_GUARDED);
>> +			_PAGE_NON_IDEMPOTENT);
>>  }
>>  
>>  #define pgprot_noncached_wc pgprot_noncached_wc
>>  static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
>>  {
>>  	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
>> -			_PAGE_NO_CACHE);
>> +			_PAGE_TOLERANT);
>>  }
>>  
>>  #define pgprot_cached pgprot_cached
>>  static inline pgprot_t pgprot_cached(pgprot_t prot)
>>  {
>> -	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
>> -			_PAGE_COHERENT);
>> -}
>> -
>> -#define pgprot_cached_wthru pgprot_cached_wthru
>> -static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
>> -{
>> -	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
>> -			_PAGE_COHERENT | _PAGE_WRITETHRU);
>> -}
>> -
>> -#define pgprot_cached_noncoherent pgprot_cached_noncoherent
>> -static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
>> -{
>> -	return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
>> +	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
>>  }
>>  
>>  #define pgprot_writecombine pgprot_writecombine
>> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
>> index f9a7a89a3e4f..f23b1698ad3c 100644
>> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
>> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
>> @@ -278,19 +278,24 @@ static inline unsigned long hpte_make_readonly(unsigned long ptel)
>>  	return ptel;
>>  }
>>  
>> -static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
>> +static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
>>  {
>> -	unsigned int wimg = ptel & HPTE_R_WIMG;
>> +	unsigned int wimg = hptel & HPTE_R_WIMG;
>>  
>>  	/* Handle SAO */
>>  	if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
>>  	    cpu_has_feature(CPU_FTR_ARCH_206))
>>  		wimg = HPTE_R_M;
>>  
>> -	if (!io_type)
>> +	if (!is_ci)
>>  		return wimg == HPTE_R_M;
>> -
>> -	return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
>> +	/*
>> +	 * if host is mapped cache inhibited, make sure hptel also have
>> +	 * cache inhibited.
>> +	 */
>> +	if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */
>> +		return false;
>> +	return !!(wimg & HPTE_R_I);
>>  }
>>  
>>  /*
>> @@ -333,16 +338,12 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
>>  	return new_pte;
>>  }
>>  
>> -
>> -/* Return HPTE cache control bits corresponding to Linux pte bits */
>> -static inline unsigned long hpte_cache_bits(unsigned long pte_val)
>> +/*
>> + * check whether the mapping is cache inhibited
>> + */
>> +static inline bool hpte_is_cache_inhibited(unsigned long pte_val)
>>  {
>> -#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W
>> -	return pte_val & (HPTE_R_W | HPTE_R_I);
>> -#else
>> -	return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
>> -		((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
>> -#endif
>> +	return !!(pte_val & (_PAGE_TOLERANT | _PAGE_NON_IDEMPOTENT));
>
> Can we use _PAGE_CACHE_CTL here?
>
>>  }

This is different now

/*
 * check a pte mapping have cache inhibited property
 */
static inline bool pte_ci(pte_t pte)
{
	unsigned long pte_v = pte_val(pte);

	if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) ||
	    ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT))
		return true;
	return false;
}


>>  
>>  static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
>> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> index c7b78d8336b2..40ad06c41ca1 100644
>> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> @@ -447,7 +447,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,

...

-aneesh

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2016-03-26  6:12 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-07 13:39 [PATCH 01/14] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 02/14] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 03/14] powerpc/mm/subpage: Clear RWX bit to indicate no access Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 04/14] powerpc/mm: Use pte_user instead of opencoding Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 05/14] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED Aneesh Kumar K.V
2016-03-22  6:05   ` Michael Neuling
2016-03-26  5:32     ` Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 06/14] powerpc/mm: Remove RPN_SHIFT and RPN_SIZE Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 07/14] powerpc/mm: Update _PAGE_KERNEL_RO Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 08/14] powerpc/mm: Use helper for finding pte bits mapping I/O area Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 09/14] powerpc/mm: Drop WIMG in favour of new constants Aneesh Kumar K.V
2016-03-07 17:29   ` kbuild test robot
2016-03-22  4:59   ` Michael Neuling
2016-03-26  6:12     ` Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 10/14] powerpc/mm: Use generic version of pmdp_clear_flush_young Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 11/14] powerpc/mm: Use generic version of ptep_clear_flush_young Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 12/14] powerpc/mm: Move common data structure between radix and hash to book3s 64 generic headers Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 13/14] powerpc/mm/power9: Add partition table format Aneesh Kumar K.V
2016-03-07 13:39 ` [PATCH 14/14] powerpc/mm/hash: Add support for POWER9 hash Aneesh Kumar K.V

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.