linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64
@ 2016-03-27  8:23 Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
                   ` (64 more replies)
  0 siblings, 65 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This enables us to share the same page table code for
both radix and hash. Radix use a hardware defined big endian
page table

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h   |  16 +++--
 arch/powerpc/include/asm/kvm_book3s_64.h    |  13 ++--
 arch/powerpc/include/asm/page.h             |   4 ++
 arch/powerpc/include/asm/pgtable-be-types.h | 104 ++++++++++++++++++++++++++++
 arch/powerpc/mm/hash64_4k.c                 |   7 +-
 arch/powerpc/mm/hash64_64k.c                |  14 ++--
 arch/powerpc/mm/hugepage-hash64.c           |   7 +-
 arch/powerpc/mm/hugetlbpage-hash64.c        |   7 +-
 arch/powerpc/mm/pgtable_64.c                |   9 ++-
 9 files changed, 159 insertions(+), 22 deletions(-)
 create mode 100644 arch/powerpc/include/asm/pgtable-be-types.h

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index d0ee6fcef823..2113de051824 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -250,22 +250,27 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 				       int huge)
 {
 	unsigned long old, tmp;
+	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
+
+	clr = cpu_to_be64(clr);
+	set = cpu_to_be64(set);
 
 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%3		# pte_update\n\
-	andi.	%1,%0,%6\n\
+	and.	%1,%0,%6\n\
 	bne-	1b \n\
 	andc	%1,%0,%4 \n\
 	or	%1,%1,%7\n\
 	stdcx.	%1,0,%3 \n\
 	bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
+	: "r" (ptep), "r" (clr), "m" (*ptep), "r" (busy), "r" (set)
 	: "cc" );
 	/* huge pages use the old page table lock */
 	if (!huge)
 		assert_pte_locked(mm, addr);
 
+	old = be64_to_cpu(old);
 	if (old & _PAGE_HASHPTE)
 		hpte_need_flush(mm, addr, ptep, old, huge);
 
@@ -351,16 +356,19 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 		 _PAGE_SOFT_DIRTY);
 
 	unsigned long old, tmp;
+	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
+
+	bits = cpu_to_be64(bits);
 
 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%4\n\
-		andi.	%1,%0,%6\n\
+		and.	%1,%0,%6\n\
 		bne-	1b \n\
 		or	%0,%3,%0\n\
 		stdcx.	%0,0,%4\n\
 		bne-	1b"
 	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	:"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
+	:"r" (bits), "r" (ptep), "m" (*ptep), "r" (busy)
 	:"cc");
 }
 
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2aa79c864e91..f9a7a89a3e4f 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -299,6 +299,8 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
  */
 static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
 {
+	__be64 opte, npte;
+	unsigned long old_ptev;
 	pte_t old_pte, new_pte = __pte(0);
 
 	while (1) {
@@ -306,24 +308,25 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
 		 * Make sure we don't reload from ptep
 		 */
 		old_pte = READ_ONCE(*ptep);
+		old_ptev = pte_val(old_pte);
 		/*
 		 * wait until _PAGE_BUSY is clear then set it atomically
 		 */
-		if (unlikely(pte_val(old_pte) & _PAGE_BUSY)) {
+		if (unlikely(old_ptev & _PAGE_BUSY)) {
 			cpu_relax();
 			continue;
 		}
 		/* If pte is not present return None */
-		if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
+		if (unlikely(!(old_ptev & _PAGE_PRESENT)))
 			return __pte(0);
 
 		new_pte = pte_mkyoung(old_pte);
 		if (writing && pte_write(old_pte))
 			new_pte = pte_mkdirty(new_pte);
 
-		if (pte_val(old_pte) == __cmpxchg_u64((unsigned long *)ptep,
-						      pte_val(old_pte),
-						      pte_val(new_pte))) {
+		npte = cpu_to_be64(pte_val(new_pte));
+		opte = cpu_to_be64(old_ptev);
+		if (opte == __cmpxchg_u64((unsigned long *)ptep, opte, npte)) {
 			break;
 		}
 	}
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index ab3d8977bacd..158574d2acf4 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -288,7 +288,11 @@ extern long long virt_phys_offset;
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/pgtable-be-types.h>
+#else
 #include <asm/pgtable-types.h>
+#endif
 
 typedef struct { signed long pd; } hugepd_t;
 
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
new file mode 100644
index 000000000000..20527200d6ae
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -0,0 +1,104 @@
+#ifndef _ASM_POWERPC_PGTABLE_BE_TYPES_H
+#define _ASM_POWERPC_PGTABLE_BE_TYPES_H
+
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
+/* These are used to make use of C type-checking. */
+
+/* PTE level */
+typedef struct { __be64 pte; } pte_t;
+#define __pte(x)	((pte_t) { cpu_to_be64(x) })
+static inline unsigned long pte_val(pte_t x)
+{
+	return be64_to_cpu(x.pte);
+}
+
+/* PMD level */
+#ifdef CONFIG_PPC64
+typedef struct { __be64 pmd; } pmd_t;
+#define __pmd(x)	((pmd_t) { cpu_to_be64(x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+	return be64_to_cpu(x.pmd);
+}
+
+/*
+ * 64 bit hash always use 4 level table. Everybody else use 4 level
+ * only for 4K page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+typedef struct { __be64 pud; } pud_t;
+#define __pud(x)	((pud_t) { cpu_to_be64(x) })
+static inline unsigned long pud_val(pud_t x)
+{
+	return be64_to_cpu(x.pud);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+/* PGD level */
+typedef struct { __be64 pgd; } pgd_t;
+#define __pgd(x)	((pgd_t) { cpu_to_be64(x) })
+static inline unsigned long pgd_val(pgd_t x)
+{
+	return be64_to_cpu(x.pgd);
+}
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+
+#else
+
+/*
+ * .. while these make it easier on the compiler
+ */
+
+typedef __be64 pte_t;
+#define __pte(x)	cpu_to_be64(x)
+static inline unsigned long pte_val(pte_t pte)
+{
+	return be64_to_cpu(pte);
+}
+
+#ifdef CONFIG_PPC64
+typedef __be64 pmd_t;
+#define __pmd(x)	cpu_to_be64(x)
+static inline unsigned long pmd_val(pmd_t pmd)
+{
+	return be64_to_cpu(pmd);
+}
+
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+typedef __be64 pud_t;
+#define __pud(x)	cpu_to_be64(x)
+static inline unsigned long pud_val(pud_t pud)
+{
+	return be64_to_cpu(pud);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+typedef __be64 pgd_t;
+#define __pgd(x)	cpu_to_be64(x)
+static inline unsigned long pgd_val(pgd_t pgd)
+{
+	return be64_to_cpu(pgd);
+}
+
+typedef unsigned long pgprot_t;
+#define pgprot_val(x)	(x)
+#define __pgprot(x)	(x)
+
+#endif /* CONFIG_STRICT_MM_TYPECHECKS */
+/*
+ * With hash config 64k pages additionally define a bigger "real PTE" type that
+ * gathers the "second half" part of the PTE for pseudo 64k pages
+ */
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+
+#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 47d1b26effc6..71abd4c44c27 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -20,6 +20,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		   pte_t *ptep, unsigned long trap, unsigned long flags,
 		   int ssize, int subpg_prot)
 {
+	__be64 opte, npte;
 	unsigned long hpte_group;
 	unsigned long rflags, pa;
 	unsigned long old_pte, new_pte;
@@ -47,8 +48,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_RW)
 			new_pte |= _PAGE_DIRTY;
-	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
-					  old_pte, new_pte));
+
+		opte = cpu_to_be64(old_pte);
+		npte = cpu_to_be64(new_pte);
+	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
 	/*
 	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
 	 * need to add in 0x1 if it's a read-only user page
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index b2d659cf51c6..6f9b3c34a5c0 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -49,6 +49,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		   pte_t *ptep, unsigned long trap, unsigned long flags,
 		   int ssize, int subpg_prot)
 {
+	__be64 opte, npte;
 	real_pte_t rpte;
 	unsigned long *hidxp;
 	unsigned long hpte_group;
@@ -79,8 +80,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO;
 		if (access & _PAGE_RW)
 			new_pte |= _PAGE_DIRTY;
-	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
-					  old_pte, new_pte));
+
+		opte = cpu_to_be64(old_pte);
+		npte = cpu_to_be64(new_pte);
+	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
 	/*
 	 * Handle the subpage protection bits
 	 */
@@ -220,7 +223,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		    unsigned long vsid, pte_t *ptep, unsigned long trap,
 		    unsigned long flags, int ssize)
 {
-
+	__be64 opte, npte;
 	unsigned long hpte_group;
 	unsigned long rflags, pa;
 	unsigned long old_pte, new_pte;
@@ -254,8 +257,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_RW)
 			new_pte |= _PAGE_DIRTY;
-	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
-					  old_pte, new_pte));
+		opte = cpu_to_be64(old_pte);
+		npte = cpu_to_be64(new_pte);
+	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
 
 	rflags = htab_convert_pte_flags(new_pte);
 
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index eb2accdd76fd..98891139c044 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -22,6 +22,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		    pmd_t *pmdp, unsigned long trap, unsigned long flags,
 		    int ssize, unsigned int psize)
 {
+	__be64 opmd, npmd;
 	unsigned int index, valid;
 	unsigned char *hpte_slot_array;
 	unsigned long rflags, pa, hidx;
@@ -49,8 +50,10 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_RW)
 			new_pmd |= _PAGE_DIRTY;
-	} while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
-					  old_pmd, new_pmd));
+		opmd = cpu_to_be64(old_pmd);
+		npmd = cpu_to_be64(new_pmd);
+	} while (opmd != __cmpxchg_u64((unsigned long *)pmdp, opmd, npmd));
+
 	rflags = htab_convert_pte_flags(new_pmd);
 
 #if 0
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 8555fce902fe..5bcb28606158 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -22,6 +22,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		     pte_t *ptep, unsigned long trap, unsigned long flags,
 		     int ssize, unsigned int shift, unsigned int mmu_psize)
 {
+	__be64 opte, npte;
 	unsigned long vpn;
 	unsigned long old_pte, new_pte;
 	unsigned long rflags, pa, sz;
@@ -57,8 +58,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_RW)
 			new_pte |= _PAGE_DIRTY;
-	} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
-					 old_pte, new_pte));
+		opte = cpu_to_be64(old_pte);
+		npte = cpu_to_be64(new_pte);
+	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
+
 	rflags = htab_convert_pte_flags(new_pte);
 
 	sz = ((1UL) << shift);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 0eb53128ca2a..aa742aa35b64 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -516,6 +516,7 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
 {
 
 	unsigned long old, tmp;
+	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
 
 #ifdef CONFIG_DEBUG_VM
 	WARN_ON(!pmd_trans_huge(*pmdp));
@@ -523,17 +524,21 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
 #endif
 
 #ifdef PTE_ATOMIC_UPDATES
+	clr = cpu_to_be64(clr);
+	set = cpu_to_be64(set);
 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%3\n\
-		andi.	%1,%0,%6\n\
+		and.	%1,%0,%6\n\
 		bne-	1b \n\
 		andc	%1,%0,%4 \n\
 		or	%1,%1,%7\n\
 		stdcx.	%1,0,%3 \n\
 		bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
-	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
+	: "r" (pmdp), "r" (clr), "m" (*pmdp), "r" (busy), "r" (set)
 	: "cc" );
+
+	old = be64_to_cpu(old);
 #else
 	old = pmd_val(*pmdp);
 	*pmdp = __pmd((old & ~clr) | set);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27 15:35   ` Ian Munsie
  2016-03-31  0:57   ` Balbir Singh
  2016-03-27  8:23 ` [PATCH 03/65] powerpc/mm/subpage: Clear RWX bit to indicate no access Aneesh Kumar K.V
                   ` (63 subsequent siblings)
  64 siblings, 2 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This split _PAGE_RW bit to _PAGE_READ and _PAGE_WRITE. It also remove
the dependency on _PAGE_USER for implying read only. Few things to note
here is that, we have read implied with write and execute permission.
Hence we should always find _PAGE_READ set on hash pte fault.

We still can't switch PROT_NONE to !(_PAGE_RWX). Auto numa do depend
on marking a prot none pte _PAGE_WRITE. (For more details look at
b191f9b106ea "mm: numa: preserve PTE write permissions across a NUMA hinting fault")

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h |  4 +--
 arch/powerpc/include/asm/book3s/64/hash.h     | 35 ++++++++++++++++-----------
 arch/powerpc/include/asm/pte-common.h         |  5 ++++
 arch/powerpc/mm/hash64_4k.c                   |  2 +-
 arch/powerpc/mm/hash64_64k.c                  |  4 +--
 arch/powerpc/mm/hash_utils_64.c               |  9 ++++---
 arch/powerpc/mm/hugepage-hash64.c             |  2 +-
 arch/powerpc/mm/hugetlbpage-hash64.c          |  2 +-
 arch/powerpc/mm/hugetlbpage.c                 |  4 +--
 arch/powerpc/mm/pgtable.c                     |  4 +--
 arch/powerpc/mm/pgtable_64.c                  |  5 ++--
 arch/powerpc/platforms/cell/spu_base.c        |  2 +-
 arch/powerpc/platforms/cell/spufs/fault.c     |  4 +--
 drivers/misc/cxl/fault.c                      |  4 +--
 14 files changed, 49 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 0a7956a80a08..279ded72f1db 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -291,10 +291,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pmd_t *pmdp)
 {
 
-	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
+	if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0)
 		return;
 
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
+	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
 }
 
 #endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 2113de051824..f092d83fa623 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -16,8 +16,10 @@
 #define _PAGE_BIT_SWAP_TYPE	0
 
 #define _PAGE_EXEC		0x00001 /* execute permission */
-#define _PAGE_RW		0x00002 /* read & write access allowed */
+#define _PAGE_WRITE		0x00002 /* write access allowed */
 #define _PAGE_READ		0x00004	/* read access allowed */
+#define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
+#define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
 #define _PAGE_USER		0x00008 /* page may be accessed by userspace */
 #define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
 /* M (memory coherence) is always set in the HPTE, so we don't need it here */
@@ -147,8 +149,8 @@
  */
 #define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
 			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
-			 _PAGE_USER | _PAGE_ACCESSED |  \
-			 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC | \
+			 _PAGE_USER | _PAGE_ACCESSED |  _PAGE_READ |\
+			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
 			 _PAGE_SOFT_DIRTY)
 /*
  * We define 2 sets of base prot bits, one for basic pages (ie,
@@ -173,10 +175,12 @@
 #define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
 #define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
 				 _PAGE_EXEC)
-#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER )
-#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
-#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER )
-#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
+				 _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
+				 _PAGE_EXEC)
 
 #define __P000	PAGE_NONE
 #define __P001	PAGE_READONLY
@@ -300,19 +304,19 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
 
-	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+	if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
 		return;
 
-	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
+	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
 }
 
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+	if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
 		return;
 
-	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
+	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
 }
 
 /*
@@ -352,7 +356,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 {
 	unsigned long bits = pte_val(entry) &
-		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC |
+		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |
 		 _PAGE_SOFT_DIRTY);
 
 	unsigned long old, tmp;
@@ -386,7 +390,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 
 
 /* Generic accessors to PTE bits */
-static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
+static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_WRITE);}
 static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
 static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
 static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
@@ -447,7 +451,7 @@ static inline unsigned long pte_pfn(pte_t pte)
 /* Generic modifiers for PTE bits */
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~_PAGE_RW);
+	return __pte(pte_val(pte) & ~_PAGE_WRITE);
 }
 
 static inline pte_t pte_mkclean(pte_t pte)
@@ -462,6 +466,9 @@ static inline pte_t pte_mkold(pte_t pte)
 
 static inline pte_t pte_mkwrite(pte_t pte)
 {
+	/*
+	 * write implies read, hence set both
+	 */
 	return __pte(pte_val(pte) | _PAGE_RW);
 }
 
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index 1ec67b043065..9f5dea58b0db 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -198,3 +198,8 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
 /* Advertise support for _PAGE_SPECIAL */
 #define __HAVE_ARCH_PTE_SPECIAL
 
+#ifndef _PAGE_READ
+/* if not defined, we should not find _PAGE_WRITE too */
+#define _PAGE_READ 0
+#define _PAGE_WRITE _PAGE_RW
+#endif
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 71abd4c44c27..7ebac279d38e 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -46,7 +46,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		 * also add _PAGE_COMBO
 		 */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
-		if (access & _PAGE_RW)
+		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 
 		opte = cpu_to_be64(old_pte);
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 6f9b3c34a5c0..83ac9f658733 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -78,7 +78,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		 * also add _PAGE_COMBO
 		 */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO;
-		if (access & _PAGE_RW)
+		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 
 		opte = cpu_to_be64(old_pte);
@@ -255,7 +255,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		 * a write access.
 		 */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
-		if (access & _PAGE_RW)
+		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 		opte = cpu_to_be64(old_pte);
 		npte = cpu_to_be64(new_pte);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 90dd9280894f..ea23403b3fc0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -175,8 +175,9 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	 * or PP=0x3 for read-only (including writeable but clean pages).
 	 */
 	if (pteflags & _PAGE_USER) {
-		rflags |= 0x2;
-		if (!((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY)))
+		if (pteflags & _PAGE_RWX)
+			rflags |= 0x2;
+		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
 			rflags |= 0x1;
 	}
 	/*
@@ -1205,7 +1206,7 @@ EXPORT_SYMBOL_GPL(hash_page);
 int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
 		unsigned long dsisr)
 {
-	unsigned long access = _PAGE_PRESENT;
+	unsigned long access = _PAGE_PRESENT | _PAGE_READ;
 	unsigned long flags = 0;
 	struct mm_struct *mm = current->mm;
 
@@ -1216,7 +1217,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
 		flags |= HPTE_NOHPTE_UPDATE;
 
 	if (dsisr & DSISR_ISSTORE)
-		access |= _PAGE_RW;
+		access |= _PAGE_WRITE;
 	/*
 	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
 	 * accessing a userspace segment (even from the kernel). We assume
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 98891139c044..39342638a498 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -48,7 +48,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		 * a write access
 		 */
 		new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
-		if (access & _PAGE_RW)
+		if (access & _PAGE_WRITE)
 			new_pmd |= _PAGE_DIRTY;
 		opmd = cpu_to_be64(old_pmd);
 		npmd = cpu_to_be64(new_pmd);
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 5bcb28606158..e6e54a04bd32 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -56,7 +56,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
 		 * a write access */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
-		if (access & _PAGE_RW)
+		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 		opte = cpu_to_be64(old_pte);
 		npte = cpu_to_be64(new_pte);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 6dd272b6196f..6e52e722d3f2 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1003,9 +1003,9 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		end = pte_end;
 
 	pte = READ_ONCE(*ptep);
-	mask = _PAGE_PRESENT | _PAGE_USER;
+	mask = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
 	if (write)
-		mask |= _PAGE_RW;
+		mask |= _PAGE_WRITE;
 
 	if ((pte_val(pte) & mask) != mask)
 		return 0;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 83dfd7925c72..98b5c03e344d 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -177,8 +177,8 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
 	 * Hence we can use set_pte_at for them.
 	 */
-	VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
-		(_PAGE_PRESENT | _PAGE_USER));
+	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
+
 	/*
 	 * Add the pte bit when tryint set a pte
 	 */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index aa742aa35b64..00d8d985bba3 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -277,7 +277,7 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
 	void *caller = __builtin_return_address(0);
 
 	/* writeable implies dirty for kernel addresses */
-	if (flags & _PAGE_RW)
+	if (flags & _PAGE_WRITE)
 		flags |= _PAGE_DIRTY;
 
 	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
@@ -681,8 +681,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		pmd_t *pmdp, pmd_t pmd)
 {
 #ifdef CONFIG_DEBUG_VM
-	WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
-		(_PAGE_PRESENT | _PAGE_USER));
+	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
 	assert_spin_locked(&mm->page_table_lock);
 	WARN_ON(!pmd_trans_huge(pmd));
 #endif
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index f7af74f83693..7bc00b508128 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -197,7 +197,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
 	    (REGION_ID(ea) != USER_REGION_ID)) {
 
 		spin_unlock(&spu->register_lock);
-		ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr);
+		ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr);
 		spin_lock(&spu->register_lock);
 
 		if (!ret) {
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index d98f845ac777..c3a3bf1745b7 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -141,8 +141,8 @@ int spufs_handle_class1(struct spu_context *ctx)
 	/* we must not hold the lock when entering copro_handle_mm_fault */
 	spu_release(ctx);
 
-	access = (_PAGE_PRESENT | _PAGE_USER);
-	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+	access = (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER);
+	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
 	local_irq_save(flags);
 	ret = hash_page(ea, access, 0x300, dsisr);
 	local_irq_restore(flags);
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 81c3f75b7330..a3d5e1e16c21 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -149,9 +149,9 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
 	 * update_mmu_cache() will not have loaded the hash since current->trap
 	 * is not a 0x400 or 0x300, so just call hash_page_mm() here.
 	 */
-	access = _PAGE_PRESENT;
+	access = _PAGE_PRESENT | _PAGE_READ;
 	if (dsisr & CXL_PSL_DSISR_An_S)
-		access |= _PAGE_RW;
+		access |= _PAGE_WRITE;
 	if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
 		access |= _PAGE_USER;
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 03/65] powerpc/mm/subpage: Clear RWX bit to indicate no access
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-31  1:42   ` Balbir Singh
  2016-03-27  8:23 ` [PATCH 04/65] powerpc/mm: Use pte_user instead of opencoding Aneesh Kumar K.V
                   ` (62 subsequent siblings)
  64 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Subpage protection used to depend on _PAGE_USER bit to implement no
access mode. This patch switch that to use _PAGE_RWX. We clear READ,
Write and Execute access from pte instead of clearing _PAGE_USER now.
This was done so that we can switch to _PAGE_PRIVILEGED in later patch.
subpage_protection() returns pte bits that need to be cleared.
Instead of updating the interface to handle no-access in a separate way,
it appears simple to clear RWX acecss to indicate no access.

We still don't insert hash pte for no access implied by !_PAGE_RWX.
Hence we should not get PROT_FAULT with change.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hash_utils_64.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ea23403b3fc0..ec37f4b0a8ff 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -917,7 +917,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
  * Userspace sets the subpage permissions using the subpage_prot system call.
  *
  * Result is 0: full permissions, _PAGE_RW: read-only,
- * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access.
+ * _PAGE_RWX: no access.
  */
 static int subpage_protection(struct mm_struct *mm, unsigned long ea)
 {
@@ -943,8 +943,13 @@ static int subpage_protection(struct mm_struct *mm, unsigned long ea)
 	/* extract 2-bit bitfield for this 4k subpage */
 	spp >>= 30 - 2 * ((ea >> 12) & 0xf);
 
-	/* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */
-	spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0);
+	/*
+	 * 0 -> full premission
+	 * 1 -> Read only
+	 * 2 -> no access.
+	 * We return the flag that need to be cleared.
+	 */
+	spp = ((spp & 2) ? _PAGE_RWX : 0) | ((spp & 1) ? _PAGE_WRITE : 0);
 	return spp;
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 04/65] powerpc/mm: Use pte_user instead of opencoding
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 03/65] powerpc/mm/subpage: Clear RWX bit to indicate no access Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-31  1:45   ` Balbir Singh
  2016-03-27  8:23 ` [PATCH 05/65] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED Aneesh Kumar K.V
                   ` (61 subsequent siblings)
  64 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We have common declaration in pte-common.h Add book3s specific one
and switch to pte_user. In the later patch we will be switching
_PAGE_USER to _PAGE_PRIVILEGED

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 5 +++++
 arch/powerpc/perf/callchain.c                | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 77d3ce05798e..4ac6221802ad 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -185,6 +185,11 @@ extern struct page *pgd_page(pgd_t pgd);
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
 #define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
 
+static inline bool pte_user(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_USER);
+}
+
 #ifdef CONFIG_MEM_SOFT_DIRTY
 #define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
 #else
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index e04a6752b399..0071de76d776 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -137,7 +137,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
 	offset = addr & ((1UL << shift) - 1);
 
 	pte = READ_ONCE(*ptep);
-	if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
+	if (!pte_present(pte) || !pte_user(pte))
 		goto err_out;
 	pfn = pte_pfn(pte);
 	if (!page_is_ram(pfn))
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 05/65] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 04/65] powerpc/mm: Use pte_user instead of opencoding Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27 15:41   ` Ian Munsie
  2016-03-27  8:23 ` [PATCH 06/65] powerpc/cxl: Use REGION_ID instead of opencoding Aneesh Kumar K.V
                   ` (60 subsequent siblings)
  64 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

_PAGE_PRIVILEGED means the page can be accessed only by kernel. This is done
to keep pte bits similar to PowerISA 3.0 radix PTE format. User
pages are now makred by clearing _PAGE_PRIVILEGED bit.

Previously we allowed kernel to have a privileged page
in the lower address range(USER_REGION). With this patch such access
is denied.

We also prevent a kernel access to a non-privileged page in
higher address range (ie, REGION_ID != 0). Both the above access
scenario should never happen.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    | 34 ++++++++++++++--------------
 arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++++++-
 arch/powerpc/mm/hash64_4k.c                  |  2 +-
 arch/powerpc/mm/hash64_64k.c                 |  4 ++--
 arch/powerpc/mm/hash_utils_64.c              | 16 ++++++++-----
 arch/powerpc/mm/hugepage-hash64.c            |  2 +-
 arch/powerpc/mm/hugetlbpage-hash64.c         |  3 ++-
 arch/powerpc/mm/hugetlbpage.c                |  2 +-
 arch/powerpc/mm/pgtable.c                    | 15 ++++++++++--
 arch/powerpc/mm/pgtable_64.c                 | 15 +++++++++---
 arch/powerpc/platforms/cell/spufs/fault.c    |  2 +-
 drivers/misc/cxl/fault.c                     |  4 +++-
 12 files changed, 80 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index f092d83fa623..a3e01cf421c1 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -20,7 +20,7 @@
 #define _PAGE_READ		0x00004	/* read access allowed */
 #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
 #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
-#define _PAGE_USER		0x00008 /* page may be accessed by userspace */
+#define _PAGE_PRIVILEGED	0x00008 /* kernel access only */ 
 #define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
 /* M (memory coherence) is always set in the HPTE, so we don't need it here */
 #define _PAGE_COHERENT		0x0
@@ -114,10 +114,13 @@
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 #endif /* CONFIG_PPC_MM_SLICES */
 
-/* No separate kernel read-only */
-#define _PAGE_KERNEL_RW		(_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
+/*
+ * No separate kernel read-only, user access blocked by key
+ */
+#define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
 #define _PAGE_KERNEL_RO		 _PAGE_KERNEL_RW
-#define _PAGE_KERNEL_RWX	(_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
+				 _PAGE_RW | _PAGE_EXEC)
 
 /* Strong Access Ordering */
 #define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
@@ -149,7 +152,7 @@
  */
 #define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
 			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
-			 _PAGE_USER | _PAGE_ACCESSED |  _PAGE_READ |\
+			 _PAGE_PRIVILEGED | _PAGE_ACCESSED |  _PAGE_READ |\
 			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
 			 _PAGE_SOFT_DIRTY)
 /*
@@ -171,16 +174,13 @@
  *
  * Note due to the way vm flags are laid out, the bits are XWR
  */
-#define PAGE_NONE	__pgprot(_PAGE_BASE)
-#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
-#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
-				 _PAGE_EXEC)
-#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
-#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
-				 _PAGE_EXEC)
-#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
-#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
-				 _PAGE_EXEC)
+#define PAGE_NONE	__pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
 
 #define __P000	PAGE_NONE
 #define __P001	PAGE_READONLY
@@ -421,8 +421,8 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte)
  */
 static inline int pte_protnone(pte_t pte)
 {
-	return (pte_val(pte) &
-		(_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
+	return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) ==
+		(_PAGE_PRESENT | _PAGE_PRIVILEGED);
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 4ac6221802ad..b609729e0d76 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -187,7 +187,7 @@ extern struct page *pgd_page(pgd_t pgd);
 
 static inline bool pte_user(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_USER);
+	return !(pte_val(pte) & _PAGE_PRIVILEGED);
 }
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
@@ -211,6 +211,22 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 }
 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 
+static inline bool check_pte_access(unsigned long access, unsigned long ptev)
+{
+	/*
+	 * This check for _PAGE_RWX and _PAGE_PRESENT bits
+	 */
+	if (access & ~ptev)
+		return false;
+	/*
+	 * This check for access to privilege space
+	 */
+	if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
+		return false;
+
+	return true;
+}
+
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
 
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 7ebac279d38e..42ba12c184e1 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -38,7 +38,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		if (unlikely(old_pte & _PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
-		if (unlikely(access & ~old_pte))
+		if (unlikely(!check_pte_access(access, old_pte)))
 			return 1;
 		/*
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 83ac9f658733..f33b410d6c8a 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -70,7 +70,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		if (unlikely(old_pte & _PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
-		if (unlikely(access & ~old_pte))
+		if (unlikely(!check_pte_access(access, old_pte)))
 			return 1;
 		/*
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
@@ -241,7 +241,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		if (unlikely(old_pte & _PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
-		if (unlikely(access & ~old_pte))
+		if (unlikely(!check_pte_access(access, old_pte)))
 			return 1;
 		/*
 		 * Check if PTE has the cache-inhibit bit set
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ec37f4b0a8ff..f0dbaa51e8da 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -174,7 +174,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	 * User area is mapped with PP=0x2 for read/write
 	 * or PP=0x3 for read-only (including writeable but clean pages).
 	 */
-	if (pteflags & _PAGE_USER) {
+	if (!(pteflags & _PAGE_PRIVILEGED)) {
 		if (pteflags & _PAGE_RWX)
 			rflags |= 0x2;
 		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
@@ -1086,7 +1086,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 	/* Pre-check access permissions (will be re-checked atomically
 	 * in __hash_page_XX but this pre-check is a fast path
 	 */
-	if (access & ~pte_val(*ptep)) {
+	if (!check_pte_access(access, pte_val(*ptep))) {
 		DBG_LOW(" no access !\n");
 		rc = 1;
 		goto bail;
@@ -1224,12 +1224,16 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
 	if (dsisr & DSISR_ISSTORE)
 		access |= _PAGE_WRITE;
 	/*
-	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
-	 * accessing a userspace segment (even from the kernel). We assume
-	 * kernel addresses always have the high bit set.
+	 * We set _PAGE_PRIVILEGED only when
+	 * kernel mode access kernel space.
+	 *
+	 * _PAGE_PRIVILEGED is NOT set
+	 * 1) when kernel mode access user space
+	 * 2) user space access kernel space.
 	 */
+	access |= _PAGE_PRIVILEGED;
 	if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
-		access |= _PAGE_USER;
+		access &= ~_PAGE_PRIVILEGED;
 
 	if (trap == 0x400)
 		access |= _PAGE_EXEC;
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 39342638a498..182f1d3fe73c 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -41,7 +41,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		if (unlikely(old_pmd & _PAGE_BUSY))
 			return 0;
 		/* If PMD permissions don't match, take page fault */
-		if (unlikely(access & ~old_pmd))
+		if (unlikely(!check_pte_access(access, old_pmd)))
 			return 1;
 		/*
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index e6e54a04bd32..96765510a49c 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -51,8 +51,9 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		if (unlikely(old_pte & _PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
-		if (unlikely(access & ~old_pte))
+		if (unlikely(!check_pte_access(access, old_pte)))
 			return 1;
+
 		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
 		 * a write access */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 6e52e722d3f2..7201e9c624d5 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1003,7 +1003,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		end = pte_end;
 
 	pte = READ_ONCE(*ptep);
-	mask = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
+	mask = _PAGE_PRESENT | _PAGE_READ;
 	if (write)
 		mask |= _PAGE_WRITE;
 
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 98b5c03e344d..a34884beaa47 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -43,9 +43,20 @@ static inline int is_exec_fault(void)
  */
 static inline int pte_looks_normal(pte_t pte)
 {
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+	if ((pte_val(pte) &
+	     (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
+	    _PAGE_PRESENT) {
+		if (pte_user(pte))
+			return 1;
+	}
+	return 0;
+#else
 	return (pte_val(pte) &
-	    (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
-	    (_PAGE_PRESENT | _PAGE_USER);
+		(_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
+		(_PAGE_PRESENT | _PAGE_USER);
+#endif
 }
 
 static struct page *maybe_pte_to_page(pte_t pte)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 00d8d985bba3..441905f7bba4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -280,8 +280,17 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
 	if (flags & _PAGE_WRITE)
 		flags |= _PAGE_DIRTY;
 
-	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
-	flags &= ~(_PAGE_USER | _PAGE_EXEC);
+	/* we don't want to let _PAGE_EXEC leak out */
+	flags &= ~_PAGE_EXEC;
+	/*
+	 * Force kernel mapping.
+	 */
+#if defined(CONFIG_PPC_BOOK3S_64)
+	flags |= _PAGE_PRIVILEGED;
+#else
+	flags &= ~_PAGE_USER;
+#endif
+
 
 #ifdef _PAGE_BAP_SR
 	/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
@@ -669,7 +678,7 @@ void pmdp_huge_split_prepare(struct vm_area_struct *vma,
 	 * the translation is still valid, because we will withdraw
 	 * pgtable_t after this.
 	 */
-	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
+	pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
 }
 
 
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index c3a3bf1745b7..e29e4d5afa2d 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -141,7 +141,7 @@ int spufs_handle_class1(struct spu_context *ctx)
 	/* we must not hold the lock when entering copro_handle_mm_fault */
 	spu_release(ctx);
 
-	access = (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER);
+	access = (_PAGE_PRESENT | _PAGE_READ);
 	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
 	local_irq_save(flags);
 	ret = hash_page(ea, access, 0x300, dsisr);
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index a3d5e1e16c21..33bd0ee30edd 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -152,8 +152,10 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
 	access = _PAGE_PRESENT | _PAGE_READ;
 	if (dsisr & CXL_PSL_DSISR_An_S)
 		access |= _PAGE_WRITE;
+
+	access |= _PAGE_PRIVILEGED;
 	if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
-		access |= _PAGE_USER;
+		access &= ~_PAGE_PRIVILEGED;
 
 	if (dsisr & DSISR_NOHPTE)
 		inv_flags |= HPTE_NOHPTE_UPDATE;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 06/65] powerpc/cxl: Use REGION_ID instead of opencoding
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 05/65] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27 15:48   ` Ian Munsie
  2016-03-27  8:23 ` [PATCH 07/65] powerpc/mm: Remove RPN_SHIFT and RPN_SIZE Aneesh Kumar K.V
                   ` (59 subsequent siblings)
  64 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 drivers/misc/cxl/fault.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 33bd0ee30edd..67cc657e96cc 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -154,7 +154,7 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
 		access |= _PAGE_WRITE;
 
 	access |= _PAGE_PRIVILEGED;
-	if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
+	if ((!ctx->kernel) || (REGION_ID(dar) == USER_REGION_ID))
 		access &= ~_PAGE_PRIVILEGED;
 
 	if (dsisr & DSISR_NOHPTE)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 07/65] powerpc/mm: Remove RPN_SHIFT and RPN_SIZE
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (4 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 06/65] powerpc/cxl: Use REGION_ID instead of opencoding Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 08/65] powerpc/mm: Update _PAGE_KERNEL_RO Aneesh Kumar K.V
                   ` (58 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

PTE_RPN_SHIFT is actually page size dependent. Even tough PowerISA 3.0
expect only lower 12 bits to be zero, we will always find the pages to
be PAGE_SHIFT aligned. In case of hash config, this also allows us to
use the additional 3 bits to track pte specific information. We need
to make sure we use these bits only for hash specific pte flags.

For both 4k and 64k config, pte now can hold 57 bits address.

Inorder to keep things simpler, drop PTE_RPN_SHIFT and PTE_RPN_SIZE and
specify the 57 bit detail explicitly.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  4 ----
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 26 +++++++++++++-------------
 arch/powerpc/include/asm/book3s/64/hash.h     | 10 +++++-----
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  4 ++--
 arch/powerpc/mm/pgtable_64.c                  |  2 +-
 5 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 5f08a0832238..772850e517f3 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -51,10 +51,6 @@
 #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
 			 _PAGE_F_SECOND | _PAGE_F_GIX)
 
-/* shift to put page number into pte */
-#define PTE_RPN_SHIFT	(12)
-#define PTE_RPN_SIZE	(45)	/* gives 57-bit real addresses */
-
 #define _PAGE_4K_PFN		0
 #ifndef __ASSEMBLY__
 /*
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 279ded72f1db..8bb038076892 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -40,15 +40,6 @@
 /* PTE flags to conserve for HPTE identification */
 #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \
 			 _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO)
-
-/* Shift to put page number into pte.
- *
- * That gives us a max RPN of 41 bits, which means a max of 57 bits
- * of addressable physical space, or 53 bits for the special 4k PFNs.
- */
-#define PTE_RPN_SHIFT	(16)
-#define PTE_RPN_SIZE	(41)
-
 /*
  * we support 16 fragments per PTE page of 64K size.
  */
@@ -68,6 +59,7 @@
 #define PGD_MASKED_BITS		0xc0000000000000ffUL
 
 #ifndef __ASSEMBLY__
+#include <asm/errno.h>
 
 /*
  * With 64K pages on hash table, we have a special PTE format that
@@ -124,10 +116,18 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
 #define pte_pagesize_index(mm, addr, pte)	\
 	(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
 
-#define remap_4k_pfn(vma, addr, pfn, prot)				\
-	(WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL :	\
-		remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,	\
-			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
+extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
+			   unsigned long pfn, unsigned long size, pgprot_t);
+static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
+			       unsigned long pfn, pgprot_t prot)
+{
+	if (pfn > (PTE_RPN_MASK >> PAGE_SHIFT)) {
+		WARN(1, "remap_4k_pfn called with wrong pfn value\n");
+		return -EINVAL;
+	}
+	return remap_pfn_range(vma, addr, pfn, PAGE_SIZE,
+			       __pgprot(pgprot_val(prot) | _PAGE_4K_PFN));
+}
 
 #define PTE_TABLE_SIZE	PTE_FRAG_SIZE
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index a3e01cf421c1..e69f84927c2d 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -136,10 +136,10 @@
 #define PTE_ATOMIC_UPDATES	1
 #define _PTE_NONE_MASK	_PAGE_HPTEFLAGS
 /*
- * The mask convered by the RPN must be a ULL on 32-bit platforms with
- * 64-bit PTEs
+ * We support 57 bit real address in pte. Clear everything above 57, and
+ * every thing below PAGE_SHIFT;
  */
-#define PTE_RPN_MASK	(((1UL << PTE_RPN_SIZE) - 1) << PTE_RPN_SHIFT)
+#define PTE_RPN_MASK	(((1UL << 57) - 1) & (PAGE_MASK))
 /*
  * _PAGE_CHG_MASK masks of bits that are to be preserved across
  * pgprot changes
@@ -439,13 +439,13 @@ static inline int pte_present(pte_t pte)
  */
 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
 {
-	return __pte((((pte_basic_t)(pfn) << PTE_RPN_SHIFT) & PTE_RPN_MASK) |
+	return __pte((((pte_basic_t)(pfn) << PAGE_SHIFT) & PTE_RPN_MASK) |
 		     pgprot_val(pgprot));
 }
 
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
+	return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
 }
 
 /* Generic modifiers for PTE bits */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index b609729e0d76..0fac73721e1e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -172,10 +172,10 @@ extern struct page *pgd_page(pgd_t pgd);
 #define SWP_TYPE_BITS 5
 #define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
 				& ((1UL << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x)		(((x).val & PTE_RPN_MASK) >> PTE_RPN_SHIFT)
+#define __swp_offset(x)		(((x).val & PTE_RPN_MASK) >> PAGE_SHIFT)
 #define __swp_entry(type, offset)	((swp_entry_t) { \
 				((type) << _PAGE_BIT_SWAP_TYPE) \
-				| (((offset) << PTE_RPN_SHIFT) & PTE_RPN_MASK)})
+				| (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)})
 /*
  * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
  * swap type and offset we get from swap and convert that to pte to find a
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 441905f7bba4..1254cf107871 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -762,7 +762,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
 {
 	unsigned long pmdv;
 
-	pmdv = (pfn << PTE_RPN_SHIFT) & PTE_RPN_MASK;
+	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
 	return pmd_set_protbits(__pmd(pmdv), pgprot);
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 08/65] powerpc/mm: Update _PAGE_KERNEL_RO
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (5 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 07/65] powerpc/mm: Remove RPN_SHIFT and RPN_SIZE Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-04-05  7:45   ` Balbir Singh
  2016-03-27  8:23 ` [PATCH 09/65] powerpc/mm: Use helper for finding pte bits mapping I/O area Aneesh Kumar K.V
                   ` (57 subsequent siblings)
  64 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

PS3 had used PPP bit hack to implement a read only mapping in the
kernel area. Since we are bolt mapping the ioremap area, it used
the pte flags _PAGE_PRESENT | _PAGE_USER to get a PPP value of 0x3
there by resulting in a read only mapping. This means the area
can be accessed by user space, but kernel will never return such an
address to user space.

But we can do better by implementing a read only kernel mapping using
PPP bits 0b110

This also allows us to do read only kernel mapping for radix in later
patches.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h |  4 ++--
 arch/powerpc/mm/hash_utils_64.c           | 17 +++++++++++------
 arch/powerpc/platforms/ps3/spu.c          |  2 +-
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index e69f84927c2d..2a80981f1b0b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -115,10 +115,10 @@
 #endif /* CONFIG_PPC_MM_SLICES */
 
 /*
- * No separate kernel read-only, user access blocked by key
+ * user access blocked by key
  */
 #define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
-#define _PAGE_KERNEL_RO		 _PAGE_KERNEL_RW
+#define _PAGE_KERNEL_RO		 (_PAGE_PRIVILEGED | _PAGE_READ)
 #define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
 				 _PAGE_RW | _PAGE_EXEC)
 
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index f0dbaa51e8da..59d4600bacd5 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -167,14 +167,19 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	if ((pteflags & _PAGE_EXEC) == 0)
 		rflags |= HPTE_R_N;
 	/*
-	 * PP bits:
+	 * PPP bits:
 	 * Linux uses slb key 0 for kernel and 1 for user.
-	 * kernel areas are mapped with PP=00
-	 * and there is no kernel RO (_PAGE_KERNEL_RO).
-	 * User area is mapped with PP=0x2 for read/write
-	 * or PP=0x3 for read-only (including writeable but clean pages).
+	 * kernel RW areas are mapped with PPP=0b000
+	 * User area is mapped with PPP=0b010 for read/write
+	 * or PPP=0b011 for read-only (including writeable but clean pages).
 	 */
-	if (!(pteflags & _PAGE_PRIVILEGED)) {
+	if (pteflags & _PAGE_PRIVILEGED) {
+		/*
+		 * Kernel read only mapped with ppp bits 0b110
+		 */
+		if (!(pteflags & _PAGE_WRITE))
+			rflags |= (HPTE_R_PP0 | 0x2);
+	} else {
 		if (pteflags & _PAGE_RWX)
 			rflags |= 0x2;
 		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index a0bca05e26b0..5e8a40f9739f 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -205,7 +205,7 @@ static void spu_unmap(struct spu *spu)
 static int __init setup_areas(struct spu *spu)
 {
 	struct table {char* name; unsigned long addr; unsigned long size;};
-	static const unsigned long shadow_flags = _PAGE_NO_CACHE | 3;
+	unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO));
 
 	spu_pdata(spu)->shadow = __ioremap(spu_pdata(spu)->shadow_addr,
 					   sizeof(struct spe_shadow),
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 09/65] powerpc/mm: Use helper for finding pte bits mapping I/O area
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (6 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 08/65] powerpc/mm: Update _PAGE_KERNEL_RO Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-04-05 11:47   ` Balbir Singh
  2016-03-27  8:23 ` [PATCH 10/65] powerpc/mm: Drop WIMG in favour of new constants Aneesh Kumar K.V
                   ` (56 subsequent siblings)
  64 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Use helper instead of opencoding with constants. Later patch will
drop the WIMG bits and use PowerISA 3.0 defines

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/btext.c      | 2 +-
 arch/powerpc/kernel/isa-bridge.c | 4 ++--
 arch/powerpc/kernel/pci_64.c     | 2 +-
 arch/powerpc/mm/pgtable_64.c     | 4 ++--
 arch/powerpc/platforms/ps3/spu.c | 2 +-
 drivers/pcmcia/electra_cf.c      | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 41c011cb6070..8275858a434d 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -162,7 +162,7 @@ void btext_map(void)
 	offset = ((unsigned long) dispDeviceBase) - base;
 	size = dispDeviceRowBytes * dispDeviceRect[3] + offset
 		+ dispDeviceRect[0];
-	vbase = __ioremap(base, size, _PAGE_NO_CACHE);
+	vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
 	if (vbase == 0)
 		return;
 	logicalDisplayBase = vbase + offset;
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 0f1997097960..ae1316106e2b 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -109,14 +109,14 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
 		size = 0x10000;
 
 	__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
-		     size, _PAGE_NO_CACHE|_PAGE_GUARDED);
+		     size, pgprot_val(pgprot_noncached(__pgprot(0))));
 	return;
 
 inval_range:
 	printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
 	       "mapping 64k\n");
 	__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
-		     0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED);
+		     0x10000, pgprot_val(pgprot_noncached(__pgprot(0))));
 }
 
 
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 60bb187cb46a..41503d7d53a1 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -159,7 +159,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
 
 	/* Establish the mapping */
 	if (__ioremap_at(phys_page, area->addr, size_page,
-			 _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)
+			 pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)
 		return -ENOMEM;
 
 	/* Fixup hose IO resource */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 1254cf107871..6f1b7064f822 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -253,7 +253,7 @@ void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
 
 void __iomem * ioremap(phys_addr_t addr, unsigned long size)
 {
-	unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED;
+	unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
 	void *caller = __builtin_return_address(0);
 
 	if (ppc_md.ioremap)
@@ -263,7 +263,7 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size)
 
 void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
 {
-	unsigned long flags = _PAGE_NO_CACHE;
+	unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
 	void *caller = __builtin_return_address(0);
 
 	if (ppc_md.ioremap)
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 5e8a40f9739f..492b2575e0d2 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -216,7 +216,7 @@ static int __init setup_areas(struct spu *spu)
 	}
 
 	spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
-		LS_SIZE, _PAGE_NO_CACHE);
+		LS_SIZE, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
 
 	if (!spu->local_store) {
 		pr_debug("%s:%d: ioremap local_store failed\n",
diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c
index 61cf61ac621e..4d7bc3f4124a 100644
--- a/drivers/pcmcia/electra_cf.c
+++ b/drivers/pcmcia/electra_cf.c
@@ -228,7 +228,7 @@ static int electra_cf_probe(struct platform_device *ofdev)
 
 	if (!cf->mem_base || !cf->io_virt || !cf->gpio_base ||
 	    (__ioremap_at(io.start, cf->io_virt, cf->io_size,
-		_PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)) {
+		  pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)) {
 		dev_err(device, "can't ioremap ranges\n");
 		status = -ENOMEM;
 		goto fail1;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 10/65] powerpc/mm: Drop WIMG in favour of new constants
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (7 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 09/65] powerpc/mm: Use helper for finding pte bits mapping I/O area Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-04-05 13:25   ` Balbir Singh
  2016-03-27  8:23 ` [PATCH 11/65] powerpc/mm: Use generic version of pmdp_clear_flush_young Aneesh Kumar K.V
                   ` (55 subsequent siblings)
  64 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

PowerISA 3.0 introduce two pte bits with the below meaning w.r.t Radix
00 ->  Normal Memory
01 ->  Strong Access Order
10 -> Non idempotent I/O (Cache inhibited and guarded)
11 -> Tolerant I/O (Cache inhibited)

We drop the existing WIMG bits in linux page table in favour of above
constants. We loose _PAGE_WRITETHRU with this conversion. We only use
writethru via pgprot_cached_wthru() which is used by fbdev/controlfb.c
which is Apple control display and also PPC32.

With respect to _PAGE_COHERENCE, we have been marking hpte
always coherent for some time now. htab_convert_pte_flags always added
HPTE_R_M.

NOTE: KVM changes need closer review.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h | 72 ++++++++++++++-----------------
 arch/powerpc/include/asm/kvm_book3s_64.h  | 27 +++++-------
 arch/powerpc/kvm/book3s_64_mmu_hv.c       | 11 +++--
 arch/powerpc/kvm/book3s_hv_rm_mmu.c       | 12 +++---
 arch/powerpc/mm/hash64_64k.c              |  2 +-
 arch/powerpc/mm/hash_utils_64.c           | 18 ++++----
 arch/powerpc/mm/pgtable.c                 |  8 ++--
 arch/powerpc/mm/pgtable_64.c              |  4 --
 arch/powerpc/platforms/pseries/lpar.c     |  4 --
 9 files changed, 67 insertions(+), 91 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 2a80981f1b0b..fd2d0ebfc49c 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -20,12 +20,10 @@
 #define _PAGE_READ		0x00004	/* read access allowed */
 #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
 #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
-#define _PAGE_PRIVILEGED	0x00008 /* kernel access only */ 
-#define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
-/* M (memory coherence) is always set in the HPTE, so we don't need it here */
-#define _PAGE_COHERENT		0x0
-#define _PAGE_NO_CACHE		0x00020 /* I: cache inhibit */
-#define _PAGE_WRITETHRU		0x00040 /* W: cache write-through */
+#define _PAGE_PRIVILEGED	0x00008 /* kernel access only */
+#define _PAGE_SAO		0x00010 /* Strong access order */
+#define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
+#define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */
 #define _PAGE_DIRTY		0x00080 /* C: page changed */
 #define _PAGE_ACCESSED		0x00100 /* R: page referenced */
 #define _PAGE_SPECIAL		0x00400 /* software: special page */
@@ -43,7 +41,12 @@
 #define _PAGE_HASHPTE		(1ul << 61)	/* PTE has associated HPTE */
 #define _PAGE_PTE		(1ul << 62)	/* distinguishes PTEs from pointers */
 #define _PAGE_PRESENT		(1ul << 63)	/* pte contains a translation */
-
+/*
+ * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
+ * Instead of fixing all of them, add an alternate define which
+ * maps CI pte mapping.
+ */
+#define _PAGE_NO_CACHE		_PAGE_TOLERANT
 /*
  * We need to differentiate between explicit huge page and THP huge
  * page, since THP huge page also need to track real subpage details
@@ -122,9 +125,6 @@
 #define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
 				 _PAGE_RW | _PAGE_EXEC)
 
-/* Strong Access Ordering */
-#define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
-
 /* No page size encoding in the linux PTE */
 #define _PAGE_PSIZE		0
 
@@ -150,10 +150,9 @@
 /*
  * Mask of bits returned by pte_pgprot()
  */
-#define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
-			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
-			 _PAGE_PRIVILEGED | _PAGE_ACCESSED |  _PAGE_READ |\
-			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
+#define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
+			 _PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
+			 _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
 			 _PAGE_SOFT_DIRTY)
 /*
  * We define 2 sets of base prot bits, one for basic pages (ie,
@@ -162,7 +161,7 @@
  * the processor might need it for DMA coherency.
  */
 #define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
-#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
+#define _PAGE_BASE	(_PAGE_BASE_NC)
 
 /* Permission masks used to generate the __P and __S table,
  *
@@ -203,9 +202,9 @@
 /* Permission masks used for kernel mappings */
 #define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
 #define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_NO_CACHE)
+				 _PAGE_TOLERANT)
 #define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_NO_CACHE | _PAGE_GUARDED)
+				 _PAGE_NON_IDEMPOTENT)
 #define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
 #define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
 #define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
@@ -512,45 +511,26 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 	*ptep = pte;
 }
 
-/*
- * Macro to mark a page protection value as "uncacheable".
- */
-
-#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
-			 _PAGE_WRITETHRU)
+#define _PAGE_CACHE_CTL	(_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
 
 #define pgprot_noncached pgprot_noncached
 static inline pgprot_t pgprot_noncached(pgprot_t prot)
 {
 	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_NO_CACHE | _PAGE_GUARDED);
+			_PAGE_NON_IDEMPOTENT);
 }
 
 #define pgprot_noncached_wc pgprot_noncached_wc
 static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
 {
 	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_NO_CACHE);
+			_PAGE_TOLERANT);
 }
 
 #define pgprot_cached pgprot_cached
 static inline pgprot_t pgprot_cached(pgprot_t prot)
 {
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_COHERENT);
-}
-
-#define pgprot_cached_wthru pgprot_cached_wthru
-static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
-{
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_COHERENT | _PAGE_WRITETHRU);
-}
-
-#define pgprot_cached_noncoherent pgprot_cached_noncoherent
-static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
-{
-	return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
 }
 
 #define pgprot_writecombine pgprot_writecombine
@@ -558,6 +538,18 @@ static inline pgprot_t pgprot_writecombine(pgprot_t prot)
 {
 	return pgprot_noncached_wc(prot);
 }
+/*
+ * check a pte mapping have cache inhibited property
+ */
+static inline bool pte_ci(pte_t pte)
+{
+	unsigned long pte_v = pte_val(pte);
+
+	if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) ||
+	    ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT))
+		return true;
+	return false;
+}
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index f9a7a89a3e4f..ebdaf576cf26 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -278,19 +278,24 @@ static inline unsigned long hpte_make_readonly(unsigned long ptel)
 	return ptel;
 }
 
-static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
+static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
 {
-	unsigned int wimg = ptel & HPTE_R_WIMG;
+	unsigned int wimg = hptel & HPTE_R_WIMG;
 
 	/* Handle SAO */
 	if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
 	    cpu_has_feature(CPU_FTR_ARCH_206))
 		wimg = HPTE_R_M;
 
-	if (!io_type)
+	if (!is_ci)
 		return wimg == HPTE_R_M;
-
-	return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
+	/*
+	 * if host is mapped cache inhibited, make sure hptel also have
+	 * cache inhibited.
+	 */
+	if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */
+		return false;
+	return !!(wimg & HPTE_R_I);
 }
 
 /*
@@ -333,18 +338,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
 	return new_pte;
 }
 
-
-/* Return HPTE cache control bits corresponding to Linux pte bits */
-static inline unsigned long hpte_cache_bits(unsigned long pte_val)
-{
-#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W
-	return pte_val & (HPTE_R_W | HPTE_R_I);
-#else
-	return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
-		((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
-#endif
-}
-
 static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
 {
 	if (key)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c7b78d8336b2..05f09ae82587 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -447,7 +447,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	struct revmap_entry *rev;
 	struct page *page, *pages[1];
 	long index, ret, npages;
-	unsigned long is_io;
+	bool is_ci;
 	unsigned int writing, write_ok;
 	struct vm_area_struct *vma;
 	unsigned long rcbits;
@@ -503,7 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	smp_rmb();
 
 	ret = -EFAULT;
-	is_io = 0;
+	is_ci = false;
 	pfn = 0;
 	page = NULL;
 	pte_size = PAGE_SIZE;
@@ -521,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			pfn = vma->vm_pgoff +
 				((hva - vma->vm_start) >> PAGE_SHIFT);
 			pte_size = psize;
-			is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+			is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
 			write_ok = vma->vm_flags & VM_WRITE;
 		}
 		up_read(&current->mm->mmap_sem);
@@ -558,10 +558,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		goto out_put;
 
 	/* Check WIMG vs. the actual page we're accessing */
-	if (!hpte_cache_flags_ok(r, is_io)) {
-		if (is_io)
+	if (!hpte_cache_flags_ok(r, is_ci)) {
+		if (is_ci)
 			goto out_put;
-
 		/*
 		 * Allow guest to map emulated device memory as
 		 * uncacheable, but actually make it cacheable.
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 4cb8db05f3e5..99b4e9d5dd23 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -175,7 +175,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	unsigned long g_ptel;
 	struct kvm_memory_slot *memslot;
 	unsigned hpage_shift;
-	unsigned long is_io;
+	bool is_ci;
 	unsigned long *rmap;
 	pte_t *ptep;
 	unsigned int writing;
@@ -199,7 +199,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	gfn = gpa >> PAGE_SHIFT;
 	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
 	pa = 0;
-	is_io = ~0ul;
+	is_ci = false;
 	rmap = NULL;
 	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
 		/* Emulated MMIO - mark this with key=31 */
@@ -250,7 +250,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			if (writing && !pte_write(pte))
 				/* make the actual HPTE be read-only */
 				ptel = hpte_make_readonly(ptel);
-			is_io = hpte_cache_bits(pte_val(pte));
+			is_ci = pte_ci(pte);
 			pa = pte_pfn(pte) << PAGE_SHIFT;
 			pa |= hva & (host_pte_size - 1);
 			pa |= gpa & ~PAGE_MASK;
@@ -267,9 +267,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	else
 		pteh |= HPTE_V_ABSENT;
 
-	/* Check WIMG */
-	if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
-		if (is_io)
+	/*If we had host pte mapping then  Check WIMG */
+	if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
+		if (is_ci)
 			return H_PARAMETER;
 		/*
 		 * Allow guest to map emulated device memory as
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index f33b410d6c8a..419562b0e9c8 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -248,7 +248,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		 * If so, bail out and refault as a 4k page
 		 */
 		if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) &&
-		    unlikely(old_pte & _PAGE_NO_CACHE))
+		    unlikely(pte_ci(pte)))
 			return 0;
 		/*
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 59d4600bacd5..e924690a5a0e 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -192,12 +192,13 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	/*
 	 * Add in WIG bits
 	 */
-	if (pteflags & _PAGE_WRITETHRU)
-		rflags |= HPTE_R_W;
-	if (pteflags & _PAGE_NO_CACHE)
+
+	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
 		rflags |= HPTE_R_I;
-	if (pteflags & _PAGE_GUARDED)
-		rflags |= HPTE_R_G;
+	if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT)
+		rflags |= (HPTE_R_I | HPTE_R_G);
+	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
+		rflags |= (HPTE_R_I | HPTE_R_W);
 
 	return rflags;
 }
@@ -1138,8 +1139,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 	/* If this PTE is non-cacheable and we have restrictions on
 	 * using non cacheable large pages, then we switch to 4k
 	 */
-	if (mmu_ci_restrictions && psize == MMU_PAGE_64K &&
-	    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
+	if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) {
 		if (user_region) {
 			demote_segment_4k(mm, ea);
 			psize = MMU_PAGE_4K;
@@ -1293,13 +1293,13 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 
 	WARN_ON(hugepage_shift);
 #ifdef CONFIG_PPC_64K_PAGES
-	/* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on
+	/* If either _PAGE_4K_PFN or cache inhibited is set (and we are on
 	 * a 64K kernel), then we don't preload, hash_page() will take
 	 * care of it once we actually try to access the page.
 	 * That way we don't have to duplicate all of the logic for segment
 	 * page size demotion here
 	 */
-	if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
+	if ((pte_val(*ptep) & _PAGE_4K_PFN) || pte_ci(*ptep))
 		goto out_exit;
 #endif /* CONFIG_PPC_64K_PAGES */
 
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index a34884beaa47..115a0a19d5a2 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -38,16 +38,16 @@ static inline int is_exec_fault(void)
 
 /* We only try to do i/d cache coherency on stuff that looks like
  * reasonably "normal" PTEs. We currently require a PTE to be present
- * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that
+ * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that
  * on userspace PTEs
  */
 static inline int pte_looks_normal(pte_t pte)
 {
 
 #if defined(CONFIG_PPC_BOOK3S_64)
-	if ((pte_val(pte) &
-	     (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
-	    _PAGE_PRESENT) {
+	if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) {
+		if (pte_ci(pte))
+			return 0;
 		if (pte_user(pte))
 			return 1;
 	}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 6f1b7064f822..db924c54f370 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -167,10 +167,6 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
 	if ((flags & _PAGE_PRESENT) == 0)
 		flags |= pgprot_val(PAGE_KERNEL);
 
-	/* Non-cacheable page cannot be coherent */
-	if (flags & _PAGE_NO_CACHE)
-		flags &= ~_PAGE_COHERENT;
-
 	/* We don't support the 4K PFN hack with ioremap */
 	if (flags & _PAGE_4K_PFN)
 		return NULL;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 2415a0d31f8f..0d4608990702 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -152,10 +152,6 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	/* Exact = 0                   */
 	flags = 0;
 
-	/* Make pHyp happy */
-	if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
-		hpte_r &= ~HPTE_R_M;
-
 	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
 		flags |= H_COALESCE_CAND;
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 11/65] powerpc/mm: Use generic version of pmdp_clear_flush_young
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (8 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 10/65] powerpc/mm: Drop WIMG in favour of new constants Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-04-05 23:51   ` Balbir Singh
  2016-03-27  8:23 ` [PATCH 12/65] powerpc/mm: Use generic version of ptep_clear_flush_young Aneesh Kumar K.V
                   ` (54 subsequent siblings)
  64 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

The radix variant is going to require a flush_pmd_tlb_range. With
flush_pmd_tlb_range added, pmdp_clear_flush_young is same as the generic
version. So drop the powerpc specific variant

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h |  3 ---
 arch/powerpc/mm/pgtable_64.c                 | 13 +++----------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 0fac73721e1e..4745d314acfb 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -298,9 +298,6 @@ extern int pmdp_set_access_flags(struct vm_area_struct *vma,
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 				     unsigned long address, pmd_t *pmdp);
-#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
-				  unsigned long address, pmd_t *pmdp);
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index db924c54f370..98c91ad18ba7 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -593,22 +593,15 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
 	return pmd;
 }
 
-int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-			      unsigned long address, pmd_t *pmdp)
-{
-	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
-}
-
 /*
  * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
+ * the entry was young or dirty.
  *
  * We should be more intelligent about this but for the moment we override
  * these functions and force a tlb flush unconditionally
  */
-int pmdp_clear_flush_young(struct vm_area_struct *vma,
-				  unsigned long address, pmd_t *pmdp)
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long address, pmd_t *pmdp)
 {
 	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
 }
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 12/65] powerpc/mm: Use generic version of ptep_clear_flush_young
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (9 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 11/65] powerpc/mm: Use generic version of pmdp_clear_flush_young Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-04-05 23:53   ` Balbir Singh
  2016-03-27  8:23 ` [PATCH 13/65] powerpc/mm: Move common data structure between radix and hash to book3s 64 generic headers Aneesh Kumar K.V
                   ` (53 subsequent siblings)
  64 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

The radix variant is going to require a flush_tlb_range. With
flush_tlb_range added, ptep_clear_flush_young is same as the generic
version. So drop the powerpc specific variant

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index fd2d0ebfc49c..d54ebb16692c 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -280,6 +280,13 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 	return old;
 }
 
+/*
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ */
 static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 					      unsigned long addr, pte_t *ptep)
 {
@@ -318,22 +325,6 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
 }
 
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-#define ptep_clear_flush_young(__vma, __address, __ptep)		\
-({									\
-	int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
-						  __ptep);		\
-	__young;							\
-})
-
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long addr, pte_t *ptep)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 13/65] powerpc/mm: Move common data structure between radix and hash to book3s 64 generic headers
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (10 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 12/65] powerpc/mm: Use generic version of ptep_clear_flush_young Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 14/65] powerpc/mm/power9: Add partition table format Aneesh Kumar K.V
                   ` (52 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We want to use mmu_context_t with radix and hash. Move that mmuh.h

With this patch, we start moving code that is generic between radix
and hash to book3s specific headers from book3s hash 64 specific one.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/32/mmu-hash.h |  6 +--
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 61 ++---------------------
 arch/powerpc/include/asm/book3s/64/mmu.h      | 72 +++++++++++++++++++++++++++
 arch/powerpc/include/asm/mmu.h                | 11 ++--
 4 files changed, 85 insertions(+), 65 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/64/mmu.h

diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 16f513e5cbd7..b82e063494dd 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_POWERPC_MMU_HASH32_H_
-#define _ASM_POWERPC_MMU_HASH32_H_
+#ifndef _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_
+#define _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_
 /*
  * 32-bit hash table MMU support
  */
@@ -90,4 +90,4 @@ typedef struct {
 #define mmu_virtual_psize	MMU_PAGE_4K
 #define mmu_linear_psize	MMU_PAGE_256M
 
-#endif /* _ASM_POWERPC_MMU_HASH32_H_ */
+#endif /* _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 0cea4807e26f..ce73736b42db 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_POWERPC_MMU_HASH64_H_
-#define _ASM_POWERPC_MMU_HASH64_H_
+#ifndef _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_
+#define _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_
 /*
  * PowerPC64 memory management structures
  *
@@ -127,24 +127,6 @@ extern struct hash_pte *htab_address;
 extern unsigned long htab_size_bytes;
 extern unsigned long htab_hash_mask;
 
-/*
- * Page size definition
- *
- *    shift : is the "PAGE_SHIFT" value for that page size
- *    sllp  : is a bit mask with the value of SLB L || LP to be or'ed
- *            directly to a slbmte "vsid" value
- *    penc  : is the HPTE encoding mask for the "LP" field:
- *
- */
-struct mmu_psize_def
-{
-	unsigned int	shift;	/* number of bits */
-	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
-	unsigned int	tlbiel;	/* tlbiel supported for that page size */
-	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
-	unsigned long	sllp;	/* SLB L||LP (exact mask to use in slbmte) */
-};
-extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 
 static inline int shift_to_mmu_psize(unsigned int shift)
 {
@@ -210,11 +192,6 @@ static inline int segment_shift(int ssize)
 /*
  * The current system page and segment sizes
  */
-extern int mmu_linear_psize;
-extern int mmu_virtual_psize;
-extern int mmu_vmalloc_psize;
-extern int mmu_vmemmap_psize;
-extern int mmu_io_psize;
 extern int mmu_kernel_ssize;
 extern int mmu_highuser_ssize;
 extern u16 mmu_slb_size;
@@ -512,38 +489,6 @@ static inline void subpage_prot_free(struct mm_struct *mm) {}
 static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
 
-typedef unsigned long mm_context_id_t;
-struct spinlock;
-
-typedef struct {
-	mm_context_id_t id;
-	u16 user_psize;		/* page size index */
-
-#ifdef CONFIG_PPC_MM_SLICES
-	u64 low_slices_psize;	/* SLB page size encodings */
-	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
-#else
-	u16 sllp;		/* SLB page size encoding */
-#endif
-	unsigned long vdso_base;
-#ifdef CONFIG_PPC_SUBPAGE_PROT
-	struct subpage_prot_table spt;
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
-#ifdef CONFIG_PPC_ICSWX
-	struct spinlock *cop_lockp; /* guard acop and cop_pid */
-	unsigned long acop;	/* mask of enabled coprocessor types */
-	unsigned int cop_pid;	/* pid value used with coprocessors */
-#endif /* CONFIG_PPC_ICSWX */
-#ifdef CONFIG_PPC_64K_PAGES
-	/* for 4K PTE fragment support */
-	void *pte_frag;
-#endif
-#ifdef CONFIG_SPAPR_TCE_IOMMU
-	struct list_head iommu_group_mem_list;
-#endif
-} mm_context_t;
-
-
 #if 0
 /*
  * The code below is equivalent to this function for arguments
@@ -613,4 +558,4 @@ unsigned htab_shift_for_mem_size(unsigned long mem_size);
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* _ASM_POWERPC_MMU_HASH64_H_ */
+#endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
new file mode 100644
index 000000000000..aadb0bbc5c71
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -0,0 +1,72 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_MMU_H_
+#define _ASM_POWERPC_BOOK3S_64_MMU_H_
+
+#ifndef __ASSEMBLY__
+/*
+ * Page size definition
+ *
+ *    shift : is the "PAGE_SHIFT" value for that page size
+ *    sllp  : is a bit mask with the value of SLB L || LP to be or'ed
+ *            directly to a slbmte "vsid" value
+ *    penc  : is the HPTE encoding mask for the "LP" field:
+ *
+ */
+struct mmu_psize_def {
+	unsigned int	shift;	/* number of bits */
+	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
+	unsigned int	tlbiel;	/* tlbiel supported for that page size */
+	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
+	unsigned long	sllp;	/* SLB L||LP (exact mask to use in slbmte) */
+};
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_STD_MMU_64
+/* 64-bit classic hash table MMU */
+#include <asm/book3s/64/mmu-hash.h>
+#endif
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long mm_context_id_t;
+struct spinlock;
+
+typedef struct {
+	mm_context_id_t id;
+	u16 user_psize;		/* page size index */
+
+#ifdef CONFIG_PPC_MM_SLICES
+	u64 low_slices_psize;	/* SLB page size encodings */
+	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+#else
+	u16 sllp;		/* SLB page size encoding */
+#endif
+	unsigned long vdso_base;
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+	struct subpage_prot_table spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+#ifdef CONFIG_PPC_ICSWX
+	struct spinlock *cop_lockp; /* guard acop and cop_pid */
+	unsigned long acop;	/* mask of enabled coprocessor types */
+	unsigned int cop_pid;	/* pid value used with coprocessors */
+#endif /* CONFIG_PPC_ICSWX */
+#ifdef CONFIG_PPC_64K_PAGES
+	/* for 4K PTE fragment support */
+	void *pte_frag;
+#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	struct list_head iommu_group_mem_list;
+#endif
+} mm_context_t;
+
+/*
+ * The current system page and segment sizes
+ */
+extern int mmu_linear_psize;
+extern int mmu_virtual_psize;
+extern int mmu_vmalloc_psize;
+extern int mmu_vmemmap_psize;
+extern int mmu_io_psize;
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 8ca1c983bf6c..5f55024f9522 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -181,10 +181,13 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
 
 #define MMU_PAGE_COUNT	15
 
-#if defined(CONFIG_PPC_STD_MMU_64)
-/* 64-bit classic hash table MMU */
-#include <asm/book3s/64/mmu-hash.h>
-#elif defined(CONFIG_PPC_STD_MMU_32)
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/mmu.h>
+#else /* CONFIG_PPC_BOOK3S_64 */
+
+#endif
+
+#if defined(CONFIG_PPC_STD_MMU_32)
 /* 32-bit classic hash table MMU */
 #include <asm/book3s/32/mmu-hash.h>
 #elif defined(CONFIG_40x)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 14/65] powerpc/mm/power9: Add partition table format
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (11 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 13/65] powerpc/mm: Move common data structure between radix and hash to book3s 64 generic headers Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-04-06  6:11   ` Balbir Singh
  2016-03-27  8:23 ` [PATCH 15/65] powerpc/mm/hash: Add support for POWER9 hash Aneesh Kumar K.V
                   ` (51 subsequent siblings)
  64 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We also add mach dep call back for updating partition table entry.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 31 +++++++++++++++++++++++++++++--
 arch/powerpc/include/asm/machdep.h       |  1 +
 arch/powerpc/include/asm/reg.h           |  1 +
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index aadb0bbc5c71..b86786f2521c 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -21,12 +21,39 @@ struct mmu_psize_def {
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 #endif /* __ASSEMBLY__ */
 
-#ifdef CONFIG_PPC_STD_MMU_64
 /* 64-bit classic hash table MMU */
 #include <asm/book3s/64/mmu-hash.h>
-#endif
 
 #ifndef __ASSEMBLY__
+/*
+ * ISA 3.0 partiton and process table entry format
+ */
+struct prtb_entry {
+	__be64 prtb0;
+	__be64 prtb1;
+};
+extern struct prtb_entry *process_tb;
+
+struct patb_entry {
+	__be64 patb0;
+	__be64 patb1;
+};
+extern struct patb_entry *partition_tb;
+
+#define PATB_HR		(1UL << 63)
+#define PATB_GR		(1UL << 63)
+#define RPDB_MASK	0x0ffffffffffff00fUL
+#define RPDB_SHIFT	(1UL << 8)
+/*
+ * Limit process table to PAGE_SIZE table. This
+ * also limit the max pid we can support.
+ * MAX_USER_CONTEXT * 16 bytes of space.
+ */
+#define PRTB_SIZE_SHIFT	(CONTEXT_BITS + 4)
+/*
+ * Power9 currently only support 64K partition table size.
+ */
+#define PATB_SIZE_SHIFT	16
 
 typedef unsigned long mm_context_id_t;
 struct spinlock;
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index fd22442d30a9..6bdcd0da9e21 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -256,6 +256,7 @@ struct machdep_calls {
 #ifdef CONFIG_ARCH_RANDOM
 	int (*get_random_seed)(unsigned long *v);
 #endif
+	int (*update_partition_table)(u64);
 };
 
 extern void e500_idle(void);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 52ed654d01ba..257251ada3a3 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -587,6 +587,7 @@
 #define SPRN_PIR	0x3FF	/* Processor Identification Register */
 #endif
 #define SPRN_TIR	0x1BE	/* Thread Identification Register */
+#define SPRN_PTCR	0x1D0	/* Partition table control Register */
 #define SPRN_PSPB	0x09F	/* Problem State Priority Boost reg */
 #define SPRN_PTEHI	0x3D5	/* 981 7450 PTE HI word (S/W TLB load) */
 #define SPRN_PTELO	0x3D6	/* 982 7450 PTE LO word (S/W TLB load) */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 15/65] powerpc/mm/hash: Add support for POWER9 hash
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (12 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 14/65] powerpc/mm/power9: Add partition table format Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 16/65] powerpc/mm: Move hash and no hash code to separate files Aneesh Kumar K.V
                   ` (50 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

PowerISA 3.0 adds a parition table indexed by LPID. Parition table allow
us to specify the MMU model that will be used for guest and host
translation.

This patch add support with SLB based hash model (UPRT = 0). What is
required with this model is to support the new hash page table entry
format and also setup Partition table such we use hash table for
address translation.

We don't have segment table support yet.

Inorder to make sure we don't load KVM module on Power9 (since we
don't have kvm support yet) this patch also disable kvm on Power9

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 13 +++++++--
 arch/powerpc/kvm/book3s_hv.c                  |  6 ++++
 arch/powerpc/kvm/book3s_pr.c                  |  6 +++-
 arch/powerpc/mm/hash_native_64.c              | 11 ++++++-
 arch/powerpc/mm/hash_utils_64.c               | 42 +++++++++++++++++++++++++--
 arch/powerpc/mm/pgtable_64.c                  |  7 +++++
 arch/powerpc/platforms/ps3/htab.c             |  2 +-
 arch/powerpc/platforms/pseries/lpar.c         |  2 +-
 8 files changed, 81 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index ce73736b42db..843b5d839904 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -78,6 +78,10 @@
 #define HPTE_V_SECONDARY	ASM_CONST(0x0000000000000002)
 #define HPTE_V_VALID		ASM_CONST(0x0000000000000001)
 
+/*
+ * ISA 3.0 have a different HPTE format.
+ */
+#define HPTE_R_3_0_SSIZE_SHIFT	58
 #define HPTE_R_PP0		ASM_CONST(0x8000000000000000)
 #define HPTE_R_TS		ASM_CONST(0x4000000000000000)
 #define HPTE_R_KEY_HI		ASM_CONST(0x3000000000000000)
@@ -224,7 +228,8 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
 	 */
 	v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
 	v <<= HPTE_V_AVPN_SHIFT;
-	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
 	return v;
 }
 
@@ -248,8 +253,12 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
  * aligned for the requested page size
  */
 static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
-					  int actual_psize)
+					  int actual_psize, int ssize)
 {
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT;
+
 	/* A 4K page needs no special encoding */
 	if (actual_psize == MMU_PAGE_4K)
 		return pa & HPTE_R_RPN;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index baeddb06811d..c07600efcef6 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3083,6 +3083,12 @@ static int kvmppc_core_check_processor_compat_hv(void)
 	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
 	    !cpu_has_feature(CPU_FTR_ARCH_206))
 		return -EIO;
+	/*
+	 * Disable KVM for Power9, untill the required bits merged.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		return -EIO;
+
 	return 0;
 }
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 95bceca8f40e..ffbaf40b7f31 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1683,7 +1683,11 @@ static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
 
 static int kvmppc_core_check_processor_compat_pr(void)
 {
-	/* we are always compatible */
+	/*
+	 * Disable KVM for Power9 untill the required bits merged.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		return -EIO;
 	return 0;
 }
 
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 8eaac81347fd..d873f6507f72 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -221,7 +221,7 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 		return -1;
 
 	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
-	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+	hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
 
 	if (!(vflags & HPTE_V_BOLTED)) {
 		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
@@ -719,6 +719,12 @@ static void native_flush_hash_range(unsigned long number, int local)
 	local_irq_restore(flags);
 }
 
+static int native_update_partition_table(u64 patb1)
+{
+	partition_tb->patb1 = cpu_to_be64(patb1);
+	return 0;
+}
+
 void __init hpte_init_native(void)
 {
 	ppc_md.hpte_invalidate	= native_hpte_invalidate;
@@ -729,4 +735,7 @@ void __init hpte_init_native(void)
 	ppc_md.hpte_clear_all	= native_hpte_clear;
 	ppc_md.flush_hash_range = native_flush_hash_range;
 	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		ppc_md.update_partition_table = native_update_partition_table;
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index e924690a5a0e..43e0d86b7ca1 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -674,6 +674,41 @@ int remove_section_mapping(unsigned long start, unsigned long end)
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
+static void __init hash_init_partition_table(phys_addr_t hash_table,
+					     unsigned long pteg_count)
+{
+	unsigned long ps_field;
+	unsigned long htab_size;
+	unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+
+	/*
+	 * slb llp encoding for the page size used in VPM real mode.
+	 * We can ignore that for lpid 0
+	 */
+	ps_field = 0;
+	htab_size =  __ilog2(pteg_count) - 11;
+
+	BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
+	partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
+						MEMBLOCK_ALLOC_ANYWHERE));
+
+	/* Initialize the Partition Table with no entries */
+	memset((void *)partition_tb, 0, patb_size);
+	partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size);
+	/*
+	 * FIXME!! This should be done via update_partition table
+	 * For now UPRT is 0 for us.
+	 */
+	partition_tb->patb1 = 0;
+	DBG("Partition table %p\n", partition_tb);
+	/*
+	 * update partition table control register,
+	 * 64 K size.
+	 */
+	mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+
+}
+
 static void __init htab_initialize(void)
 {
 	unsigned long table;
@@ -742,8 +777,11 @@ static void __init htab_initialize(void)
 		/* Initialize the HPT with no entries */
 		memset((void *)table, 0, htab_size_bytes);
 
-		/* Set SDR1 */
-		mtspr(SPRN_SDR1, _SDR1);
+		if (!cpu_has_feature(CPU_FTR_ARCH_300))
+			/* Set SDR1 */
+			mtspr(SPRN_SDR1, _SDR1);
+		else
+			hash_init_partition_table(table, pteg_count);
 	}
 
 	prot = pgprot_val(PAGE_KERNEL);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 98c91ad18ba7..5fff787da17a 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -69,6 +69,13 @@
 #endif
 #endif
 
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * partition table and process table for ISA 3.0
+ */
+struct prtb_entry *process_tb;
+struct patb_entry *partition_tb;
+#endif
 unsigned long ioremap_bot = IOREMAP_BASE;
 
 #ifdef CONFIG_PPC_MMU_NOHASH
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 2f95d33cf34a..c9a3e677192a 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 	vflags &= ~HPTE_V_SECONDARY;
 
 	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
-	hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
+	hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags;
 
 	spin_lock_irqsave(&ps3_htab_lock, flags);
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 0d4608990702..e4ceba2b1551 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -139,7 +139,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 			 hpte_group, vpn,  pa, rflags, vflags, psize);
 
 	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
-	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+	hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
 
 	if (!(vflags & HPTE_V_BOLTED))
 		pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 16/65] powerpc/mm: Move hash and no hash code to separate files
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (13 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 15/65] powerpc/mm/hash: Add support for POWER9 hash Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 17/65] powerpc/mm/book3s: Rename hash specific PTE bits to carry H_ prefix Aneesh Kumar K.V
                   ` (49 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This patch reduce #ifdef in C code and also help in adding radix changes
later. Only code movement in this patch.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/Makefile         |   3 +-
 arch/powerpc/mm/init_64.c        |  74 ++--------------------
 arch/powerpc/mm/pgtable-book3e.c | 128 +++++++++++++++++++++++++++++++++++++++
 arch/powerpc/mm/pgtable-hash64.c | 100 ++++++++++++++++++++++++++++++
 arch/powerpc/mm/pgtable_64.c     |  83 -------------------------
 5 files changed, 235 insertions(+), 153 deletions(-)
 create mode 100644 arch/powerpc/mm/pgtable-book3e.c
 create mode 100644 arch/powerpc/mm/pgtable-hash64.c

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 1ffeda85c086..6b5cc805c7ba 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -13,7 +13,8 @@ obj-$(CONFIG_PPC_MMU_NOHASH)	+= mmu_context_nohash.o tlb_nohash.o \
 				   tlb_nohash_low.o
 obj-$(CONFIG_PPC_BOOK3E)	+= tlb_low_$(CONFIG_WORD_SIZE)e.o
 hash64-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
-obj-$(CONFIG_PPC_STD_MMU_64)	+= hash_utils_64.o slb_low.o slb.o $(hash64-y)
+obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
+obj-$(CONFIG_PPC_STD_MMU_64)	+= pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y)
 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o
 obj-$(CONFIG_PPC_STD_MMU)	+= tlb_hash$(CONFIG_WORD_SIZE).o \
 				   mmu_context_hash$(CONFIG_WORD_SIZE).o
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index ba655666186d..8d1daf7d9785 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -189,75 +189,6 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size)
 	return 0;
 }
 
-/* On hash-based CPUs, the vmemmap is bolted in the hash table.
- *
- * On Book3E CPUs, the vmemmap is currently mapped in the top half of
- * the vmalloc space using normal page tables, though the size of
- * pages encoded in the PTEs can be different
- */
-
-#ifdef CONFIG_PPC_BOOK3E
-static int __meminit vmemmap_create_mapping(unsigned long start,
-					    unsigned long page_size,
-					    unsigned long phys)
-{
-	/* Create a PTE encoding without page size */
-	unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
-		_PAGE_KERNEL_RW;
-
-	/* PTEs only contain page size encodings up to 32M */
-	BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
-
-	/* Encode the size in the PTE */
-	flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
-
-	/* For each PTE for that area, map things. Note that we don't
-	 * increment phys because all PTEs are of the large size and
-	 * thus must have the low bits clear
-	 */
-	for (i = 0; i < page_size; i += PAGE_SIZE)
-		BUG_ON(map_kernel_page(start + i, phys, flags));
-
-	return 0;
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static void vmemmap_remove_mapping(unsigned long start,
-				   unsigned long page_size)
-{
-}
-#endif
-#else /* CONFIG_PPC_BOOK3E */
-static int __meminit vmemmap_create_mapping(unsigned long start,
-					    unsigned long page_size,
-					    unsigned long phys)
-{
-	int rc = htab_bolt_mapping(start, start + page_size, phys,
-				   pgprot_val(PAGE_KERNEL),
-				   mmu_vmemmap_psize, mmu_kernel_ssize);
-	if (rc < 0) {
-		int rc2 = htab_remove_mapping(start, start + page_size,
-					      mmu_vmemmap_psize,
-					      mmu_kernel_ssize);
-		BUG_ON(rc2 && (rc2 != -ENOENT));
-	}
-	return rc;
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static void vmemmap_remove_mapping(unsigned long start,
-				   unsigned long page_size)
-{
-	int rc = htab_remove_mapping(start, start + page_size,
-				     mmu_vmemmap_psize,
-				     mmu_kernel_ssize);
-	BUG_ON((rc < 0) && (rc != -ENOENT));
-	WARN_ON(rc == -ENOENT);
-}
-#endif
-
-#endif /* CONFIG_PPC_BOOK3E */
-
 struct vmemmap_backing *vmemmap_list;
 static struct vmemmap_backing *next;
 static int num_left;
@@ -309,6 +240,9 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
 	vmemmap_list = vmem_back;
 }
 
+extern int __meminit vmemmap_create_mapping(unsigned long start,
+					    unsigned long page_size,
+					    unsigned long phys);
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 {
 	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
@@ -347,6 +281,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+extern void vmemmap_remove_mapping(unsigned long start,
+				   unsigned long page_size);
 static unsigned long vmemmap_list_free(unsigned long start)
 {
 	struct vmemmap_backing *vmem_back, *vmem_back_prev;
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c
new file mode 100644
index 000000000000..f75ba4142875
--- /dev/null
+++ b/arch/powerpc/mm/pgtable-book3e.c
@@ -0,0 +1,128 @@
+
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/*
+ * PPC64 THP Support for hash based MMUs
+ */
+#include <linux/sched.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/dma.h>
+
+#include "mmu_decl.h"
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * On Book3E CPUs, the vmemmap is currently mapped in the top half of
+ * the vmalloc space using normal page tables, though the size of
+ * pages encoded in the PTEs can be different
+ */
+int __meminit vmemmap_create_mapping(unsigned long start,
+				     unsigned long page_size,
+				     unsigned long phys)
+{
+	/* Create a PTE encoding without page size */
+	unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
+		_PAGE_KERNEL_RW;
+
+	/* PTEs only contain page size encodings up to 32M */
+	BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
+
+	/* Encode the size in the PTE */
+	flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
+
+	/* For each PTE for that area, map things. Note that we don't
+	 * increment phys because all PTEs are of the large size and
+	 * thus must have the low bits clear
+	 */
+	for (i = 0; i < page_size; i += PAGE_SIZE)
+		BUG_ON(map_kernel_page(start + i, phys, flags));
+
+	return 0;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void vmemmap_remove_mapping(unsigned long start,
+			    unsigned long page_size)
+{
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+static __ref void *early_alloc_pgtable(unsigned long size)
+{
+	void *pt;
+
+	pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
+	memset(pt, 0, size);
+
+	return pt;
+}
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	if (slab_is_available()) {
+		pgdp = pgd_offset_k(ea);
+		pudp = pud_alloc(&init_mm, pgdp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		pmdp = pmd_alloc(&init_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
+		ptep = pte_alloc_kernel(pmdp, ea);
+		if (!ptep)
+			return -ENOMEM;
+		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+							  __pgprot(flags)));
+	} else {
+		pgdp = pgd_offset_k(ea);
+#ifndef __PAGETABLE_PUD_FOLDED
+		if (pgd_none(*pgdp)) {
+			pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+			BUG_ON(pudp == NULL);
+			pgd_populate(&init_mm, pgdp, pudp);
+		}
+#endif /* !__PAGETABLE_PUD_FOLDED */
+		pudp = pud_offset(pgdp, ea);
+		if (pud_none(*pudp)) {
+			pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
+			BUG_ON(pmdp == NULL);
+			pud_populate(&init_mm, pudp, pmdp);
+		}
+		pmdp = pmd_offset(pudp, ea);
+		if (!pmd_present(*pmdp)) {
+			ptep = early_alloc_pgtable(PAGE_SIZE);
+			BUG_ON(ptep == NULL);
+			pmd_populate_kernel(&init_mm, pmdp, ptep);
+		}
+		ptep = pte_offset_kernel(pmdp, ea);
+		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+							  __pgprot(flags)));
+	}
+
+	smp_wmb();
+	return 0;
+}
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
new file mode 100644
index 000000000000..04d6fa12789e
--- /dev/null
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/*
+ * PPC64 THP Support for hash based MMUs
+ */
+#include <linux/sched.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+#include "mmu_decl.h"
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * On hash-based CPUs, the vmemmap is bolted in the hash table.
+ *
+ */
+int __meminit vmemmap_create_mapping(unsigned long start,
+				     unsigned long page_size,
+				     unsigned long phys)
+{
+	int rc = htab_bolt_mapping(start, start + page_size, phys,
+				   pgprot_val(PAGE_KERNEL),
+				   mmu_vmemmap_psize, mmu_kernel_ssize);
+	if (rc < 0) {
+		int rc2 = htab_remove_mapping(start, start + page_size,
+					      mmu_vmemmap_psize,
+					      mmu_kernel_ssize);
+		BUG_ON(rc2 && (rc2 != -ENOENT));
+	}
+	return rc;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void vmemmap_remove_mapping(unsigned long start,
+			    unsigned long page_size)
+{
+	int rc = htab_remove_mapping(start, start + page_size,
+				     mmu_vmemmap_psize,
+				     mmu_kernel_ssize);
+	BUG_ON((rc < 0) && (rc != -ENOENT));
+	WARN_ON(rc == -ENOENT);
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	if (slab_is_available()) {
+		pgdp = pgd_offset_k(ea);
+		pudp = pud_alloc(&init_mm, pgdp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		pmdp = pmd_alloc(&init_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
+		ptep = pte_alloc_kernel(pmdp, ea);
+		if (!ptep)
+			return -ENOMEM;
+		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+							  __pgprot(flags)));
+	} else {
+		/*
+		 * If the mm subsystem is not fully up, we cannot create a
+		 * linux page table entry for this mapping.  Simply bolt an
+		 * entry in the hardware page table.
+		 *
+		 */
+		if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+				      mmu_io_psize, mmu_kernel_ssize)) {
+			printk(KERN_ERR "Failed to do bolted mapping IO "
+			       "memory at %016lx !\n", pa);
+			return -ENOMEM;
+		}
+	}
+
+	smp_wmb();
+	return 0;
+}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 5fff787da17a..d493f62d12eb 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -78,89 +78,6 @@ struct patb_entry *partition_tb;
 #endif
 unsigned long ioremap_bot = IOREMAP_BASE;
 
-#ifdef CONFIG_PPC_MMU_NOHASH
-static __ref void *early_alloc_pgtable(unsigned long size)
-{
-	void *pt;
-
-	pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
-	memset(pt, 0, size);
-
-	return pt;
-}
-#endif /* CONFIG_PPC_MMU_NOHASH */
-
-/*
- * map_kernel_page currently only called by __ioremap
- * map_kernel_page adds an entry to the ioremap page table
- * and adds an entry to the HPT, possibly bolting it
- */
-int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
-{
-	pgd_t *pgdp;
-	pud_t *pudp;
-	pmd_t *pmdp;
-	pte_t *ptep;
-
-	if (slab_is_available()) {
-		pgdp = pgd_offset_k(ea);
-		pudp = pud_alloc(&init_mm, pgdp, ea);
-		if (!pudp)
-			return -ENOMEM;
-		pmdp = pmd_alloc(&init_mm, pudp, ea);
-		if (!pmdp)
-			return -ENOMEM;
-		ptep = pte_alloc_kernel(pmdp, ea);
-		if (!ptep)
-			return -ENOMEM;
-		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
-							  __pgprot(flags)));
-	} else {
-#ifdef CONFIG_PPC_MMU_NOHASH
-		pgdp = pgd_offset_k(ea);
-#ifdef PUD_TABLE_SIZE
-		if (pgd_none(*pgdp)) {
-			pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
-			BUG_ON(pudp == NULL);
-			pgd_populate(&init_mm, pgdp, pudp);
-		}
-#endif /* PUD_TABLE_SIZE */
-		pudp = pud_offset(pgdp, ea);
-		if (pud_none(*pudp)) {
-			pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
-			BUG_ON(pmdp == NULL);
-			pud_populate(&init_mm, pudp, pmdp);
-		}
-		pmdp = pmd_offset(pudp, ea);
-		if (!pmd_present(*pmdp)) {
-			ptep = early_alloc_pgtable(PAGE_SIZE);
-			BUG_ON(ptep == NULL);
-			pmd_populate_kernel(&init_mm, pmdp, ptep);
-		}
-		ptep = pte_offset_kernel(pmdp, ea);
-		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
-							  __pgprot(flags)));
-#else /* CONFIG_PPC_MMU_NOHASH */
-		/*
-		 * If the mm subsystem is not fully up, we cannot create a
-		 * linux page table entry for this mapping.  Simply bolt an
-		 * entry in the hardware page table.
-		 *
-		 */
-		if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
-				      mmu_io_psize, mmu_kernel_ssize)) {
-			printk(KERN_ERR "Failed to do bolted mapping IO "
-			       "memory at %016lx !\n", pa);
-			return -ENOMEM;
-		}
-#endif /* !CONFIG_PPC_MMU_NOHASH */
-	}
-
-	smp_wmb();
-	return 0;
-}
-
-
 /**
  * __ioremap_at - Low level function to establish the page tables
  *                for an IO mapping
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 17/65] powerpc/mm/book3s: Rename hash specific PTE bits to carry H_ prefix
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (14 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 16/65] powerpc/mm: Move hash and no hash code to separate files Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 18/65] powerpc/mm: Handle _PTE_NONE_MASK Aneesh Kumar K.V
                   ` (48 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This helps is following hash only pte bits easily.

We have kept _PAGE_CHG_MASK, _HPAGE_CHG_MASK and _PAGE_PROT_BITS as it
is in this patch eventhough they use hash specific bits. Using them in
radix as it is should be ok, because with radix we expect those bit
positions to be zero.

Only rename in this patch, no change in functionality.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  | 11 ++++--
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 38 +++++++++++--------
 arch/powerpc/include/asm/book3s/64/hash.h     | 39 +++++++++-----------
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  4 +-
 arch/powerpc/include/asm/kvm_book3s_64.h      |  4 +-
 arch/powerpc/include/asm/pte-common.h         |  4 ++
 arch/powerpc/mm/hash64_4k.c                   | 21 ++++++-----
 arch/powerpc/mm/hash64_64k.c                  | 53 ++++++++++++++-------------
 arch/powerpc/mm/hash_utils_64.c               |  8 ++--
 arch/powerpc/mm/hugepage-hash64.c             | 14 +++----
 arch/powerpc/mm/hugetlbpage-hash64.c          | 20 +++++-----
 arch/powerpc/mm/pgtable_64.c                  |  8 ++--
 arch/powerpc/mm/tlb_hash64.c                  |  4 +-
 13 files changed, 120 insertions(+), 108 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 772850e517f3..115427f61ef0 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -48,10 +48,13 @@
 #define PGD_MASKED_BITS		0
 
 /* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
-			 _PAGE_F_SECOND | _PAGE_F_GIX)
-
-#define _PAGE_4K_PFN		0
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \
+			 H_PAGE_F_SECOND | H_PAGE_F_GIX)
+/*
+ * Not supported by 4k linux page size
+ */
+#define H_PAGE_4K_PFN	0x0
+#define H_PAGE_THP_HUGE 0x0
 #ifndef __ASSEMBLY__
 /*
  * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 8bb038076892..93bef8162d66 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -29,17 +29,23 @@
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-#define _PAGE_COMBO	0x00001000 /* this is a combo 4k page */
-#define _PAGE_4K_PFN	0x00002000 /* PFN is for a single 4k page */
+#define H_PAGE_COMBO	0x00001000 /* this is a combo 4k page */
+#define H_PAGE_4K_PFN	0x00002000 /* PFN is for a single 4k page */
 /*
- * Used to track subpage group valid if _PAGE_COMBO is set
- * This overloads _PAGE_F_GIX and _PAGE_F_SECOND
+ * We need to differentiate between explicit huge page and THP huge
+ * page, since THP huge page also need to track real subpage details
  */
-#define _PAGE_COMBO_VALID	(_PAGE_F_GIX | _PAGE_F_SECOND)
+#define H_PAGE_THP_HUGE  H_PAGE_4K_PFN
+
+/*
+ * Used to track subpage group valid if H_PAGE_COMBO is set
+ * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND
+ */
+#define H_PAGE_COMBO_VALID	(H_PAGE_F_GIX | H_PAGE_F_SECOND)
 
 /* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \
-			 _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO)
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \
+			 H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO)
 /*
  * we support 16 fragments per PTE page of 64K size.
  */
@@ -75,9 +81,9 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
 
 	rpte.pte = pte;
 	rpte.hidx = 0;
-	if (pte_val(pte) & _PAGE_COMBO) {
+	if (pte_val(pte) & H_PAGE_COMBO) {
 		/*
-		 * Make sure we order the hidx load against the _PAGE_COMBO
+		 * Make sure we order the hidx load against the H_PAGE_COMBO
 		 * check. The store side ordering is done in __hash_page_4K
 		 */
 		smp_rmb();
@@ -89,9 +95,9 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
 
 static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
 {
-	if ((pte_val(rpte.pte) & _PAGE_COMBO))
+	if ((pte_val(rpte.pte) & H_PAGE_COMBO))
 		return (rpte.hidx >> (index<<2)) & 0xf;
-	return (pte_val(rpte.pte) >> _PAGE_F_GIX_SHIFT) & 0xf;
+	return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf;
 }
 
 #define __rpte_to_pte(r)	((r).pte)
@@ -114,7 +120,7 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
 #define pte_iterate_hashed_end() } while(0); } } while(0)
 
 #define pte_pagesize_index(mm, addr, pte)	\
-	(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
+	(((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
 
 extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			   unsigned long pfn, unsigned long size, pgprot_t);
@@ -126,7 +132,7 @@ static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
 		return -EINVAL;
 	}
 	return remap_pfn_range(vma, addr, pfn, PAGE_SIZE,
-			       __pgprot(pgprot_val(prot) | _PAGE_4K_PFN));
+			       __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN));
 }
 
 #define PTE_TABLE_SIZE	PTE_FRAG_SIZE
@@ -255,8 +261,8 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
  */
 static inline int pmd_trans_huge(pmd_t pmd)
 {
-	return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) ==
-		  (_PAGE_PTE | _PAGE_THP_HUGE));
+	return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) ==
+		  (_PAGE_PTE | H_PAGE_THP_HUGE));
 }
 
 static inline int pmd_large(pmd_t pmd)
@@ -280,7 +286,7 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
 {
 	unsigned long old;
 
-	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
 		return 0;
 	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
 	return ((old & _PAGE_ACCESSED) != 0);
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index d54ebb16692c..2b0824defa6e 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -26,19 +26,22 @@
 #define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */
 #define _PAGE_DIRTY		0x00080 /* C: page changed */
 #define _PAGE_ACCESSED		0x00100 /* R: page referenced */
-#define _PAGE_SPECIAL		0x00400 /* software: special page */
-#define _PAGE_BUSY		0x00800 /* software: PTE & hash are busy */
-
+/*
+ * Software bits
+ */
 #ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SOFT_DIRTY	0x200 /* software: software dirty tracking */
+#define _PAGE_SOFT_DIRTY	0x00200 /* software: software dirty tracking */
 #else
-#define _PAGE_SOFT_DIRTY	0x000
+#define _PAGE_SOFT_DIRTY	0x00000
 #endif
+#define _PAGE_SPECIAL		0x00400 /* software: special page */
+#define H_PAGE_BUSY		0x00800 /* software: PTE & hash are busy */
 
-#define _PAGE_F_GIX_SHIFT	57
-#define _PAGE_F_GIX		(7ul << 57)	/* HPTE index within HPTEG */
-#define _PAGE_F_SECOND		(1ul << 60)	/* HPTE is in 2ndary HPTEG */
-#define _PAGE_HASHPTE		(1ul << 61)	/* PTE has associated HPTE */
+
+#define H_PAGE_F_GIX_SHIFT	57
+#define H_PAGE_F_GIX		(7ul << 57)	/* HPTE index within HPTEG */
+#define H_PAGE_F_SECOND		(1ul << 60)	/* HPTE is in 2ndary HPTEG */
+#define H_PAGE_HASHPTE		(1ul << 61)	/* PTE has associated HPTE */
 #define _PAGE_PTE		(1ul << 62)	/* distinguishes PTEs from pointers */
 #define _PAGE_PRESENT		(1ul << 63)	/* pte contains a translation */
 /*
@@ -48,16 +51,10 @@
  */
 #define _PAGE_NO_CACHE		_PAGE_TOLERANT
 /*
- * We need to differentiate between explicit huge page and THP huge
- * page, since THP huge page also need to track real subpage details
- */
-#define _PAGE_THP_HUGE  _PAGE_4K_PFN
-
-/*
  * set of bits not changed in pmd_modify.
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-			 _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \
+			 _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
 			 _PAGE_SOFT_DIRTY)
 
 
@@ -151,7 +148,7 @@
  * Mask of bits returned by pte_pgprot()
  */
 #define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
-			 _PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
+			 H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
 			 _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
 			 _PAGE_SOFT_DIRTY)
 /*
@@ -253,7 +250,7 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 				       int huge)
 {
 	unsigned long old, tmp;
-	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
+	unsigned long busy = cpu_to_be64(H_PAGE_BUSY);
 
 	clr = cpu_to_be64(clr);
 	set = cpu_to_be64(set);
@@ -274,7 +271,7 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 		assert_pte_locked(mm, addr);
 
 	old = be64_to_cpu(old);
-	if (old & _PAGE_HASHPTE)
+	if (old & H_PAGE_HASHPTE)
 		hpte_need_flush(mm, addr, ptep, old, huge);
 
 	return old;
@@ -292,7 +289,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 {
 	unsigned long old;
 
-	if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+	if ((pte_val(*ptep) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
 		return 0;
 	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
 	return (old & _PAGE_ACCESSED) != 0;
@@ -350,7 +347,7 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 		 _PAGE_SOFT_DIRTY);
 
 	unsigned long old, tmp;
-	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
+	unsigned long busy = cpu_to_be64(H_PAGE_BUSY);
 
 	bits = cpu_to_be64(bits);
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 4745d314acfb..65eb819609b1 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -45,7 +45,7 @@
 
 #define __real_pte(e,p)		((real_pte_t){(e)})
 #define __rpte_to_pte(r)	((r).pte)
-#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT)
+#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
 
 #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
 	do {							         \
@@ -287,7 +287,7 @@ static inline int pmd_protnone(pmd_t pmd)
 
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
 {
-	return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_THP_HUGE));
+	return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE));
 }
 
 #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index ebdaf576cf26..2be3c6497609 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -315,9 +315,9 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
 		old_pte = READ_ONCE(*ptep);
 		old_ptev = pte_val(old_pte);
 		/*
-		 * wait until _PAGE_BUSY is clear then set it atomically
+		 * wait until H_PAGE_BUSY is clear then set it atomically
 		 */
-		if (unlikely(old_ptev & _PAGE_BUSY)) {
+		if (unlikely(old_ptev & H_PAGE_BUSY)) {
 			cpu_relax();
 			continue;
 		}
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index 9f5dea58b0db..2ad5d94d45cd 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -203,3 +203,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
 #define _PAGE_READ 0
 #define _PAGE_WRITE _PAGE_RW
 #endif
+
+#ifndef H_PAGE_4K_PFN
+#define H_PAGE_4K_PFN 0
+#endif
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 42ba12c184e1..a1e5ed3ced81 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -35,7 +35,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 
 		old_pte = pte_val(pte);
 		/* If PTE busy, retry the access */
-		if (unlikely(old_pte & _PAGE_BUSY))
+		if (unlikely(old_pte & H_PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
 		if (unlikely(!check_pte_access(access, old_pte)))
@@ -43,9 +43,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		/*
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
 		 * a write access. Since this is 4K insert of 64K page size
-		 * also add _PAGE_COMBO
+		 * also add H_PAGE_COMBO
 		 */
-		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
+		new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 
@@ -63,22 +63,22 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
 
 	vpn  = hpt_vpn(ea, vsid, ssize);
-	if (unlikely(old_pte & _PAGE_HASHPTE)) {
+	if (unlikely(old_pte & H_PAGE_HASHPTE)) {
 		/*
 		 * There MIGHT be an HPTE for this pte
 		 */
 		hash = hpt_hash(vpn, shift, ssize);
-		if (old_pte & _PAGE_F_SECOND)
+		if (old_pte & H_PAGE_F_SECOND)
 			hash = ~hash;
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-		slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
+		slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
 
 		if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
 					 MMU_PAGE_4K, ssize, flags) == -1)
 			old_pte &= ~_PAGE_HPTEFLAGS;
 	}
 
-	if (likely(!(old_pte & _PAGE_HASHPTE))) {
+	if (likely(!(old_pte & H_PAGE_HASHPTE))) {
 
 		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
 		hash = hpt_hash(vpn, shift, ssize);
@@ -118,9 +118,10 @@ repeat:
 					   MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
 			return -1;
 		}
-		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
-		new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX);
+		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+		new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
+			(H_PAGE_F_SECOND | H_PAGE_F_GIX);
 	}
-	*ptep = __pte(new_pte & ~_PAGE_BUSY);
+	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
 }
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 419562b0e9c8..f0d209376603 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -23,7 +23,7 @@ bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
 	unsigned long g_idx;
 	unsigned long ptev = pte_val(rpte.pte);
 
-	g_idx = (ptev & _PAGE_COMBO_VALID) >> _PAGE_F_GIX_SHIFT;
+	g_idx = (ptev & H_PAGE_COMBO_VALID) >> H_PAGE_F_GIX_SHIFT;
 	index = index >> 2;
 	if (g_idx & (0x1 << index))
 		return true;
@@ -37,12 +37,12 @@ static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long in
 {
 	unsigned long g_idx;
 
-	if (!(ptev & _PAGE_COMBO))
+	if (!(ptev & H_PAGE_COMBO))
 		return ptev;
 	index = index >> 2;
 	g_idx = 0x1 << index;
 
-	return ptev | (g_idx << _PAGE_F_GIX_SHIFT);
+	return ptev | (g_idx << H_PAGE_F_GIX_SHIFT);
 }
 
 int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
@@ -67,7 +67,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 
 		old_pte = pte_val(pte);
 		/* If PTE busy, retry the access */
-		if (unlikely(old_pte & _PAGE_BUSY))
+		if (unlikely(old_pte & H_PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
 		if (unlikely(!check_pte_access(access, old_pte)))
@@ -75,9 +75,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		/*
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
 		 * a write access. Since this is 4K insert of 64K page size
-		 * also add _PAGE_COMBO
+		 * also add H_PAGE_COMBO
 		 */
-		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO;
+		new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED | H_PAGE_COMBO;
 		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 
@@ -106,21 +106,21 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 	/*
 	 *None of the sub 4k page is hashed
 	 */
-	if (!(old_pte & _PAGE_HASHPTE))
+	if (!(old_pte & H_PAGE_HASHPTE))
 		goto htab_insert_hpte;
 	/*
 	 * Check if the pte was already inserted into the hash table
 	 * as a 64k HW page, and invalidate the 64k HPTE if so.
 	 */
-	if (!(old_pte & _PAGE_COMBO)) {
+	if (!(old_pte & H_PAGE_COMBO)) {
 		flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
 		/*
 		 * clear the old slot details from the old and new pte.
 		 * On hash insert failure we use old pte value and we don't
 		 * want slot information there if we have a insert failure.
 		 */
-		old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
-		new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
+		old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
+		new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
 		goto htab_insert_hpte;
 	}
 	/*
@@ -146,15 +146,15 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		if (ret == -1)
 			goto htab_insert_hpte;
 
-		*ptep = __pte(new_pte & ~_PAGE_BUSY);
+		*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 		return 0;
 	}
 
 htab_insert_hpte:
 	/*
-	 * handle _PAGE_4K_PFN case
+	 * handle H_PAGE_4K_PFN case
 	 */
-	if (old_pte & _PAGE_4K_PFN) {
+	if (old_pte & H_PAGE_4K_PFN) {
 		/*
 		 * All the sub 4k page have the same
 		 * physical address.
@@ -202,20 +202,20 @@ repeat:
 	}
 	/*
 	 * Insert slot number & secondary bit in PTE second half,
-	 * clear _PAGE_BUSY and set appropriate HPTE slot bit
-	 * Since we have _PAGE_BUSY set on ptep, we can be sure
+	 * clear H_PAGE_BUSY and set appropriate HPTE slot bit
+	 * Since we have H_PAGE_BUSY set on ptep, we can be sure
 	 * nobody is undating hidx.
 	 */
 	hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
 	rpte.hidx &= ~(0xfUL << (subpg_index << 2));
 	*hidxp = rpte.hidx  | (slot << (subpg_index << 2));
 	new_pte = mark_subptegroup_valid(new_pte, subpg_index);
-	new_pte |=  _PAGE_HASHPTE;
+	new_pte |=  H_PAGE_HASHPTE;
 	/*
 	 * check __real_pte for details on matching smp_rmb()
 	 */
 	smp_wmb();
-	*ptep = __pte(new_pte & ~_PAGE_BUSY);
+	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
 }
 
@@ -238,7 +238,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 
 		old_pte = pte_val(pte);
 		/* If PTE busy, retry the access */
-		if (unlikely(old_pte & _PAGE_BUSY))
+		if (unlikely(old_pte & H_PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
 		if (unlikely(!check_pte_access(access, old_pte)))
@@ -254,7 +254,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
 		 * a write access.
 		 */
-		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
+		new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 		opte = cpu_to_be64(old_pte);
@@ -268,22 +268,22 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
 
 	vpn  = hpt_vpn(ea, vsid, ssize);
-	if (unlikely(old_pte & _PAGE_HASHPTE)) {
+	if (unlikely(old_pte & H_PAGE_HASHPTE)) {
 		/*
 		 * There MIGHT be an HPTE for this pte
 		 */
 		hash = hpt_hash(vpn, shift, ssize);
-		if (old_pte & _PAGE_F_SECOND)
+		if (old_pte & H_PAGE_F_SECOND)
 			hash = ~hash;
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-		slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
+		slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
 
 		if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
 					 MMU_PAGE_64K, ssize, flags) == -1)
 			old_pte &= ~_PAGE_HPTEFLAGS;
 	}
 
-	if (likely(!(old_pte & _PAGE_HASHPTE))) {
+	if (likely(!(old_pte & H_PAGE_HASHPTE))) {
 
 		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
 		hash = hpt_hash(vpn, shift, ssize);
@@ -323,9 +323,10 @@ repeat:
 					   MMU_PAGE_64K, MMU_PAGE_64K, old_pte);
 			return -1;
 		}
-		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
-		new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX);
+		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+		new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
+			(H_PAGE_F_SECOND | H_PAGE_F_GIX);
 	}
-	*ptep = __pte(new_pte & ~_PAGE_BUSY);
+	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 43e0d86b7ca1..c2ffc97cf665 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1168,8 +1168,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 #endif
 	/* Do actual hashing */
 #ifdef CONFIG_PPC_64K_PAGES
-	/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
-	if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
+	/* If H_PAGE_4K_PFN is set, make sure this is a 4k segment */
+	if ((pte_val(*ptep) & H_PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
 		demote_segment_4k(mm, ea);
 		psize = MMU_PAGE_4K;
 	}
@@ -1331,13 +1331,13 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 
 	WARN_ON(hugepage_shift);
 #ifdef CONFIG_PPC_64K_PAGES
-	/* If either _PAGE_4K_PFN or cache inhibited is set (and we are on
+	/* If either H_PAGE_4K_PFN or cache inhibited is set (and we are on
 	 * a 64K kernel), then we don't preload, hash_page() will take
 	 * care of it once we actually try to access the page.
 	 * That way we don't have to duplicate all of the logic for segment
 	 * page size demotion here
 	 */
-	if ((pte_val(*ptep) & _PAGE_4K_PFN) || pte_ci(*ptep))
+	if ((pte_val(*ptep) & H_PAGE_4K_PFN) || pte_ci(*ptep))
 		goto out_exit;
 #endif /* CONFIG_PPC_64K_PAGES */
 
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 182f1d3fe73c..abf6c468bbc8 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -38,7 +38,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 
 		old_pmd = pmd_val(pmd);
 		/* If PMD busy, retry the access */
-		if (unlikely(old_pmd & _PAGE_BUSY))
+		if (unlikely(old_pmd & H_PAGE_BUSY))
 			return 0;
 		/* If PMD permissions don't match, take page fault */
 		if (unlikely(!check_pte_access(access, old_pmd)))
@@ -47,7 +47,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
 		 * a write access
 		 */
-		new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
+		new_pmd = old_pmd | H_PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_WRITE)
 			new_pmd |= _PAGE_DIRTY;
 		opmd = cpu_to_be64(old_pmd);
@@ -81,7 +81,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		 * base page size. This is because demote_segment won't flush
 		 * hash page table entries.
 		 */
-		if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) {
+		if ((old_pmd & H_PAGE_HASHPTE) && !(old_pmd & H_PAGE_COMBO)) {
 			flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
 					    ssize, flags);
 			/*
@@ -128,7 +128,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		hash = hpt_hash(vpn, shift, ssize);
 		/* insert new entry */
 		pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
-		new_pmd |= _PAGE_HASHPTE;
+		new_pmd |= H_PAGE_HASHPTE;
 
 repeat:
 		hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
@@ -172,17 +172,17 @@ repeat:
 		mark_hpte_slot_valid(hpte_slot_array, index, slot);
 	}
 	/*
-	 * Mark the pte with _PAGE_COMBO, if we are trying to hash it with
+	 * Mark the pte with H_PAGE_COMBO, if we are trying to hash it with
 	 * base page size 4k.
 	 */
 	if (psize == MMU_PAGE_4K)
-		new_pmd |= _PAGE_COMBO;
+		new_pmd |= H_PAGE_COMBO;
 	/*
 	 * The hpte valid is stored in the pgtable whose address is in the
 	 * second half of the PMD. Order this against clearing of the busy bit in
 	 * huge pmd.
 	 */
 	smp_wmb();
-	*pmdp = __pmd(new_pmd & ~_PAGE_BUSY);
+	*pmdp = __pmd(new_pmd & ~H_PAGE_BUSY);
 	return 0;
 }
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 96765510a49c..86a1f36714a3 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -48,7 +48,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 	do {
 		old_pte = pte_val(*ptep);
 		/* If PTE busy, retry the access */
-		if (unlikely(old_pte & _PAGE_BUSY))
+		if (unlikely(old_pte & H_PAGE_BUSY))
 			return 0;
 		/* If PTE permissions don't match, take page fault */
 		if (unlikely(!check_pte_access(access, old_pte)))
@@ -56,7 +56,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 
 		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
 		 * a write access */
-		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
+		new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
 		if (access & _PAGE_WRITE)
 			new_pte |= _PAGE_DIRTY;
 		opte = cpu_to_be64(old_pte);
@@ -72,28 +72,28 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
 
 	/* Check if pte already has an hpte (case 2) */
-	if (unlikely(old_pte & _PAGE_HASHPTE)) {
+	if (unlikely(old_pte & H_PAGE_HASHPTE)) {
 		/* There MIGHT be an HPTE for this pte */
 		unsigned long hash, slot;
 
 		hash = hpt_hash(vpn, shift, ssize);
-		if (old_pte & _PAGE_F_SECOND)
+		if (old_pte & H_PAGE_F_SECOND)
 			hash = ~hash;
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-		slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
+		slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
 
 		if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
 					 mmu_psize, ssize, flags) == -1)
 			old_pte &= ~_PAGE_HPTEFLAGS;
 	}
 
-	if (likely(!(old_pte & _PAGE_HASHPTE))) {
+	if (likely(!(old_pte & H_PAGE_HASHPTE))) {
 		unsigned long hash = hpt_hash(vpn, shift, ssize);
 
 		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
 
 		/* clear HPTE slot informations in new PTE */
-		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
+		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
 
 		slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
 					     mmu_psize, ssize);
@@ -109,14 +109,14 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 			return -1;
 		}
 
-		new_pte |= (slot << _PAGE_F_GIX_SHIFT) &
-			(_PAGE_F_SECOND | _PAGE_F_GIX);
+		new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
+			(H_PAGE_F_SECOND | H_PAGE_F_GIX);
 	}
 
 	/*
 	 * No need to use ldarx/stdcx here
 	 */
-	*ptep = __pte(new_pte & ~_PAGE_BUSY);
+	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
 }
 
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d493f62d12eb..e43e0ea820e0 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -92,7 +92,7 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
 		flags |= pgprot_val(PAGE_KERNEL);
 
 	/* We don't support the 4K PFN hack with ioremap */
-	if (flags & _PAGE_4K_PFN)
+	if (flags & H_PAGE_4K_PFN)
 		return NULL;
 
 	WARN_ON(pa & ~PAGE_MASK);
@@ -445,7 +445,7 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
 {
 
 	unsigned long old, tmp;
-	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
+	unsigned long busy = cpu_to_be64(H_PAGE_BUSY);
 
 #ifdef CONFIG_DEBUG_VM
 	WARN_ON(!pmd_trans_huge(*pmdp));
@@ -473,7 +473,7 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
 	*pmdp = __pmd((old & ~clr) | set);
 #endif
 	trace_hugepage_update(addr, old, clr, set);
-	if (old & _PAGE_HASHPTE)
+	if (old & H_PAGE_HASHPTE)
 		hpte_do_hugepage_flush(mm, addr, pmdp, old);
 	return old;
 }
@@ -645,7 +645,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 	psize = get_slice_psize(mm, addr);
 	BUG_ON(psize == MMU_PAGE_16M);
 #endif
-	if (old_pmd & _PAGE_COMBO)
+	if (old_pmd & H_PAGE_COMBO)
 		psize = MMU_PAGE_4K;
 	else
 		psize = MMU_PAGE_64K;
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index f7b80391bee7..38497cf5e31b 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -218,7 +218,7 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
 		pte = pte_val(*ptep);
 		if (is_thp)
 			trace_hugepage_invalidate(start, pte);
-		if (!(pte & _PAGE_HASHPTE))
+		if (!(pte & H_PAGE_HASHPTE))
 			continue;
 		if (unlikely(is_thp))
 			hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte);
@@ -248,7 +248,7 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
 	start_pte = pte_offset_map(pmd, addr);
 	for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
 		unsigned long pteval = pte_val(*pte);
-		if (pteval & _PAGE_HASHPTE)
+		if (pteval & H_PAGE_HASHPTE)
 			hpte_need_flush(mm, addr, pte, pteval, 0);
 		addr += PAGE_SIZE;
 	}
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 18/65] powerpc/mm: Handle _PTE_NONE_MASK
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (15 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 17/65] powerpc/mm/book3s: Rename hash specific PTE bits to carry H_ prefix Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 19/65] powerpc/mm: Move common pte bits and accessors to book3s/64/pgtable.h Aneesh Kumar K.V
                   ` (47 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

I am splitting this as a separate patch to get better review. If ok
we should merge this with previous patch.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 2b0824defa6e..13489dccc532 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -36,8 +36,7 @@
 #endif
 #define _PAGE_SPECIAL		0x00400 /* software: special page */
 #define H_PAGE_BUSY		0x00800 /* software: PTE & hash are busy */
-
-
+#define H_PTE_NONE_MASK		_PAGE_HPTEFLAGS
 #define H_PAGE_F_GIX_SHIFT	57
 #define H_PAGE_F_GIX		(7ul << 57)	/* HPTE index within HPTEG */
 #define H_PAGE_F_SECOND		(1ul << 60)	/* HPTE is in 2ndary HPTEG */
@@ -131,7 +130,6 @@
 
 /* Hash table based platforms need atomic updates of the linux PTE */
 #define PTE_ATOMIC_UPDATES	1
-#define _PTE_NONE_MASK	_PAGE_HPTEFLAGS
 /*
  * We support 57 bit real address in pte. Clear everything above 57, and
  * every thing below PAGE_SHIFT;
@@ -381,7 +379,7 @@ static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_WRITE);
 static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
 static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
 static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
-static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0; }
 static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 19/65] powerpc/mm: Move common pte bits and accessors to book3s/64/pgtable.h
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (16 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 18/65] powerpc/mm: Handle _PTE_NONE_MASK Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 20/65] powerpc/mm: Move pte accessors that operate on common pte bits to pgtable.h Aneesh Kumar K.V
                   ` (46 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Now that we have moved book3s hash64 linux pte bits to match PowerISA
3.0 radix pte bit position, we move the matching pte bits to common
header.

Only code movement in this patch. No functionality change.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    | 135 -------------------------
 arch/powerpc/include/asm/book3s/64/pgtable.h | 143 ++++++++++++++++++++++++++-
 2 files changed, 141 insertions(+), 137 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 13489dccc532..4aaee95a30fe 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -13,49 +13,12 @@
  * We could create separate kernel read-only if we used the 3 PP bits
  * combinations that newer processors provide but we currently don't.
  */
-#define _PAGE_BIT_SWAP_TYPE	0
-
-#define _PAGE_EXEC		0x00001 /* execute permission */
-#define _PAGE_WRITE		0x00002 /* write access allowed */
-#define _PAGE_READ		0x00004	/* read access allowed */
-#define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
-#define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
-#define _PAGE_PRIVILEGED	0x00008 /* kernel access only */
-#define _PAGE_SAO		0x00010 /* Strong access order */
-#define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
-#define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */
-#define _PAGE_DIRTY		0x00080 /* C: page changed */
-#define _PAGE_ACCESSED		0x00100 /* R: page referenced */
-/*
- * Software bits
- */
-#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SOFT_DIRTY	0x00200 /* software: software dirty tracking */
-#else
-#define _PAGE_SOFT_DIRTY	0x00000
-#endif
-#define _PAGE_SPECIAL		0x00400 /* software: special page */
 #define H_PAGE_BUSY		0x00800 /* software: PTE & hash are busy */
 #define H_PTE_NONE_MASK		_PAGE_HPTEFLAGS
 #define H_PAGE_F_GIX_SHIFT	57
 #define H_PAGE_F_GIX		(7ul << 57)	/* HPTE index within HPTEG */
 #define H_PAGE_F_SECOND		(1ul << 60)	/* HPTE is in 2ndary HPTEG */
 #define H_PAGE_HASHPTE		(1ul << 61)	/* PTE has associated HPTE */
-#define _PAGE_PTE		(1ul << 62)	/* distinguishes PTEs from pointers */
-#define _PAGE_PRESENT		(1ul << 63)	/* pte contains a translation */
-/*
- * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
- * Instead of fixing all of them, add an alternate define which
- * maps CI pte mapping.
- */
-#define _PAGE_NO_CACHE		_PAGE_TOLERANT
-/*
- * set of bits not changed in pmd_modify.
- */
-#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-			 _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
-			 _PAGE_SOFT_DIRTY)
-
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
@@ -113,16 +76,6 @@
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 #endif /* CONFIG_PPC_MM_SLICES */
 
-/*
- * user access blocked by key
- */
-#define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
-#define _PAGE_KERNEL_RO		 (_PAGE_PRIVILEGED | _PAGE_READ)
-#define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
-				 _PAGE_RW | _PAGE_EXEC)
-
-/* No page size encoding in the linux PTE */
-#define _PAGE_PSIZE		0
 
 /* PTEIDX nibble */
 #define _PTEIDX_SECONDARY	0x8
@@ -130,94 +83,6 @@
 
 /* Hash table based platforms need atomic updates of the linux PTE */
 #define PTE_ATOMIC_UPDATES	1
-/*
- * We support 57 bit real address in pte. Clear everything above 57, and
- * every thing below PAGE_SHIFT;
- */
-#define PTE_RPN_MASK	(((1UL << 57) - 1) & (PAGE_MASK))
-/*
- * _PAGE_CHG_MASK masks of bits that are to be preserved across
- * pgprot changes
- */
-#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
-			 _PAGE_SOFT_DIRTY)
-/*
- * Mask of bits returned by pte_pgprot()
- */
-#define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
-			 H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
-			 _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
-			 _PAGE_SOFT_DIRTY)
-/*
- * We define 2 sets of base prot bits, one for basic pages (ie,
- * cacheable kernel and user pages) and one for non cacheable
- * pages. We always set _PAGE_COHERENT when SMP is enabled or
- * the processor might need it for DMA coherency.
- */
-#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
-#define _PAGE_BASE	(_PAGE_BASE_NC)
-
-/* Permission masks used to generate the __P and __S table,
- *
- * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
- *
- * Write permissions imply read permissions for now (we could make write-only
- * pages on BookE but we don't bother for now). Execute permission control is
- * possible on platforms that define _PAGE_EXEC
- *
- * Note due to the way vm flags are laid out, the bits are XWR
- */
-#define PAGE_NONE	__pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
-#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_RW)
-#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
-#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_READ)
-#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
-#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_READ)
-#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
-
-#define __P000	PAGE_NONE
-#define __P001	PAGE_READONLY
-#define __P010	PAGE_COPY
-#define __P011	PAGE_COPY
-#define __P100	PAGE_READONLY_X
-#define __P101	PAGE_READONLY_X
-#define __P110	PAGE_COPY_X
-#define __P111	PAGE_COPY_X
-
-#define __S000	PAGE_NONE
-#define __S001	PAGE_READONLY
-#define __S010	PAGE_SHARED
-#define __S011	PAGE_SHARED
-#define __S100	PAGE_READONLY_X
-#define __S101	PAGE_READONLY_X
-#define __S110	PAGE_SHARED_X
-#define __S111	PAGE_SHARED_X
-
-/* Permission masks used for kernel mappings */
-#define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
-#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_TOLERANT)
-#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_NON_IDEMPOTENT)
-#define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
-#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
-#define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
-
-/* Protection used for kernel text. We want the debuggers to be able to
- * set breakpoints anywhere, so don't write protect the kernel text
- * on platforms where such control is possible.
- */
-#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
-	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
-#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
-#else
-#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
-#endif
-
-/* Make modules code happy. We don't set RO yet */
-#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
-#define PAGE_AGP		(PAGE_KERNEL_NC)
 
 #define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
 #define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 65eb819609b1..b5d9019f68bb 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1,9 +1,148 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
 #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
+
+/*
+ * Common bits between hash and Radix page table
+ */
+#define _PAGE_BIT_SWAP_TYPE	0
+
+#define _PAGE_EXEC		0x00001 /* execute permission */
+#define _PAGE_WRITE		0x00002 /* write access allowed */
+#define _PAGE_READ		0x00004	/* read access allowed */
+#define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
+#define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
+#define _PAGE_PRIVILEGED	0x00008 /* kernel access only */
+#define _PAGE_SAO		0x00010 /* Strong access order */
+#define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
+#define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */
+#define _PAGE_DIRTY		0x00080 /* C: page changed */
+#define _PAGE_ACCESSED		0x00100 /* R: page referenced */
+/*
+ * Software bits
+ */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SOFT_DIRTY	0x00200 /* software: software dirty tracking */
+#else
+#define _PAGE_SOFT_DIRTY	0x00000
+#endif
+#define _PAGE_SPECIAL		0x00400 /* software: special page */
+
+
+#define _PAGE_PTE		(1ul << 62)	/* distinguishes PTEs from pointers */
+#define _PAGE_PRESENT		(1ul << 63)	/* pte contains a translation */
+/*
+ * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
+ * Instead of fixing all of them, add an alternate define which
+ * maps CI pte mapping.
+ */
+#define _PAGE_NO_CACHE		_PAGE_TOLERANT
+/*
+ * set of bits not changed in pmd_modify. Even though we have hash specific bits
+ * in here, on radix we expect them to be zero.
+ */
+#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+			 _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
+			 _PAGE_SOFT_DIRTY)
+/*
+ * user access blocked by key
+ */
+#define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RO		 (_PAGE_PRIVILEGED | _PAGE_READ)
+#define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY |	\
+				 _PAGE_RW | _PAGE_EXEC)
+/*
+ * No page size encoding in the linux PTE
+ */
+#define _PAGE_PSIZE		0
 /*
- * This file contains the functions and defines necessary to modify and use
- * the ppc64 hashed page table.
+ * We support 57 bit real address in pte. Clear everything above 57, and
+ * every thing below PAGE_SHIFT;
  */
+#define PTE_RPN_MASK	(((1UL << 57) - 1) & (PAGE_MASK))
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE |	\
+			 _PAGE_SOFT_DIRTY)
+/*
+ * Mask of bits returned by pte_pgprot()
+ */
+#define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
+			 H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
+			 _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
+			 _PAGE_SOFT_DIRTY)
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#define _PAGE_BASE	(_PAGE_BASE_NC)
+
+/* Permission masks used to generate the __P and __S table,
+ *
+ * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
+ *
+ * Write permissions imply read permissions for now (we could make write-only
+ * pages on BookE but we don't bother for now). Execute permission control is
+ * possible on platforms that define _PAGE_EXEC
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
+ */
+#define PAGE_NONE	__pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
+
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_COPY
+#define __P011	PAGE_COPY
+#define __P100	PAGE_READONLY_X
+#define __P101	PAGE_READONLY_X
+#define __P110	PAGE_COPY_X
+#define __P111	PAGE_COPY_X
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED
+#define __S100	PAGE_READONLY_X
+#define __S101	PAGE_READONLY_X
+#define __S110	PAGE_SHARED_X
+#define __S111	PAGE_SHARED_X
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+				 _PAGE_TOLERANT)
+#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+				 _PAGE_NON_IDEMPOTENT)
+#define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+/*
+ * Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) || \
+	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
+#define PAGE_AGP		(PAGE_KERNEL_NC)
 
 #include <asm/book3s/64/hash.h>
 #include <asm/barrier.h>
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 20/65] powerpc/mm: Move pte accessors that operate on common pte bits to pgtable.h
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (17 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 19/65] powerpc/mm: Move common pte bits and accessors to book3s/64/pgtable.h Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 21/65] powerpc/mm: Make page table size a variable Aneesh Kumar K.V
                   ` (45 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

These pte functions will remain the same between radix and hash. Move
them to pgtable.h

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    | 205 --------------------------
 arch/powerpc/include/asm/book3s/64/pgtable.h | 209 +++++++++++++++++++++++++++
 2 files changed, 209 insertions(+), 205 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 4aaee95a30fe..a9bc8ca0df7b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -140,66 +140,6 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 	return old;
 }
 
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pte_t *ptep)
-{
-	unsigned long old;
-
-	if ((pte_val(*ptep) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
-		return 0;
-	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
-	return (old & _PAGE_ACCESSED) != 0;
-}
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define ptep_test_and_clear_young(__vma, __addr, __ptep)		   \
-({									   \
-	int __r;							   \
-	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
-	__r;								   \
-})
-
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
-				      pte_t *ptep)
-{
-
-	if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
-}
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
-				       unsigned long addr, pte_t *ptep)
-{
-	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
-	return __pte(old);
-}
-
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
-			     pte_t * ptep)
-{
-	pte_update(mm, addr, ptep, ~0UL, 0, 0);
-}
-
-
 /* Set the dirty and/or accessed bits atomically in a linux PTE, this
  * function doesn't need to flush the hash entry
  */
@@ -240,112 +180,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 
 
 /* Generic accessors to PTE bits */
-static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_WRITE);}
-static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
-static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
-static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
 static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0; }
-static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
-
-#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
-static inline bool pte_soft_dirty(pte_t pte)
-{
-	return !!(pte_val(pte) & _PAGE_SOFT_DIRTY);
-}
-static inline pte_t pte_mksoft_dirty(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
-}
-
-static inline pte_t pte_clear_soft_dirty(pte_t pte)
-{
-	return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
-}
-#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
-
-#ifdef CONFIG_NUMA_BALANCING
-/*
- * These work without NUMA balancing but the kernel does not care. See the
- * comment in include/asm-generic/pgtable.h . On powerpc, this will only
- * work for user pages and always return true for kernel pages.
- */
-static inline int pte_protnone(pte_t pte)
-{
-	return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) ==
-		(_PAGE_PRESENT | _PAGE_PRIVILEGED);
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
-static inline int pte_present(pte_t pte)
-{
-	return !!(pte_val(pte) & _PAGE_PRESENT);
-}
-
-/* Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- *
- * Even if PTEs can be unsigned long long, a PFN is always an unsigned
- * long for now.
- */
-static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
-{
-	return __pte((((pte_basic_t)(pfn) << PAGE_SHIFT) & PTE_RPN_MASK) |
-		     pgprot_val(pgprot));
-}
-
-static inline unsigned long pte_pfn(pte_t pte)
-{
-	return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
-}
-
-/* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte)
-{
-	return __pte(pte_val(pte) & ~_PAGE_WRITE);
-}
-
-static inline pte_t pte_mkclean(pte_t pte)
-{
-	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
-}
-
-static inline pte_t pte_mkold(pte_t pte)
-{
-	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
-}
-
-static inline pte_t pte_mkwrite(pte_t pte)
-{
-	/*
-	 * write implies read, hence set both
-	 */
-	return __pte(pte_val(pte) | _PAGE_RW);
-}
-
-static inline pte_t pte_mkdirty(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
-}
-
-static inline pte_t pte_mkyoung(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_ACCESSED);
-}
-
-static inline pte_t pte_mkspecial(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_SPECIAL);
-}
-
-static inline pte_t pte_mkhuge(pte_t pte)
-{
-	return pte;
-}
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
-}
 
 /* This low level function performs the actual PTE insertion
  * Setting the PTE depends on the MMU type and other factors. It's
@@ -362,46 +197,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 	*ptep = pte;
 }
 
-#define _PAGE_CACHE_CTL	(_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
-
-#define pgprot_noncached pgprot_noncached
-static inline pgprot_t pgprot_noncached(pgprot_t prot)
-{
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_NON_IDEMPOTENT);
-}
-
-#define pgprot_noncached_wc pgprot_noncached_wc
-static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
-{
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_TOLERANT);
-}
-
-#define pgprot_cached pgprot_cached
-static inline pgprot_t pgprot_cached(pgprot_t prot)
-{
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
-}
-
-#define pgprot_writecombine pgprot_writecombine
-static inline pgprot_t pgprot_writecombine(pgprot_t prot)
-{
-	return pgprot_noncached_wc(prot);
-}
-/*
- * check a pte mapping have cache inhibited property
- */
-static inline bool pte_ci(pte_t pte)
-{
-	unsigned long pte_v = pte_val(pte);
-
-	if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) ||
-	    ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT))
-		return true;
-	return false;
-}
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 				   pmd_t *pmdp, unsigned long old_pmd);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index b5d9019f68bb..e2c2af34d71b 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -201,6 +201,215 @@
 
 #endif /* __real_pte */
 
+/*
+ * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update.
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ * For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same
+ * function for both hash and radix.
+ */
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+
+	if ((pte_val(*ptep) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+	return (old & _PAGE_ACCESSED) != 0;
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep)	\
+({								\
+	int __r;						\
+	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+	__r;							\
+})
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+
+	if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pte_t *ptep)
+{
+	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
+	return __pte(old);
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+			     pte_t * ptep)
+{
+	pte_update(mm, addr, ptep, ~0UL, 0, 0);
+}
+static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_WRITE);}
+static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
+static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
+static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
+static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pte_soft_dirty(pte_t pte)
+{
+	return !!(pte_val(pte) & _PAGE_SOFT_DIRTY);
+}
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * These work without NUMA balancing but the kernel does not care. See the
+ * comment in include/asm-generic/pgtable.h . On powerpc, this will only
+ * work for user pages and always return true for kernel pages.
+ */
+static inline int pte_protnone(pte_t pte)
+{
+	return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) ==
+		(_PAGE_PRESENT | _PAGE_PRIVILEGED);
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+static inline int pte_present(pte_t pte)
+{
+	return !!(pte_val(pte) & _PAGE_PRESENT);
+}
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+	return __pte((((pte_basic_t)(pfn) << PAGE_SHIFT) & PTE_RPN_MASK) |
+		     pgprot_val(pgprot));
+}
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
+}
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_WRITE);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	/*
+	 * write implies read, hence set both
+	 */
+	return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	return pte;
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	/* FIXME!! check whether this need to be a conditional */
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+#define _PAGE_CACHE_CTL	(_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
+
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_NON_IDEMPOTENT);
+}
+
+#define pgprot_noncached_wc pgprot_noncached_wc
+static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_TOLERANT);
+}
+
+#define pgprot_cached pgprot_cached
+static inline pgprot_t pgprot_cached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	return pgprot_noncached_wc(prot);
+}
+/*
+ * check a pte mapping have cache inhibited property
+ */
+static inline bool pte_ci(pte_t pte)
+{
+	unsigned long pte_v = pte_val(pte);
+
+	if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) ||
+	    ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT))
+		return true;
+	return false;
+}
+
 static inline void pmd_set(pmd_t *pmdp, unsigned long val)
 {
 	*pmdp = __pmd(val);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 21/65] powerpc/mm: Make page table size a variable
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (18 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 20/65] powerpc/mm: Move pte accessors that operate on common pte bits to pgtable.h Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 22/65] powerpc/mm: Move page table index and and vaddr to pgtable.h Aneesh Kumar K.V
                   ` (44 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Radix and hash MMU models support different page table sizes. Make
the #defines a variable so that existing code can work with variable
sizes

Slice related code is only used by hash, so use hash constants there
We will replicate some of the boundary conditions with resepct to
TASK_SIZE using radix values too. Right now we do boundary condition
check using hash constants

Swapper pgdir size is initialized in asm code. We select the max pgd
size to keep it simpler. For now we select hash pgdir. When adding radix
we will switch that to radix pgdir which is 64K.

BUILD_BUG_ON check which is removed is already done in hugepage_init using
MAYBE_BUILD_BUG_ON()

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  | 45 ++++----------------
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 46 +++++---------------
 arch/powerpc/include/asm/book3s/64/hash.h     | 14 ++++---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  4 +-
 arch/powerpc/include/asm/book3s/64/pgtable.h  | 60 +++++++++++++++++++++++++++
 arch/powerpc/include/asm/page_64.h            |  2 +-
 arch/powerpc/kernel/asm-offsets.c             |  4 ++
 arch/powerpc/mm/hash_utils_64.c               | 12 ++++++
 arch/powerpc/mm/init_64.c                     |  4 +-
 arch/powerpc/mm/pgtable-book3e.c              |  1 +
 arch/powerpc/mm/pgtable-hash64.c              |  1 +
 arch/powerpc/mm/pgtable_64.c                  | 33 ++++++++++-----
 arch/powerpc/mm/slb_low.S                     |  2 +-
 arch/powerpc/mm/slice.c                       |  4 +-
 14 files changed, 135 insertions(+), 97 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 115427f61ef0..f121f8016d8b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -5,48 +5,20 @@
  * for each page table entry.  The PMD and PGD level use a 32b record for
  * each entry by assuming that each entry is page aligned.
  */
-#define PTE_INDEX_SIZE  9
-#define PMD_INDEX_SIZE  7
-#define PUD_INDEX_SIZE  9
-#define PGD_INDEX_SIZE  9
+#define H_PTE_INDEX_SIZE  9
+#define H_PMD_INDEX_SIZE  7
+#define H_PUD_INDEX_SIZE  9
+#define H_PGD_INDEX_SIZE  9
 
 #ifndef __ASSEMBLY__
-#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)
-#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
-#define PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
-#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
-#endif	/* __ASSEMBLY__ */
-
-#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
-#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
-#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
-
-/* PMD_SHIFT determines what a second-level page table entry can map */
-#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
-#define PMD_SIZE	(1UL << PMD_SHIFT)
-#define PMD_MASK	(~(PMD_SIZE-1))
+#define H_PTE_TABLE_SIZE	(sizeof(pte_t) << H_PTE_INDEX_SIZE)
+#define H_PMD_TABLE_SIZE	(sizeof(pmd_t) << H_PMD_INDEX_SIZE)
+#define H_PUD_TABLE_SIZE	(sizeof(pud_t) << H_PUD_INDEX_SIZE)
+#define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << H_PGD_INDEX_SIZE)
 
 /* With 4k base page size, hugepage PTEs go at the PMD level */
 #define MIN_HUGEPTE_SHIFT	PMD_SHIFT
 
-/* PUD_SHIFT determines what a third-level page table entry can map */
-#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
-#define PUD_SIZE	(1UL << PUD_SHIFT)
-#define PUD_MASK	(~(PUD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
-#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
-/* Bits to mask out from a PMD to get to the PTE page */
-#define PMD_MASKED_BITS		0
-/* Bits to mask out from a PUD to get to the PMD page */
-#define PUD_MASKED_BITS		0
-/* Bits to mask out from a PGD to get to the PUD page */
-#define PGD_MASKED_BITS		0
-
 /* PTE flags to conserve for HPTE identification */
 #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \
 			 H_PAGE_F_SECOND | H_PAGE_F_GIX)
@@ -55,7 +27,6 @@
  */
 #define H_PAGE_4K_PFN	0x0
 #define H_PAGE_THP_HUGE 0x0
-#ifndef __ASSEMBLY__
 /*
  * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
  */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 93bef8162d66..d60c431c96cb 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -1,34 +1,14 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
 #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
 
-#define PTE_INDEX_SIZE  8
-#define PMD_INDEX_SIZE  5
-#define PUD_INDEX_SIZE	5
-#define PGD_INDEX_SIZE  12
-
-#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
-#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
-#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
+#define H_PTE_INDEX_SIZE  8
+#define H_PMD_INDEX_SIZE  5
+#define H_PUD_INDEX_SIZE  5
+#define H_PGD_INDEX_SIZE  12
 
 /* With 4k base page size, hugepage PTEs go at the PMD level */
 #define MIN_HUGEPTE_SHIFT	PAGE_SHIFT
 
-/* PMD_SHIFT determines what a second-level page table entry can map */
-#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
-#define PMD_SIZE	(1UL << PMD_SHIFT)
-#define PMD_MASK	(~(PMD_SIZE-1))
-
-/* PUD_SHIFT determines what a third-level page table entry can map */
-#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
-#define PUD_SIZE	(1UL << PUD_SHIFT)
-#define PUD_MASK	(~(PUD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
-#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
 #define H_PAGE_COMBO	0x00001000 /* this is a combo 4k page */
 #define H_PAGE_4K_PFN	0x00002000 /* PFN is for a single 4k page */
 /*
@@ -57,13 +37,6 @@
 #define PTE_FRAG_SIZE_SHIFT  12
 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
 
-/* Bits to mask out from a PMD to get to the PTE page */
-#define PMD_MASKED_BITS		0xc0000000000000ffUL
-/* Bits to mask out from a PUD to get to the PMD page */
-#define PUD_MASKED_BITS		0xc0000000000000ffUL
-/* Bits to mask out from a PGD to get to the PUD page */
-#define PGD_MASKED_BITS		0xc0000000000000ffUL
-
 #ifndef __ASSEMBLY__
 #include <asm/errno.h>
 
@@ -135,14 +108,15 @@ static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
 			       __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN));
 }
 
-#define PTE_TABLE_SIZE	PTE_FRAG_SIZE
+#define H_PTE_TABLE_SIZE	PTE_FRAG_SIZE
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PMD_TABLE_SIZE	((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE))
+#define H_PMD_TABLE_SIZE	((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
+				 (sizeof(unsigned long) << PMD_INDEX_SIZE))
 #else
-#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define H_PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
 #endif
-#define PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
-#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
+#define H_PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
+#define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
 
 #ifdef CONFIG_HUGETLB_PAGE
 /*
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index a9bc8ca0df7b..9485797f4468 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -29,14 +29,18 @@
 /*
  * Size of EA range mapped by our pagetables.
  */
-#define PGTABLE_EADDR_SIZE	(PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
-				 PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
-#define PGTABLE_RANGE		(ASM_CONST(1) << PGTABLE_EADDR_SIZE)
+#define H_PGTABLE_EADDR_SIZE	(H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
+				 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
+#define H_PGTABLE_RANGE		(ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PMD_CACHE_INDEX	(PMD_INDEX_SIZE + 1)
+/*
+ * only with hash we need to use the second half of pmd page table
+ * to store pointer to deposited pgtable_t
+ */
+#define H_PMD_CACHE_INDEX	(H_PMD_INDEX_SIZE + 1)
 #else
-#define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#define H_PMD_CACHE_INDEX	H_PMD_INDEX_SIZE
 #endif
 /*
  * Define the address range of the kernel non-linear virtual area
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 843b5d839904..7da61b85406b 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -462,7 +462,7 @@ extern void slb_set_size(u16 size);
 	add	rt,rt,rx
 
 /* 4 bits per slice and we have one slice per 1TB */
-#define SLICE_ARRAY_SIZE  (PGTABLE_RANGE >> 41)
+#define SLICE_ARRAY_SIZE  (H_PGTABLE_RANGE >> 41)
 
 #ifndef __ASSEMBLY__
 
@@ -533,7 +533,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
 	/*
 	 * Bad address. We return VSID 0 for that
 	 */
-	if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
+	if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
 		return 0;
 
 	if (ssize == MMU_SEGSIZE_256M)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index e2c2af34d71b..ef080bd5fce7 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -144,6 +144,66 @@
 #define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
 #define PAGE_AGP		(PAGE_KERNEL_NC)
 
+#ifndef __ASSEMBLY__
+/*
+ * page table defines
+ */
+extern unsigned long __pte_index_size;
+extern unsigned long __pmd_index_size;
+extern unsigned long __pud_index_size;
+extern unsigned long __pgd_index_size;
+extern unsigned long __pmd_cache_index;
+#define PTE_INDEX_SIZE  __pte_index_size
+#define PMD_INDEX_SIZE  __pmd_index_size
+#define PUD_INDEX_SIZE  __pud_index_size
+#define PGD_INDEX_SIZE  __pgd_index_size
+#define PMD_CACHE_INDEX __pmd_cache_index
+/*
+ * Because of use of pte fragments and THP, size of page table
+ * are not always derived out of index size above.
+ */
+extern unsigned long __pte_table_size;
+extern unsigned long __pmd_table_size;
+extern unsigned long __pud_table_size;
+extern unsigned long __pgd_table_size;
+#define PTE_TABLE_SIZE	__pte_table_size
+#define PMD_TABLE_SIZE	__pmd_table_size
+#define PUD_TABLE_SIZE	__pud_table_size
+#define PGD_TABLE_SIZE	__pgd_table_size
+/*
+ * Pgtable size used by swapper, init in asm code
+ * We will switch this later to radix PGD
+ */
+#define MAX_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE)
+
+#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
+#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS		0xc0000000000000ffUL
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS		0xc0000000000000ffUL
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS		0xc0000000000000ffUL
+#endif /* __ASSEMBLY__ */
+
 #include <asm/book3s/64/hash.h>
 #include <asm/barrier.h>
 
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index d908a46d05c0..77488857c26d 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -93,7 +93,7 @@ extern u64 ppc64_pft_size;
 
 #define SLICE_LOW_TOP		(0x100000000ul)
 #define SLICE_NUM_LOW		(SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
-#define SLICE_NUM_HIGH		(PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
+#define SLICE_NUM_HIGH		(H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
 
 #define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
 #define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 10d5eab19458..236fd1b526ea 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -433,7 +433,11 @@ int main(void)
 	DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
 #endif
 
+#ifdef MAX_PGD_TABLE_SIZE
+	DEFINE(PGD_TABLE_SIZE, MAX_PGD_TABLE_SIZE);
+#else
 	DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
+#endif
 	DEFINE(PTE_SIZE, sizeof(pte_t));
 
 #ifdef CONFIG_KVM
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index c2ffc97cf665..c4a5f13997b8 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -866,6 +866,18 @@ static void __init htab_initialize(void)
 
 void __init early_init_mmu(void)
 {
+	/*
+	 * initialize page table size
+	 */
+	__pte_index_size = H_PTE_INDEX_SIZE;
+	__pmd_index_size = H_PMD_INDEX_SIZE;
+	__pud_index_size = H_PUD_INDEX_SIZE;
+	__pgd_index_size = H_PGD_INDEX_SIZE;
+	__pmd_cache_index = H_PMD_CACHE_INDEX;
+	__pte_table_size = H_PTE_TABLE_SIZE;
+	__pmd_table_size = H_PMD_TABLE_SIZE;
+	__pud_table_size = H_PUD_TABLE_SIZE;
+	__pgd_table_size = H_PGD_TABLE_SIZE;
 	/* Initialize the MMU Hash table and create the linear mapping
 	 * of memory. Has to be done before SLB initialization as this is
 	 * currently where the page size encoding is obtained.
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 8d1daf7d9785..09ca65e55b58 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -66,11 +66,11 @@
 #include "mmu_decl.h"
 
 #ifdef CONFIG_PPC_STD_MMU_64
-#if PGTABLE_RANGE > USER_VSID_RANGE
+#if H_PGTABLE_RANGE > USER_VSID_RANGE
 #warning Limited user VSID range means pagetable space is wasted
 #endif
 
-#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#if (TASK_SIZE_USER64 < H_PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
 #warning TASK_SIZE is smaller than it needs to be.
 #endif
 #endif /* CONFIG_PPC_STD_MMU_64 */
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c
index f75ba4142875..8a7f06531282 100644
--- a/arch/powerpc/mm/pgtable-book3e.c
+++ b/arch/powerpc/mm/pgtable-book3e.c
@@ -84,6 +84,7 @@ int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
 	pmd_t *pmdp;
 	pte_t *ptep;
 
+	BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE);
 	if (slab_is_available()) {
 		pgdp = pgd_offset_k(ea);
 		pudp = pud_alloc(&init_mm, pgdp, ea);
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 04d6fa12789e..183f80a528ba 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -67,6 +67,7 @@ int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
 	pmd_t *pmdp;
 	pte_t *ptep;
 
+	BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE);
 	if (slab_is_available()) {
 		pgdp = pgd_offset_k(ea);
 		pudp = pud_alloc(&init_mm, pgdp, ea);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e43e0ea820e0..af6a83a6c90b 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -58,11 +58,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/thp.h>
 
-/* Some sanity checking */
-#if TASK_SIZE_USER64 > PGTABLE_RANGE
-#error TASK_SIZE_USER64 exceeds pagetable range
-#endif
-
 #ifdef CONFIG_PPC_STD_MMU_64
 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
 #error TASK_SIZE_USER64 exceeds user VSID range
@@ -75,6 +70,28 @@
  */
 struct prtb_entry *process_tb;
 struct patb_entry *partition_tb;
+/*
+ * page table size
+ */
+unsigned long __pte_index_size;
+EXPORT_SYMBOL(__pte_index_size);
+unsigned long __pmd_index_size;
+EXPORT_SYMBOL(__pmd_index_size);
+unsigned long __pud_index_size;
+EXPORT_SYMBOL(__pud_index_size);
+unsigned long __pgd_index_size;
+EXPORT_SYMBOL(__pgd_index_size);
+unsigned long __pmd_cache_index;
+EXPORT_SYMBOL(__pmd_cache_index);
+unsigned long __pte_table_size;
+EXPORT_SYMBOL(__pte_table_size);
+unsigned long __pmd_table_size;
+EXPORT_SYMBOL(__pmd_table_size);
+unsigned long __pud_table_size;
+EXPORT_SYMBOL(__pud_table_size);
+unsigned long __pgd_table_size;
+EXPORT_SYMBOL(__pgd_table_size);
+
 #endif
 unsigned long ioremap_bot = IOREMAP_BASE;
 
@@ -744,12 +761,6 @@ pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 int has_transparent_hugepage(void)
 {
 
-	BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) >= MAX_ORDER,
-		"hugepages can't be allocated by the buddy allocator");
-
-	BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) < 2,
-			 "We need more than 2 pages to do deferred thp split");
-
 	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
 		return 0;
 	/*
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 736d18b3cefd..aade18f3f21f 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -35,7 +35,7 @@ _GLOBAL(slb_allocate_realmode)
 	 * check for bad kernel/user address
 	 * (ea & ~REGION_MASK) >= PGTABLE_RANGE
 	 */
-	rldicr. r9,r3,4,(63 - PGTABLE_EADDR_SIZE - 4)
+	rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4)
 	bne-	8f
 
 	srdi	r9,r3,60		/* get region */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 42954f0b47ac..ee21b8699cee 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -37,8 +37,8 @@
 #include <asm/hugetlb.h>
 
 /* some sanity checks */
-#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
-#error PGTABLE_RANGE exceeds slice_mask high_slices size
+#if (H_PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
+#error H_PGTABLE_RANGE exceeds slice_mask high_slices size
 #endif
 
 static DEFINE_SPINLOCK(slice_convert_lock);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 22/65] powerpc/mm: Move page table index and and vaddr to pgtable.h
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (19 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 21/65] powerpc/mm: Make page table size a variable Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 23/65] powerpc/mm: Move pte related function together Aneesh Kumar K.V
                   ` (43 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Now that the page table size is a variable, we can move these to
generic pgtable.h

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    | 15 ---------------
 arch/powerpc/include/asm/book3s/64/pgtable.h | 12 ++++++++++++
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 9485797f4468..833a897a0794 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -93,18 +93,8 @@
 
 #ifndef __ASSEMBLY__
 #define	pmd_bad(pmd)		(pmd_val(pmd) & PMD_BAD_BITS)
-#define pmd_page_vaddr(pmd)	__va(pmd_val(pmd) & ~PMD_MASKED_BITS)
 
 #define	pud_bad(pud)		(pud_val(pud) & PUD_BAD_BITS)
-#define pud_page_vaddr(pud)	__va(pud_val(pud) & ~PUD_MASKED_BITS)
-
-/* Pointers in the page table tree are physical addresses */
-#define __pgtable_ptr_val(ptr)	__pa(ptr)
-
-#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
-#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
-#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
-#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
 
 extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 			    pte_t *ptep, unsigned long pte, int huge);
@@ -177,11 +167,6 @@ static inline int pgd_bad(pgd_t pgd)
 
 #define __HAVE_ARCH_PTE_SAME
 #define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
-static inline unsigned long pgd_page_vaddr(pgd_t pgd)
-{
-	return (unsigned long)__va(pgd_val(pgd) & ~PGD_MASKED_BITS);
-}
-
 
 /* Generic accessors to PTE bits */
 static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0; }
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index ef080bd5fce7..b4d72ab609df 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -534,6 +534,18 @@ static inline pgd_t pte_pgd(pte_t pte)
 
 extern struct page *pgd_page(pgd_t pgd);
 
+/* Pointers in the page table tree are physical addresses */
+#define __pgtable_ptr_val(ptr)	__pa(ptr)
+
+#define pmd_page_vaddr(pmd)	__va(pmd_val(pmd) & ~PMD_MASKED_BITS)
+#define pud_page_vaddr(pud)	__va(pud_val(pud) & ~PUD_MASKED_BITS)
+#define pgd_page_vaddr(pgd)	__va(pgd_val(pgd) & ~PGD_MASKED_BITS)
+
+#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
+#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
+#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
+#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
+
 /*
  * Find an entry in a page-table-directory.  We combine the address region
  * (the high order N bits) and the pgd portion of the address.
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 23/65] powerpc/mm: Move pte related function together
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (20 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 22/65] powerpc/mm: Move page table index and and vaddr to pgtable.h Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 24/65] powerpc/mm/radix: Add radix pte defines Aneesh Kumar K.V
                   ` (42 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Only code movement. No functionality change.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 142 +++++++++++++--------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index b4d72ab609df..de3fef1f2dbe 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -430,6 +430,77 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
 }
 
+static inline bool pte_user(pte_t pte)
+{
+	return !(pte_val(pte) & _PAGE_PRIVILEGED);
+}
+
+/* Encode and de-code a swap entry */
+#define MAX_SWAPFILES_CHECK() do { \
+	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
+	/*							\
+	 * Don't have overlapping bits with _PAGE_HPTEFLAGS	\
+	 * We filter HPTEFLAGS on set_pte.			\
+	 */							\
+	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
+	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);	\
+	} while (0)
+/*
+ * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
+ */
+#define SWP_TYPE_BITS 5
+#define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
+				& ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x)		(((x).val & PTE_RPN_MASK) >> PAGE_SHIFT)
+#define __swp_entry(type, offset)	((swp_entry_t) { \
+				((type) << _PAGE_BIT_SWAP_TYPE) \
+				| (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)})
+/*
+ * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
+ * swap type and offset we get from swap and convert that to pte to find a
+ * matching pte in linux page table.
+ * Clear bits not found in swap entries here.
+ */
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
+#define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
+#else
+#define _PAGE_SWP_SOFT_DIRTY	0UL
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
+}
+static inline bool pte_swp_soft_dirty(pte_t pte)
+{
+	return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
+}
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+static inline bool check_pte_access(unsigned long access, unsigned long ptev)
+{
+	/*
+	 * This check for _PAGE_RWX and _PAGE_PRESENT bits
+	 */
+	if (access & ~ptev)
+		return false;
+	/*
+	 * This check for access to privilege space
+	 */
+	if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
+		return false;
+
+	return true;
+}
+
 #define _PAGE_CACHE_CTL	(_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
 
 #define pgprot_noncached pgprot_noncached
@@ -576,77 +647,6 @@ extern struct page *pgd_page(pgd_t pgd);
 #define pgd_ERROR(e) \
 	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
-/* Encode and de-code a swap entry */
-#define MAX_SWAPFILES_CHECK() do { \
-	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
-	/*							\
-	 * Don't have overlapping bits with _PAGE_HPTEFLAGS	\
-	 * We filter HPTEFLAGS on set_pte.			\
-	 */							\
-	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
-	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);	\
-	} while (0)
-/*
- * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
- */
-#define SWP_TYPE_BITS 5
-#define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
-				& ((1UL << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x)		(((x).val & PTE_RPN_MASK) >> PAGE_SHIFT)
-#define __swp_entry(type, offset)	((swp_entry_t) { \
-				((type) << _PAGE_BIT_SWAP_TYPE) \
-				| (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)})
-/*
- * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
- * swap type and offset we get from swap and convert that to pte to find a
- * matching pte in linux page table.
- * Clear bits not found in swap entries here.
- */
-#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
-#define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
-
-static inline bool pte_user(pte_t pte)
-{
-	return !(pte_val(pte) & _PAGE_PRIVILEGED);
-}
-
-#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
-#else
-#define _PAGE_SWP_SOFT_DIRTY	0UL
-#endif /* CONFIG_MEM_SOFT_DIRTY */
-
-#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
-static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
-}
-static inline bool pte_swp_soft_dirty(pte_t pte)
-{
-	return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
-}
-static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
-{
-	return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
-}
-#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
-
-static inline bool check_pte_access(unsigned long access, unsigned long ptev)
-{
-	/*
-	 * This check for _PAGE_RWX and _PAGE_PRESENT bits
-	 */
-	if (access & ~ptev)
-		return false;
-	/*
-	 * This check for access to privilege space
-	 */
-	if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
-		return false;
-
-	return true;
-}
-
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 24/65] powerpc/mm/radix: Add radix pte defines
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (21 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 23/65] powerpc/mm: Move pte related function together Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 25/65] powerpc/mm/radix: Dummy radix_enabled() Aneesh Kumar K.V
                   ` (41 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This add PowerISA 3.0 specific pte defines. We share most of the
details with hash linux page table format. This patch indicate only
things where we differ

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h   |   1 +
 arch/powerpc/include/asm/book3s/64/radix-4k.h  |  12 +++
 arch/powerpc/include/asm/book3s/64/radix-64k.h |  12 +++
 arch/powerpc/include/asm/book3s/64/radix.h     | 128 +++++++++++++++++++++++++
 4 files changed, 153 insertions(+)
 create mode 100644 arch/powerpc/include/asm/book3s/64/radix-4k.h
 create mode 100644 arch/powerpc/include/asm/book3s/64/radix-64k.h
 create mode 100644 arch/powerpc/include/asm/book3s/64/radix.h

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index de3fef1f2dbe..d59292459f5e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -205,6 +205,7 @@ extern unsigned long __pgd_table_size;
 #endif /* __ASSEMBLY__ */
 
 #include <asm/book3s/64/hash.h>
+#include <asm/book3s/64/radix.h>
 #include <asm/barrier.h>
 
 /*
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
new file mode 100644
index 000000000000..1966ee977206
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_4K_H
+#define _ASM_POWERPC_PGTABLE_RADIX_4K_H
+
+/*
+ * For 4K page size supported index is 13/9/9/9
+ */
+#define R_PTE_INDEX_SIZE  9  /* 2MB huge page */
+#define R_PMD_INDEX_SIZE  9  /* 1G huge page */
+#define R_PUD_INDEX_SIZE	 9
+#define R_PGD_INDEX_SIZE  13
+
+#endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
new file mode 100644
index 000000000000..fe1cebe55e01
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_64K_H
+#define _ASM_POWERPC_PGTABLE_RADIX_64K_H
+
+/*
+ * For 64K page size supported index is 13/9/9/5
+ */
+#define R_PTE_INDEX_SIZE  5  /* 2MB huge page */
+#define R_PMD_INDEX_SIZE  9  /* 1G huge page */
+#define R_PUD_INDEX_SIZE	 9
+#define R_PGD_INDEX_SIZE  13
+
+#endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
new file mode 100644
index 000000000000..72e18d5c96b4
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -0,0 +1,128 @@
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_H
+#define _ASM_POWERPC_PGTABLE_RADIX_H
+
+#ifndef __ASSEMBLY__
+#include <asm/cmpxchg.h>
+#endif
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/radix-64k.h>
+#else
+#include <asm/book3s/64/radix-4k.h>
+#endif
+
+/* An empty PTE can still have a R or C writeback */
+#define R_PTE_NONE_MASK		(_PAGE_DIRTY | _PAGE_ACCESSED)
+
+/* Bits to set in a RPMD/RPUD/RPGD */
+#define R_PMD_VAL_BITS		(0x8000000000000000UL | R_PTE_INDEX_SIZE)
+#define R_PUD_VAL_BITS		(0x8000000000000000UL | R_PMD_INDEX_SIZE)
+#define R_PGD_VAL_BITS		(0x8000000000000000UL | R_PUD_INDEX_SIZE)
+
+/* Don't have anything in the reserved bits and leaf bits */
+#define R_PMD_BAD_BITS		0x60000000000000e0UL
+#define R_PUD_BAD_BITS		0x60000000000000e0UL
+#define R_PGD_BAD_BITS		0x60000000000000e0UL
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define R_PGTABLE_EADDR_SIZE (R_PTE_INDEX_SIZE + R_PMD_INDEX_SIZE +	\
+			      R_PUD_INDEX_SIZE + R_PGD_INDEX_SIZE + PAGE_SHIFT)
+#define R_PGTABLE_RANGE (ASM_CONST(1) << R_PGTABLE_EADDR_SIZE)
+
+#ifndef __ASSEMBLY__
+#define R_PTE_TABLE_SIZE	(sizeof(pte_t) << R_PTE_INDEX_SIZE)
+#define R_PMD_TABLE_SIZE	(sizeof(pmd_t) << R_PMD_INDEX_SIZE)
+#define R_PUD_TABLE_SIZE	(sizeof(pud_t) << R_PUD_INDEX_SIZE)
+#define R_PGD_TABLE_SIZE	(sizeof(pgd_t) << R_PGD_INDEX_SIZE)
+
+static inline unsigned long rpte_update(struct mm_struct *mm,
+					unsigned long addr,
+					pte_t *ptep, unsigned long clr,
+					unsigned long set,
+					int huge)
+{
+
+	pte_t pte;
+	unsigned long old_pte, new_pte;
+
+	do {
+		pte = READ_ONCE(*ptep);
+		old_pte = pte_val(pte);
+		new_pte = (old_pte | set) & ~clr;
+
+	} while (cpu_to_be64(old_pte) != __cmpxchg_u64((unsigned long *)ptep,
+						       cpu_to_be64(old_pte),
+						       cpu_to_be64(new_pte)));
+	/* We already do a sync in cmpxchg, is ptesync needed ?*/
+	asm volatile("ptesync" : : : "memory");
+	/* huge pages use the old page table lock */
+	if (!huge)
+		assert_pte_locked(mm, addr);
+
+	return old_pte;
+}
+
+/*
+ * Set the dirty and/or accessed bits atomically in a linux PTE, this
+ * function doesn't need to invalidate tlb.
+ */
+static inline void __rptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+	pte_t pte;
+	unsigned long old_pte, new_pte;
+	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
+					      _PAGE_RW | _PAGE_EXEC);
+	do {
+		pte = READ_ONCE(*ptep);
+		old_pte = pte_val(pte);
+		new_pte = old_pte | set;
+
+	} while (cpu_to_be64(old_pte) != __cmpxchg_u64((unsigned long *)ptep,
+						       cpu_to_be64(old_pte),
+						       cpu_to_be64(new_pte)));
+	/* We already do a sync in cmpxchg, is ptesync needed ?*/
+	asm volatile("ptesync" : : : "memory");
+}
+
+static inline int rpte_same(pte_t pte_a, pte_t pte_b)
+{
+	return ((pte_val(pte_a) == pte_val(pte_b)));
+}
+
+static inline int rpte_none(pte_t pte)
+{
+	return (pte_val(pte) & ~R_PTE_NONE_MASK) == 0;
+}
+
+static inline void __set_rpte_at(struct mm_struct *mm, unsigned long addr,
+				 pte_t *ptep, pte_t pte, int percpu)
+{
+	*ptep = pte;
+	asm volatile("ptesync" : : : "memory");
+}
+
+static inline int rpmd_bad(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & R_PMD_BAD_BITS);
+}
+
+static inline int rpmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	return ((pmd_val(pmd_a) == pmd_val(pmd_b)));
+}
+
+static inline int rpud_bad(pud_t pud)
+{
+	return !!(pud_val(pud) & R_PUD_BAD_BITS);
+}
+
+
+static inline int rpgd_bad(pgd_t pgd)
+{
+	return !!(pgd_val(pgd) & R_PGD_BAD_BITS);
+}
+
+#endif /* __ASSEMBLY__ */
+#endif
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 25/65] powerpc/mm/radix: Dummy radix_enabled()
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (22 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 24/65] powerpc/mm/radix: Add radix pte defines Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 26/65] powerpc/mm: Add radix callbacks to pte accessors Aneesh Kumar K.V
                   ` (40 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

In this patch we add the radix Kconfig and conditional check.
radix_enabled is written to always return 0 here. Once we have
all needed radix changes added, we will update this to mmu_feature
check.

We need to addt this early so that we can get it all build in the early
stage.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

radix kconfig variable
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 2 ++
 arch/powerpc/include/asm/mmu.h           | 3 +++
 arch/powerpc/platforms/Kconfig.cputype   | 4 ++++
 3 files changed, 9 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index b86786f2521c..593d9e3ce8e7 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -19,6 +19,8 @@ struct mmu_psize_def {
 	unsigned long	sllp;	/* SLB L||LP (exact mask to use in slbmte) */
 };
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
+#define radix_enabled() (0)
 #endif /* __ASSEMBLY__ */
 
 /* 64-bit classic hash table MMU */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 5f55024f9522..decde4c4870d 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -204,6 +204,9 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
 #  include <asm/mmu-8xx.h>
 #endif
 
+#ifndef radix_enabled
+#define radix_enabled() (0)
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_MMU_H_ */
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 142dff5e96d6..53299182dba9 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -331,6 +331,10 @@ config PPC_STD_MMU_64
 	def_bool y
 	depends on PPC_STD_MMU && PPC64
 
+config PPC_RADIX_MMU
+        def_bool y
+        depends on PPC_BOOK3S_64
+
 config PPC_MMU_NOHASH
 	def_bool y
 	depends on !PPC_STD_MMU
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 26/65] powerpc/mm: Add radix callbacks to pte accessors
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (23 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 25/65] powerpc/mm/radix: Dummy radix_enabled() Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 27/65] powerpc/mm: Move hugetlb and THP related pmd accessors to pgtable.h Aneesh Kumar K.V
                   ` (39 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

For those pte accessors, that operate on a different set of pte bits
between hash/radix, we add a generic variant that does a conditional
to hash linux or radix variant.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    | 41 +++++++++---------
 arch/powerpc/include/asm/book3s/64/pgtable.h | 64 ++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 833a897a0794..958491f59c2a 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -88,23 +88,26 @@
 /* Hash table based platforms need atomic updates of the linux PTE */
 #define PTE_ATOMIC_UPDATES	1
 
-#define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
-#define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
+#define H_PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
+#define H_PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
 
 #ifndef __ASSEMBLY__
-#define	pmd_bad(pmd)		(pmd_val(pmd) & PMD_BAD_BITS)
-
-#define	pud_bad(pud)		(pud_val(pud) & PUD_BAD_BITS)
+#define	hlpmd_bad(pmd)		(pmd_val(pmd) & H_PMD_BAD_BITS)
+#define	hlpud_bad(pud)		(pud_val(pud) & H_PUD_BAD_BITS)
+static inline int hlpgd_bad(pgd_t pgd)
+{
+	return (pgd_val(pgd) == 0);
+}
 
 extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 			    pte_t *ptep, unsigned long pte, int huge);
 extern unsigned long htab_convert_pte_flags(unsigned long pteflags);
 /* Atomic PTE updates */
-static inline unsigned long pte_update(struct mm_struct *mm,
-				       unsigned long addr,
-				       pte_t *ptep, unsigned long clr,
-				       unsigned long set,
-				       int huge)
+static inline unsigned long hlpte_update(struct mm_struct *mm,
+					 unsigned long addr,
+					 pte_t *ptep, unsigned long clr,
+					 unsigned long set,
+					 int huge)
 {
 	unsigned long old, tmp;
 	unsigned long busy = cpu_to_be64(H_PAGE_BUSY);
@@ -137,7 +140,7 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 /* Set the dirty and/or accessed bits atomically in a linux PTE, this
  * function doesn't need to flush the hash entry
  */
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+static inline void __hlptep_set_access_flags(pte_t *ptep, pte_t entry)
 {
 	unsigned long bits = pte_val(entry) &
 		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |
@@ -160,24 +163,20 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 	:"cc");
 }
 
-static inline int pgd_bad(pgd_t pgd)
+#define hlpte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+
+static inline int hlpte_none(pte_t pte)
 {
-	return (pgd_val(pgd) == 0);
+	return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0;
 }
 
-#define __HAVE_ARCH_PTE_SAME
-#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
-
-/* Generic accessors to PTE bits */
-static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0; }
-
 /* This low level function performs the actual PTE insertion
  * Setting the PTE depends on the MMU type and other factors. It's
  * an horrible mess that I'm not going to try to clean up now but
  * I'm keeping it in one place rather than spread around
  */
-static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
-				pte_t *ptep, pte_t pte, int percpu)
+static inline void __set_hlpte_at(struct mm_struct *mm, unsigned long addr,
+				  pte_t *ptep, pte_t pte, int percpu)
 {
 	/*
 	 * Anything else just stores the PTE normally. That covers all 64-bit
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index d59292459f5e..18855437894f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -262,6 +262,14 @@ extern unsigned long __pgd_table_size;
 
 #endif /* __real_pte */
 
+static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr,
+				       pte_t *ptep, unsigned long clr,
+				       unsigned long set, int huge)
+{
+	if (radix_enabled())
+		return rpte_update(mm, addr, ptep, clr, set, huge);
+	return hlpte_update(mm, addr, ptep, clr, set, huge);
+}
 /*
  * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update.
  * We currently remove entries from the hashtable regardless of whether
@@ -501,6 +509,39 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev)
 
 	return true;
 }
+/*
+ * Generic functions with hash/radix callbacks
+ */
+
+static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+	if (radix_enabled())
+		return __rptep_set_access_flags(ptep, entry);
+	return __hlptep_set_access_flags(ptep, entry);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+	if (radix_enabled())
+		return rpte_same(pte_a, pte_b);
+	return hlpte_same(pte_a, pte_b);
+}
+
+static inline int pte_none(pte_t pte)
+{
+	if (radix_enabled())
+		return rpte_none(pte);
+	return hlpte_none(pte);
+}
+
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, int percpu)
+{
+	if (radix_enabled())
+		return __set_rpte_at(mm, addr, ptep, pte, percpu);
+	return __set_hlpte_at(mm, addr, ptep, pte, percpu);
+}
 
 #define _PAGE_CACHE_CTL	(_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
 
@@ -555,6 +596,13 @@ static inline void pmd_clear(pmd_t *pmdp)
 #define pmd_none(pmd)		(!pmd_val(pmd))
 #define	pmd_present(pmd)	(!pmd_none(pmd))
 
+static inline int pmd_bad(pmd_t pmd)
+{
+	if (radix_enabled())
+		return rpmd_bad(pmd);
+	return hlpmd_bad(pmd);
+}
+
 static inline void pud_set(pud_t *pudp, unsigned long val)
 {
 	*pudp = __pud(val);
@@ -580,6 +628,15 @@ static inline pud_t pte_pud(pte_t pte)
 	return __pud(pte_val(pte));
 }
 #define pud_write(pud)		pte_write(pud_pte(pud))
+
+static inline int pud_bad(pud_t pud)
+{
+	if (radix_enabled())
+		return rpud_bad(pud);
+	return hlpud_bad(pud);
+}
+
+
 #define pgd_write(pgd)		pte_write(pgd_pte(pgd))
 static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 {
@@ -604,6 +661,13 @@ static inline pgd_t pte_pgd(pte_t pte)
 	return __pgd(pte_val(pte));
 }
 
+static inline int pgd_bad(pgd_t pgd)
+{
+	if (radix_enabled())
+		return rpgd_bad(pgd);
+	return hlpgd_bad(pgd);
+}
+
 extern struct page *pgd_page(pgd_t pgd);
 
 /* Pointers in the page table tree are physical addresses */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 27/65] powerpc/mm: Move hugetlb and THP related pmd accessors to pgtable.h
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (24 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 26/65] powerpc/mm: Add radix callbacks to pte accessors Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 28/65] powerpc/mm/radix: Add radix callback for pmd accessors Aneesh Kumar K.V
                   ` (38 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Here we create pgtable-64/4k.h and move pmd accessors that is common
between hash and radix there. We can't do much sharing with 4k linux
page size [1]. So for now it is empty. In later patches we will add
functions that does conditional hash/radix accessors there.

[1] 4k linux page size with hash config don't support THP.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h    | 83 ---------------------
 arch/powerpc/include/asm/book3s/64/pgtable-4k.h  |  7 ++
 arch/powerpc/include/asm/book3s/64/pgtable-64k.h | 94 ++++++++++++++++++++++++
 arch/powerpc/include/asm/book3s/64/pgtable.h     |  7 ++
 4 files changed, 108 insertions(+), 83 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/64/pgtable-4k.h
 create mode 100644 arch/powerpc/include/asm/book3s/64/pgtable-64k.h

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index d60c431c96cb..173f35e9faef 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -118,57 +118,6 @@ static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
 #define H_PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
 #define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
 
-#ifdef CONFIG_HUGETLB_PAGE
-/*
- * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
- * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
- *
- * Defined in such a way that we can optimize away code block at build time
- * if CONFIG_HUGETLB_PAGE=n.
- */
-static inline int pmd_huge(pmd_t pmd)
-{
-	/*
-	 * leaf pte for huge page
-	 */
-	return !!(pmd_val(pmd) & _PAGE_PTE);
-}
-
-static inline int pud_huge(pud_t pud)
-{
-	/*
-	 * leaf pte for huge page
-	 */
-	return !!(pud_val(pud) & _PAGE_PTE);
-}
-
-static inline int pgd_huge(pgd_t pgd)
-{
-	/*
-	 * leaf pte for huge page
-	 */
-	return !!(pgd_val(pgd) & _PAGE_PTE);
-}
-#define pgd_huge pgd_huge
-
-#ifdef CONFIG_DEBUG_VM
-extern int hugepd_ok(hugepd_t hpd);
-#define is_hugepd(hpd)               (hugepd_ok(hpd))
-#else
-/*
- * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
- * need to setup hugepage directory for them. Our pte and page directory format
- * enable us to have this enabled.
- */
-static inline int hugepd_ok(hugepd_t hpd)
-{
-	return 0;
-}
-#define is_hugepd(pdep)			0
-#endif /* CONFIG_DEBUG_VM */
-
-#endif /* CONFIG_HUGETLB_PAGE */
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
 					 unsigned long addr,
@@ -239,44 +188,12 @@ static inline int pmd_trans_huge(pmd_t pmd)
 		  (_PAGE_PTE | H_PAGE_THP_HUGE));
 }
 
-static inline int pmd_large(pmd_t pmd)
-{
-	return !!(pmd_val(pmd) & _PAGE_PTE);
-}
-
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-{
-	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
-}
-
 #define __HAVE_ARCH_PMD_SAME
 static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 {
 	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
 }
 
-static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pmd_t *pmdp)
-{
-	unsigned long old;
-
-	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
-		return 0;
-	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
-	return ((old & _PAGE_ACCESSED) != 0);
-}
-
-#define __HAVE_ARCH_PMDP_SET_WRPROTECT
-static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
-				      pmd_t *pmdp)
-{
-
-	if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0)
-		return;
-
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
-}
-
 #endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
new file mode 100644
index 000000000000..423735f897f5
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H
+#define _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H
+/*
+ * hash 4k can't share hugetlb and also doesn't support THP
+ */
+
+#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
new file mode 100644
index 000000000000..ceadc2fd408f
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -0,0 +1,94 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H
+#define _ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
+ * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
+ *
+ * Defined in such a way that we can optimize away code block at build time
+ * if CONFIG_HUGETLB_PAGE=n.
+ */
+static inline int pmd_huge(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page
+	 */
+	return !!(pmd_val(pmd) & _PAGE_PTE);
+}
+
+static inline int pud_huge(pud_t pud)
+{
+	/*
+	 * leaf pte for huge page
+	 */
+	return !!(pud_val(pud) & _PAGE_PTE);
+}
+
+static inline int pgd_huge(pgd_t pgd)
+{
+	/*
+	 * leaf pte for huge page
+	 */
+	return !!(pgd_val(pgd) & _PAGE_PTE);
+}
+#define pgd_huge pgd_huge
+
+#ifdef CONFIG_DEBUG_VM
+extern int hugepd_ok(hugepd_t hpd);
+#define is_hugepd(hpd)               (hugepd_ok(hpd))
+#else
+/*
+ * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
+ * need to setup hugepage directory for them. Our pte and page directory format
+ * enable us to have this enabled.
+ */
+static inline int hugepd_ok(hugepd_t hpd)
+{
+	return 0;
+}
+#define is_hugepd(pdep)			0
+#endif /* CONFIG_DEBUG_VM */
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_large(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_PTE);
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
+}
+/*
+ * For radix we should always find H_PAGE_HASHPTE zero. Hence
+ * the below will work for radix too
+ */
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pmd_t *pmdp)
+{
+	unsigned long old;
+
+	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
+	return ((old & _PAGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pmd_t *pmdp)
+{
+
+	if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0)
+		return;
+
+	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+}
+
+#endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
+#endif	/* __ASSEMBLY__ */
+#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 18855437894f..e3a8fc469460 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -206,6 +206,13 @@ extern unsigned long __pgd_table_size;
 
 #include <asm/book3s/64/hash.h>
 #include <asm/book3s/64/radix.h>
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/pgtable-64k.h>
+#else
+#include <asm/book3s/64/pgtable-4k.h>
+#endif
+
 #include <asm/barrier.h>
 
 /*
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 28/65] powerpc/mm/radix: Add radix callback for pmd accessors
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (25 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 27/65] powerpc/mm: Move hugetlb and THP related pmd accessors to pgtable.h Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 29/65] powerpc/mm: Abstraction for early init routines Aneesh Kumar K.V
                   ` (37 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This only does 64k linux page support for now. 64k hash linux config
THP need to differentiate it from hugetlb huge page because with THP
we need to track hash pte slot information with respect to each subpage.
This is not needed with hugetlb hugepage, because we don't do MPSS with
hugetlb.

Radix doesn't have any such restrictions.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h    |  9 ++++-----
 arch/powerpc/include/asm/book3s/64/pgtable-64k.h | 23 +++++++++++++++++++++++
 arch/powerpc/include/asm/book3s/64/radix.h       |  9 +++++++++
 3 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 173f35e9faef..ad797a8cc615 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -97,8 +97,8 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
 
 extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			   unsigned long pfn, unsigned long size, pgprot_t);
-static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
-			       unsigned long pfn, pgprot_t prot)
+static inline int hlremap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
+				 unsigned long pfn, pgprot_t prot)
 {
 	if (pfn > (PTE_RPN_MASK >> PAGE_SHIFT)) {
 		WARN(1, "remap_4k_pfn called with wrong pfn value\n");
@@ -182,14 +182,13 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
  * that for explicit huge pages.
  *
  */
-static inline int pmd_trans_huge(pmd_t pmd)
+static inline int hlpmd_trans_huge(pmd_t pmd)
 {
 	return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) ==
 		  (_PAGE_PTE | H_PAGE_THP_HUGE));
 }
 
-#define __HAVE_ARCH_PMD_SAME
-static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+static inline int hlpmd_same(pmd_t pmd_a, pmd_t pmd_b)
 {
 	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
 }
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index ceadc2fd408f..028b8f6e002b 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -89,6 +89,29 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
 }
 
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	if (radix_enabled())
+		return rpmd_trans_huge(pmd);
+	return hlpmd_trans_huge(pmd);
+}
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	if (radix_enabled())
+		return rpmd_same(pmd_a, pmd_b);
+	return hlpmd_same(pmd_a, pmd_b);
+}
 #endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
+
+static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
+			       unsigned long pfn, pgprot_t prot)
+{
+	if (radix_enabled())
+		BUG();
+	return hlremap_4k_pfn(vma, addr, pfn, prot);
+
+}
 #endif	/* __ASSEMBLY__ */
 #endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 72e18d5c96b4..46a5381324f5 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -124,5 +124,14 @@ static inline int rpgd_bad(pgd_t pgd)
 	return !!(pgd_val(pgd) & R_PGD_BAD_BITS);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+static inline int rpmd_trans_huge(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_PTE);
+}
+
+#endif
+
 #endif /* __ASSEMBLY__ */
 #endif
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 29/65] powerpc/mm: Abstraction for early init routines
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (26 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 28/65] powerpc/mm/radix: Add radix callback for pmd accessors Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 30/65] powerpc/mm/radix: Add radix callback " Aneesh Kumar K.V
                   ` (36 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 20 ++++++++++++++++++++
 arch/powerpc/include/asm/mmu.h           | 14 +++++++-------
 arch/powerpc/mm/hash_utils_64.c          |  6 +++---
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 593d9e3ce8e7..a66cd3e65a33 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -97,5 +97,25 @@ extern int mmu_vmalloc_psize;
 extern int mmu_vmemmap_psize;
 extern int mmu_io_psize;
 
+/* MMU initialization */
+extern void hlearly_init_mmu(void);
+static inline void early_init_mmu(void)
+{
+	return hlearly_init_mmu();
+}
+extern void hlearly_init_mmu_secondary(void);
+static inline void early_init_mmu_secondary(void)
+{
+	return hlearly_init_mmu_secondary();
+}
+
+extern void hlsetup_initial_memory_limit(phys_addr_t first_memblock_base,
+					 phys_addr_t first_memblock_size);
+static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+					      phys_addr_t first_memblock_size)
+{
+	return hlsetup_initial_memory_limit(first_memblock_base,
+					   first_memblock_size);
+}
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index decde4c4870d..34070e982f1d 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -122,13 +122,6 @@ static inline void mmu_clear_feature(unsigned long feature)
 
 extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
 
-/* MMU initialization */
-extern void early_init_mmu(void);
-extern void early_init_mmu_secondary(void);
-
-extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
-				       phys_addr_t first_memblock_size);
-
 #ifdef CONFIG_PPC64
 /* This is our real memory area size on ppc64 server, on embedded, we
  * make it match the size our of bolted TLB area
@@ -185,6 +178,13 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
 #include <asm/book3s/64/mmu.h>
 #else /* CONFIG_PPC_BOOK3S_64 */
 
+#ifndef __ASSEMBLY__
+/* MMU initialization */
+extern void early_init_mmu(void);
+extern void early_init_mmu_secondary(void);
+extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				       phys_addr_t first_memblock_size);
+#endif /* __ASSEMBLY__ */
 #endif
 
 #if defined(CONFIG_PPC_STD_MMU_32)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index c4a5f13997b8..8dfc6ae85d4a 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -864,7 +864,7 @@ static void __init htab_initialize(void)
 #undef KB
 #undef MB
 
-void __init early_init_mmu(void)
+void __init hlearly_init_mmu(void)
 {
 	/*
 	 * initialize page table size
@@ -889,7 +889,7 @@ void __init early_init_mmu(void)
 }
 
 #ifdef CONFIG_SMP
-void early_init_mmu_secondary(void)
+void hlearly_init_mmu_secondary(void)
 {
 	/* Initialize hash table for that CPU */
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
@@ -1631,7 +1631,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+void hlsetup_initial_memory_limit(phys_addr_t first_memblock_base,
 				phys_addr_t first_memblock_size)
 {
 	/* We don't currently support the first MEMBLOCK not mapping 0
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 30/65] powerpc/mm/radix: Add radix callback for early init routines
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (27 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 29/65] powerpc/mm: Abstraction for early init routines Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 31/65] powerpc/mm: Abstraction for vmemmap and map_kernel_page Aneesh Kumar K.V
                   ` (35 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This add routines for early setup w.r.t radix. We use device tree
property ibm,processor-radix-AP-encodings to find supported page sizes.
If we don't find above we consider 64K and 4K as supported page sizes.

We do map vmemap using 2M page size if we can. Linear mapping is done
such that we use required page size for that range. For ex: memory of
3.5G is mapped such that we use 1G mapping till 3G range and use 2M
mapping for the rest.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu.h   |  17 +-
 arch/powerpc/include/asm/book3s/64/radix.h |   2 +
 arch/powerpc/mm/Makefile                   |   1 +
 arch/powerpc/mm/pgtable-radix.c            | 344 +++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/setup.c     |   5 +-
 5 files changed, 367 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/mm/pgtable-radix.c

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index a66cd3e65a33..a526642f1c02 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -16,7 +16,10 @@ struct mmu_psize_def {
 	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
 	unsigned int	tlbiel;	/* tlbiel supported for that page size */
 	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
-	unsigned long	sllp;	/* SLB L||LP (exact mask to use in slbmte) */
+	union {
+		unsigned long	sllp;	/* SLB L||LP (exact mask to use in slbmte) */
+		unsigned long ap;	/* Ap encoding used by PowerISA 3.0 */
+	};
 };
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 
@@ -98,22 +101,34 @@ extern int mmu_vmemmap_psize;
 extern int mmu_io_psize;
 
 /* MMU initialization */
+extern void radix_init_native(void);
 extern void hlearly_init_mmu(void);
+extern void rearly_init_mmu(void);
 static inline void early_init_mmu(void)
 {
+	if (radix_enabled())
+		return rearly_init_mmu();
 	return hlearly_init_mmu();
 }
 extern void hlearly_init_mmu_secondary(void);
+extern void rearly_init_mmu_secondary(void);
 static inline void early_init_mmu_secondary(void)
 {
+	if (radix_enabled())
+		return rearly_init_mmu_secondary();
 	return hlearly_init_mmu_secondary();
 }
 
 extern void hlsetup_initial_memory_limit(phys_addr_t first_memblock_base,
 					 phys_addr_t first_memblock_size);
+extern void rsetup_initial_memory_limit(phys_addr_t first_memblock_base,
+					 phys_addr_t first_memblock_size);
 static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 					      phys_addr_t first_memblock_size)
 {
+	if (radix_enabled())
+		return rsetup_initial_memory_limit(first_memblock_base,
+						   first_memblock_size);
 	return hlsetup_initial_memory_limit(first_memblock_base,
 					   first_memblock_size);
 }
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 46a5381324f5..806dc2195f85 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -133,5 +133,7 @@ static inline int rpmd_trans_huge(pmd_t pmd)
 
 #endif
 
+extern int map_radix_kernel_page(unsigned long ea, unsigned long pa,
+				 pgprot_t flags, unsigned int psz);
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 6b5cc805c7ba..d421719fcf21 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_PPC_BOOK3E)	+= tlb_low_$(CONFIG_WORD_SIZE)e.o
 hash64-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
 obj-$(CONFIG_PPC_STD_MMU_64)	+= pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y)
+obj-$(CONFIG_PPC_RADIX_MMU)	+= pgtable-radix.o
 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o
 obj-$(CONFIG_PPC_STD_MMU)	+= tlb_hash$(CONFIG_WORD_SIZE).o \
 				   mmu_context_hash$(CONFIG_WORD_SIZE).o
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
new file mode 100644
index 000000000000..900c3386c6e1
--- /dev/null
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -0,0 +1,344 @@
+/*
+ * page table handling routines for radix page table
+ *
+ *  Copyright (C) 2015 Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/sched.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/firmware.h>
+
+static int native_update_partition_table(u64 patb1)
+{
+	partition_tb->patb1 = cpu_to_be64(patb1);
+	return 0;
+}
+
+static __ref void *early_alloc_pgtable(unsigned long size)
+{
+	void *pt;
+
+	pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
+	memset(pt, 0, size);
+
+	return pt;
+}
+
+int map_radix_kernel_page(unsigned long ea, unsigned long pa,
+			  pgprot_t flags,
+			  unsigned int map_page_size)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	/*
+	 * Make sure task size is correct as per the max adddr
+	 */
+	BUILD_BUG_ON(TASK_SIZE_USER64 > R_PGTABLE_RANGE);
+	if (slab_is_available()) {
+		pgdp = pgd_offset_k(ea);
+		pudp = pud_alloc(&init_mm, pgdp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		if (map_page_size == PUD_SIZE) {
+			ptep = (pte_t *)pudp;
+			goto set_the_pte;
+		}
+		pmdp = pmd_alloc(&init_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
+		if (map_page_size == PMD_SIZE) {
+			ptep = (pte_t *)pudp;
+			goto set_the_pte;
+		}
+		ptep = pte_alloc_kernel(pmdp, ea);
+		if (!ptep)
+			return -ENOMEM;
+	} else {
+		pgdp = pgd_offset_k(ea);
+		if (pgd_none(*pgdp)) {
+			pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+			BUG_ON(pudp == NULL);
+			pgd_populate(&init_mm, pgdp, pudp);
+		}
+		pudp = pud_offset(pgdp, ea);
+		if (map_page_size == PUD_SIZE) {
+			ptep = (pte_t *)pudp;
+			goto set_the_pte;
+		}
+		if (pud_none(*pudp)) {
+			pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
+			BUG_ON(pmdp == NULL);
+			pud_populate(&init_mm, pudp, pmdp);
+		}
+		pmdp = pmd_offset(pudp, ea);
+		if (map_page_size == PMD_SIZE) {
+			ptep = (pte_t *)pudp;
+			goto set_the_pte;
+		}
+		if (!pmd_present(*pmdp)) {
+			ptep = early_alloc_pgtable(PAGE_SIZE);
+			BUG_ON(ptep == NULL);
+			pmd_populate_kernel(&init_mm, pmdp, ptep);
+		}
+		ptep = pte_offset_kernel(pmdp, ea);
+	}
+
+set_the_pte:
+	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+	smp_wmb();
+	return 0;
+}
+
+static void __init radix_init_pgtable(void)
+{
+	int loop_count;
+	u64 base, end, start_addr;
+	unsigned long rts_field;
+	struct memblock_region *reg;
+	unsigned long linear_page_size;
+
+	/* We don't support slb for radix */
+	mmu_slb_size = 0;
+	/*
+	 * Create the linear mapping, using standard page size for now
+	 */
+	loop_count = 0;
+	for_each_memblock(memory, reg) {
+
+		start_addr = reg->base;
+
+redo:
+		if (loop_count < 1 && mmu_psize_defs[MMU_PAGE_1G].shift)
+			linear_page_size = PUD_SIZE;
+		else if (loop_count < 2 && mmu_psize_defs[MMU_PAGE_2M].shift)
+			linear_page_size = PMD_SIZE;
+		else
+			linear_page_size = PAGE_SIZE;
+
+		base = _ALIGN_UP(start_addr, linear_page_size);
+		end = _ALIGN_DOWN(reg->base + reg->size, linear_page_size);
+
+		pr_info("Mapping range 0x%lx - 0x%lx with 0x%lx\n",
+			(unsigned long)base, (unsigned long)end,
+			linear_page_size);
+
+		while (base < end) {
+			map_radix_kernel_page((unsigned long)__va(base),
+					      base, PAGE_KERNEL_X,
+					      linear_page_size);
+			base += linear_page_size;
+		}
+		/*
+		 * map the rest using lower page size
+		 */
+		if (end < reg->base + reg->size) {
+			start_addr = end;
+			loop_count++;
+			goto redo;
+		}
+	}
+	/*
+	 * Allocate Partition table and process table for the
+	 * host.
+	 */
+	BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 23), "Process table size too large.");
+	process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
+	/*
+	 * Fill in the process table.
+	 * we support 52 bits, hence 52-28 = 24, 11000
+	 */
+	rts_field = 3ull << PPC_BITLSHIFT(2);
+	process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | R_PGD_INDEX_SIZE);
+	/*
+	 * Fill in the partition table. We are suppose to use effective address
+	 * of process table here. But our linear mapping also enable us to use
+	 * physical address here.
+	 */
+	ppc_md.update_partition_table(__pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR);
+	pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
+}
+
+static void __init radix_init_partition_table(void)
+{
+	unsigned long rts_field;
+	/*
+	 * we support 52 bits, hence 52-28 = 24, 11000
+	 */
+	rts_field = 3ull << PPC_BITLSHIFT(2);
+
+	BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
+	partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
+	partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
+					  R_PGD_INDEX_SIZE | PATB_HR);
+	printk("Partition table %p\n", partition_tb);
+
+	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+	/*
+	 * update partition table control register,
+	 * 64 K size.
+	 */
+	mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+}
+
+void __init radix_init_native(void)
+{
+	ppc_md.update_partition_table = native_update_partition_table;
+}
+
+static int __init get_idx_from_shift(unsigned int shift)
+{
+	int idx = -1;
+
+	switch (shift) {
+	case 0xc:
+		idx = MMU_PAGE_4K;
+		break;
+	case 0x10:
+		idx = MMU_PAGE_64K;
+		break;
+	case 0x15:
+		idx = MMU_PAGE_2M;
+		break;
+	case 0x1e:
+		idx = MMU_PAGE_1G;
+		break;
+	}
+	return idx;
+}
+
+static int __init radix_dt_scan_page_sizes(unsigned long node,
+					   const char *uname, int depth,
+					   void *data)
+{
+	int size = 0;
+	int shift, idx;
+	unsigned int ap;
+	const __be32 *prop;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+
+	/* We are scanning "cpu" nodes only */
+	if (type == NULL || strcmp(type, "cpu") != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
+	if (!prop)
+		return 0;
+
+	pr_info("Page sizes from device-tree:\n");
+	for (; size >= 4; size -= 4, ++prop) {
+
+		struct mmu_psize_def *def;
+
+		/* top 3 bit is AP encoding */
+		shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
+		ap = be32_to_cpu(prop[0]) >> 29;
+		pr_info("Page size sift = %d AP=0x%x\n", shift, ap);
+
+		idx = get_idx_from_shift(shift);
+		if (idx < 0)
+			continue;
+
+		def = &mmu_psize_defs[idx];
+		def->shift = shift;
+		def->ap  = ap;
+	}
+
+	/* needed ? */
+	cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
+	return 1;
+}
+
+static void __init radix_init_page_sizes(void)
+{
+	int rc;
+
+	/*
+	 * Try to find the available page sizes in the device-tree
+	 */
+	rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
+	if (rc != 0)  /* Found */
+		goto found;
+	/*
+	 * let's assume we have page 4k and 64k support
+	 */
+	mmu_psize_defs[MMU_PAGE_4K].shift = 12;
+	mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+
+	mmu_psize_defs[MMU_PAGE_64K].shift = 16;
+	mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+found:
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	if (mmu_psize_defs[MMU_PAGE_2M].shift) {
+		/*
+		 * map vmemmap using 2M if available
+		 */
+		mmu_vmemmap_psize = MMU_PAGE_2M;
+	}
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+	return;
+}
+
+void __init rearly_init_mmu(void)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+	/* PAGE_SIZE mappings */
+	mmu_virtual_psize = MMU_PAGE_64K;
+#else
+	mmu_virtual_psize = MMU_PAGE_4K;
+#endif
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	/* vmemmap mapping */
+	mmu_vmemmap_psize = mmu_virtual_psize;
+#endif
+	/*
+	 * initialize page table size
+	 */
+	__pte_index_size = R_PTE_INDEX_SIZE;
+	__pmd_index_size = R_PMD_INDEX_SIZE;
+	__pud_index_size = R_PUD_INDEX_SIZE;
+	__pgd_index_size = R_PGD_INDEX_SIZE;
+	__pmd_cache_index = R_PMD_INDEX_SIZE;
+	__pte_table_size = R_PTE_TABLE_SIZE;
+	__pmd_table_size = R_PMD_TABLE_SIZE;
+	__pud_table_size = R_PUD_TABLE_SIZE;
+	__pgd_table_size = R_PGD_TABLE_SIZE;
+
+	radix_init_page_sizes();
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		radix_init_partition_table();
+
+	radix_init_pgtable();
+}
+
+void rearly_init_mmu_secondary(void)
+{
+	/*
+	 * update partition table control register, 64 K size.
+	 */
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		mtspr(SPRN_PTCR,
+		      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+}
+
+void rsetup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* Finally limit subsequent allocations */
+	memblock_set_current_limit(first_memblock_base + first_memblock_size);
+}
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 1acb0c72d923..ee6430bedcc3 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -273,7 +273,10 @@ static int __init pnv_probe(void)
 	if (!of_flat_dt_is_compatible(root, "ibm,powernv"))
 		return 0;
 
-	hpte_init_native();
+	if (IS_ENABLED(CONFIG_PPC_RADIX_MMU) && radix_enabled())
+		radix_init_native();
+	else if (IS_ENABLED(CONFIG_PPC_STD_MMU_64))
+		hpte_init_native();
 
 	if (firmware_has_feature(FW_FEATURE_OPAL))
 		pnv_setup_machdep_opal();
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 31/65] powerpc/mm: Abstraction for vmemmap and map_kernel_page
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (28 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 30/65] powerpc/mm/radix: Add radix callback " Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 32/65] powerpc/mm/radix: Add radix callback for vmemmap and map_kernel page Aneesh Kumar K.V
                   ` (34 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    |  8 ++++++++
 arch/powerpc/include/asm/book3s/64/pgtable.h | 20 ++++++++++++++++++++
 arch/powerpc/include/asm/nohash/64/pgtable.h |  7 +++++++
 arch/powerpc/mm/init_64.c                    |  5 -----
 arch/powerpc/mm/mmu_decl.h                   |  5 -----
 arch/powerpc/mm/pgtable-hash64.c             | 12 ++++++------
 6 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 958491f59c2a..43bd7d15f41e 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -197,6 +197,14 @@ static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+
+extern int hlmap_kernel_page(unsigned long ea, unsigned long pa,
+			     unsigned long flags);
+extern int __meminit hlvmemmap_create_mapping(unsigned long start,
+					      unsigned long page_size,
+					      unsigned long phys);
+extern void hlvmemmap_remove_mapping(unsigned long start,
+				     unsigned long page_size);
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index e3a8fc469460..bf6a600cb109 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -722,6 +722,26 @@ extern struct page *pgd_page(pgd_t pgd);
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
 
+static inline int map_kernel_page(unsigned long ea, unsigned long pa,
+				  unsigned long flags)
+{
+	return hlmap_kernel_page(ea, pa, flags);
+}
+
+static inline int __meminit vmemmap_create_mapping(unsigned long start,
+						   unsigned long page_size,
+						   unsigned long phys)
+{
+	return hlvmemmap_create_mapping(start, page_size, phys);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline void vmemmap_remove_mapping(unsigned long start,
+					  unsigned long page_size)
+{
+	return hlvmemmap_remove_mapping(start, page_size);
+}
+#endif
 struct page *realmode_pfn_to_page(unsigned long pfn);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 10debb93c4a4..f143d6fb3576 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -362,6 +362,13 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
+extern int map_kernel_page(unsigned long ea, unsigned long pa,
+			   unsigned long flags);
+extern int __meminit vmemmap_create_mapping(unsigned long start,
+					    unsigned long page_size,
+					    unsigned long phys);
+extern void vmemmap_remove_mapping(unsigned long start,
+				   unsigned long page_size);
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 09ca65e55b58..33709bdb0419 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -240,9 +240,6 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
 	vmemmap_list = vmem_back;
 }
 
-extern int __meminit vmemmap_create_mapping(unsigned long start,
-					    unsigned long page_size,
-					    unsigned long phys);
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 {
 	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
@@ -281,8 +278,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-extern void vmemmap_remove_mapping(unsigned long start,
-				   unsigned long page_size);
 static unsigned long vmemmap_list_free(unsigned long start)
 {
 	struct vmemmap_backing *vmem_back, *vmem_back_prev;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 898d63365cdd..6360f54ef2d0 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -109,11 +109,6 @@ extern unsigned long Hash_size, Hash_mask;
 
 #endif /* CONFIG_PPC32 */
 
-#ifdef CONFIG_PPC64
-extern int map_kernel_page(unsigned long ea, unsigned long pa,
-			   unsigned long flags);
-#endif /* CONFIG_PPC64 */
-
 extern unsigned long ioremap_bot;
 extern unsigned long __max_low_memory;
 extern phys_addr_t __initial_memory_limit_addr;
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 183f80a528ba..b926b0a4ae0c 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -26,9 +26,9 @@
  * On hash-based CPUs, the vmemmap is bolted in the hash table.
  *
  */
-int __meminit vmemmap_create_mapping(unsigned long start,
-				     unsigned long page_size,
-				     unsigned long phys)
+int __meminit hlvmemmap_create_mapping(unsigned long start,
+				       unsigned long page_size,
+				       unsigned long phys)
 {
 	int rc = htab_bolt_mapping(start, start + page_size, phys,
 				   pgprot_val(PAGE_KERNEL),
@@ -43,8 +43,8 @@ int __meminit vmemmap_create_mapping(unsigned long start,
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-void vmemmap_remove_mapping(unsigned long start,
-			    unsigned long page_size)
+void hlvmemmap_remove_mapping(unsigned long start,
+			      unsigned long page_size)
 {
 	int rc = htab_remove_mapping(start, start + page_size,
 				     mmu_vmemmap_psize,
@@ -60,7 +60,7 @@ void vmemmap_remove_mapping(unsigned long start,
  * map_kernel_page adds an entry to the ioremap page table
  * and adds an entry to the HPT, possibly bolting it
  */
-int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
+int hlmap_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
 {
 	pgd_t *pgdp;
 	pud_t *pudp;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 32/65] powerpc/mm/radix: Add radix callback for vmemmap and map_kernel page
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (29 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 31/65] powerpc/mm: Abstraction for vmemmap and map_kernel_page Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 33/65] powerpc/mm: Abstraction for switch_mmu_context Aneesh Kumar K.V
                   ` (33 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 12 ++++++++++++
 arch/powerpc/include/asm/book3s/64/radix.h   |  6 ++++++
 arch/powerpc/mm/pgtable-radix.c              | 20 ++++++++++++++++++++
 3 files changed, 38 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index bf6a600cb109..a993b7c820a9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -725,6 +725,13 @@ void pgtable_cache_init(void);
 static inline int map_kernel_page(unsigned long ea, unsigned long pa,
 				  unsigned long flags)
 {
+	if (radix_enabled()) {
+#if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
+		unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
+		WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
+#endif
+		return map_radix_kernel_page(ea, pa, __pgprot(flags), PAGE_SIZE);
+	}
 	return hlmap_kernel_page(ea, pa, flags);
 }
 
@@ -732,6 +739,8 @@ static inline int __meminit vmemmap_create_mapping(unsigned long start,
 						   unsigned long page_size,
 						   unsigned long phys)
 {
+	if (radix_enabled())
+		return rvmemmap_create_mapping(start, page_size, phys);
 	return hlvmemmap_create_mapping(start, page_size, phys);
 }
 
@@ -739,6 +748,9 @@ static inline int __meminit vmemmap_create_mapping(unsigned long start,
 static inline void vmemmap_remove_mapping(unsigned long start,
 					  unsigned long page_size)
 {
+
+	if (radix_enabled())
+		return rvmemmap_remove_mapping(start, page_size);
 	return hlvmemmap_remove_mapping(start, page_size);
 }
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 806dc2195f85..040c4a56d07b 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -133,6 +133,12 @@ static inline int rpmd_trans_huge(pmd_t pmd)
 
 #endif
 
+extern int __meminit rvmemmap_create_mapping(unsigned long start,
+					     unsigned long page_size,
+					     unsigned long phys);
+extern void rvmemmap_remove_mapping(unsigned long start,
+				    unsigned long page_size);
+
 extern int map_radix_kernel_page(unsigned long ea, unsigned long pa,
 				 pgprot_t flags, unsigned int psz);
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 900c3386c6e1..31c2e717a756 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -342,3 +342,23 @@ void rsetup_initial_memory_limit(phys_addr_t first_memblock_base,
 	/* Finally limit subsequent allocations */
 	memblock_set_current_limit(first_memblock_base + first_memblock_size);
 }
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit rvmemmap_create_mapping(unsigned long start,
+				      unsigned long page_size,
+				      unsigned long phys)
+{
+	/* Create a PTE encoding */
+	unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
+
+	BUG_ON(map_radix_kernel_page(start, phys, __pgprot(flags), page_size));
+	return 0;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void rvmemmap_remove_mapping(unsigned long start, unsigned long page_size)
+{
+	/* FIXME!! intel does more. We should free page tables mapping vmemmap ? */
+}
+#endif
+#endif
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 33/65] powerpc/mm: Abstraction for switch_mmu_context
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (30 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 32/65] powerpc/mm/radix: Add radix callback for vmemmap and map_kernel page Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 34/65] powerpc/mm/radix: Add mmu context handling callback for radix Aneesh Kumar K.V
                   ` (32 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu_context.h | 25 +++++++++++++------------
 arch/powerpc/kernel/swsusp.c           |  2 +-
 arch/powerpc/mm/mmu_context_nohash.c   |  3 ++-
 drivers/cpufreq/pmac32-cpufreq.c       |  2 +-
 drivers/macintosh/via-pmu.c            |  4 ++--
 5 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 878c27771717..0dd522ff4559 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -33,16 +33,23 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
 extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
 extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
 #endif
-
-extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
 extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
 extern void set_context(unsigned long id, pgd_t *pgd);
 
 #ifdef CONFIG_PPC_BOOK3S_64
+static inline void switch_mmu_context(struct mm_struct *prev,
+				      struct mm_struct *next,
+				      struct task_struct *tsk)
+{
+	return switch_slb(tsk, next);
+}
+
 extern int __init_new_context(void);
 extern void __destroy_context(int context_id);
 static inline void mmu_context_init(void) { }
 #else
+extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
+			       struct task_struct *tsk);
 extern unsigned long __init_new_context(void);
 extern void __destroy_context(unsigned long context_id);
 extern void mmu_context_init(void);
@@ -88,17 +95,11 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		asm volatile ("dssall");
 #endif /* CONFIG_ALTIVEC */
-
-	/* The actual HW switching method differs between the various
-	 * sub architectures.
+	/*
+	 * The actual HW switching method differs between the various
+	 * sub architectures. Out of line for now
 	 */
-#ifdef CONFIG_PPC_STD_MMU_64
-	switch_slb(tsk, next);
-#else
-	/* Out of line for now */
-	switch_mmu_context(prev, next);
-#endif
-
+	switch_mmu_context(prev, next, tsk);
 }
 
 #define deactivate_mm(tsk,mm)	do { } while (0)
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
index 6669b1752512..6ae9bd5086a4 100644
--- a/arch/powerpc/kernel/swsusp.c
+++ b/arch/powerpc/kernel/swsusp.c
@@ -31,6 +31,6 @@ void save_processor_state(void)
 void restore_processor_state(void)
 {
 #ifdef CONFIG_PPC32
-	switch_mmu_context(current->active_mm, current->active_mm);
+	switch_mmu_context(current->active_mm, current->active_mm, NULL);
 #endif
 }
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 986afbc22c76..a36c43a27893 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -226,7 +226,8 @@ static void context_check_map(void)
 static void context_check_map(void) { }
 #endif
 
-void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
+void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
+			struct task_struct *tsk)
 {
 	unsigned int i, id, cpu = smp_processor_id();
 	unsigned long *map;
diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 1f49d97a70ea..bde503c66945 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c
@@ -298,7 +298,7 @@ static int pmu_set_cpu_speed(int low_speed)
  		_set_L3CR(save_l3cr);
 
 	/* Restore userland MMU context */
-	switch_mmu_context(NULL, current->active_mm);
+	switch_mmu_context(NULL, current->active_mm, NULL);
 
 #ifdef DEBUG_FREQ
 	printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1));
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 01ee736fe0ef..f8b6d1403c16 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -1851,7 +1851,7 @@ static int powerbook_sleep_grackle(void)
  		_set_L2CR(save_l2cr);
 	
 	/* Restore userland MMU context */
-	switch_mmu_context(NULL, current->active_mm);
+	switch_mmu_context(NULL, current->active_mm, NULL);
 
 	/* Power things up */
 	pmu_unlock();
@@ -1940,7 +1940,7 @@ powerbook_sleep_Core99(void)
  		_set_L3CR(save_l3cr);
 	
 	/* Restore userland MMU context */
-	switch_mmu_context(NULL, current->active_mm);
+	switch_mmu_context(NULL, current->active_mm, NULL);
 
 	/* Tell PMU we are ready */
 	pmu_unlock();
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 34/65] powerpc/mm/radix: Add mmu context handling callback for radix
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (31 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 33/65] powerpc/mm: Abstraction for switch_mmu_context Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 35/65] powerpc/mm: Rename mmu_context_hash64.c to mmu_context_book3s64.c Aneesh Kumar K.V
                   ` (31 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu_context.h |  4 ++++
 arch/powerpc/mm/mmu_context_hash64.c   | 42 +++++++++++++++++++++++++++-------
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 0dd522ff4559..8f16a065d33d 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -37,10 +37,14 @@ extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
 extern void set_context(unsigned long id, pgd_t *pgd);
 
 #ifdef CONFIG_PPC_BOOK3S_64
+extern void switch_radix_mmu_context(struct mm_struct *prev,
+				     struct mm_struct *next);
 static inline void switch_mmu_context(struct mm_struct *prev,
 				      struct mm_struct *next,
 				      struct task_struct *tsk)
 {
+	if (radix_enabled())
+		return switch_radix_mmu_context(prev, next);
 	return switch_slb(tsk, next);
 }
 
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 4e4efbc2658e..08cc3182b744 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -58,6 +58,17 @@ again:
 	return index;
 }
 EXPORT_SYMBOL_GPL(__init_new_context);
+static int rinit_new_context(struct mm_struct *mm, int index)
+{
+	unsigned long rts_field;
+
+	/*
+	 * set the process table entry,
+	 */
+	rts_field = 3ull << PPC_BITLSHIFT(2);
+	process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | R_PGD_INDEX_SIZE);
+	return 0;
+}
 
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
@@ -67,13 +78,18 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	if (index < 0)
 		return index;
 
-	/* The old code would re-promote on fork, we don't do that
-	 * when using slices as it could cause problem promoting slices
-	 * that have been forced down to 4K
-	 */
-	if (slice_mm_new_context(mm))
-		slice_set_user_psize(mm, mmu_virtual_psize);
-	subpage_prot_init_new_context(mm);
+	if (radix_enabled()) {
+		rinit_new_context(mm, index);
+	} else {
+
+		/* The old code would re-promote on fork, we don't do that
+		 * when using slices as it could cause problem promoting slices
+		 * that have been forced down to 4K
+		 */
+		if (slice_mm_new_context(mm))
+			slice_set_user_psize(mm, mmu_virtual_psize);
+		subpage_prot_init_new_context(mm);
+	}
 	mm->context.id = index;
 #ifdef CONFIG_PPC_ICSWX
 	mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
@@ -145,8 +161,18 @@ void destroy_context(struct mm_struct *mm)
 	mm->context.cop_lockp = NULL;
 #endif /* CONFIG_PPC_ICSWX */
 
+	if (radix_enabled())
+		process_tb[mm->context.id].prtb1 = 0;
+	else
+		subpage_prot_free(mm);
 	destroy_pagetable_page(mm);
 	__destroy_context(mm->context.id);
-	subpage_prot_free(mm);
 	mm->context.id = MMU_NO_CONTEXT;
 }
+
+void switch_radix_mmu_context(struct mm_struct *prev, struct mm_struct *next)
+{
+	mtspr(SPRN_PID, next->context.id);
+	asm volatile("isync": : :"memory");
+
+}
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 35/65] powerpc/mm: Rename mmu_context_hash64.c to mmu_context_book3s64.c
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (32 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 34/65] powerpc/mm/radix: Add mmu context handling callback for radix Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 36/65] powerpc/mm: Hash linux abstraction for tlbflush routines Aneesh Kumar K.V
                   ` (30 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This file now contain  both hash and radix specific code. Rename it to
indicate this better.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/Makefile                                         | 7 +++----
 arch/powerpc/mm/{mmu_context_hash64.c => mmu_context_book3s64.c} | 0
 2 files changed, 3 insertions(+), 4 deletions(-)
 rename arch/powerpc/mm/{mmu_context_hash64.c => mmu_context_book3s64.c} (100%)

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index d421719fcf21..771234cc55b6 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -14,11 +14,10 @@ obj-$(CONFIG_PPC_MMU_NOHASH)	+= mmu_context_nohash.o tlb_nohash.o \
 obj-$(CONFIG_PPC_BOOK3E)	+= tlb_low_$(CONFIG_WORD_SIZE)e.o
 hash64-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
-obj-$(CONFIG_PPC_STD_MMU_64)	+= pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y)
+obj-$(CONFIG_PPC_STD_MMU_64)	+= pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o
 obj-$(CONFIG_PPC_RADIX_MMU)	+= pgtable-radix.o
-obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o
-obj-$(CONFIG_PPC_STD_MMU)	+= tlb_hash$(CONFIG_WORD_SIZE).o \
-				   mmu_context_hash$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
+obj-$(CONFIG_PPC_STD_MMU)	+= tlb_hash$(CONFIG_WORD_SIZE).o
 ifeq ($(CONFIG_PPC_STD_MMU_64),y)
 obj-$(CONFIG_PPC_4K_PAGES)	+= hash64_4k.o
 obj-$(CONFIG_PPC_64K_PAGES)	+= hash64_64k.o
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_book3s64.c
similarity index 100%
rename from arch/powerpc/mm/mmu_context_hash64.c
rename to arch/powerpc/mm/mmu_context_book3s64.c
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 36/65] powerpc/mm: Hash linux abstraction for tlbflush routines
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (33 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 35/65] powerpc/mm: Rename mmu_context_hash64.c to mmu_context_book3s64.c Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 37/65] powerpc/mm/radix: Add " Aneesh Kumar K.V
                   ` (29 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/tlbflush-hash.h | 28 ++++++-----
 arch/powerpc/include/asm/book3s/64/tlbflush.h      | 56 ++++++++++++++++++++++
 arch/powerpc/include/asm/tlbflush.h                |  2 +-
 arch/powerpc/mm/tlb_hash64.c                       |  2 +-
 4 files changed, 73 insertions(+), 15 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/64/tlbflush.h

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 1b753f96b374..ddce8477fe0c 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -52,40 +52,42 @@ extern void flush_hash_range(unsigned long number, int local);
 extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
 				pmd_t *pmdp, unsigned int psize, int ssize,
 				unsigned long flags);
-
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
+static inline void local_flush_hltlb_mm(struct mm_struct *mm)
 {
 }
 
-static inline void flush_tlb_mm(struct mm_struct *mm)
+static inline void flush_hltlb_mm(struct mm_struct *mm)
 {
 }
 
-static inline void local_flush_tlb_page(struct vm_area_struct *vma,
-					unsigned long vmaddr)
+static inline void local_flush_hltlb_page(struct vm_area_struct *vma,
+					  unsigned long vmaddr)
 {
 }
 
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-				  unsigned long vmaddr)
+static inline void flush_hltlb_page(struct vm_area_struct *vma,
+				    unsigned long vmaddr)
 {
 }
 
-static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
-					 unsigned long vmaddr)
+static inline void flush_hltlb_page_nohash(struct vm_area_struct *vma,
+					   unsigned long vmaddr)
 {
 }
 
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
+static inline void flush_hltlb_range(struct vm_area_struct *vma,
+				     unsigned long start, unsigned long end)
 {
 }
 
-static inline void flush_tlb_kernel_range(unsigned long start,
-					  unsigned long end)
+static inline void flush_hltlb_kernel_range(unsigned long start,
+					    unsigned long end)
 {
 }
 
+
+struct mmu_gather;
+extern void hltlb_flush(struct mmu_gather *tlb);
 /* Private function for use by PCI IO mapping code */
 extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
 				     unsigned long end);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
new file mode 100644
index 000000000000..37d7f289ad42
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -0,0 +1,56 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H
+#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H
+
+#include <asm/book3s/64/tlbflush-hash.h>
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	return flush_hltlb_range(vma, start, end);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	return flush_hltlb_kernel_range(start, end);
+}
+
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	return local_flush_hltlb_mm(mm);
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+					unsigned long vmaddr)
+{
+	return local_flush_hltlb_page(vma, vmaddr);
+}
+
+static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
+					 unsigned long vmaddr)
+{
+	return flush_hltlb_page_nohash(vma, vmaddr);
+}
+
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+	return hltlb_flush(tlb);
+}
+
+#ifdef CONFIG_SMP
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	return flush_hltlb_mm(mm);
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long vmaddr)
+{
+	return flush_hltlb_page(vma, vmaddr);
+}
+#else
+#define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
+#define flush_tlb_page(vma, addr)	local_flush_tlb_page(vma, addr)
+#endif /* CONFIG_SMP */
+
+#endif /*  _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 9f77f85e3e99..2fc4331c5bc5 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -78,7 +78,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 }
 
 #elif defined(CONFIG_PPC_STD_MMU_64)
-#include <asm/book3s/64/tlbflush-hash.h>
+#include <asm/book3s/64/tlbflush.h>
 #else
 #error Unsupported MMU type
 #endif
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 38497cf5e31b..d3a189d014f2 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -155,7 +155,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
 	batch->index = 0;
 }
 
-void tlb_flush(struct mmu_gather *tlb)
+void hltlb_flush(struct mmu_gather *tlb)
 {
 	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 37/65] powerpc/mm/radix: Add tlbflush routines
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (34 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 36/65] powerpc/mm: Hash linux abstraction for tlbflush routines Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27 10:00   ` kbuild test robot
                     ` (2 more replies)
  2016-03-27  8:23 ` [PATCH 38/65] powerpc/mm/radix: Add MMU_FTR_RADIX Aneesh Kumar K.V
                   ` (28 subsequent siblings)
  64 siblings, 3 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Core kernel don't track the page size of the va range that we are
invalidating. Hence we end up flushing tlb for the entire mm here.
Later patches will improve this.

We also don't flush page walk cache separetly instead use RIC=2 when
flushing tlb, because we do a mmu gather flush after freeing page table.

MMU_NO_CONTEXT is updated for hash.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/tlbflush-hash.h |  13 +-
 .../powerpc/include/asm/book3s/64/tlbflush-radix.h |  33 +++
 arch/powerpc/include/asm/book3s/64/tlbflush.h      |  20 ++
 arch/powerpc/include/asm/tlbflush.h                |   1 +
 arch/powerpc/mm/Makefile                           |   2 +-
 arch/powerpc/mm/tlb-radix.c                        | 246 +++++++++++++++++++++
 kernel/fork.c                                      |   4 +
 7 files changed, 314 insertions(+), 5 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
 create mode 100644 arch/powerpc/mm/tlb-radix.c

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index ddce8477fe0c..e90310d1a519 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -1,8 +1,6 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
 #define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
 
-#define MMU_NO_CONTEXT		0
-
 /*
  * TLB flushing for 64-bit hash-MMU CPUs
  */
@@ -29,14 +27,21 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
 
 static inline void arch_enter_lazy_mmu_mode(void)
 {
-	struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+	struct ppc64_tlb_batch *batch;
 
+	if (radix_enabled())
+		return;
+	batch = this_cpu_ptr(&ppc64_tlb_batch);
 	batch->active = 1;
 }
 
 static inline void arch_leave_lazy_mmu_mode(void)
 {
-	struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+	struct ppc64_tlb_batch *batch;
+
+	if (radix_enabled())
+		return;
+	batch = this_cpu_ptr(&ppc64_tlb_batch);
 
 	if (batch->index)
 		__flush_tlb_pending(batch);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
new file mode 100644
index 000000000000..584ffa0a331f
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -0,0 +1,33 @@
+#ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H
+#define _ASM_POWERPC_TLBFLUSH_RADIX_H
+
+struct vm_area_struct;
+struct mm_struct;
+struct mmu_gather;
+
+static inline int mmu_get_ap(int psize)
+{
+	return mmu_psize_defs[psize].ap;
+}
+
+extern void flush_rtlb_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end);
+extern void flush_rtlb_kernel_range(unsigned long start, unsigned long end);
+
+extern void local_flush_rtlb_mm(struct mm_struct *mm);
+extern void local_flush_rtlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void __local_flush_rtlb_page(struct mm_struct *mm, unsigned long vmaddr,
+				    unsigned long ap, int nid);
+extern void rtlb_flush(struct mmu_gather *tlb);
+#ifdef CONFIG_SMP
+extern void flush_rtlb_mm(struct mm_struct *mm);
+extern void flush_rtlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void __flush_rtlb_page(struct mm_struct *mm, unsigned long vmaddr,
+			      unsigned long ap, int nid);
+#else
+#define flush_rtlb_mm(mm)		local_flush_rtlb_mm(mm)
+#define flush_rtlb_page(vma,addr)	local_flush_rtlb_page(vma,addr)
+#define __flush_rtlb_page(mm,addr,p,i)	__local_flush_rtlb_page(mm,addr,p,i)
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 37d7f289ad42..66b7bc371491 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -1,51 +1,71 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H
 #define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H
 
+#define MMU_NO_CONTEXT	~0UL
+
+
 #include <asm/book3s/64/tlbflush-hash.h>
+#include <asm/book3s/64/tlbflush-radix.h>
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {
+	if (radix_enabled())
+		return flush_rtlb_range(vma, start, end);
 	return flush_hltlb_range(vma, start, end);
 }
 
 static inline void flush_tlb_kernel_range(unsigned long start,
 					  unsigned long end)
 {
+	if (radix_enabled())
+		return flush_rtlb_kernel_range(start, end);
 	return flush_hltlb_kernel_range(start, end);
 }
 
 static inline void local_flush_tlb_mm(struct mm_struct *mm)
 {
+	if (radix_enabled())
+		return local_flush_rtlb_mm(mm);
 	return local_flush_hltlb_mm(mm);
 }
 
 static inline void local_flush_tlb_page(struct vm_area_struct *vma,
 					unsigned long vmaddr)
 {
+	if (radix_enabled())
+		return local_flush_rtlb_page(vma, vmaddr);
 	return local_flush_hltlb_page(vma, vmaddr);
 }
 
 static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
 					 unsigned long vmaddr)
 {
+	if (radix_enabled())
+		return flush_rtlb_page(vma, vmaddr);
 	return flush_hltlb_page_nohash(vma, vmaddr);
 }
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
+	if (radix_enabled())
+		return rtlb_flush(tlb);
 	return hltlb_flush(tlb);
 }
 
 #ifdef CONFIG_SMP
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
+	if (radix_enabled())
+		return flush_rtlb_mm(mm);
 	return flush_hltlb_mm(mm);
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 				  unsigned long vmaddr)
 {
+	if (radix_enabled())
+		return flush_rtlb_page(vma, vmaddr);
 	return flush_hltlb_page(vma, vmaddr);
 }
 #else
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 2fc4331c5bc5..1b38eea28e5a 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -58,6 +58,7 @@ extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 
 #elif defined(CONFIG_PPC_STD_MMU_32)
 
+#define MMU_NO_CONTEXT      (0)
 /*
  * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
  */
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 771234cc55b6..cb21d9862748 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -15,7 +15,7 @@ obj-$(CONFIG_PPC_BOOK3E)	+= tlb_low_$(CONFIG_WORD_SIZE)e.o
 hash64-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
 obj-$(CONFIG_PPC_STD_MMU_64)	+= pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o
-obj-$(CONFIG_PPC_RADIX_MMU)	+= pgtable-radix.o
+obj-$(CONFIG_PPC_RADIX_MMU)	+= pgtable-radix.o tlb-radix.o
 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
 obj-$(CONFIG_PPC_STD_MMU)	+= tlb_hash$(CONFIG_WORD_SIZE).o
 ifeq ($(CONFIG_PPC_STD_MMU_64),y)
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
new file mode 100644
index 000000000000..39c28d32bd97
--- /dev/null
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -0,0 +1,246 @@
+/*
+ *  TLB flush routines for radix kernels.
+ *
+ *  Copyright (C) 2015 Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/memblock.h>
+
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+
+static inline void __tlbiel_pid(unsigned long pid, int set)
+{
+	unsigned long rb,rs,ric,prs,r;
+
+	rb = PPC_BIT(53); /* IS = 1 */
+	rb |= set << PPC_BITLSHIFT(51);
+	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
+	prs = 1; /* process scoped */
+	r = 1;   /* raidx format */
+	ric = 2;  /* invalidate all the caches */
+
+	asm volatile("ptesync": : :"memory");
+	asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
+		     "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	asm volatile("ptesync": : :"memory");
+}
+
+/*
+ * We use 128 set in radix mode and 256 set in hpt mode.
+ * FIXME!! this need to be derived from device tree. For now
+ * do #define
+ */
+#define TLB_SET 128
+static inline void _tlbiel_pid(unsigned long pid)
+{
+	int set;
+
+	for (set = 0; set < TLB_SET; set++) {
+		__tlbiel_pid(pid, set);
+	}
+	return;
+}
+
+static inline void _tlbie_pid(unsigned long pid)
+{
+	unsigned long rb,rs,ric,prs,r;
+
+	rb = PPC_BIT(53); /* IS = 1 */
+	rs = pid << PPC_BITLSHIFT(31);
+	prs = 1; /* process scoped */
+	r = 1;   /* raidx format */
+	ric = 2;  /* invalidate all the caches */
+
+	asm volatile("ptesync": : :"memory");
+	asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
+		     "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void _tlbiel_va(unsigned long va, unsigned long pid,
+			      unsigned long ap)
+{
+	unsigned long rb,rs,ric,prs,r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = pid << PPC_BITLSHIFT(31);
+	prs = 1; /* process scoped */
+	r = 1;   /* raidx format */
+	ric = 0;  /* no cluster flush yet */
+
+	asm volatile("ptesync": : :"memory");
+	asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
+		     "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	asm volatile("ptesync": : :"memory");
+}
+
+static inline void _tlbie_va(unsigned long va, unsigned long pid,
+			     unsigned long ap)
+{
+	unsigned long rb,rs,ric,prs,r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = pid << PPC_BITLSHIFT(31);
+	prs = 1; /* process scoped */
+	r = 1;   /* raidx format */
+	ric = 0;  /* no cluster flush yet */
+
+	asm volatile("ptesync": : :"memory");
+	asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
+		     "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+/*
+ * Base TLB flushing operations:
+ *
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ *  - local_* variants of page and mm only apply to the current
+ *    processor
+ */
+void local_flush_rtlb_mm(struct mm_struct *mm)
+{
+	unsigned int pid;
+
+	preempt_disable();
+	pid = mm->context.id;
+	if (pid != MMU_NO_CONTEXT)
+		_tlbiel_pid(pid);
+	preempt_enable();
+}
+EXPORT_SYMBOL(local_flush_rtlb_mm);
+
+void __local_flush_rtlb_page(struct mm_struct *mm, unsigned long vmaddr,
+			    unsigned long ap, int nid)
+{
+	unsigned int pid;
+
+	preempt_disable();
+	pid = mm ? mm->context.id : 0;
+	if (pid != MMU_NO_CONTEXT)
+		_tlbiel_va(vmaddr, pid, ap);
+	preempt_enable();
+}
+
+void local_flush_rtlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	__local_flush_rtlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+			       mmu_get_ap(mmu_virtual_psize), 0);
+}
+EXPORT_SYMBOL(local_flush_rtlb_page);
+
+#ifdef CONFIG_SMP
+static int mm_is_core_local(struct mm_struct *mm)
+{
+	return cpumask_subset(mm_cpumask(mm),
+			      topology_sibling_cpumask(smp_processor_id()));
+}
+
+void flush_rtlb_mm(struct mm_struct *mm)
+{
+	unsigned int pid;
+
+	preempt_disable();
+	pid = mm->context.id;
+	if (unlikely(pid == MMU_NO_CONTEXT))
+		goto no_context;
+
+	if (!mm_is_core_local(mm)) {
+		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+		if (lock_tlbie)
+			raw_spin_lock(&native_tlbie_lock);
+		_tlbie_pid(pid);
+		if (lock_tlbie)
+			raw_spin_unlock(&native_tlbie_lock);
+	} else
+		_tlbiel_pid(pid);
+no_context:
+	preempt_enable();
+}
+EXPORT_SYMBOL(flush_rtlb_mm);
+
+void __flush_rtlb_page(struct mm_struct *mm, unsigned long vmaddr,
+		       unsigned long ap, int nid)
+{
+	unsigned int pid;
+
+	preempt_disable();
+	pid = mm ? mm->context.id : 0;
+	if (unlikely(pid == MMU_NO_CONTEXT))
+		goto bail;
+	if (!mm_is_core_local(mm)) {
+		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+		if (lock_tlbie)
+			raw_spin_lock(&native_tlbie_lock);
+		_tlbie_va(vmaddr, pid, ap);
+		if (lock_tlbie)
+			raw_spin_unlock(&native_tlbie_lock);
+	} else
+		_tlbiel_va(vmaddr, pid, ap);
+bail:
+	preempt_enable();
+}
+
+void flush_rtlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	__flush_rtlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+			 mmu_get_ap(mmu_virtual_psize), 0);
+}
+EXPORT_SYMBOL(flush_rtlb_page);
+
+#endif /* CONFIG_SMP */
+
+void flush_rtlb_kernel_range(unsigned long start, unsigned long end)
+{
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+	if (lock_tlbie)
+		raw_spin_lock(&native_tlbie_lock);
+	_tlbie_pid(0);
+	if (lock_tlbie)
+		raw_spin_unlock(&native_tlbie_lock);
+}
+EXPORT_SYMBOL(flush_rtlb_kernel_range);
+
+/*
+ * Currently, for range flushing, we just do a full mm flush. Because
+ * we use this in code path where we don' track the page size.
+ */
+void flush_rtlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+
+{
+	struct mm_struct *mm = vma->vm_mm;
+	flush_rtlb_mm(mm);
+}
+EXPORT_SYMBOL(flush_rtlb_range);
+
+
+void rtlb_flush(struct mmu_gather *tlb)
+{
+	struct mm_struct *mm = tlb->mm;
+	flush_rtlb_mm(mm);
+}
diff --git a/kernel/fork.c b/kernel/fork.c
index 2e391c754ae7..b1a472c3ce3a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -669,6 +669,10 @@ struct mm_struct *mm_alloc(void)
 		return NULL;
 
 	memset(mm, 0, sizeof(*mm));
+	/*
+	 * FIXME!! we need a better way handle this
+	 */
+	mm->context.id = MMU_NO_CONTEXT;
 	return mm_init(mm, current);
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 38/65] powerpc/mm/radix: Add MMU_FTR_RADIX
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (35 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 37/65] powerpc/mm/radix: Add " Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 39/65] powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code Aneesh Kumar K.V
                   ` (27 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We are going to add asm changes in the follow up patches. Add the
feature bit now so that we can get it all build. We will enable radix
in the last patch using cpu table.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 3 ++-
 arch/powerpc/include/asm/mmu.h           | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index a526642f1c02..0835a8f9904b 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -23,7 +23,8 @@ struct mmu_psize_def {
 };
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 
-#define radix_enabled() (0)
+#define radix_enabled() mmu_has_feature(MMU_FTR_RADIX)
+
 #endif /* __ASSEMBLY__ */
 
 /* 64-bit classic hash table MMU */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 34070e982f1d..9e8c05f9c562 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -87,6 +87,10 @@
 /* 1T segments available
  */
 #define MMU_FTR_1T_SEGMENT		ASM_CONST(0x40000000)
+/*
+ * Radix page table available
+ */
+#define MMU_FTR_RADIX                  ASM_CONST(0x80000000)
 
 /* MMU feature bit sets for various CPUs */
 #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2	\
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 39/65] powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (36 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 38/65] powerpc/mm/radix: Add MMU_FTR_RADIX Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 40/65] powerpc/mm/radix: Isolate hash table function from pseries guest code Aneesh Kumar K.V
                   ` (26 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We also use MMU_FTR_RADIX to branch out from code path specific to
hash.

No functionality change.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/entry_64.S         |  7 +++++--
 arch/powerpc/kernel/exceptions-64s.S   | 28 +++++++++++++++++++++++-----
 arch/powerpc/kernel/machine_kexec_64.c |  6 ++++--
 arch/powerpc/kernel/mce_power.c        | 10 ++++++++++
 arch/powerpc/kernel/process.c          | 15 +++++++++------
 arch/powerpc/xmon/xmon.c               |  2 +-
 6 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 038e0a1425e7..5b97a79b7a31 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -510,7 +510,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
 	std	r6,PACACURRENT(r13)	/* Set new 'current' */
 
 	ld	r8,KSP(r4)	/* new stack pointer */
-#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_PPC_STD_MMU_64
+BEGIN_MMU_FTR_SECTION
+	b	2f
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
 BEGIN_FTR_SECTION
 	clrrdi	r6,r8,28	/* get its ESID */
 	clrrdi	r9,r1,28	/* get current sp ESID */
@@ -556,7 +559,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	slbmte	r7,r0
 	isync
 2:
-#endif /* !CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_STD_MMU_64 */
 
 	CURRENT_THREAD_INFO(r7, r8)  /* base of new stack */
 	/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 7716cebf4b8e..d2afec81d04d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -983,7 +983,13 @@ data_access_common:
 	ld	r3,PACA_EXGEN+EX_DAR(r13)
 	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
 	li	r5,0x300
+	std	r3,_DAR(r1)
+	std	r4,_DSISR(r1)
+BEGIN_MMU_FTR_SECTION
 	b	do_hash_page		/* Try to handle as hpte fault */
+MMU_FTR_SECTION_ELSE
+	b	handle_page_fault
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
 
 	.align  7
 	.globl  h_data_storage_common
@@ -1008,7 +1014,13 @@ instruction_access_common:
 	ld	r3,_NIP(r1)
 	andis.	r4,r12,0x5820
 	li	r5,0x400
+	std	r3,_DAR(r1)
+	std	r4,_DSISR(r1)
+BEGIN_MMU_FTR_SECTION
 	b	do_hash_page		/* Try to handle as hpte fault */
+MMU_FTR_SECTION_ELSE
+	b	handle_page_fault
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
 
 	STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
 
@@ -1476,8 +1488,11 @@ slb_miss_realmode:
 	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
 	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
 
+#ifdef CONFIG_PPC_STD_MMU_64
+BEGIN_MMU_FTR_SECTION
 	bl	slb_allocate_realmode
-
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX)
+#endif
 	/* All done -- return from exception. */
 
 	ld	r10,PACA_EXSLB+EX_LR(r13)
@@ -1485,7 +1500,9 @@ slb_miss_realmode:
 	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
 
 	mtlr	r10
-
+BEGIN_MMU_FTR_SECTION
+	b	2f
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
 	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
 	beq-	2f
 
@@ -1536,9 +1553,7 @@ power4_fixup_nap:
  */
 	.align	7
 do_hash_page:
-	std	r3,_DAR(r1)
-	std	r4,_DSISR(r1)
-
+#ifdef CONFIG_PPC_STD_MMU_64
 	andis.	r0,r4,0xa410		/* weird error? */
 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
 	andis.  r0,r4,DSISR_DABRMATCH@h
@@ -1566,6 +1581,7 @@ do_hash_page:
 
 	/* Error */
 	blt-	13f
+#endif /* CONFIG_PPC_STD_MMU_64 */
 
 /* Here we have a page fault that hash_page can't handle. */
 handle_page_fault:
@@ -1592,6 +1608,7 @@ handle_dabr_fault:
 12:	b       ret_from_except_lite
 
 
+#ifdef CONFIG_PPC_STD_MMU_64
 /* We have a page fault that hash_page could handle but HV refused
  * the PTE insertion
  */
@@ -1601,6 +1618,7 @@ handle_dabr_fault:
 	ld	r4,_DAR(r1)
 	bl	low_hash_fault
 	b	ret_from_except
+#endif
 
 /*
  * We come here as a result of a DSI at a point where we don't want
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 0fbd75d185d7..1da864c00db0 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -76,6 +76,7 @@ int default_machine_kexec_prepare(struct kimage *image)
 	 * end of the blocked region (begin >= high).  Use the
 	 * boolean identity !(a || b)  === (!a && !b).
 	 */
+#ifdef CONFIG_PPC_STD_MMU_64
 	if (htab_address) {
 		low = __pa(htab_address);
 		high = low + htab_size_bytes;
@@ -88,6 +89,7 @@ int default_machine_kexec_prepare(struct kimage *image)
 				return -ETXTBSY;
 		}
 	}
+#endif /* CONFIG_PPC_STD_MMU_64 */
 
 	/* We also should not overwrite the tce tables */
 	for_each_node_by_type(node, "pci") {
@@ -381,7 +383,7 @@ void default_machine_kexec(struct kimage *image)
 	/* NOTREACHED */
 }
 
-#ifndef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_STD_MMU_64
 /* Values we need to export to the second kernel via the device tree. */
 static unsigned long htab_base;
 static unsigned long htab_size;
@@ -428,4 +430,4 @@ static int __init export_htab_values(void)
 	return 0;
 }
 late_initcall(export_htab_values);
-#endif /* !CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PPC_STD_MMU_64 */
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index ee62b197502d..92a66a2a9b85 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -77,6 +77,7 @@ void __flush_tlb_power9(unsigned int action)
 
 
 /* flush SLBs and reload */
+#ifdef CONFIG_PPC_MMU_STD_64
 static void flush_and_reload_slb(void)
 {
 	struct slb_shadow *slb;
@@ -110,6 +111,7 @@ static void flush_and_reload_slb(void)
 		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
 	}
 }
+#endif
 
 static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
 {
@@ -120,6 +122,7 @@ static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
 	 * reset the error bits whenever we handle them so that at the end
 	 * we can check whether we handled all of them or not.
 	 * */
+#ifdef CONFIG_PPC_MMU_STD_64
 	if (dsisr & slb_error_bits) {
 		flush_and_reload_slb();
 		/* reset error bits */
@@ -131,6 +134,7 @@ static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
 		/* reset error bits */
 		dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
 	}
+#endif
 	/* Any other errors we don't understand? */
 	if (dsisr & 0xffffffffUL)
 		handled = 0;
@@ -150,6 +154,7 @@ static long mce_handle_common_ierror(uint64_t srr1)
 	switch (P7_SRR1_MC_IFETCH(srr1)) {
 	case 0:
 		break;
+#ifdef CONFIG_PPC_MMU_STD_64
 	case P7_SRR1_MC_IFETCH_SLB_PARITY:
 	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
 		/* flush and reload SLBs for SLB errors. */
@@ -162,6 +167,7 @@ static long mce_handle_common_ierror(uint64_t srr1)
 			handled = 1;
 		}
 		break;
+#endif
 	default:
 		break;
 	}
@@ -175,10 +181,12 @@ static long mce_handle_ierror_p7(uint64_t srr1)
 
 	handled = mce_handle_common_ierror(srr1);
 
+#ifdef CONFIG_PPC_MMU_STD_64
 	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
 		flush_and_reload_slb();
 		handled = 1;
 	}
+#endif
 	return handled;
 }
 
@@ -321,10 +329,12 @@ static long mce_handle_ierror_p8(uint64_t srr1)
 
 	handled = mce_handle_common_ierror(srr1);
 
+#ifdef CONFIG_PPC_MMU_STD_64
 	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
 		flush_and_reload_slb();
 		handled = 1;
 	}
+#endif
 	return handled;
 }
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d7a9df51b974..e35b018be765 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1075,7 +1075,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	}
 #endif /* CONFIG_PPC64 */
 
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_STD_MMU_64
 	batch = this_cpu_ptr(&ppc64_tlb_batch);
 	if (batch->active) {
 		current_thread_info()->local_flags |= _TLF_LAZY_MMU;
@@ -1083,7 +1083,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 			__flush_tlb_pending(batch);
 		batch->active = 0;
 	}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_STD_MMU_64 */
 
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	switch_booke_debug_regs(&new->thread.debug);
@@ -1129,7 +1129,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 	last = _switch(old_thread, new_thread);
 
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_STD_MMU_64
 	if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
 		current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
 		batch = this_cpu_ptr(&ppc64_tlb_batch);
@@ -1138,8 +1138,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 	if (current_thread_info()->task->thread.regs)
 		restore_math(current_thread_info()->task->thread.regs);
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_STD_MMU_64 */
 
 	return last;
 }
@@ -1374,6 +1373,9 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
 	unsigned long sp_vsid;
 	unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
 
+	if (radix_enabled())
+		return;
+
 	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
 			<< SLB_VSID_SHIFT_1T;
@@ -1920,7 +1922,8 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 	 * the heap, we can put it above 1TB so it is backed by a 1TB
 	 * segment. Otherwise the heap will be in the bottom 1TB
 	 * which always uses 256MB segments and this may result in a
-	 * performance penalty.
+	 * performance penalty. We don't need to worry about radix. For
+	 * radix, mmu_highuser_ssize remains unchanged from 256MB.
 	 */
 	if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
 		base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 942796fa4767..308283b7b60c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2913,7 +2913,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
 	printf("%s", after);
 }
 
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_STD_MMU_64
 void dump_segments(void)
 {
 	int i;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 40/65] powerpc/mm/radix: Isolate hash table function from pseries guest code
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (37 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 39/65] powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 41/65] powerpc/mm/radix: Add checks in slice code to catch radix usage Aneesh Kumar K.V
                   ` (25 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/lpar.c    | 14 +++++++++++---
 arch/powerpc/platforms/pseries/lparcfg.c |  3 ++-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index e4ceba2b1551..7f6100d91b4b 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -89,18 +89,21 @@ void vpa_init(int cpu)
 		       "%lx failed with %ld\n", cpu, hwcpu, addr, ret);
 		return;
 	}
+
+#ifdef CONFIG_PPC_STD_MMU_64
 	/*
 	 * PAPR says this feature is SLB-Buffer but firmware never
 	 * reports that.  All SPLPAR support SLB shadow buffer.
 	 */
-	addr = __pa(paca[cpu].slb_shadow_ptr);
-	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+	if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		addr = __pa(paca[cpu].slb_shadow_ptr);
 		ret = register_slb_shadow(hwcpu, addr);
 		if (ret)
 			pr_err("WARNING: SLB shadow buffer registration for "
 			       "cpu %d (hw %d) of area %lx failed with %ld\n",
 			       cpu, hwcpu, addr, ret);
 	}
+#endif /* CONFIG_PPC_STD_MMU_64 */
 
 	/*
 	 * Register dispatch trace log, if one has been allocated.
@@ -123,6 +126,8 @@ void vpa_init(int cpu)
 	}
 }
 
+#ifdef CONFIG_PPC_STD_MMU_64
+
 static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 				     unsigned long vpn, unsigned long pa,
 				     unsigned long rflags, unsigned long vflags,
@@ -655,6 +660,8 @@ static void pSeries_set_page_state(struct page *page, int order,
 
 void arch_free_page(struct page *page, int order)
 {
+	if (radix_enabled())
+		return;
 	if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO))
 		return;
 
@@ -662,7 +669,8 @@ void arch_free_page(struct page *page, int order)
 }
 EXPORT_SYMBOL(arch_free_page);
 
-#endif
+#endif /* CONFIG_PPC_SMLPAR */
+#endif /* CONFIG_PPC_STD_MMU_64 */
 
 #ifdef CONFIG_TRACEPOINTS
 #ifdef HAVE_JUMP_LABEL
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index c9fecf09b8fa..afa05a2cb702 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -484,8 +484,9 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 	seq_printf(m, "shared_processor_mode=%d\n",
 		   lppaca_shared_proc(get_lppaca()));
 
+#ifdef CONFIG_PPC_STD_MMU_64
 	seq_printf(m, "slb_size=%d\n", mmu_slb_size);
-
+#endif
 	parse_em_data(m);
 
 	return 0;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 41/65] powerpc/mm/radix: Add checks in slice code to catch radix usage
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (38 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 40/65] powerpc/mm/radix: Isolate hash table function from pseries guest code Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 42/65] powerpc/mm/radix: Limit paca allocation in radix Aneesh Kumar K.V
                   ` (24 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Radix don't need slice support. Catch wrong usage of slice feature
with radix enabled

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/slice.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index ee21b8699cee..2b27458902ee 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -395,6 +395,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 
 	/* Sanity checks */
 	BUG_ON(mm->task_size == 0);
+	VM_BUG_ON(radix_enabled());
 
 	slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
 	slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
@@ -568,6 +569,16 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
 	unsigned char *hpsizes;
 	int index, mask_index;
 
+	/*
+	 * Radix doesn't use slice, but can get enabled along with MMU_SLICE
+	 */
+	if (radix_enabled()) {
+#ifdef CONFIG_PPC_64K_PAGES
+		return MMU_PAGE_64K;
+#else
+		return MMU_PAGE_4K;
+#endif
+	}
 	if (addr < SLICE_LOW_TOP) {
 		u64 lpsizes;
 		lpsizes = mm->context.low_slices_psize;
@@ -605,6 +616,7 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 
 	slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
 
+	VM_BUG_ON(radix_enabled());
 	spin_lock_irqsave(&slice_convert_lock, flags);
 
 	old_psize = mm->context.user_psize;
@@ -649,6 +661,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 {
 	struct slice_mask mask = slice_range_to_mask(start, len);
 
+	VM_BUG_ON(radix_enabled());
 	slice_convert(mm, mask, psize);
 }
 
@@ -678,6 +691,9 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 	struct slice_mask mask, available;
 	unsigned int psize = mm->context.user_psize;
 
+	if (radix_enabled())
+		return 0;
+
 	mask = slice_range_to_mask(addr, len);
 	available = slice_mask_for_size(mm, psize);
 #ifdef CONFIG_PPC_64K_PAGES
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 42/65] powerpc/mm/radix: Limit paca allocation in radix
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (39 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 41/65] powerpc/mm/radix: Add checks in slice code to catch radix usage Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 43/65] powerpc/mm/radix: Pick the address layout for radix config Aneesh Kumar K.V
                   ` (23 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

On return from rtas we access the paca variables and we have 64 bit
disabled. This requires us to limit paca in 32 bit range.

Note: we don't set ppc64_rma_size for radix and because of that we endup
with a limit value of 0.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/paca.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 01ea0edf0579..9c6e0d1eb61b 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -216,6 +216,11 @@ void __init allocate_pacas(void)
 	 * the first segment.
 	 */
 	limit = min(0x10000000ULL, limit);
+	/*
+	 * Fix it properly. Radix hack
+	 */
+	if (limit == 0)
+		limit = 0x10000000ULL;
 #endif
 
 	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 43/65] powerpc/mm/radix: Pick the address layout for radix config
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (40 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 42/65] powerpc/mm/radix: Limit paca allocation in radix Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 44/65] powerpc/mm/radix: Update secondary PTCR Aneesh Kumar K.V
                   ` (22 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This is same as the generic code, but then we have to duplicate
because we need to enable enable this along with hash which set
HAVE_ARCH_UNMAPPED_AREA. Hash need special get_unmapped_area handling
because of limitations around base page size. With radix we don't have
such restrictions.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/mmap.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0f0502e12f6c..001997c7299b 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -26,6 +26,8 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <linux/security.h>
+#include <linux/mman.h>
 
 /*
  * Top of mmap area (just below the process stack).
@@ -78,6 +80,111 @@ static inline unsigned long mmap_base(unsigned long rnd)
 	return PAGE_ALIGN(TASK_SIZE - gap - rnd);
 }
 
+#ifdef CONFIG_PPC_RADIX_MMU
+/*
+ * Same function as generic code used only for radix, because we don't need to overload
+ * the generic one. But we will have to duplicate, because hash select
+ * HAVE_ARCH_UNMAPPED_AREA
+ */
+static unsigned long
+arch_radix_get_unmapped_area(struct file *filp, unsigned long addr,
+			     unsigned long len, unsigned long pgoff,
+			     unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct vm_unmapped_area_info info;
+
+	if (len > TASK_SIZE - mmap_min_addr)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = mm->mmap_base;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = 0;
+	return vm_unmapped_area(&info);
+}
+
+static unsigned long
+arch_radix_get_unmapped_area_topdown(struct file *filp,
+				     const unsigned long addr0,
+				     const unsigned long len,
+				     const unsigned long pgoff,
+				     const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+	struct vm_unmapped_area_info info;
+
+	/* requested length too big for entire address space */
+	if (len > TASK_SIZE - mmap_min_addr)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+	/* requesting a specific address */
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+				(!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+	info.high_limit = mm->mmap_base;
+	info.align_mask = 0;
+	addr = vm_unmapped_area(&info);
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	if (addr & ~PAGE_MASK) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = TASK_SIZE;
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+
+static void arch_pick_radix_mmap_layout(struct mm_struct *mm,
+					unsigned long random_factor)
+{
+	if (mmap_is_legacy()) {
+		mm->mmap_base = TASK_UNMAPPED_BASE;
+		mm->get_unmapped_area = arch_radix_get_unmapped_area;
+	} else {
+		mm->mmap_base = mmap_base(random_factor);
+		mm->get_unmapped_area = arch_radix_get_unmapped_area_topdown;
+	}
+}
+#else
+/* dummy */
+extern void arch_pick_radix_mmap_layout(struct mm_struct *mm,
+					unsigned long random_factor);
+#endif
 /*
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
@@ -89,6 +196,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	if (current->flags & PF_RANDOMIZE)
 		random_factor = arch_mmap_rnd();
 
+	if (radix_enabled())
+		return arch_pick_radix_mmap_layout(mm, random_factor);
 	/*
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 44/65] powerpc/mm/radix: Update secondary PTCR
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (41 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 43/65] powerpc/mm/radix: Pick the address layout for radix config Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 45/65] powerpc/mm: Make a copy of pgalloc.h for 32 and 64 book3s Aneesh Kumar K.V
                   ` (21 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hash_utils_64.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 8dfc6ae85d4a..be5d123b3f61 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -892,9 +892,13 @@ void __init hlearly_init_mmu(void)
 void hlearly_init_mmu_secondary(void)
 {
 	/* Initialize hash table for that CPU */
-	if (!firmware_has_feature(FW_FEATURE_LPAR))
-		mtspr(SPRN_SDR1, _SDR1);
-
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		if (!cpu_has_feature(CPU_FTR_ARCH_300))
+			mtspr(SPRN_SDR1, _SDR1);
+		else
+			mtspr(SPRN_PTCR,
+			      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+	}
 	/* Initialize SLB */
 	slb_initialize();
 }
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 45/65] powerpc/mm: Make a copy of pgalloc.h for 32 and 64 book3s
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (42 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 44/65] powerpc/mm/radix: Update secondary PTCR Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 46/65] powerpc/mm: revert changes made to generic pgalloc-64.h Aneesh Kumar K.V
                   ` (20 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This patch start to make a book3s variant for pgalloc headers. We have
multiple book3s specific chanages such as
* 4 level page table
* store physical address in higher level table
* use pte_t * for pgtable_t

Having a book3s 64 specific variant helps to keep code simpler and
remove lots of #ifdef around code.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/32/pgalloc.h | 109 +++++++++++
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 266 +++++++++++++++++++++++++++
 2 files changed, 375 insertions(+)
 create mode 100644 arch/powerpc/include/asm/book3s/32/pgalloc.h
 create mode 100644 arch/powerpc/include/asm/book3s/64/pgalloc.h

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
new file mode 100644
index 000000000000..76d6b9e0c8a9
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -0,0 +1,109 @@
+#ifndef _ASM_POWERPC_PGALLOC_32_H
+#define _ASM_POWERPC_PGALLOC_32_H
+
+#include <linux/threads.h>
+
+/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
+#define MAX_PGTABLE_INDEX_SIZE	0
+
+extern void __bad_pte(pmd_t *pmd);
+
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+/* #define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); }) */
+#define pmd_free(mm, x) 		do { } while (0)
+#define __pmd_free_tlb(tlb,x,a)		do { } while (0)
+/* #define pgd_populate(mm, pmd, pte)      BUG() */
+
+#ifndef CONFIG_BOOKE
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+				       pte_t *pte)
+{
+	*pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pte_page)
+{
+	*pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT);
+}
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+#else
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+				       pte_t *pte)
+{
+	*pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pte_page)
+{
+	*pmdp = __pmd((unsigned long)lowmem_page_address(pte_page) | _PMD_PRESENT);
+}
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+#endif
+
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+	pgtable_page_dtor(ptepage);
+	__free_page(ptepage);
+}
+
+static inline void pgtable_free(void *table, unsigned index_size)
+{
+	BUG_ON(index_size); /* 32-bit doesn't use this */
+	free_page((unsigned long)table);
+}
+
+#define check_pgt_cache()	do { } while (0)
+
+#ifdef CONFIG_SMP
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	unsigned long pgf = (unsigned long)table;
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= shift;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	pgtable_free(table, shift);
+}
+#else
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	pgtable_free(table, shift);
+}
+#endif
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+				  unsigned long address)
+{
+	tlb_flush_pgtable(tlb, address);
+	pgtable_page_dtor(table);
+	pgtable_free_tlb(tlb, page_address(table), 0);
+}
+#endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
new file mode 100644
index 000000000000..8d5fc3ac43da
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -0,0 +1,266 @@
+#ifndef _ASM_POWERPC_PGALLOC_64_H
+#define _ASM_POWERPC_PGALLOC_64_H
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+
+struct vmemmap_backing {
+	struct vmemmap_backing *list;
+	unsigned long phys;
+	unsigned long virt_addr;
+};
+extern struct vmemmap_backing *vmemmap_list;
+
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation.  For PTE pages (which are linked to a struct
+ * page for now, and drawn from the main get_free_pages() pool), the
+ * allocation size will be (2^index_size * sizeof(pointer)) and
+ * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer.  In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value.  This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE	0xf
+
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) ({				\
+			BUG_ON(!(shift));		\
+			pgtable_cache[(shift) - 1];	\
+		})
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
+#ifndef CONFIG_PPC_64K_PAGES
+
+#define pgd_populate(MM, PGD, PUD)	pgd_set(PGD, __pgtable_ptr_val(PUD))
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+				GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_set(pud, __pgtable_ptr_val(pmd));
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+				       pte_t *pte)
+{
+	pmd_set(pmd, __pgtable_ptr_val(pte));
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				pgtable_t pte_page)
+{
+	pmd_set(pmd, __pgtable_ptr_val(page_address(pte_page)));
+}
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long address)
+{
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+				      unsigned long address)
+{
+	struct page *page;
+	pte_t *pte;
+
+	pte = pte_alloc_one_kernel(mm, address);
+	if (!pte)
+		return NULL;
+	page = virt_to_page(pte);
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
+	return page;
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+	pgtable_page_dtor(ptepage);
+	__free_page(ptepage);
+}
+
+static inline void pgtable_free(void *table, unsigned index_size)
+{
+	if (!index_size)
+		free_page((unsigned long)table);
+	else {
+		BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
+		kmem_cache_free(PGT_CACHE(index_size), table);
+	}
+}
+
+#ifdef CONFIG_SMP
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	unsigned long pgf = (unsigned long)table;
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= shift;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	pgtable_free(table, shift);
+}
+#else /* !CONFIG_SMP */
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	pgtable_free(table, shift);
+}
+#endif /* CONFIG_SMP */
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+				  unsigned long address)
+{
+	tlb_flush_pgtable(tlb, address);
+	pgtable_page_dtor(table);
+	pgtable_free_tlb(tlb, page_address(table), 0);
+}
+
+#else /* if CONFIG_PPC_64K_PAGES */
+
+extern pte_t *page_table_alloc(struct mm_struct *, unsigned long, int);
+extern void page_table_free(struct mm_struct *, unsigned long *, int);
+extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
+#ifdef CONFIG_SMP
+extern void __tlb_remove_table(void *_table);
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+/* book3s 64 is 4 level page table */
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+	pgd_set(pgd, __pgtable_ptr_val(pud));
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+				GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+}
+#endif
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_set(pud, __pgtable_ptr_val(pmd));
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+				       pte_t *pte)
+{
+	pmd_set(pmd, __pgtable_ptr_val(pte));
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				pgtable_t pte_page)
+{
+	pmd_set(pmd, __pgtable_ptr_val(pte_page));
+}
+
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
+{
+	return (pgtable_t)pmd_page_vaddr(pmd);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long address)
+{
+	return (pte_t *)page_table_alloc(mm, address, 1);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+					unsigned long address)
+{
+	return (pgtable_t)page_table_alloc(mm, address, 0);
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	page_table_free(mm, (unsigned long *)pte, 1);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+	page_table_free(mm, (unsigned long *)ptepage, 0);
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+				  unsigned long address)
+{
+	tlb_flush_pgtable(tlb, address);
+	pgtable_free_tlb(tlb, table, 0);
+}
+#endif /* CONFIG_PPC_64K_PAGES */
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+				GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
+}
+
+#define __pmd_free_tlb(tlb, pmd, addr)		      \
+	pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
+#ifndef __PAGETABLE_PUD_FOLDED
+#define __pud_free_tlb(tlb, pud, addr)		      \
+	pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
+
+#endif /* __PAGETABLE_PUD_FOLDED */
+
+#define check_pgt_cache()	do { } while (0)
+
+#endif /* _ASM_POWERPC_PGALLOC_64_H */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 46/65] powerpc/mm: revert changes made to generic pgalloc-64.h
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (43 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 45/65] powerpc/mm: Make a copy of pgalloc.h for 32 and 64 book3s Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 47/65] powerpc/mm: Copy pgalloc (part 2) Aneesh Kumar K.V
                   ` (19 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This revert pgalloc related changes w.r.t implementing 4 level page
table for 64k linux page size and storing of physical address in
higher level page table since they are only applicable to book3s 64
variant and we now have a separate copy for book3s 64. This helps to
keep these headers simpler

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/nohash/64/pgtable.h |  3 --
 arch/powerpc/include/asm/pgalloc-64.h        | 42 +++++++---------------------
 2 files changed, 10 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index f143d6fb3576..d4d808cf905e 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -108,9 +108,6 @@
 #ifndef __ASSEMBLY__
 /* pte_clear moved to later in this file */
 
-/* Pointers in the page table tree are virtual addresses */
-#define __pgtable_ptr_val(ptr)	((unsigned long)(ptr))
-
 #define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
 #define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
 
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index 8d5fc3ac43da..69ef28a81733 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -53,7 +53,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 #ifndef CONFIG_PPC_64K_PAGES
 
-#define pgd_populate(MM, PGD, PUD)	pgd_set(PGD, __pgtable_ptr_val(PUD))
+#define pgd_populate(MM, PGD, PUD)	pgd_set(PGD, (unsigned long)PUD)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -68,19 +68,19 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
-	pud_set(pud, __pgtable_ptr_val(pmd));
+	pud_set(pud, (unsigned long)pmd);
 }
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 				       pte_t *pte)
 {
-	pmd_set(pmd, __pgtable_ptr_val(pte));
+	pmd_set(pmd, (unsigned long)pte);
 }
 
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 				pgtable_t pte_page)
 {
-	pmd_set(pmd, __pgtable_ptr_val(page_address(pte_page)));
+	pmd_set(pmd, (unsigned long)page_address(pte_page));
 }
 
 #define pmd_pgtable(pmd) pmd_page(pmd)
@@ -171,45 +171,23 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
 extern void __tlb_remove_table(void *_table);
 #endif
 
-#ifndef __PAGETABLE_PUD_FOLDED
-/* book3s 64 is 4 level page table */
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
-{
-	pgd_set(pgd, __pgtable_ptr_val(pud));
-}
-
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
-				GFP_KERNEL|__GFP_REPEAT);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
-}
-#endif
-
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
-{
-	pud_set(pud, __pgtable_ptr_val(pmd));
-}
+#define pud_populate(mm, pud, pmd)	pud_set(pud, (unsigned long)pmd)
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 				       pte_t *pte)
 {
-	pmd_set(pmd, __pgtable_ptr_val(pte));
+	pmd_set(pmd, (unsigned long)pte);
 }
 
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 				pgtable_t pte_page)
 {
-	pmd_set(pmd, __pgtable_ptr_val(pte_page));
+	pmd_set(pmd, (unsigned long)pte_page);
 }
 
 static inline pgtable_t pmd_pgtable(pmd_t pmd)
 {
-	return (pgtable_t)pmd_page_vaddr(pmd);
+	return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS);
 }
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -255,11 +233,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #define __pmd_free_tlb(tlb, pmd, addr)		      \
 	pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
-#ifndef __PAGETABLE_PUD_FOLDED
+#ifndef CONFIG_PPC_64K_PAGES
 #define __pud_free_tlb(tlb, pud, addr)		      \
 	pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
 
-#endif /* __PAGETABLE_PUD_FOLDED */
+#endif /* CONFIG_PPC_64K_PAGES */
 
 #define check_pgt_cache()	do { } while (0)
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 47/65] powerpc/mm: Copy pgalloc (part 2)
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (44 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 46/65] powerpc/mm: revert changes made to generic pgalloc-64.h Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 48/65] powerpc/mm: Simplify the code dropping 4 level table #ifdef Aneesh Kumar K.V
                   ` (18 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This moves the nohash variant of pgalloc headers to nohash/ directory

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/32/pgalloc.h       |  6 +++---
 arch/powerpc/include/asm/book3s/64/pgalloc.h       | 17 ++++++++++------
 arch/powerpc/include/asm/book3s/pgalloc.h          | 19 ++++++++++++++++++
 .../asm/{pgalloc-32.h => nohash/32/pgalloc.h}      |  0
 .../asm/{pgalloc-64.h => nohash/64/pgalloc.h}      |  0
 arch/powerpc/include/asm/nohash/pgalloc.h          | 23 ++++++++++++++++++++++
 arch/powerpc/include/asm/pgalloc.h                 | 19 +++---------------
 7 files changed, 59 insertions(+), 25 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/pgalloc.h
 rename arch/powerpc/include/asm/{pgalloc-32.h => nohash/32/pgalloc.h} (100%)
 rename arch/powerpc/include/asm/{pgalloc-64.h => nohash/64/pgalloc.h} (100%)
 create mode 100644 arch/powerpc/include/asm/nohash/pgalloc.h

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 76d6b9e0c8a9..a2350194fc76 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_POWERPC_PGALLOC_32_H
-#define _ASM_POWERPC_PGALLOC_32_H
+#ifndef _ASM_POWERPC_BOOK3S_32_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_32_PGALLOC_H
 
 #include <linux/threads.h>
 
@@ -106,4 +106,4 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 	pgtable_page_dtor(table);
 	pgtable_free_tlb(tlb, page_address(table), 0);
 }
-#endif /* _ASM_POWERPC_PGALLOC_32_H */
+#endif /* _ASM_POWERPC_BOOK3S_32_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 8d5fc3ac43da..54017260c8bf 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_POWERPC_PGALLOC_64_H
-#define _ASM_POWERPC_PGALLOC_64_H
+#ifndef _ASM_POWERPC_BOOK3S_64_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_64_PGALLOC_H
 /*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -52,8 +52,10 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 }
 
 #ifndef CONFIG_PPC_64K_PAGES
-
-#define pgd_populate(MM, PGD, PUD)	pgd_set(PGD, __pgtable_ptr_val(PUD))
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+	pgd_set(pgd, __pgtable_ptr_val(pud));
+}
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -83,7 +85,10 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 	pmd_set(pmd, __pgtable_ptr_val(page_address(pte_page)));
 }
 
-#define pmd_pgtable(pmd) pmd_page(pmd)
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
+{
+	return pmd_page(pmd);
+}
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
@@ -263,4 +268,4 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #define check_pgt_cache()	do { } while (0)
 
-#endif /* _ASM_POWERPC_PGALLOC_64_H */
+#endif /* _ASM_POWERPC_BOOK3S_64_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h
new file mode 100644
index 000000000000..54f591e9572e
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/pgalloc.h
@@ -0,0 +1,19 @@
+#ifndef _ASM_POWERPC_BOOK3S_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_PGALLOC_H
+
+#include <linux/mm.h>
+
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
+				     unsigned long address)
+{
+
+}
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/pgalloc.h>
+#else
+#include <asm/book3s/32/pgalloc.h>
+#endif
+
+#endif /* _ASM_POWERPC_BOOK3S_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/pgalloc-32.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
similarity index 100%
rename from arch/powerpc/include/asm/pgalloc-32.h
rename to arch/powerpc/include/asm/nohash/32/pgalloc.h
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
similarity index 100%
rename from arch/powerpc/include/asm/pgalloc-64.h
rename to arch/powerpc/include/asm/nohash/64/pgalloc.h
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
new file mode 100644
index 000000000000..b39ec956d71e
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -0,0 +1,23 @@
+#ifndef _ASM_POWERPC_NOHASH_PGALLOC_H
+#define _ASM_POWERPC_NOHASH_PGALLOC_H
+
+#include <linux/mm.h>
+
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+#ifdef CONFIG_PPC64
+extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
+#else
+/* 44x etc which is BOOKE not BOOK3E */
+static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
+				     unsigned long address)
+{
+
+}
+#endif /* !CONFIG_PPC_BOOK3E */
+
+#ifdef CONFIG_PPC64
+#include <asm/nohash/64/pgalloc.h>
+#else
+#include <asm/nohash/32/pgalloc.h>
+#endif
+#endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index fc3ee06eab87..0413457ba11d 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -1,25 +1,12 @@
 #ifndef _ASM_POWERPC_PGALLOC_H
 #define _ASM_POWERPC_PGALLOC_H
-#ifdef __KERNEL__
 
 #include <linux/mm.h>
 
-#ifdef CONFIG_PPC_BOOK3E
-extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
-#else /* CONFIG_PPC_BOOK3E */
-static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
-				     unsigned long address)
-{
-}
-#endif /* !CONFIG_PPC_BOOK3E */
-
-extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-
-#ifdef CONFIG_PPC64
-#include <asm/pgalloc-64.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/pgalloc.h>
 #else
-#include <asm/pgalloc-32.h>
+#include <asm/nohash/pgalloc.h>
 #endif
 
-#endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_PGALLOC_H */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 48/65] powerpc/mm: Simplify the code dropping 4 level table #ifdef
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (45 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 47/65] powerpc/mm: Copy pgalloc (part 2) Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 49/65] powerpc/mm: Rename function to indicate we are allocating fragments Aneesh Kumar K.V
                   ` (17 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Simplify the code by dropping 4 level page table #ifdef. We are always
4 level now

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 57 +++++++++-------------------
 1 file changed, 18 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 54017260c8bf..2a246245eca0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -51,7 +51,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
 }
 
-#ifndef CONFIG_PPC_64K_PAGES
 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 {
 	pgd_set(pgd, __pgtable_ptr_val(pud));
@@ -78,7 +77,13 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 {
 	pmd_set(pmd, __pgtable_ptr_val(pte));
 }
-
+/*
+ * FIXME!!
+ * Between 4K and 64K pages, we differ in what is stored in pmd. ie.
+ * typedef pte_t *pgtable_t; -> 64K
+ * typedef struct page *pgtable_t; -> 4k
+ */
+#ifdef CONFIG_PPC_4K_PAGES
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 				pgtable_t pte_page)
 {
@@ -176,36 +181,6 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
 extern void __tlb_remove_table(void *_table);
 #endif
 
-#ifndef __PAGETABLE_PUD_FOLDED
-/* book3s 64 is 4 level page table */
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
-{
-	pgd_set(pgd, __pgtable_ptr_val(pud));
-}
-
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
-				GFP_KERNEL|__GFP_REPEAT);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
-}
-#endif
-
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
-{
-	pud_set(pud, __pgtable_ptr_val(pmd));
-}
-
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-				       pte_t *pte)
-{
-	pmd_set(pmd, __pgtable_ptr_val(pte));
-}
-
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 				pgtable_t pte_page)
 {
@@ -245,7 +220,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 	tlb_flush_pgtable(tlb, address);
 	pgtable_free_tlb(tlb, table, 0);
 }
-#endif /* CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC_4K_PAGES */
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -258,13 +233,17 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
 }
 
-#define __pmd_free_tlb(tlb, pmd, addr)		      \
-	pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
-#ifndef __PAGETABLE_PUD_FOLDED
-#define __pud_free_tlb(tlb, pud, addr)		      \
-	pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+                                  unsigned long address)
+{
+        return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX);
+}
 
-#endif /* __PAGETABLE_PUD_FOLDED */
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+                                  unsigned long address)
+{
+        pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+}
 
 #define check_pgt_cache()	do { } while (0)
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 49/65] powerpc/mm: Rename function to indicate we are allocating fragments
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (46 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 48/65] powerpc/mm: Simplify the code dropping 4 level table #ifdef Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 50/65] powerpc/mm: make 4k and 64k use pte_t for pgtable_t Aneesh Kumar K.V
                   ` (16 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Only code cleanup. No functionality change.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 12 ++++++------
 arch/powerpc/include/asm/nohash/64/pgalloc.h | 12 ++++++------
 arch/powerpc/mm/pgtable_64.c                 | 21 ++++-----------------
 3 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 2a246245eca0..37283e3d8e56 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -174,8 +174,8 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 
 #else /* if CONFIG_PPC_64K_PAGES */
 
-extern pte_t *page_table_alloc(struct mm_struct *, unsigned long, int);
-extern void page_table_free(struct mm_struct *, unsigned long *, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern void pte_fragment_free(unsigned long *, int);
 extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
 #ifdef CONFIG_SMP
 extern void __tlb_remove_table(void *_table);
@@ -195,23 +195,23 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return (pte_t *)page_table_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, address, 1);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 					unsigned long address)
 {
-	return (pgtable_t)page_table_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-	page_table_free(mm, (unsigned long *)pte, 1);
+	pte_fragment_free((unsigned long *)pte, 1);
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 {
-	page_table_free(mm, (unsigned long *)ptepage, 0);
+	pte_fragment_free((unsigned long *)ptepage, 0);
 }
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 69ef28a81733..be0cce7f7d4e 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -164,8 +164,8 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 
 #else /* if CONFIG_PPC_64K_PAGES */
 
-extern pte_t *page_table_alloc(struct mm_struct *, unsigned long, int);
-extern void page_table_free(struct mm_struct *, unsigned long *, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern void pte_fragment_free(unsigned long *, int);
 extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
 #ifdef CONFIG_SMP
 extern void __tlb_remove_table(void *_table);
@@ -193,23 +193,23 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return (pte_t *)page_table_alloc(mm, address, 1);
+	return (pte_t *)pte_fragment_alloc(mm, address, 1);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 					unsigned long address)
 {
-	return (pgtable_t)page_table_alloc(mm, address, 0);
+	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-	page_table_free(mm, (unsigned long *)pte, 1);
+	pte_fragment_fre((unsigned long *)pte, 1);
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 {
-	page_table_free(mm, (unsigned long *)ptepage, 0);
+	pte_fragment_free((unsigned long *)ptepage, 0);
 }
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index af6a83a6c90b..2304b9cac59e 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -357,7 +357,7 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
 	return (pte_t *)ret;
 }
 
-pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
 {
 	pte_t *pte;
 
@@ -368,7 +368,7 @@ pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
 	return __alloc_for_cache(mm, kernel);
 }
 
-void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel)
+void pte_fragment_free(unsigned long *table, int kernel)
 {
 	struct page *page = virt_to_page(table);
 	if (put_page_testzero(page)) {
@@ -379,15 +379,6 @@ void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel)
 }
 
 #ifdef CONFIG_SMP
-static void page_table_free_rcu(void *table)
-{
-	struct page *page = virt_to_page(table);
-	if (put_page_testzero(page)) {
-		pgtable_page_dtor(page);
-		free_hot_cold_page(page, 0);
-	}
-}
-
 void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 {
 	unsigned long pgf = (unsigned long)table;
@@ -404,7 +395,7 @@ void __tlb_remove_table(void *_table)
 
 	if (!shift)
 		/* PTE page needs special handling */
-		page_table_free_rcu(table);
+		pte_fragment_free(table, 0);
 	else {
 		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
 		kmem_cache_free(PGT_CACHE(shift), table);
@@ -415,11 +406,7 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 {
 	if (!shift) {
 		/* PTE page needs special handling */
-		struct page *page = virt_to_page(table);
-		if (put_page_testzero(page)) {
-			pgtable_page_dtor(page);
-			free_hot_cold_page(page, 0);
-		}
+		pte_fragment_free(table, 0);
 	} else {
 		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
 		kmem_cache_free(PGT_CACHE(shift), table);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 50/65] powerpc/mm: make 4k and 64k use pte_t for pgtable_t
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (47 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 49/65] powerpc/mm: Rename function to indicate we are allocating fragments Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:23 ` [PATCH 51/65] powerpc/mm: Add radix pgalloc details Aneesh Kumar K.V
                   ` (15 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

pgtable_page_dtor for nohash is now moved to pte_fragment_free_mm()

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 147 +++++++--------------------
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  38 +------
 arch/powerpc/include/asm/page.h              |  10 +-
 arch/powerpc/mm/pgtable_64.c                 |   2 +-
 4 files changed, 52 insertions(+), 145 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 37283e3d8e56..faad1319ba26 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -41,6 +41,15 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+
+extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern void pte_fragment_free(unsigned long *, int);
+extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
+#ifdef CONFIG_SMP
+extern void __tlb_remove_table(void *_table);
+#endif
+
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
@@ -72,29 +81,47 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 	pud_set(pud, __pgtable_ptr_val(pmd));
 }
 
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+                                  unsigned long address)
+{
+        pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+				GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
+}
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+                                  unsigned long address)
+{
+        return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX);
+}
+
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 				       pte_t *pte)
 {
 	pmd_set(pmd, __pgtable_ptr_val(pte));
 }
-/*
- * FIXME!!
- * Between 4K and 64K pages, we differ in what is stored in pmd. ie.
- * typedef pte_t *pgtable_t; -> 64K
- * typedef struct page *pgtable_t; -> 4k
- */
-#ifdef CONFIG_PPC_4K_PAGES
+
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 				pgtable_t pte_page)
 {
-	pmd_set(pmd, __pgtable_ptr_val(page_address(pte_page)));
+	pmd_set(pmd, __pgtable_ptr_val(pte_page));
 }
 
 static inline pgtable_t pmd_pgtable(pmd_t pmd)
 {
-	return pmd_page(pmd);
+	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
+#ifdef CONFIG_PPC_4K_PAGES
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
@@ -115,83 +142,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 		__free_page(page);
 		return NULL;
 	}
-	return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
-	pgtable_page_dtor(ptepage);
-	__free_page(ptepage);
-}
-
-static inline void pgtable_free(void *table, unsigned index_size)
-{
-	if (!index_size)
-		free_page((unsigned long)table);
-	else {
-		BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
-		kmem_cache_free(PGT_CACHE(index_size), table);
-	}
-}
-
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
-				    void *table, int shift)
-{
-	unsigned long pgf = (unsigned long)table;
-	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
-	pgf |= shift;
-	tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
-	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
-	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
-	pgtable_free(table, shift);
-}
-#else /* !CONFIG_SMP */
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
-				    void *table, int shift)
-{
-	pgtable_free(table, shift);
-}
-#endif /* CONFIG_SMP */
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
-				  unsigned long address)
-{
-	tlb_flush_pgtable(tlb, address);
-	pgtable_page_dtor(table);
-	pgtable_free_tlb(tlb, page_address(table), 0);
+	return pte;
 }
-
 #else /* if CONFIG_PPC_64K_PAGES */
 
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
-extern void pte_fragment_free(unsigned long *, int);
-extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
-#ifdef CONFIG_SMP
-extern void __tlb_remove_table(void *_table);
-#endif
-
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
-				pgtable_t pte_page)
-{
-	pmd_set(pmd, __pgtable_ptr_val(pte_page));
-}
-
-static inline pgtable_t pmd_pgtable(pmd_t pmd)
-{
-	return (pgtable_t)pmd_page_vaddr(pmd);
-}
-
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
@@ -199,10 +153,11 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+				      unsigned long address)
 {
 	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
 }
+#endif
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
@@ -220,30 +175,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 	tlb_flush_pgtable(tlb, address);
 	pgtable_free_tlb(tlb, table, 0);
 }
-#endif /* CONFIG_PPC_4K_PAGES */
-
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
-				GFP_KERNEL|__GFP_REPEAT);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-	kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
-}
-
-static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
-                                  unsigned long address)
-{
-        return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX);
-}
-
-static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
-                                  unsigned long address)
-{
-        pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
-}
 
 #define check_pgt_cache()	do { } while (0)
 
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index be0cce7f7d4e..0c12a3bfe2ab 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -119,46 +119,14 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 	__free_page(ptepage);
 }
 
-static inline void pgtable_free(void *table, unsigned index_size)
-{
-	if (!index_size)
-		free_page((unsigned long)table);
-	else {
-		BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
-		kmem_cache_free(PGT_CACHE(index_size), table);
-	}
-}
-
+extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
 #ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
-				    void *table, int shift)
-{
-	unsigned long pgf = (unsigned long)table;
-	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
-	pgf |= shift;
-	tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
-	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
-	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
-	pgtable_free(table, shift);
-}
-#else /* !CONFIG_SMP */
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
-				    void *table, int shift)
-{
-	pgtable_free(table, shift);
-}
-#endif /* CONFIG_SMP */
-
+extern void __tlb_remove_table(void *_table);
+#endif
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
 	tlb_flush_pgtable(tlb, address);
-	pgtable_page_dtor(table);
 	pgtable_free_tlb(tlb, page_address(table), 0);
 }
 
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 158574d2acf4..51db3a37bced 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -316,12 +316,20 @@ void arch_free_page(struct page *page, int order);
 #endif
 
 struct vm_area_struct;
-
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * For BOOK3s 64 with 4k and 64K linux page size
+ * we want to use pointers, because the page table
+ * actually store pfn
+ */
+typedef pte_t *pgtable_t;
+#else
 #if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64)
 typedef pte_t *pgtable_t;
 #else
 typedef struct page *pgtable_t;
 #endif
+#endif
 
 #include <asm-generic/memory_model.h>
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 2304b9cac59e..78910035cbca 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -367,6 +367,7 @@ pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel
 
 	return __alloc_for_cache(mm, kernel);
 }
+#endif /* CONFIG_PPC_64K_PAGES */
 
 void pte_fragment_free(unsigned long *table, int kernel)
 {
@@ -413,7 +414,6 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 	}
 }
 #endif
-#endif /* CONFIG_PPC_64K_PAGES */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 51/65] powerpc/mm: Add radix pgalloc details
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (48 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 50/65] powerpc/mm: make 4k and 64k use pte_t for pgtable_t Aneesh Kumar K.V
@ 2016-03-27  8:23 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 52/65] powerpc/mm: Update pte filter for radix Aneesh Kumar K.V
                   ` (14 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:23 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 34 ++++++++++++++++++++++++----
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++++++--
 arch/powerpc/mm/hash_utils_64.c              |  7 ++++++
 arch/powerpc/mm/pgtable-radix.c              |  5 +++-
 arch/powerpc/mm/pgtable_64.c                 |  6 +++++
 5 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index faad1319ba26..a282674b2378 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -50,19 +50,45 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
 extern void __tlb_remove_table(void *_table);
 #endif
 
+static inline pgd_t *rpgd_alloc(struct mm_struct *mm)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+	return (pgd_t *)__get_free_page(PGALLOC_GFP);
+#else
+	struct page *page;
+	page = alloc_pages(PGALLOC_GFP, 4);
+	if (!page)
+		return NULL;
+	return (pgd_t *) page_address(page);
+#endif
+}
+
+static inline void rpgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+	free_page((unsigned long)pgd);
+#else
+	free_pages((unsigned long)pgd, 4);
+#endif
+}
+
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
+	if (radix_enabled())
+		return rpgd_alloc(mm);
 	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
+	if (radix_enabled())
+		return rpgd_free(mm, pgd);
 	kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
 }
 
 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 {
-	pgd_set(pgd, __pgtable_ptr_val(pud));
+	pgd_set(pgd, __pgtable_ptr_val(pud) | PGD_VAL_BITS);
 }
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -78,7 +104,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
-	pud_set(pud, __pgtable_ptr_val(pmd));
+	pud_set(pud, __pgtable_ptr_val(pmd) | PUD_VAL_BITS);
 }
 
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
@@ -107,13 +133,13 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 				       pte_t *pte)
 {
-	pmd_set(pmd, __pgtable_ptr_val(pte));
+	pmd_set(pmd, __pgtable_ptr_val(pte) | PMD_VAL_BITS);
 }
 
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 				pgtable_t pte_page)
 {
-	pmd_set(pmd, __pgtable_ptr_val(pte_page));
+	pmd_set(pmd, __pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
 }
 
 static inline pgtable_t pmd_pgtable(pmd_t pmd)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index a993b7c820a9..757f78776003 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -170,11 +170,17 @@ extern unsigned long __pgd_table_size;
 #define PMD_TABLE_SIZE	__pmd_table_size
 #define PUD_TABLE_SIZE	__pud_table_size
 #define PGD_TABLE_SIZE	__pgd_table_size
+
+extern unsigned long __pmd_val_bits;
+extern unsigned long __pud_val_bits;
+extern unsigned long __pgd_val_bits;
+#define PMD_VAL_BITS	__pmd_val_bits
+#define PUD_VAL_BITS	__pud_val_bits
+#define PGD_VAL_BITS	__pgd_val_bits
 /*
  * Pgtable size used by swapper, init in asm code
- * We will switch this later to radix PGD
  */
-#define MAX_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE)
+#define MAX_PGD_TABLE_SIZE (sizeof(pgd_t) << R_PGD_INDEX_SIZE)
 
 #define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
 #define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index be5d123b3f61..aef691b75784 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -878,6 +878,13 @@ void __init hlearly_init_mmu(void)
 	__pmd_table_size = H_PMD_TABLE_SIZE;
 	__pud_table_size = H_PUD_TABLE_SIZE;
 	__pgd_table_size = H_PGD_TABLE_SIZE;
+	/*
+	 * 4k use hugepd format, so for hash set then to
+	 * zero
+	 */
+	__pmd_val_bits = 0;
+	__pud_val_bits = 0;
+	__pgd_val_bits = 0;
 	/* Initialize the MMU Hash table and create the linear mapping
 	 * of memory. Has to be done before SLB initialization as this is
 	 * currently where the page size encoding is obtained.
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 31c2e717a756..c77cf4c903b1 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -318,8 +318,11 @@ void __init rearly_init_mmu(void)
 	__pud_table_size = R_PUD_TABLE_SIZE;
 	__pgd_table_size = R_PGD_TABLE_SIZE;
 
-	radix_init_page_sizes();
+	__pmd_val_bits = R_PMD_VAL_BITS;
+	__pud_val_bits = R_PUD_VAL_BITS;
+	__pgd_val_bits = R_PGD_VAL_BITS;
 
+	radix_init_page_sizes();
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		radix_init_partition_table();
 
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 78910035cbca..a58259793198 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -91,6 +91,12 @@ unsigned long __pud_table_size;
 EXPORT_SYMBOL(__pud_table_size);
 unsigned long __pgd_table_size;
 EXPORT_SYMBOL(__pgd_table_size);
+unsigned long __pmd_val_bits;
+EXPORT_SYMBOL(__pmd_val_bits);
+unsigned long __pud_val_bits;
+EXPORT_SYMBOL(__pud_val_bits);
+unsigned long __pgd_val_bits;
+EXPORT_SYMBOL(__pgd_val_bits);
 
 #endif
 unsigned long ioremap_bot = IOREMAP_BASE;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 52/65] powerpc/mm: Update pte filter for radix
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (49 preceding siblings ...)
  2016-03-27  8:23 ` [PATCH 51/65] powerpc/mm: Add radix pgalloc details Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 53/65] powerpc/mm: VMALLOC abstraction Aneesh Kumar K.V
                   ` (13 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

---
 arch/powerpc/mm/pgtable.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 115a0a19d5a2..0a9658fbf8b9 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -82,6 +82,9 @@ static struct page *maybe_pte_to_page(pte_t pte)
 
 static pte_t set_pte_filter(pte_t pte)
 {
+	if (radix_enabled())
+		return pte;
+
 	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
 	if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
 				       cpu_has_feature(CPU_FTR_NOEXECUTE))) {
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 53/65] powerpc/mm: VMALLOC abstraction
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (50 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 52/65] powerpc/mm: Update pte filter for radix Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 54/65] powerpc/radix: update mmu cache Aneesh Kumar K.V
                   ` (12 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    | 14 +++++++-------
 arch/powerpc/include/asm/book3s/64/pgtable.h | 15 ++++++++++++---
 arch/powerpc/include/asm/book3s/64/radix.h   | 21 +++++++++++++++++++++
 arch/powerpc/kernel/pci_64.c                 |  3 ++-
 arch/powerpc/mm/hash_utils_64.c              |  8 ++++++++
 arch/powerpc/mm/pgtable-radix.c              |  7 +++++++
 arch/powerpc/mm/pgtable_64.c                 | 13 +++++++++++--
 arch/powerpc/mm/slb_low.S                    |  2 +-
 8 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 43bd7d15f41e..9da410ea7e1a 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -45,17 +45,17 @@
 /*
  * Define the address range of the kernel non-linear virtual area
  */
-#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
-#define KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
+#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
+#define H_KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
 
 /*
  * The vmalloc space starts at the beginning of that region, and
  * occupies half of it on hash CPUs and a quarter of it on Book3E
  * (we keep a quarter for the virtual memmap)
  */
-#define VMALLOC_START	KERN_VIRT_START
-#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
+#define H_VMALLOC_START	H_KERN_VIRT_START
+#define H_VMALLOC_SIZE	(H_KERN_VIRT_SIZE >> 1)
+#define H_VMALLOC_END	(H_VMALLOC_START + H_VMALLOC_SIZE)
 
 /*
  * Region IDs
@@ -64,7 +64,7 @@
 #define REGION_MASK		(0xfUL << REGION_SHIFT)
 #define REGION_ID(ea)		(((unsigned long)(ea)) >> REGION_SHIFT)
 
-#define VMALLOC_REGION_ID	(REGION_ID(VMALLOC_START))
+#define VMALLOC_REGION_ID	(REGION_ID(H_VMALLOC_START))
 #define KERNEL_REGION_ID	(REGION_ID(PAGE_OFFSET))
 #define VMEMMAP_REGION_ID	(0xfUL)	/* Server only */
 #define USER_REGION_ID		(0UL)
@@ -73,7 +73,7 @@
  * Defines the address of the vmemap area, in its own region on
  * hash table CPUs.
  */
-#define VMEMMAP_BASE		(VMEMMAP_REGION_ID << REGION_SHIFT)
+#define H_VMEMMAP_BASE		(VMEMMAP_REGION_ID << REGION_SHIFT)
 
 #ifdef CONFIG_PPC_MM_SLICES
 #define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 757f78776003..72e314c7f157 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -208,6 +208,18 @@ extern unsigned long __pgd_val_bits;
 #define PUD_MASKED_BITS		0xc0000000000000ffUL
 /* Bits to mask out from a PGD to get to the PUD page */
 #define PGD_MASKED_BITS		0xc0000000000000ffUL
+
+extern unsigned long __vmalloc_start;
+extern unsigned long __vmalloc_end;
+#define VMALLOC_START	__vmalloc_start
+#define VMALLOC_END	__vmalloc_end
+
+extern unsigned long __kernel_virt_start;
+extern unsigned long __kernel_virt_size;
+#define KERN_VIRT_START __kernel_virt_start
+#define KERN_VIRT_SIZE  __kernel_virt_size
+extern struct page *vmemmap;
+extern unsigned long ioremap_bot;
 #endif /* __ASSEMBLY__ */
 
 #include <asm/book3s/64/hash.h>
@@ -220,7 +232,6 @@ extern unsigned long __pgd_val_bits;
 #endif
 
 #include <asm/barrier.h>
-
 /*
  * The second half of the kernel virtual space is used for IO mappings,
  * it's itself carved into the PIO region (ISA and PHB IO space) and
@@ -239,8 +250,6 @@ extern unsigned long __pgd_val_bits;
 #define IOREMAP_BASE	(PHB_IO_END)
 #define IOREMAP_END	(KERN_VIRT_START + KERN_VIRT_SIZE)
 
-#define vmemmap			((struct page *)VMEMMAP_BASE)
-
 /* Advertise special mapping type for AGP */
 #define HAVE_PAGE_AGP
 
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 040c4a56d07b..d0449c0f2166 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -30,6 +30,27 @@
 #define R_PGTABLE_EADDR_SIZE (R_PTE_INDEX_SIZE + R_PMD_INDEX_SIZE +	\
 			      R_PUD_INDEX_SIZE + R_PGD_INDEX_SIZE + PAGE_SHIFT)
 #define R_PGTABLE_RANGE (ASM_CONST(1) << R_PGTABLE_EADDR_SIZE)
+/*
+ * We support 52 bit address space, Use top bit for kernel
+ * virtual mapping. Also make sure kernel fit in the top
+ * quadrant.
+ */
+#define R_KERN_VIRT_START ASM_CONST(0xc008000000000000)
+#define R_KERN_VIRT_SIZE  ASM_CONST(0x0008000000000000)
+
+/*
+ * The vmalloc space starts at the beginning of that region, and
+ * occupies a quarter of it on radix config.
+ * (we keep a quarter for the virtual memmap)
+ */
+#define R_VMALLOC_START	R_KERN_VIRT_START
+#define R_VMALLOC_SIZE	(R_KERN_VIRT_SIZE >> 2)
+#define R_VMALLOC_END	(R_VMALLOC_START + R_VMALLOC_SIZE)
+/*
+ * Defines the address of the vmemap area, in its own region on
+ * hash table CPUs.
+ */
+#define R_VMEMMAP_BASE		(R_VMALLOC_END)
 
 #ifndef __ASSEMBLY__
 #define R_PTE_TABLE_SIZE	(sizeof(pte_t) << R_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 41503d7d53a1..3759df52bd67 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -38,7 +38,7 @@
  * ISA drivers use hard coded offsets.  If no ISA bus exists nothing
  * is mapped on the first 64K of IO space
  */
-unsigned long pci_io_base = ISA_IO_BASE;
+unsigned long pci_io_base;
 EXPORT_SYMBOL(pci_io_base);
 
 static int __init pcibios_init(void)
@@ -47,6 +47,7 @@ static int __init pcibios_init(void)
 
 	printk(KERN_INFO "PCI: Probing PCI hardware\n");
 
+	pci_io_base = ISA_IO_BASE;
 	/* For now, override phys_mem_access_prot. If we need it,g
 	 * later, we may move that initialization to each ppc_md
 	 */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index aef691b75784..599c4684e158 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -885,6 +885,14 @@ void __init hlearly_init_mmu(void)
 	__pmd_val_bits = 0;
 	__pud_val_bits = 0;
 	__pgd_val_bits = 0;
+
+	__kernel_virt_start = H_KERN_VIRT_START;
+	__kernel_virt_size = H_KERN_VIRT_SIZE;
+	__vmalloc_start = H_VMALLOC_START;
+	__vmalloc_end = H_VMALLOC_END;
+	vmemmap = (struct page *)H_VMEMMAP_BASE;
+	ioremap_bot = IOREMAP_BASE;
+
 	/* Initialize the MMU Hash table and create the linear mapping
 	 * of memory. Has to be done before SLB initialization as this is
 	 * currently where the page size encoding is obtained.
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index c77cf4c903b1..6fe83f160360 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -322,6 +322,13 @@ void __init rearly_init_mmu(void)
 	__pud_val_bits = R_PUD_VAL_BITS;
 	__pgd_val_bits = R_PGD_VAL_BITS;
 
+	__kernel_virt_start = R_KERN_VIRT_START;
+	__kernel_virt_size = R_KERN_VIRT_SIZE;
+	__vmalloc_start = R_VMALLOC_START;
+	__vmalloc_end = R_VMALLOC_END;
+	vmemmap = (struct page *)R_VMEMMAP_BASE;
+	ioremap_bot = IOREMAP_BASE;
+
 	radix_init_page_sizes();
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		radix_init_partition_table();
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index a58259793198..88b6cfc0a3e3 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -97,9 +97,18 @@ unsigned long __pud_val_bits;
 EXPORT_SYMBOL(__pud_val_bits);
 unsigned long __pgd_val_bits;
 EXPORT_SYMBOL(__pgd_val_bits);
-
+unsigned long __kernel_virt_start;
+EXPORT_SYMBOL(__kernel_virt_start);
+unsigned long __kernel_virt_size;
+EXPORT_SYMBOL(__kernel_virt_size);
+unsigned long __vmalloc_start;
+EXPORT_SYMBOL(__vmalloc_start);
+unsigned long __vmalloc_end;
+EXPORT_SYMBOL(__vmalloc_end);
+struct page *vmemmap;
+EXPORT_SYMBOL(vmemmap);
 #endif
-unsigned long ioremap_bot = IOREMAP_BASE;
+unsigned long ioremap_bot;
 
 /**
  * __ioremap_at - Low level function to establish the page tables
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index aade18f3f21f..5d840b249fd4 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -91,7 +91,7 @@ slb_miss_kernel_load_vmemmap:
 	 * can be demoted from 64K -> 4K dynamically on some machines
 	 */
 	clrldi	r11,r10,48
-	cmpldi	r11,(VMALLOC_SIZE >> 28) - 1
+	cmpldi	r11,(H_VMALLOC_SIZE >> 28) - 1
 	bgt	5f
 	lhz	r11,PACAVMALLOCSLLP(r13)
 	b	6f
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 54/65] powerpc/radix: update mmu cache
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (51 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 53/65] powerpc/mm: VMALLOC abstraction Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 55/65] powerpc/mm: pte_frag abstraction Aneesh Kumar K.V
                   ` (11 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

With radix there is no mmu cache. Hence we don't need to do much
in update_mmu_cache.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/mem.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f980da6d7569..3164ce720b3e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -499,6 +499,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 	 * called with either mm->page_table_lock held or ptl lock held
 	 */
 	unsigned long access = 0, trap;
+	if (radix_enabled())
+		return;
 
 	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
 	if (!pte_young(*ptep) || address >= TASK_SIZE)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 55/65] powerpc/mm: pte_frag abstraction
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (52 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 54/65] powerpc/radix: update mmu cache Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 56/65] powerpc/mm: Fix vma_mmu_pagesize for radix Aneesh Kumar K.V
                   ` (10 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

In this patch we make the number of pte fragments per level4 page table
page a variable. Radix level 4 table size is 256 bytes and hence we
can have 256 fragments per level4 page. We don't update the fragment
count in this patch. We need to do performance measurements to find the
right value for fragment count.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  | 2 ++
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 4 ++--
 arch/powerpc/include/asm/book3s/64/pgtable.h  | 6 ++++++
 arch/powerpc/mm/hash_utils_64.c               | 3 +++
 arch/powerpc/mm/pgtable-radix.c               | 5 +++++
 arch/powerpc/mm/pgtable_64.c                  | 4 ++++
 6 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index f121f8016d8b..10266ec97a83 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -27,6 +27,8 @@
  */
 #define H_PAGE_4K_PFN	0x0
 #define H_PAGE_THP_HUGE 0x0
+#define H_PTE_FRAG_NR	0
+#define H_PTE_FRAG_SIZE_SHIFT  0
 /*
  * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
  */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index ad797a8cc615..f4a19914fdbe 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -29,12 +29,12 @@
 /*
  * we support 16 fragments per PTE page of 64K size.
  */
-#define PTE_FRAG_NR	16
+#define H_PTE_FRAG_NR	16
 /*
  * We use a 2K PTE page fragment and another 2K for storing
  * real_pte_t hash index
  */
-#define PTE_FRAG_SIZE_SHIFT  12
+#define H_PTE_FRAG_SIZE_SHIFT  12
 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 72e314c7f157..5db263a60322 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -177,6 +177,12 @@ extern unsigned long __pgd_val_bits;
 #define PMD_VAL_BITS	__pmd_val_bits
 #define PUD_VAL_BITS	__pud_val_bits
 #define PGD_VAL_BITS	__pgd_val_bits
+
+extern unsigned long __pte_frag_nr;
+#define PTE_FRAG_NR __pte_frag_nr
+extern unsigned long __pte_frag_size_shift;
+#define PTE_FRAG_SIZE_SHIFT __pte_frag_size_shift
+#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
 /*
  * Pgtable size used by swapper, init in asm code
  */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 599c4684e158..3356a48ca479 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -869,6 +869,9 @@ void __init hlearly_init_mmu(void)
 	/*
 	 * initialize page table size
 	 */
+	__pte_frag_nr = H_PTE_FRAG_NR;
+	__pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+
 	__pte_index_size = H_PTE_INDEX_SIZE;
 	__pmd_index_size = H_PMD_INDEX_SIZE;
 	__pud_index_size = H_PUD_INDEX_SIZE;
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 6fe83f160360..2b608c37526c 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -328,6 +328,11 @@ void __init rearly_init_mmu(void)
 	__vmalloc_end = R_VMALLOC_END;
 	vmemmap = (struct page *)R_VMEMMAP_BASE;
 	ioremap_bot = IOREMAP_BASE;
+	/*
+	 * For now radix also use the same frag size
+	 */
+	__pte_frag_nr = H_PTE_FRAG_NR;
+	__pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
 
 	radix_init_page_sizes();
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 88b6cfc0a3e3..0e980204c4d5 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -107,6 +107,10 @@ unsigned long __vmalloc_end;
 EXPORT_SYMBOL(__vmalloc_end);
 struct page *vmemmap;
 EXPORT_SYMBOL(vmemmap);
+unsigned long __pte_frag_nr;
+EXPORT_SYMBOL(__pte_frag_nr);
+unsigned long __pte_frag_size_shift;
+EXPORT_SYMBOL(__pte_frag_size_shift);
 #endif
 unsigned long ioremap_bot;
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 56/65] powerpc/mm: Fix vma_mmu_pagesize for radix
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (53 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 55/65] powerpc/mm: pte_frag abstraction Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 57/65] powerpc/mm: Add radix support for hugetlb Aneesh Kumar K.V
                   ` (9 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Radix don't use SLICE framework to find page size. Hence use
vma to find the page size.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hugetlbpage.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7201e9c624d5..9eab82cfbb91 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -719,14 +719,14 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 {
 #ifdef CONFIG_PPC_MM_SLICES
 	unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
-
-	return 1UL << mmu_psize_to_shift(psize);
-#else
+	/* With radix we don't use slice, so derive it from vma*/
+	if (!radix_enabled())
+		return 1UL << mmu_psize_to_shift(psize);
+#endif
 	if (!is_vm_hugetlb_page(vma))
 		return PAGE_SIZE;
 
 	return huge_page_size(hstate_vma(vma));
-#endif
 }
 
 static inline bool is_power_of_4(unsigned long x)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 57/65] powerpc/mm: Add radix support for hugetlb
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (54 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 56/65] powerpc/mm: Fix vma_mmu_pagesize for radix Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 58/65] powerpc/mm/radix: Make sure swapper pgdir is properly aligned Aneesh Kumar K.V
                   ` (8 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hugetlb-radix.h | 14 ++++
 arch/powerpc/include/asm/hugetlb.h                 | 14 ++++
 arch/powerpc/mm/Makefile                           |  1 +
 arch/powerpc/mm/hugetlbpage-radix.c                | 87 ++++++++++++++++++++++
 arch/powerpc/mm/hugetlbpage.c                      |  8 +-
 arch/powerpc/mm/tlb-radix.c                        | 10 +++
 arch/powerpc/mm/tlb_nohash.c                       |  4 +
 7 files changed, 137 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/book3s/64/hugetlb-radix.h
 create mode 100644 arch/powerpc/mm/hugetlbpage-radix.c

diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h b/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h
new file mode 100644
index 000000000000..6f8f644bb26e
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H
+#define _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H
+/*
+ * For radix we want generic code to handle hugetlb. But then if we want
+ * both hash and radix to be enabled together we need to workaround the
+ * limitations.
+ */
+void flush_hugetlb_rpage(struct vm_area_struct *vma, unsigned long vmaddr);
+void __local_flush_hugetlb_rpage(struct vm_area_struct *vma, unsigned long vmaddr);
+extern unsigned long
+hugetlb_get_radix_unmapped_area(struct file *file, unsigned long addr,
+				unsigned long len, unsigned long pgoff,
+				unsigned long flags);
+#endif
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 42814f0567cc..221a40ce2a76 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -8,6 +8,8 @@
 extern struct kmem_cache *hugepte_cache;
 
 #ifdef CONFIG_PPC_BOOK3S_64
+
+#include <asm/book3s/64/hugetlb-radix.h>
 /*
  * This should work for other subarchs too. But right now we use the
  * new format only for 64bit book3s
@@ -31,7 +33,19 @@ static inline unsigned int hugepd_shift(hugepd_t hpd)
 {
 	return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
 }
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+				      unsigned long vmaddr)
+{
+	if (radix_enabled())
+		return flush_hugetlb_rpage(vma, vmaddr);
+}
 
+static inline void __local_flush_hugetlb_page(struct vm_area_struct *vma,
+					      unsigned long vmaddr)
+{
+	if (radix_enabled())
+		return __local_flush_hugetlb_rpage(vma, vmaddr);
+}
 #else
 
 static inline pte_t *hugepd_page(hugepd_t hpd)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index cb21d9862748..c110328ce340 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-y				+= hugetlbpage.o
 ifeq ($(CONFIG_HUGETLB_PAGE),y)
 obj-$(CONFIG_PPC_STD_MMU_64)	+= hugetlbpage-hash64.o
+obj-$(CONFIG_PPC_RADIX_MMU)	+= hugetlbpage-radix.o
 obj-$(CONFIG_PPC_BOOK3E_MMU)	+= hugetlbpage-book3e.o
 endif
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
new file mode 100644
index 000000000000..94ff1f23078d
--- /dev/null
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -0,0 +1,87 @@
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+#include <asm/mman.h>
+
+void flush_hugetlb_rpage(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	unsigned long ap, shift;
+	struct hstate *hstate = hstate_file(vma->vm_file);
+
+	shift = huge_page_shift(hstate);
+	if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
+		ap = mmu_get_ap(MMU_PAGE_2M);
+	else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
+		ap = mmu_get_ap(MMU_PAGE_1G);
+	else {
+		WARN(1, "Wrong huge page shift\n");
+		return ;
+	}
+	__flush_rtlb_page(vma->vm_mm, vmaddr, ap, 0);
+}
+
+void __local_flush_hugetlb_rpage(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	unsigned long ap, shift;
+	struct hstate *hstate = hstate_file(vma->vm_file);
+
+	shift = huge_page_shift(hstate);
+	if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
+		ap = mmu_get_ap(MMU_PAGE_2M);
+	else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
+		ap = mmu_get_ap(MMU_PAGE_1G);
+	else {
+		WARN(1, "Wrong huge page shift\n");
+		return ;
+	}
+	__local_flush_rtlb_page(vma->vm_mm, vmaddr, ap, 0);
+}
+
+/*
+ * A vairant of hugetlb_get_unmapped_area doing topdown search
+ * FIXME!! should we do as x86 does or non hugetlb area does ?
+ * ie, use topdown or not based on mmap_is_legacy check ?
+ */
+unsigned long
+hugetlb_get_radix_unmapped_area(struct file *file, unsigned long addr,
+				unsigned long len, unsigned long pgoff,
+				unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct hstate *h = hstate_file(file);
+	struct vm_unmapped_area_info info;
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		if (prepare_hugepage_range(file, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (addr) {
+		addr = ALIGN(addr, huge_page_size(h));
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+	/*
+	 * We are always doing an topdown search here. Slice code
+	 * does that too.
+	 */
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = current->mm->mmap_base;
+	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
+}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9eab82cfbb91..fa242c744ec1 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -711,6 +711,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	struct hstate *hstate = hstate_file(file);
 	int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
 
+	if (radix_enabled())
+		return hugetlb_get_radix_unmapped_area(file, addr, len,
+						       pgoff, flags);
 	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
 }
 #endif
@@ -823,7 +826,7 @@ static int __init hugetlbpage_init(void)
 {
 	int psize;
 
-	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
+	if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
 		return -ENODEV;
 
 	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
@@ -863,6 +866,9 @@ static int __init hugetlbpage_init(void)
 		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
 	else if (mmu_psize_defs[MMU_PAGE_1M].shift)
 		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
+	else if (mmu_psize_defs[MMU_PAGE_2M].shift)
+		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;
+
 
 	return 0;
 }
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 39c28d32bd97..5f580291e26b 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -145,6 +145,11 @@ void __local_flush_rtlb_page(struct mm_struct *mm, unsigned long vmaddr,
 
 void local_flush_rtlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 {
+#ifdef CONFIG_HUGETLB_PAGE
+	/* need the return fix for nohash.c */
+	if (vma && is_vm_hugetlb_page(vma))
+		return __local_flush_hugetlb_page(vma, vmaddr);
+#endif
 	__local_flush_rtlb_page(vma ? vma->vm_mm : NULL, vmaddr,
 			       mmu_get_ap(mmu_virtual_psize), 0);
 }
@@ -206,6 +211,11 @@ bail:
 
 void flush_rtlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 {
+#ifdef CONFIG_HUGETLB_PAGE
+	/* need the return fix for nohash.c */
+	if (vma && is_vm_hugetlb_page(vma))
+		return flush_hugetlb_page(vma, vmaddr);
+#endif
 	__flush_rtlb_page(vma ? vma->vm_mm : NULL, vmaddr,
 			 mmu_get_ap(mmu_virtual_psize), 0);
 }
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index bb04e4df3100..6266b835c082 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -326,6 +326,10 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 {
+	/*
+	 * FIXME!! do we not need a return here ?. Also the non-smp
+	 * version needs to handle the hugetlb ?
+	 */
 #ifdef CONFIG_HUGETLB_PAGE
 	if (vma && is_vm_hugetlb_page(vma))
 		flush_hugetlb_page(vma, vmaddr);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 58/65] powerpc/mm/radix: Make sure swapper pgdir is properly aligned
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (55 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 57/65] powerpc/mm: Add radix support for hugetlb Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 59/65] powerpc/mm/radix: Add hugetlb support 4K page size Aneesh Kumar K.V
                   ` (7 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

With 4k page size radix config our level 1 page table size 64K and it
should naturally aligned

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/head_64.S | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 1b779560728f..e8870c9bbc11 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -875,13 +875,16 @@ start_here_common:
  * This stuff goes at the beginning of the bss, which is page-aligned.
  */
 	.section ".bss"
+/*
+ * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K.
+ * We will need to find a better way to fix this
+ */
+	.align	16
 
-	.align	PAGE_SHIFT
+	.globl	swapper_pg_dir
+swapper_pg_dir:
+	.space	PGD_TABLE_SIZE
 
 	.globl	empty_zero_page
 empty_zero_page:
 	.space	PAGE_SIZE
-
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.space	PGD_TABLE_SIZE
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 59/65] powerpc/mm/radix: Add hugetlb support 4K page size
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (56 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 58/65] powerpc/mm/radix: Make sure swapper pgdir is properly aligned Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 60/65] powerpc/mm: Drop PTE_ATOMIC_UPDATES from pmd_hugepage_update Aneesh Kumar K.V
                   ` (6 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We have hugepage at the pmd level with 4K radix config. Hence we don't
need to use hugepd format with radix.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h    | 22 +-----------
 arch/powerpc/include/asm/book3s/64/pgtable-4k.h | 46 +++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 10266ec97a83..4d2ae7fdc4f9 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -36,26 +36,7 @@
 	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
 
 #ifdef CONFIG_HUGETLB_PAGE
-/*
- * For 4k page size, we support explicit hugepage via hugepd
- */
-static inline int pmd_huge(pmd_t pmd)
-{
-	return 0;
-}
-
-static inline int pud_huge(pud_t pud)
-{
-	return 0;
-}
-
-static inline int pgd_huge(pgd_t pgd)
-{
-	return 0;
-}
-#define pgd_huge pgd_huge
-
-static inline int hugepd_ok(hugepd_t hpd)
+static inline int hl_hugepd_ok(hugepd_t hpd)
 {
 	/*
 	 * if it is not a pte and have hugepd shift mask
@@ -66,7 +47,6 @@ static inline int hugepd_ok(hugepd_t hpd)
 		return true;
 	return false;
 }
-#define is_hugepd(hpd)		(hugepd_ok(hpd))
 #endif
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
index 423735f897f5..9860e769099f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -3,5 +3,51 @@
 /*
  * hash 4k can't share hugetlb and also doesn't support THP
  */
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_HUGETLB_PAGE
+static inline int pmd_huge(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page
+	 */
+	if (radix_enabled())
+		return !!(pmd_val(pmd) & _PAGE_PTE);
+	return 0;
+}
+
+static inline int pud_huge(pud_t pud)
+{
+	/*
+	 * leaf pte for huge page
+	 */
+	if (radix_enabled())
+		return !!(pud_val(pud) & _PAGE_PTE);
+	return 0;
+}
+
+static inline int pgd_huge(pgd_t pgd)
+{
+	/*
+	 * leaf pte for huge page
+	 */
+	if (radix_enabled())
+		return !!(pgd_val(pgd) & _PAGE_PTE);
+	return 0;
+}
+#define pgd_huge pgd_huge
+/*
+ * With radix , we have hugepage ptes in the pud and pmd entries. We don't
+ * need to setup hugepage directory for them. Our pte and page directory format
+ * enable us to have this enabled.
+ */
+static inline int hugepd_ok(hugepd_t hpd)
+{
+	if (radix_enabled())
+		return 0;
+	return hl_hugepd_ok(hpd);
+}
+#define is_hugepd(hpd)		(hugepd_ok(hpd))
+#endif /* CONFIG_HUGETLB_PAGE */
+#endif /* __ASSEMBLY__ */
 
 #endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 60/65] powerpc/mm: Drop PTE_ATOMIC_UPDATES from pmd_hugepage_update
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (57 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 59/65] powerpc/mm/radix: Add hugetlb support 4K page size Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 61/65] powerpc/mm: THP is only available on hash64 as of now Aneesh Kumar K.V
                   ` (5 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

THP is only supported by hash64 now and for that we should always
have PTE_ATOMIC_UPDATES always set. Drop the unnecessary #ifdef
in the code.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/pgtable_64.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 0e980204c4d5..092c07115612 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -475,7 +475,6 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
 	assert_spin_locked(&mm->page_table_lock);
 #endif
 
-#ifdef PTE_ATOMIC_UPDATES
 	clr = cpu_to_be64(clr);
 	set = cpu_to_be64(set);
 	__asm__ __volatile__(
@@ -491,10 +490,7 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
 	: "cc" );
 
 	old = be64_to_cpu(old);
-#else
-	old = pmd_val(*pmdp);
-	*pmdp = __pmd((old & ~clr) | set);
-#endif
+
 	trace_hugepage_update(addr, old, clr, set);
 	if (old & H_PAGE_HASHPTE)
 		hpte_do_hugepage_flush(mm, addr, pmdp, old);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 61/65] powerpc/mm: THP is only available on hash64 as of now
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (58 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 60/65] powerpc/mm: Drop PTE_ATOMIC_UPDATES from pmd_hugepage_update Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 62/65] powerpc/mm/thp: Abstraction for THP functions Aneesh Kumar K.V
                   ` (4 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Only code movement in this patch. No functionality change.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h |  24 +-
 arch/powerpc/mm/pgtable-hash64.c             | 363 ++++++++++++++++++++++++++-
 arch/powerpc/mm/pgtable_64.c                 | 360 --------------------------
 3 files changed, 371 insertions(+), 376 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 5db263a60322..d38f402aa648 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -777,18 +777,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
 #endif
 struct page *realmode_pfn_to_page(unsigned long pfn);
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
-extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
-extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
-extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-		       pmd_t *pmdp, pmd_t pmd);
-extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
-				 pmd_t *pmd);
-extern int has_transparent_hugepage(void);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-
 static inline pte_t pmd_pte(pmd_t pmd)
 {
 	return __pte(pmd_val(pmd));
@@ -803,7 +791,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 {
 	return (pte_t *)pmd;
 }
-
 #define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
@@ -830,6 +817,16 @@ static inline int pmd_protnone(pmd_t pmd)
 #define __HAVE_ARCH_PMD_WRITE
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		       pmd_t *pmdp, pmd_t pmd);
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd);
+extern int has_transparent_hugepage(void);
+
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
 {
 	return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE));
@@ -878,5 +875,6 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
 	 */
 	return true;
 }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index b926b0a4ae0c..a11fde8ac274 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -12,15 +12,15 @@
  *
  */
 
-/*
- * PPC64 THP Support for hash based MMUs
- */
 #include <linux/sched.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
 #include "mmu_decl.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/thp.h>
+
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * On hash-based CPUs, the vmemmap is bolted in the hash table.
@@ -99,3 +99,360 @@ int hlmap_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
 	smp_wmb();
 	return 0;
 }
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+/*
+ * This is called when relaxing access to a hugepage. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp, pmd_t entry, int dirty)
+{
+	int changed;
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!pmd_trans_huge(*pmdp));
+	assert_spin_locked(&vma->vm_mm->page_table_lock);
+#endif
+	changed = !pmd_same(*(pmdp), entry);
+	if (changed) {
+		__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
+		/*
+		 * Since we are not supporting SW TLB systems, we don't
+		 * have any thing similar to flush_tlb_page_nohash()
+		 */
+	}
+	return changed;
+}
+
+unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+				  pmd_t *pmdp, unsigned long clr,
+				  unsigned long set)
+{
+
+	unsigned long old, tmp;
+	unsigned long busy = cpu_to_be64(H_PAGE_BUSY);
+
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!pmd_trans_huge(*pmdp));
+	assert_spin_locked(&mm->page_table_lock);
+#endif
+
+	clr = cpu_to_be64(clr);
+	set = cpu_to_be64(set);
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3\n\
+		and.	%1,%0,%6\n\
+		bne-	1b \n\
+		andc	%1,%0,%4 \n\
+		or	%1,%1,%7\n\
+		stdcx.	%1,0,%3 \n\
+		bne-	1b"
+	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
+	: "r" (pmdp), "r" (clr), "m" (*pmdp), "r" (busy), "r" (set)
+	: "cc" );
+
+	old = be64_to_cpu(old);
+
+	trace_hugepage_update(addr, old, clr, set);
+	if (old & H_PAGE_HASHPTE)
+		hpte_do_hugepage_flush(mm, addr, pmdp, old);
+	return old;
+}
+
+pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp)
+{
+	pmd_t pmd;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	VM_BUG_ON(pmd_trans_huge(*pmdp));
+
+	pmd = *pmdp;
+	pmd_clear(pmdp);
+	/*
+	 * Wait for all pending hash_page to finish. This is needed
+	 * in case of subpage collapse. When we collapse normal pages
+	 * to hugepage, we first clear the pmd, then invalidate all
+	 * the PTE entries. The assumption here is that any low level
+	 * page fault will see a none pmd and take the slow path that
+	 * will wait on mmap_sem. But we could very well be in a
+	 * hash_page with local ptep pointer value. Such a hash page
+	 * can result in adding new HPTE entries for normal subpages.
+	 * That means we could be modifying the page content as we
+	 * copy them to a huge page. So wait for parallel hash_page
+	 * to finish before invalidating HPTE entries. We can do this
+	 * by sending an IPI to all the cpus and executing a dummy
+	 * function there.
+	 */
+	kick_all_cpus_sync();
+	/*
+	 * Now invalidate the hpte entries in the range
+	 * covered by pmd. This make sure we take a
+	 * fault and will find the pmd as none, which will
+	 * result in a major fault which takes mmap_sem and
+	 * hence wait for collapse to complete. Without this
+	 * the __collapse_huge_page_copy can result in copying
+	 * the old content.
+	 */
+	flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
+	return pmd;
+}
+
+/*
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ */
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long address, pmd_t *pmdp)
+{
+	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
+}
+
+/*
+ * We want to put the pgtable in pmd and use pgtable for tracking
+ * the base page size hptes
+ */
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable)
+{
+	pgtable_t *pgtable_slot;
+	assert_spin_locked(&mm->page_table_lock);
+	/*
+	 * we store the pgtable in the second half of PMD
+	 */
+	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+	*pgtable_slot = pgtable;
+	/*
+	 * expose the deposited pgtable to other cpus.
+	 * before we set the hugepage PTE at pmd level
+	 * hash fault code looks at the deposted pgtable
+	 * to store hash index values.
+	 */
+	smp_wmb();
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+	pgtable_t pgtable;
+	pgtable_t *pgtable_slot;
+
+	assert_spin_locked(&mm->page_table_lock);
+	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+	pgtable = *pgtable_slot;
+	/*
+	 * Once we withdraw, mark the entry NULL.
+	 */
+	*pgtable_slot = NULL;
+	/*
+	 * We store HPTE information in the deposited PTE fragment.
+	 * zero out the content on withdraw.
+	 */
+	memset(pgtable, 0, PTE_FRAG_SIZE);
+	return pgtable;
+}
+
+void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+			     unsigned long address, pmd_t *pmdp)
+{
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+
+	/*
+	 * We can't mark the pmd none here, because that will cause a race
+	 * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
+	 * we spilt, but at the same time we wan't rest of the ppc64 code
+	 * not to insert hash pte on this, because we will be modifying
+	 * the deposited pgtable in the caller of this function. Hence
+	 * clear the _PAGE_USER so that we move the fault handling to
+	 * higher level function and that will serialize against ptl.
+	 * We need to flush existing hash pte entries here even though,
+	 * the translation is still valid, because we will withdraw
+	 * pgtable_t after this.
+	 */
+	pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
+}
+
+
+/*
+ * set a new huge pmd. We should not be called for updating
+ * an existing pmd entry. That should go via pmd_hugepage_update.
+ */
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
+	assert_spin_locked(&mm->page_table_lock);
+	WARN_ON(!pmd_trans_huge(pmd));
+#endif
+	trace_hugepage_set_pmd(addr, pmd_val(pmd));
+	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
+}
+
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
+void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+		     pmd_t *pmdp)
+{
+	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+
+	/*
+	 * This ensures that generic code that rely on IRQ disabling
+	 * to prevent a parallel THP split work as expected.
+	 */
+	kick_all_cpus_sync();
+}
+
+/*
+ * A linux hugepage PMD was changed and the corresponding hash table entries
+ * neesd to be flushed.
+ */
+void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+			    pmd_t *pmdp, unsigned long old_pmd)
+{
+	int ssize;
+	unsigned int psize;
+	unsigned long vsid;
+	unsigned long flags = 0;
+	const struct cpumask *tmp;
+
+	/* get the base page size,vsid and segment size */
+#ifdef CONFIG_DEBUG_VM
+	psize = get_slice_psize(mm, addr);
+	BUG_ON(psize == MMU_PAGE_16M);
+#endif
+	if (old_pmd & H_PAGE_COMBO)
+		psize = MMU_PAGE_4K;
+	else
+		psize = MMU_PAGE_64K;
+
+	if (!is_kernel_addr(addr)) {
+		ssize = user_segment_size(addr);
+		vsid = get_vsid(mm->context.id, addr, ssize);
+		WARN_ON(vsid == 0);
+	} else {
+		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+		ssize = mmu_kernel_ssize;
+	}
+
+	tmp = cpumask_of(smp_processor_id());
+	if (cpumask_equal(mm_cpumask(mm), tmp))
+		flags |= HPTE_LOCAL_UPDATE;
+
+	return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
+}
+
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
+{
+	return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
+}
+
+pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+	unsigned long pmdv;
+
+	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+	return pmd_set_protbits(__pmd(pmdv), pgprot);
+}
+
+pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
+{
+	return pfn_pmd(page_to_pfn(page), pgprot);
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	unsigned long pmdv;
+
+	pmdv = pmd_val(pmd);
+	pmdv &= _HPAGE_CHG_MASK;
+	return pmd_set_protbits(__pmd(pmdv), newprot);
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a HUGE PMD entry in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux HUGE PMD entry.
+ */
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+			  pmd_t *pmd)
+{
+	return;
+}
+
+pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t old_pmd;
+	pgtable_t pgtable;
+	unsigned long old;
+	pgtable_t *pgtable_slot;
+
+	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
+	old_pmd = __pmd(old);
+	/*
+	 * We have pmd == none and we are holding page_table_lock.
+	 * So we can safely go and clear the pgtable hash
+	 * index info.
+	 */
+	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+	pgtable = *pgtable_slot;
+	/*
+	 * Let's zero out old valid and hash index details
+	 * hash fault look at them.
+	 */
+	memset(pgtable, 0, PTE_FRAG_SIZE);
+	/*
+	 * Serialize against find_linux_pte_or_hugepte which does lock-less
+	 * lookup in page tables with local interrupts disabled. For huge pages
+	 * it casts pmd_t to pte_t. Since format of pte_t is different from
+	 * pmd_t we want to prevent transit from pmd pointing to page table
+	 * to pmd pointing to huge page (and back) while interrupts are disabled.
+	 * We clear pmd to possibly replace it with page table pointer in
+	 * different code paths. So make sure we wait for the parallel
+	 * find_linux_pte_or_hugepage to finish.
+	 */
+	kick_all_cpus_sync();
+	return old_pmd;
+}
+
+int has_transparent_hugepage(void)
+{
+
+	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
+		return 0;
+	/*
+	 * We support THP only if PMD_SIZE is 16MB.
+	 */
+	if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
+		return 0;
+	/*
+	 * We need to make sure that we support 16MB hugepage in a segement
+	 * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
+	 * of 64K.
+	 */
+	/*
+	 * If we have 64K HPTE, we will be using that by default
+	 */
+	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+	    (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
+		return 0;
+	/*
+	 * Ok we only have 4K HPTE
+	 */
+	if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
+		return 0;
+
+	return 1;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 092c07115612..2ab2fa2aa32a 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -55,9 +55,6 @@
 
 #include "mmu_decl.h"
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/thp.h>
-
 #ifdef CONFIG_PPC_STD_MMU_64
 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
 #error TASK_SIZE_USER64 exceeds user VSID range
@@ -433,360 +430,3 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 	}
 }
 #endif
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-
-/*
- * This is called when relaxing access to a hugepage. It's also called in the page
- * fault path when we don't hit any of the major fault cases, ie, a minor
- * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
- * handled those two for us, we additionally deal with missing execute
- * permission here on some processors
- */
-int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
-			  pmd_t *pmdp, pmd_t entry, int dirty)
-{
-	int changed;
-#ifdef CONFIG_DEBUG_VM
-	WARN_ON(!pmd_trans_huge(*pmdp));
-	assert_spin_locked(&vma->vm_mm->page_table_lock);
-#endif
-	changed = !pmd_same(*(pmdp), entry);
-	if (changed) {
-		__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
-		/*
-		 * Since we are not supporting SW TLB systems, we don't
-		 * have any thing similar to flush_tlb_page_nohash()
-		 */
-	}
-	return changed;
-}
-
-unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
-				  pmd_t *pmdp, unsigned long clr,
-				  unsigned long set)
-{
-
-	unsigned long old, tmp;
-	unsigned long busy = cpu_to_be64(H_PAGE_BUSY);
-
-#ifdef CONFIG_DEBUG_VM
-	WARN_ON(!pmd_trans_huge(*pmdp));
-	assert_spin_locked(&mm->page_table_lock);
-#endif
-
-	clr = cpu_to_be64(clr);
-	set = cpu_to_be64(set);
-	__asm__ __volatile__(
-	"1:	ldarx	%0,0,%3\n\
-		and.	%1,%0,%6\n\
-		bne-	1b \n\
-		andc	%1,%0,%4 \n\
-		or	%1,%1,%7\n\
-		stdcx.	%1,0,%3 \n\
-		bne-	1b"
-	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
-	: "r" (pmdp), "r" (clr), "m" (*pmdp), "r" (busy), "r" (set)
-	: "cc" );
-
-	old = be64_to_cpu(old);
-
-	trace_hugepage_update(addr, old, clr, set);
-	if (old & H_PAGE_HASHPTE)
-		hpte_do_hugepage_flush(mm, addr, pmdp, old);
-	return old;
-}
-
-pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
-			  pmd_t *pmdp)
-{
-	pmd_t pmd;
-
-	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-	VM_BUG_ON(pmd_trans_huge(*pmdp));
-
-	pmd = *pmdp;
-	pmd_clear(pmdp);
-	/*
-	 * Wait for all pending hash_page to finish. This is needed
-	 * in case of subpage collapse. When we collapse normal pages
-	 * to hugepage, we first clear the pmd, then invalidate all
-	 * the PTE entries. The assumption here is that any low level
-	 * page fault will see a none pmd and take the slow path that
-	 * will wait on mmap_sem. But we could very well be in a
-	 * hash_page with local ptep pointer value. Such a hash page
-	 * can result in adding new HPTE entries for normal subpages.
-	 * That means we could be modifying the page content as we
-	 * copy them to a huge page. So wait for parallel hash_page
-	 * to finish before invalidating HPTE entries. We can do this
-	 * by sending an IPI to all the cpus and executing a dummy
-	 * function there.
-	 */
-	kick_all_cpus_sync();
-	/*
-	 * Now invalidate the hpte entries in the range
-	 * covered by pmd. This make sure we take a
-	 * fault and will find the pmd as none, which will
-	 * result in a major fault which takes mmap_sem and
-	 * hence wait for collapse to complete. Without this
-	 * the __collapse_huge_page_copy can result in copying
-	 * the old content.
-	 */
-	flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
-	return pmd;
-}
-
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-			      unsigned long address, pmd_t *pmdp)
-{
-	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
-}
-
-/*
- * We want to put the pgtable in pmd and use pgtable for tracking
- * the base page size hptes
- */
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable)
-{
-	pgtable_t *pgtable_slot;
-	assert_spin_locked(&mm->page_table_lock);
-	/*
-	 * we store the pgtable in the second half of PMD
-	 */
-	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
-	*pgtable_slot = pgtable;
-	/*
-	 * expose the deposited pgtable to other cpus.
-	 * before we set the hugepage PTE at pmd level
-	 * hash fault code looks at the deposted pgtable
-	 * to store hash index values.
-	 */
-	smp_wmb();
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
-	pgtable_t pgtable;
-	pgtable_t *pgtable_slot;
-
-	assert_spin_locked(&mm->page_table_lock);
-	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
-	pgtable = *pgtable_slot;
-	/*
-	 * Once we withdraw, mark the entry NULL.
-	 */
-	*pgtable_slot = NULL;
-	/*
-	 * We store HPTE information in the deposited PTE fragment.
-	 * zero out the content on withdraw.
-	 */
-	memset(pgtable, 0, PTE_FRAG_SIZE);
-	return pgtable;
-}
-
-void pmdp_huge_split_prepare(struct vm_area_struct *vma,
-			     unsigned long address, pmd_t *pmdp)
-{
-	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-	VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
-
-	/*
-	 * We can't mark the pmd none here, because that will cause a race
-	 * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
-	 * we spilt, but at the same time we wan't rest of the ppc64 code
-	 * not to insert hash pte on this, because we will be modifying
-	 * the deposited pgtable in the caller of this function. Hence
-	 * clear the _PAGE_USER so that we move the fault handling to
-	 * higher level function and that will serialize against ptl.
-	 * We need to flush existing hash pte entries here even though,
-	 * the translation is still valid, because we will withdraw
-	 * pgtable_t after this.
-	 */
-	pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
-}
-
-
-/*
- * set a new huge pmd. We should not be called for updating
- * an existing pmd entry. That should go via pmd_hugepage_update.
- */
-void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-		pmd_t *pmdp, pmd_t pmd)
-{
-#ifdef CONFIG_DEBUG_VM
-	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
-	assert_spin_locked(&mm->page_table_lock);
-	WARN_ON(!pmd_trans_huge(pmd));
-#endif
-	trace_hugepage_set_pmd(addr, pmd_val(pmd));
-	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
-}
-
-/*
- * We use this to invalidate a pmdp entry before switching from a
- * hugepte to regular pmd entry.
- */
-void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
-		     pmd_t *pmdp)
-{
-	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
-
-	/*
-	 * This ensures that generic code that rely on IRQ disabling
-	 * to prevent a parallel THP split work as expected.
-	 */
-	kick_all_cpus_sync();
-}
-
-/*
- * A linux hugepage PMD was changed and the corresponding hash table entries
- * neesd to be flushed.
- */
-void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
-			    pmd_t *pmdp, unsigned long old_pmd)
-{
-	int ssize;
-	unsigned int psize;
-	unsigned long vsid;
-	unsigned long flags = 0;
-	const struct cpumask *tmp;
-
-	/* get the base page size,vsid and segment size */
-#ifdef CONFIG_DEBUG_VM
-	psize = get_slice_psize(mm, addr);
-	BUG_ON(psize == MMU_PAGE_16M);
-#endif
-	if (old_pmd & H_PAGE_COMBO)
-		psize = MMU_PAGE_4K;
-	else
-		psize = MMU_PAGE_64K;
-
-	if (!is_kernel_addr(addr)) {
-		ssize = user_segment_size(addr);
-		vsid = get_vsid(mm->context.id, addr, ssize);
-		WARN_ON(vsid == 0);
-	} else {
-		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
-		ssize = mmu_kernel_ssize;
-	}
-
-	tmp = cpumask_of(smp_processor_id());
-	if (cpumask_equal(mm_cpumask(mm), tmp))
-		flags |= HPTE_LOCAL_UPDATE;
-
-	return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
-}
-
-static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
-{
-	return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
-}
-
-pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
-{
-	unsigned long pmdv;
-
-	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
-	return pmd_set_protbits(__pmd(pmdv), pgprot);
-}
-
-pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
-{
-	return pfn_pmd(page_to_pfn(page), pgprot);
-}
-
-pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
-	unsigned long pmdv;
-
-	pmdv = pmd_val(pmd);
-	pmdv &= _HPAGE_CHG_MASK;
-	return pmd_set_protbits(__pmd(pmdv), newprot);
-}
-
-/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a HUGE PMD entry in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux HUGE PMD entry.
- */
-void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
-			  pmd_t *pmd)
-{
-	return;
-}
-
-pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
-			      unsigned long addr, pmd_t *pmdp)
-{
-	pmd_t old_pmd;
-	pgtable_t pgtable;
-	unsigned long old;
-	pgtable_t *pgtable_slot;
-
-	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
-	old_pmd = __pmd(old);
-	/*
-	 * We have pmd == none and we are holding page_table_lock.
-	 * So we can safely go and clear the pgtable hash
-	 * index info.
-	 */
-	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
-	pgtable = *pgtable_slot;
-	/*
-	 * Let's zero out old valid and hash index details
-	 * hash fault look at them.
-	 */
-	memset(pgtable, 0, PTE_FRAG_SIZE);
-	/*
-	 * Serialize against find_linux_pte_or_hugepte which does lock-less
-	 * lookup in page tables with local interrupts disabled. For huge pages
-	 * it casts pmd_t to pte_t. Since format of pte_t is different from
-	 * pmd_t we want to prevent transit from pmd pointing to page table
-	 * to pmd pointing to huge page (and back) while interrupts are disabled.
-	 * We clear pmd to possibly replace it with page table pointer in
-	 * different code paths. So make sure we wait for the parallel
-	 * find_linux_pte_or_hugepage to finish.
-	 */
-	kick_all_cpus_sync();
-	return old_pmd;
-}
-
-int has_transparent_hugepage(void)
-{
-
-	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
-		return 0;
-	/*
-	 * We support THP only if PMD_SIZE is 16MB.
-	 */
-	if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
-		return 0;
-	/*
-	 * We need to make sure that we support 16MB hugepage in a segement
-	 * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
-	 * of 64K.
-	 */
-	/*
-	 * If we have 64K HPTE, we will be using that by default
-	 */
-	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
-	    (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
-		return 0;
-	/*
-	 * Ok we only have 4K HPTE
-	 */
-	if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
-		return 0;
-
-	return 1;
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 62/65] powerpc/mm/thp: Abstraction for THP functions
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (59 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 61/65] powerpc/mm: THP is only available on hash64 as of now Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 63/65] powerpc/mm/radix: Add radix THP callbacks Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h    |  23 +++-
 arch/powerpc/include/asm/book3s/64/pgtable-64k.h |   7 +-
 arch/powerpc/include/asm/book3s/64/pgtable.h     |  48 +++++---
 arch/powerpc/mm/Makefile                         |   2 +-
 arch/powerpc/mm/pgtable-book3s64.c               | 123 ++++++++++++++++++++
 arch/powerpc/mm/pgtable-hash64.c                 | 137 +++--------------------
 6 files changed, 196 insertions(+), 144 deletions(-)
 create mode 100644 arch/powerpc/mm/pgtable-book3s64.c

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index f4a19914fdbe..f377177ee060 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -119,11 +119,6 @@ static inline int hlremap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
 #define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
-					 unsigned long addr,
-					 pmd_t *pmdp,
-					 unsigned long clr,
-					 unsigned long set);
 static inline char *get_hpte_slot_array(pmd_t *pmdp)
 {
 	/*
@@ -193,6 +188,24 @@ static inline int hlpmd_same(pmd_t pmd_a, pmd_t pmd_b)
 	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
 }
 
+static inline pmd_t hlpmd_mkhuge(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE));
+}
+
+extern unsigned long hlpmd_hugepage_update(struct mm_struct *mm,
+					   unsigned long addr, pmd_t *pmdp,
+					   unsigned long clr, unsigned long set);
+extern pmd_t hlpmdp_collapse_flush(struct vm_area_struct *vma,
+				   unsigned long address, pmd_t *pmdp);
+extern void hlpgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+					 pgtable_t pgtable);
+extern pgtable_t hlpgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern void hlpmdp_huge_split_prepare(struct vm_area_struct *vma,
+				      unsigned long address, pmd_t *pmdp);
+extern pmd_t hlpmdp_huge_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pmd_t *pmdp);
+extern int hl_has_transparent_hugepage(void);
 #endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index 028b8f6e002b..47638774baad 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -103,6 +103,12 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 		return rpmd_same(pmd_a, pmd_b);
 	return hlpmd_same(pmd_a, pmd_b);
 }
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	return hlpmd_mkhuge(pmd);
+}
+
 #endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
@@ -111,7 +117,6 @@ static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
 	if (radix_enabled())
 		BUG();
 	return hlremap_4k_pfn(vma, addr, pfn, prot);
-
 }
 #endif	/* __ASSEMBLY__ */
 #endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index d38f402aa648..e22bd4797fa6 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -769,7 +769,6 @@ static inline int __meminit vmemmap_create_mapping(unsigned long start,
 static inline void vmemmap_remove_mapping(unsigned long start,
 					  unsigned long page_size)
 {
-
 	if (radix_enabled())
 		return rvmemmap_remove_mapping(start, page_size);
 	return hlvmemmap_remove_mapping(start, page_size);
@@ -825,11 +824,17 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		       pmd_t *pmdp, pmd_t pmd);
 extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 				 pmd_t *pmd);
-extern int has_transparent_hugepage(void);
+extern int hl_has_transparent_hugepage(void);
+static inline int has_transparent_hugepage(void)
+{
+	return hl_has_transparent_hugepage();
+}
 
-static inline pmd_t pmd_mkhuge(pmd_t pmd)
+static inline unsigned long
+pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
+		    unsigned long clr, unsigned long set)
 {
-	return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE));
+	return hlpmd_hugepage_update(mm, addr, pmdp, clr, set);
 }
 
 #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
@@ -842,26 +847,43 @@ extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 				     unsigned long address, pmd_t *pmdp);
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
-extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
-				     unsigned long addr, pmd_t *pmdp);
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pmd_t *pmdp)
+{
+	return hlpmdp_huge_get_and_clear(mm, addr, pmdp);
+}
 
-extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
-				 unsigned long address, pmd_t *pmdp);
+static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+					unsigned long address, pmd_t *pmdp)
+{
+	return hlpmdp_collapse_flush(vma, address, pmdp);
+}
 #define pmdp_collapse_flush pmdp_collapse_flush
 
 #define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				       pgtable_t pgtable);
+static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
+					      pmd_t *pmdp, pgtable_t pgtable)
+{
+	return hlpgtable_trans_huge_deposit(mm, pmdp, pgtable);
+}
+
 #define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
+						    pmd_t *pmdp)
+{
+	return hlpgtable_trans_huge_withdraw(mm, pmdp);
+}
 
 #define __HAVE_ARCH_PMDP_INVALIDATE
 extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 			    pmd_t *pmdp);
 
 #define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
-extern void pmdp_huge_split_prepare(struct vm_area_struct *vma,
-				    unsigned long address, pmd_t *pmdp);
+static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+					   unsigned long address, pmd_t *pmdp)
+{
+	return hlpmdp_huge_split_prepare(vma, address, pmdp);
+}
 
 #define pmd_move_must_withdraw pmd_move_must_withdraw
 struct spinlock;
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index c110328ce340..67996a379046 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH)	+= mmu_context_nohash.o tlb_nohash.o \
 obj-$(CONFIG_PPC_BOOK3E)	+= tlb_low_$(CONFIG_WORD_SIZE)e.o
 hash64-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
-obj-$(CONFIG_PPC_STD_MMU_64)	+= pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o
+obj-$(CONFIG_PPC_STD_MMU_64)	+= pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
 obj-$(CONFIG_PPC_RADIX_MMU)	+= pgtable-radix.o tlb-radix.o
 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
 obj-$(CONFIG_PPC_STD_MMU)	+= tlb_hash$(CONFIG_WORD_SIZE).o
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
new file mode 100644
index 000000000000..168f2fa92fb1
--- /dev/null
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright IBM Corporation, 2016
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/sched.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+#include "mmu_decl.h"
+#include <trace/events/thp.h>
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * This is called when relaxing access to a hugepage. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp, pmd_t entry, int dirty)
+{
+	int changed;
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!pmd_trans_huge(*pmdp));
+	assert_spin_locked(&vma->vm_mm->page_table_lock);
+#endif
+	changed = !pmd_same(*(pmdp), entry);
+	if (changed) {
+		__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
+		/*
+		 * Since we are not supporting SW TLB systems, we don't
+		 * have any thing similar to flush_tlb_page_nohash()
+		 */
+	}
+	return changed;
+}
+
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long address, pmd_t *pmdp)
+{
+	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
+}
+/*
+ * set a new huge pmd. We should not be called for updating
+ * an existing pmd entry. That should go via pmd_hugepage_update.
+ */
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
+	assert_spin_locked(&mm->page_table_lock);
+	WARN_ON(!pmd_trans_huge(pmd));
+#endif
+	trace_hugepage_set_pmd(addr, pmd_val(pmd));
+	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
+}
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
+void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+		     pmd_t *pmdp)
+{
+	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+
+	/*
+	 * This ensures that generic code that rely on IRQ disabling
+	 * to prevent a parallel THP split work as expected.
+	 */
+	kick_all_cpus_sync();
+}
+
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
+{
+	return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
+}
+
+pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+	unsigned long pmdv;
+
+	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+	return pmd_set_protbits(__pmd(pmdv), pgprot);
+}
+
+pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
+{
+	return pfn_pmd(page_to_pfn(page), pgprot);
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	unsigned long pmdv;
+
+	pmdv = pmd_val(pmd);
+	pmdv &= _HPAGE_CHG_MASK;
+	return pmd_set_protbits(__pmd(pmdv), newprot);
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a HUGE PMD entry in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux HUGE PMD entry.
+ */
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+			  pmd_t *pmd)
+{
+	return;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index a11fde8ac274..bb5b4815284a 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -102,35 +102,9 @@ int hlmap_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
-/*
- * This is called when relaxing access to a hugepage. It's also called in the page
- * fault path when we don't hit any of the major fault cases, ie, a minor
- * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
- * handled those two for us, we additionally deal with missing execute
- * permission here on some processors
- */
-int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
-			  pmd_t *pmdp, pmd_t entry, int dirty)
-{
-	int changed;
-#ifdef CONFIG_DEBUG_VM
-	WARN_ON(!pmd_trans_huge(*pmdp));
-	assert_spin_locked(&vma->vm_mm->page_table_lock);
-#endif
-	changed = !pmd_same(*(pmdp), entry);
-	if (changed) {
-		__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
-		/*
-		 * Since we are not supporting SW TLB systems, we don't
-		 * have any thing similar to flush_tlb_page_nohash()
-		 */
-	}
-	return changed;
-}
-
-unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
-				  pmd_t *pmdp, unsigned long clr,
-				  unsigned long set)
+unsigned long hlpmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+				    pmd_t *pmdp, unsigned long clr,
+				    unsigned long set)
 {
 
 	unsigned long old, tmp;
@@ -163,8 +137,8 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
 	return old;
 }
 
-pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
-			  pmd_t *pmdp)
+pmd_t hlpmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+			    pmd_t *pmdp)
 {
 	pmd_t pmd;
 
@@ -203,24 +177,11 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
 }
 
 /*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-			      unsigned long address, pmd_t *pmdp)
-{
-	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
-}
-
-/*
  * We want to put the pgtable in pmd and use pgtable for tracking
  * the base page size hptes
  */
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable)
+void hlpgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				  pgtable_t pgtable)
 {
 	pgtable_t *pgtable_slot;
 	assert_spin_locked(&mm->page_table_lock);
@@ -238,7 +199,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 	smp_wmb();
 }
 
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+pgtable_t hlpgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 {
 	pgtable_t pgtable;
 	pgtable_t *pgtable_slot;
@@ -258,8 +219,8 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 	return pgtable;
 }
 
-void pmdp_huge_split_prepare(struct vm_area_struct *vma,
-			     unsigned long address, pmd_t *pmdp)
+void hlpmdp_huge_split_prepare(struct vm_area_struct *vma,
+			       unsigned long address, pmd_t *pmdp)
 {
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 	VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
@@ -279,39 +240,6 @@ void pmdp_huge_split_prepare(struct vm_area_struct *vma,
 	pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
 }
 
-
-/*
- * set a new huge pmd. We should not be called for updating
- * an existing pmd entry. That should go via pmd_hugepage_update.
- */
-void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-		pmd_t *pmdp, pmd_t pmd)
-{
-#ifdef CONFIG_DEBUG_VM
-	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
-	assert_spin_locked(&mm->page_table_lock);
-	WARN_ON(!pmd_trans_huge(pmd));
-#endif
-	trace_hugepage_set_pmd(addr, pmd_val(pmd));
-	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
-}
-
-/*
- * We use this to invalidate a pmdp entry before switching from a
- * hugepte to regular pmd entry.
- */
-void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
-		     pmd_t *pmdp)
-{
-	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
-
-	/*
-	 * This ensures that generic code that rely on IRQ disabling
-	 * to prevent a parallel THP split work as expected.
-	 */
-	kick_all_cpus_sync();
-}
-
 /*
  * A linux hugepage PMD was changed and the corresponding hash table entries
  * neesd to be flushed.
@@ -351,47 +279,8 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 	return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
 }
 
-static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
-{
-	return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
-}
-
-pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
-{
-	unsigned long pmdv;
-
-	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
-	return pmd_set_protbits(__pmd(pmdv), pgprot);
-}
-
-pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
-{
-	return pfn_pmd(page_to_pfn(page), pgprot);
-}
-
-pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
-	unsigned long pmdv;
-
-	pmdv = pmd_val(pmd);
-	pmdv &= _HPAGE_CHG_MASK;
-	return pmd_set_protbits(__pmd(pmdv), newprot);
-}
-
-/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a HUGE PMD entry in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux HUGE PMD entry.
- */
-void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
-			  pmd_t *pmd)
-{
-	return;
-}
-
-pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
-			      unsigned long addr, pmd_t *pmdp)
+pmd_t hlpmdp_huge_get_and_clear(struct mm_struct *mm,
+				unsigned long addr, pmd_t *pmdp)
 {
 	pmd_t old_pmd;
 	pgtable_t pgtable;
@@ -426,7 +315,7 @@ pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 	return old_pmd;
 }
 
-int has_transparent_hugepage(void)
+int hl_has_transparent_hugepage(void)
 {
 
 	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 63/65] powerpc/mm/radix: Add radix THP callbacks
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (60 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 62/65] powerpc/mm/thp: Abstraction for THP functions Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 64/65] powerpc/mm/radix: Add THP support for 4k linux page size Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

The deposited pgtable_t is a pte fragment hence we cannot use page->lru
for linking then together. We use the first two 64 bits for pte fragment
as list_head type to link all deposited fragments together. On withdraw
we properly zero then out.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable-64k.h |   2 +
 arch/powerpc/include/asm/book3s/64/pgtable.h     |  16 ++++
 arch/powerpc/include/asm/book3s/64/radix.h       |  22 +++++
 arch/powerpc/mm/pgtable-book3s64.c               |   2 +-
 arch/powerpc/mm/pgtable-radix.c                  | 117 +++++++++++++++++++++++
 5 files changed, 158 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index 47638774baad..1803458a3297 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -106,6 +106,8 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
 {
+	if (radix_enabled())
+		return rpmd_mkhuge(pmd);
 	return hlpmd_mkhuge(pmd);
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index e22bd4797fa6..7eda6d2e5124 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -827,6 +827,8 @@ extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 extern int hl_has_transparent_hugepage(void);
 static inline int has_transparent_hugepage(void)
 {
+	if (radix_enabled())
+		return r_has_transparent_hugepage();
 	return hl_has_transparent_hugepage();
 }
 
@@ -834,6 +836,8 @@ static inline unsigned long
 pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
 		    unsigned long clr, unsigned long set)
 {
+	if (radix_enabled())
+		return rpmd_hugepage_update(mm, addr, pmdp, clr, set);
 	return hlpmd_hugepage_update(mm, addr, pmdp, clr, set);
 }
 
@@ -850,12 +854,16 @@ extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 					    unsigned long addr, pmd_t *pmdp)
 {
+	if (radix_enabled())
+		return rpmdp_huge_get_and_clear(mm, addr, pmdp);
 	return hlpmdp_huge_get_and_clear(mm, addr, pmdp);
 }
 
 static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 					unsigned long address, pmd_t *pmdp)
 {
+	if (radix_enabled())
+		return rpmdp_collapse_flush(vma, address, pmdp);
 	return hlpmdp_collapse_flush(vma, address, pmdp);
 }
 #define pmdp_collapse_flush pmdp_collapse_flush
@@ -864,6 +872,8 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
 					      pmd_t *pmdp, pgtable_t pgtable)
 {
+	if (radix_enabled())
+		return rpgtable_trans_huge_deposit(mm, pmdp, pgtable);
 	return hlpgtable_trans_huge_deposit(mm, pmdp, pgtable);
 }
 
@@ -871,6 +881,8 @@ static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
 static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
 						    pmd_t *pmdp)
 {
+	if (radix_enabled())
+		return rpgtable_trans_huge_withdraw(mm, pmdp);
 	return hlpgtable_trans_huge_withdraw(mm, pmdp);
 }
 
@@ -882,6 +894,8 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
 					   unsigned long address, pmd_t *pmdp)
 {
+	if (radix_enabled())
+		return rpmdp_huge_split_prepare(vma, address, pmdp);
 	return hlpmdp_huge_split_prepare(vma, address, pmdp);
 }
 
@@ -890,6 +904,8 @@ struct spinlock;
 static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
 					 struct spinlock *old_pmd_ptl)
 {
+	if (radix_enabled())
+		return false;
 	/*
 	 * Archs like ppc64 use pgtable to store per pmd
 	 * specific information. So when we switch the pmd,
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index d0449c0f2166..b67c593655f2 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -152,6 +152,28 @@ static inline int rpmd_trans_huge(pmd_t pmd)
 	return !!(pmd_val(pmd) & _PAGE_PTE);
 }
 
+static inline pmd_t rpmd_mkhuge(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | _PAGE_PTE);
+}
+static inline void rpmdp_huge_split_prepare(struct vm_area_struct *vma,
+					    unsigned long address, pmd_t *pmdp)
+{
+	/* Nothing to do for radix. */
+	return;
+}
+
+extern unsigned long rpmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+					  pmd_t *pmdp, unsigned long clr,
+					  unsigned long set);
+extern pmd_t rpmdp_collapse_flush(struct vm_area_struct *vma,
+				  unsigned long address, pmd_t *pmdp);
+extern void rpgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+					pgtable_t pgtable);
+extern pgtable_t rpgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern pmd_t rpmdp_huge_get_and_clear(struct mm_struct *mm,
+				      unsigned long addr, pmd_t *pmdp);
+extern int r_has_transparent_hugepage(void);
 #endif
 
 extern int __meminit rvmemmap_create_mapping(unsigned long start,
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 168f2fa92fb1..3ce8c8712b1e 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -74,7 +74,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 		     pmd_t *pmdp)
 {
 	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
-
+	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	/*
 	 * This ensures that generic code that rely on IRQ disabling
 	 * to prevent a parallel THP split work as expected.
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 2b608c37526c..bb1eb7d0911c 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -20,6 +20,8 @@
 #include <asm/mmu.h>
 #include <asm/firmware.h>
 
+#include <trace/events/thp.h>
+
 static int native_update_partition_table(u64 patb1)
 {
 	partition_tb->patb1 = cpu_to_be64(patb1);
@@ -377,3 +379,118 @@ void rvmemmap_remove_mapping(unsigned long start, unsigned long page_size)
 }
 #endif
 #endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+unsigned long rpmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+				  pmd_t *pmdp, unsigned long clr,
+				  unsigned long set)
+{
+	unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!rpmd_trans_huge(*pmdp));
+	assert_spin_locked(&mm->page_table_lock);
+#endif
+
+	old = rpte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
+	trace_hugepage_update(addr, old, clr, set);
+
+	return old;
+}
+
+pmd_t rpmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+			pmd_t *pmdp)
+
+{
+	pmd_t pmd;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	VM_BUG_ON(rpmd_trans_huge(*pmdp));
+	/*
+	 * khugepaged calls this for normal pmd
+	 */
+	pmd = *pmdp;
+	pmd_clear(pmdp);
+	/*FIXME!!  Verify whether we need this kick below */
+	kick_all_cpus_sync();
+	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+	return pmd;
+}
+
+/*
+ * For us pgtable_t is pte_t *. Inorder to save the deposisted
+ * page table, we consider the allocated page table as a list
+ * head. On withdraw we need to make sure we zero out the used
+ * list_head memory area.
+ */
+void rpgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				 pgtable_t pgtable)
+{
+        struct list_head *lh = (struct list_head *) pgtable;
+
+        assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+        /* FIFO */
+        if (!pmd_huge_pte(mm, pmdp))
+                INIT_LIST_HEAD(lh);
+        else
+                list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+        pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t rpgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+        pte_t *ptep;
+        pgtable_t pgtable;
+        struct list_head *lh;
+
+        assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+        /* FIFO */
+        pgtable = pmd_huge_pte(mm, pmdp);
+        lh = (struct list_head *) pgtable;
+        if (list_empty(lh))
+                pmd_huge_pte(mm, pmdp) = NULL;
+        else {
+                pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+                list_del(lh);
+        }
+        ptep = (pte_t *) pgtable;
+        *ptep = __pte(0);
+        ptep++;
+        *ptep = __pte(0);
+        return pgtable;
+}
+
+
+pmd_t rpmdp_huge_get_and_clear(struct mm_struct *mm,
+			       unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t old_pmd;
+	unsigned long old;
+
+	old = rpmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
+	old_pmd = __pmd(old);
+	/*
+	 * Serialize against find_linux_pte_or_hugepte which does lock-less
+	 * lookup in page tables with local interrupts disabled. For huge pages
+	 * it casts pmd_t to pte_t. Since format of pte_t is different from
+	 * pmd_t we want to prevent transit from pmd pointing to page table
+	 * to pmd pointing to huge page (and back) while interrupts are disabled.
+	 * We clear pmd to possibly replace it with page table pointer in
+	 * different code paths. So make sure we wait for the parallel
+	 * find_linux_pte_or_hugepage to finish.
+	 */
+	kick_all_cpus_sync();
+	return old_pmd;
+}
+
+int r_has_transparent_hugepage(void)
+{
+	/* For radix 2M at PMD level means thp */
+	if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
+		return 1;
+	return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 64/65] powerpc/mm/radix: Add THP support for 4k linux page size
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (61 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 63/65] powerpc/mm/radix: Add radix THP callbacks Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-27  8:24 ` [PATCH 65/65] powerpc/mm/radix: Cputable update for radix Aneesh Kumar K.V
  2016-03-30 10:53 ` [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Balbir Singh
  64 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h     | 60 ++++++++++++++++++++++++
 arch/powerpc/include/asm/book3s/64/pgtable-64k.h | 60 ------------------------
 arch/powerpc/include/asm/book3s/64/pgtable.h     | 57 ++++++++++++++++++++++
 arch/powerpc/platforms/Kconfig.cputype           |  2 +-
 4 files changed, 118 insertions(+), 61 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 4d2ae7fdc4f9..add4d554312c 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -29,6 +29,7 @@
 #define H_PAGE_THP_HUGE 0x0
 #define H_PTE_FRAG_NR	0
 #define H_PTE_FRAG_SIZE_SHIFT  0
+#define H_PAGE_COMBO	0x0
 /*
  * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
  */
@@ -49,6 +50,65 @@ static inline int hl_hugepd_ok(hugepd_t hpd)
 }
 #endif
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+static inline char *get_hpte_slot_array(pmd_t *pmdp)
+{
+	BUG();
+	return NULL;
+}
+
+static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
+{
+	BUG();
+	return 0;
+}
+
+static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
+					   int index)
+{
+	BUG();
+	return 0;
+}
+
+static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
+					unsigned int index, unsigned int hidx)
+{
+	BUG();
+}
+
+static inline int hlpmd_trans_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+static inline int hlpmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	BUG();
+	return 0;
+}
+
+static inline pmd_t hlpmd_mkhuge(pmd_t pmd)
+{
+	BUG();
+	return pmd;
+}
+
+extern unsigned long hlpmd_hugepage_update(struct mm_struct *mm,
+					   unsigned long addr, pmd_t *pmdp,
+					   unsigned long clr, unsigned long set);
+extern pmd_t hlpmdp_collapse_flush(struct vm_area_struct *vma,
+				   unsigned long address, pmd_t *pmdp);
+extern void hlpgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+					 pgtable_t pgtable);
+extern pgtable_t hlpgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern void hlpmdp_huge_split_prepare(struct vm_area_struct *vma,
+				      unsigned long address, pmd_t *pmdp);
+extern pmd_t hlpmdp_huge_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pmd_t *pmdp);
+extern int hl_has_transparent_hugepage(void);
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index 1803458a3297..888c25a491b0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -53,66 +53,6 @@ static inline int hugepd_ok(hugepd_t hpd)
 
 #endif /* CONFIG_HUGETLB_PAGE */
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int pmd_large(pmd_t pmd)
-{
-	return !!(pmd_val(pmd) & _PAGE_PTE);
-}
-
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-{
-	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
-}
-/*
- * For radix we should always find H_PAGE_HASHPTE zero. Hence
- * the below will work for radix too
- */
-static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pmd_t *pmdp)
-{
-	unsigned long old;
-
-	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
-		return 0;
-	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
-	return ((old & _PAGE_ACCESSED) != 0);
-}
-
-#define __HAVE_ARCH_PMDP_SET_WRPROTECT
-static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
-				      pmd_t *pmdp)
-{
-
-	if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0)
-		return;
-
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
-}
-
-static inline int pmd_trans_huge(pmd_t pmd)
-{
-	if (radix_enabled())
-		return rpmd_trans_huge(pmd);
-	return hlpmd_trans_huge(pmd);
-}
-
-#define __HAVE_ARCH_PMD_SAME
-static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
-{
-	if (radix_enabled())
-		return rpmd_same(pmd_a, pmd_b);
-	return hlpmd_same(pmd_a, pmd_b);
-}
-
-static inline pmd_t pmd_mkhuge(pmd_t pmd)
-{
-	if (radix_enabled())
-		return rpmd_mkhuge(pmd);
-	return hlpmd_mkhuge(pmd);
-}
-
-#endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
-
 static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
 			       unsigned long pfn, pgprot_t prot)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 7eda6d2e5124..24d89e9001e0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -841,6 +841,63 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
 	return hlpmd_hugepage_update(mm, addr, pmdp, clr, set);
 }
 
+static inline int pmd_large(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_PTE);
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
+}
+/*
+ * For radix we should always find H_PAGE_HASHPTE zero. Hence
+ * the below will work for radix too
+ */
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pmd_t *pmdp)
+{
+	unsigned long old;
+
+	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
+	return ((old & _PAGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pmd_t *pmdp)
+{
+
+	if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0)
+		return;
+
+	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+}
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	if (radix_enabled())
+		return rpmd_trans_huge(pmd);
+	return hlpmd_trans_huge(pmd);
+}
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	if (radix_enabled())
+		return rpmd_same(pmd_a, pmd_b);
+	return hlpmd_same(pmd_a, pmd_b);
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	if (radix_enabled())
+		return rpmd_mkhuge(pmd);
+	return hlpmd_mkhuge(pmd);
+}
+
 #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
 				 unsigned long address, pmd_t *pmdp,
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 53299182dba9..ad9c77399aab 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -72,7 +72,7 @@ config PPC_BOOK3S_64
 	select PPC_FPU
 	select PPC_HAVE_PMU_SUPPORT
 	select SYS_SUPPORTS_HUGETLBFS
-	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select IRQ_WORK
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 65/65] powerpc/mm/radix: Cputable update for radix
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (62 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 64/65] powerpc/mm/radix: Add THP support for 4k linux page size Aneesh Kumar K.V
@ 2016-03-27  8:24 ` Aneesh Kumar K.V
  2016-03-30  1:01   ` Michael Neuling
  2016-04-01  6:25   ` Michael Ellerman
  2016-03-30 10:53 ` [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Balbir Singh
  64 siblings, 2 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-27  8:24 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This patch move the existing p9 hash to a different PVR and add
radix feature with p9 PVR. That implies we will not be able to
runtime select P9 hash. With P9 Radix we need to do

* set UPRT = 0 in cpu setup
* set different TLB set count

We ideally want to use ibm,pa-features to enable disable radix. But
we have already done setup cpu by the time we reach pa-features check.

So for now use this hack.

Not-Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  1 +
 arch/powerpc/include/asm/reg.h                |  4 +++
 arch/powerpc/kernel/cpu_setup_power.S         | 35 +++++++++++++++++++++++++++
 arch/powerpc/kernel/cputable.c                | 29 +++++++++++++++++++---
 arch/powerpc/kernel/mce_power.c               |  4 +++
 5 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 7da61b85406b..290157e8d5b2 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -119,6 +119,7 @@
 #define POWER7_TLB_SETS		128	/* # sets in POWER7 TLB */
 #define POWER8_TLB_SETS		512	/* # sets in POWER8 TLB */
 #define POWER9_TLB_SETS_HASH	256	/* # sets in POWER9 TLB Hash mode */
+#define POWER9_TLB_SETS_RADIX	128	/* # sets in POWER9 TLB Radix mode */
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 257251ada3a3..d39b5fedabfd 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -347,6 +347,10 @@
 #define   LPCR_LPES_SH	2
 #define   LPCR_RMI     0x00000002      /* real mode is cache inhibit */
 #define   LPCR_HDICE   0x00000001      /* Hyp Decr enable (HV,PR,EE) */
+/*
+ * Used in asm code, hence we don't want to use PPC_BITCOUNT
+ */
+#define	  LPCR_UPRT	(ASM_CONST(0x1) << (64 - 42))
 #ifndef SPRN_LPID
 #define SPRN_LPID	0x13F	/* Logical Partition Identifier */
 #endif
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 584e119fa8b0..e9b76c651bd1 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -117,6 +117,41 @@ _GLOBAL(__restore_cpu_power9)
 	mtlr	r11
 	blr
 
+_GLOBAL(__setup_cpu_power9_uprt)
+	mflr	r11
+	bl	__init_FSCR
+	bl	__init_hvmode_206
+	mtlr	r11
+	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
+	mfspr	r3,SPRN_LPCR
+	ori	r3, r3, LPCR_PECEDH
+	oris	r3,r3,(LPCR_UPRT >> 16)
+	bl	__init_LPCR
+	bl	__init_HFSCR
+	bl	__init_tlb_power7
+	mtlr	r11
+	blr
+
+_GLOBAL(__restore_cpu_power9_uprt)
+	mflr	r11
+	bl	__init_FSCR
+	mfmsr	r3
+	rldicl.	r0,r3,4,63
+	mtlr	r11
+	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
+	mfspr   r3,SPRN_LPCR
+	ori	r3, r3, LPCR_PECEDH
+	oris	r3,r3,(LPCR_UPRT >> 16)
+	bl	__init_LPCR
+	bl	__init_HFSCR
+	bl	__init_tlb_power7
+	mtlr	r11
+	blr
+
 __init_hvmode_206:
 	/* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
 	mfmsr	r3
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index be4d73053bed..878bcc46ea04 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -72,10 +72,13 @@ extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power8(void);
 extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power9(void);
+extern void __setup_cpu_power9_uprt(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_power9_uprt(void);
 extern void __restore_cpu_a2(void);
 extern void __flush_tlb_power7(unsigned int action);
 extern void __flush_tlb_power8(unsigned int action);
 extern void __flush_tlb_power9(unsigned int action);
+extern void __flush_tlb_power9_radix(unsigned int action);
 extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
 #endif /* CONFIG_PPC64 */
@@ -508,9 +511,9 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.platform		= "power8",
 	},
 	{	/* Power9 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x004e0000,
-		.cpu_name		= "POWER9 (raw)",
+		.pvr_mask		= 0xffffff00,
+		.pvr_value		= 0x004e0200,
+		.cpu_name		= "POWER9/hash (raw)",
 		.cpu_features		= CPU_FTRS_POWER9,
 		.cpu_user_features	= COMMON_USER_POWER9,
 		.cpu_user_features2	= COMMON_USER2_POWER9,
@@ -526,6 +529,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.flush_tlb		= __flush_tlb_power9,
 		.platform		= "power9",
 	},
+	{	/*  Power9 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004e0000,
+		.cpu_name		= "POWER9/radix (raw)",
+		.cpu_features		= CPU_FTRS_POWER9,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9 | MMU_FTR_RADIX,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.oprofile_cpu_type	= "ppc64/power8",
+		.oprofile_type		= PPC_OPROFILE_INVALID,
+		.cpu_setup		= __setup_cpu_power9_uprt,
+		.cpu_restore		= __restore_cpu_power9_uprt,
+		.flush_tlb		= __flush_tlb_power9_radix,
+		.machine_check_early	= __machine_check_early_realmode_p8,
+		.platform		= "power9",
+	},
 	{	/* Cell Broadband Engine */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x00700000,
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 92a66a2a9b85..f902ede263ab 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -75,6 +75,10 @@ void __flush_tlb_power9(unsigned int action)
 	flush_tlb_206(POWER9_TLB_SETS_HASH, action);
 }
 
+void __flush_tlb_power9_radix(unsigned int action)
+{
+	flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
+}
 
 /* flush SLBs and reload */
 #ifdef CONFIG_PPC_MMU_STD_64
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* Re: [PATCH 37/65] powerpc/mm/radix: Add tlbflush routines
  2016-03-27  8:23 ` [PATCH 37/65] powerpc/mm/radix: Add " Aneesh Kumar K.V
@ 2016-03-27 10:00   ` kbuild test robot
  2016-03-28 17:58     ` Aneesh Kumar K.V
  2016-03-27 10:09   ` kbuild test robot
  2016-03-27 10:11   ` kbuild test robot
  2 siblings, 1 reply; 92+ messages in thread
From: kbuild test robot @ 2016-03-27 10:00 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kbuild-all, benh, paulus, mpe, linuxppc-dev, Aneesh Kumar K.V

[-- Attachment #1: Type: text/plain, Size: 1489 bytes --]

Hi Aneesh,

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.6-rc1 next-20160324]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/powerpc-mm-Use-big-endian-page-table-for-book3s-64/20160327-174557
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: x86_64-randconfig-x001-201613 (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   kernel/fork.c: In function 'mm_alloc':
>> kernel/fork.c:675:13: error: 'mm_context_t {aka struct <anonymous>}' has no member named 'id'
     mm->context.id = MMU_NO_CONTEXT;
                ^
>> kernel/fork.c:675:19: error: 'MMU_NO_CONTEXT' undeclared (first use in this function)
     mm->context.id = MMU_NO_CONTEXT;
                      ^
   kernel/fork.c:675:19: note: each undeclared identifier is reported only once for each function it appears in

vim +675 kernel/fork.c

   669			return NULL;
   670	
   671		memset(mm, 0, sizeof(*mm));
   672		/*
   673		 * FIXME!! we need a better way handle this
   674		 */
 > 675		mm->context.id = MMU_NO_CONTEXT;
   676		return mm_init(mm, current);
   677	}
   678	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 21441 bytes --]

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 37/65] powerpc/mm/radix: Add tlbflush routines
  2016-03-27  8:23 ` [PATCH 37/65] powerpc/mm/radix: Add " Aneesh Kumar K.V
  2016-03-27 10:00   ` kbuild test robot
@ 2016-03-27 10:09   ` kbuild test robot
  2016-03-27 10:11   ` kbuild test robot
  2 siblings, 0 replies; 92+ messages in thread
From: kbuild test robot @ 2016-03-27 10:09 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kbuild-all, benh, paulus, mpe, linuxppc-dev, Aneesh Kumar K.V

[-- Attachment #1: Type: text/plain, Size: 1612 bytes --]

Hi Aneesh,

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.6-rc1 next-20160324]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/powerpc-mm-Use-big-endian-page-table-for-book3s-64/20160327-174557
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: xtensa-allmodconfig (attached as .config)
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=xtensa 

All errors (new ones prefixed by >>):

   kernel/fork.c: In function 'mm_alloc':
>> kernel/fork.c:675:13: error: 'mm_context_t' has no member named 'id'
     mm->context.id = MMU_NO_CONTEXT;
                ^
   kernel/fork.c:675:19: error: 'MMU_NO_CONTEXT' undeclared (first use in this function)
     mm->context.id = MMU_NO_CONTEXT;
                      ^
   kernel/fork.c:675:19: note: each undeclared identifier is reported only once for each function it appears in

vim +675 kernel/fork.c

   669			return NULL;
   670	
   671		memset(mm, 0, sizeof(*mm));
   672		/*
   673		 * FIXME!! we need a better way handle this
   674		 */
 > 675		mm->context.id = MMU_NO_CONTEXT;
   676		return mm_init(mm, current);
   677	}
   678	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 43795 bytes --]

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 37/65] powerpc/mm/radix: Add tlbflush routines
  2016-03-27  8:23 ` [PATCH 37/65] powerpc/mm/radix: Add " Aneesh Kumar K.V
  2016-03-27 10:00   ` kbuild test robot
  2016-03-27 10:09   ` kbuild test robot
@ 2016-03-27 10:11   ` kbuild test robot
  2 siblings, 0 replies; 92+ messages in thread
From: kbuild test robot @ 2016-03-27 10:11 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kbuild-all, benh, paulus, mpe, linuxppc-dev, Aneesh Kumar K.V

[-- Attachment #1: Type: text/plain, Size: 1637 bytes --]

Hi Aneesh,

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.6-rc1 next-20160324]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/powerpc-mm-Use-big-endian-page-table-for-book3s-64/20160327-174557
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: alpha-allmodconfig (attached as .config)
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=alpha 

All errors (new ones prefixed by >>):

   kernel/fork.c: In function 'mm_alloc':
>> kernel/fork.c:675:13: error: request for member 'id' in something not a structure or union
     mm->context.id = MMU_NO_CONTEXT;
                ^
   kernel/fork.c:675:19: error: 'MMU_NO_CONTEXT' undeclared (first use in this function)
     mm->context.id = MMU_NO_CONTEXT;
                      ^
   kernel/fork.c:675:19: note: each undeclared identifier is reported only once for each function it appears in

vim +/id +675 kernel/fork.c

   669			return NULL;
   670	
   671		memset(mm, 0, sizeof(*mm));
   672		/*
   673		 * FIXME!! we need a better way handle this
   674		 */
 > 675		mm->context.id = MMU_NO_CONTEXT;
   676		return mm_init(mm, current);
   677	}
   678	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 44739 bytes --]

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access
  2016-03-27  8:23 ` [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
@ 2016-03-27 15:35   ` Ian Munsie
  2016-03-31  0:57   ` Balbir Singh
  1 sibling, 0 replies; 92+ messages in thread
From: Ian Munsie @ 2016-03-27 15:35 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: benh, paulus, mpe, linuxppc-dev

I'll leave it to others to work out whether the general approach & code
in /arch/powerpc is right, but at least for the cxl driver:

Acked-by: Ian Munsie <imunsie@au1.ibm.com>

Excerpts from Aneesh Kumar K.V's message of 2016-03-27 03:23:10 -0500:
> diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
> index 81c3f75b7330..a3d5e1e16c21 100644
> --- a/drivers/misc/cxl/fault.c
> +++ b/drivers/misc/cxl/fault.c
> @@ -149,9 +149,9 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
>       * update_mmu_cache() will not have loaded the hash since current->trap
>       * is not a 0x400 or 0x300, so just call hash_page_mm() here.
>       */
> -    access = _PAGE_PRESENT;
> +    access = _PAGE_PRESENT | _PAGE_READ;
>      if (dsisr & CXL_PSL_DSISR_An_S)
> -        access |= _PAGE_RW;
> +        access |= _PAGE_WRITE;
>      if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
>          access |= _PAGE_USER;

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 05/65] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED
  2016-03-27  8:23 ` [PATCH 05/65] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED Aneesh Kumar K.V
@ 2016-03-27 15:41   ` Ian Munsie
  0 siblings, 0 replies; 92+ messages in thread
From: Ian Munsie @ 2016-03-27 15:41 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Manoj Kumar, Matthew R. Ochs, andrew.donnellan
  Cc: benh, paulus, mpe, linuxppc-dev

Excerpts from Aneesh Kumar K.V's message of 2016-03-27 03:23:13 -0500:
> _PAGE_PRIVILEGED means the page can be accessed only by kernel. This is done
> to keep pte bits similar to PowerISA 3.0 radix PTE format. User
> pages are now makred by clearing _PAGE_PRIVILEGED bit.
> 
> Previously we allowed kernel to have a privileged page
> in the lower address range(USER_REGION). With this patch such access
> is denied.

Copying in cxlflash folks - as the only in-kernel user of the cxl driver
can you do some testing and make sure that the driver still works with
this change?

Copying in Andrew Donnellan since he has a patch to fix the kernel test
that hasn't been merged yet that might be impacted by this.

> We also prevent a kernel access to a non-privileged page in
> higher address range (ie, REGION_ID != 0). Both the above access
> scenario should never happen.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Acked-by: Ian Munsie <imunsie@au1.ibm.com>

> diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
> index a3d5e1e16c21..33bd0ee30edd 100644
> --- a/drivers/misc/cxl/fault.c
> +++ b/drivers/misc/cxl/fault.c
> @@ -152,8 +152,10 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
>      access = _PAGE_PRESENT | _PAGE_READ;
>      if (dsisr & CXL_PSL_DSISR_An_S)
>          access |= _PAGE_WRITE;
> +
> +    access |= _PAGE_PRIVILEGED;
>      if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
> -        access |= _PAGE_USER;
> +        access &= ~_PAGE_PRIVILEGED;
>  
>      if (dsisr & DSISR_NOHPTE)
>          inv_flags |= HPTE_NOHPTE_UPDATE;

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 06/65] powerpc/cxl: Use REGION_ID instead of opencoding
  2016-03-27  8:23 ` [PATCH 06/65] powerpc/cxl: Use REGION_ID instead of opencoding Aneesh Kumar K.V
@ 2016-03-27 15:48   ` Ian Munsie
  0 siblings, 0 replies; 92+ messages in thread
From: Ian Munsie @ 2016-03-27 15:48 UTC (permalink / raw)
  To: Aneesh Kumar K.V, andrew.donnellan, Manoj Kumar, Matthew R. Ochs
  Cc: benh, paulus, mpe, linuxppc-dev

Copying in Andrew Donnellan - this looks to be roughly equivalent to
your patch:

[PATCH] cxl: fix setting of _PAGE_USER bit when handling page faults

I think we were waiting on a v2 of that patch - does the below replace
the need for that?

Also copying cxlflash folk as an FYI

Acked-by: Ian Munsie <imunsie@au1.ibm.com>

Excerpts from Aneesh Kumar K.V's message of 2016-03-27 03:23:14 -0500:
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  drivers/misc/cxl/fault.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
> index 33bd0ee30edd..67cc657e96cc 100644
> --- a/drivers/misc/cxl/fault.c
> +++ b/drivers/misc/cxl/fault.c
> @@ -154,7 +154,7 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
>          access |= _PAGE_WRITE;
>  
>      access |= _PAGE_PRIVILEGED;
> -    if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
> +    if ((!ctx->kernel) || (REGION_ID(dar) == USER_REGION_ID))
>          access &= ~_PAGE_PRIVILEGED;
>  
>      if (dsisr & DSISR_NOHPTE)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 37/65] powerpc/mm/radix: Add tlbflush routines
  2016-03-27 10:00   ` kbuild test robot
@ 2016-03-28 17:58     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-28 17:58 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: benh, paulus, mpe, linuxppc-dev

kbuild test robot <lkp@intel.com> writes:

> [ text/plain ]
> Hi Aneesh,
>
> [auto build test ERROR on powerpc/next]
> [also build test ERROR on v4.6-rc1 next-20160324]
> [if your patch is applied to the wrong git tree, please drop us a note to help improving the system]
>
> url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/powerpc-mm-Use-big-endian-page-table-for-book3s-64/20160327-174557
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
> config: x86_64-randconfig-x001-201613 (attached as .config)
> reproduce:
>         # save the attached .config to linux build tree
>         make ARCH=x86_64 
>
> All errors (new ones prefixed by >>):
>
>    kernel/fork.c: In function 'mm_alloc':
>>> kernel/fork.c:675:13: error: 'mm_context_t {aka struct <anonymous>}' has no member named 'id'
>      mm->context.id = MMU_NO_CONTEXT;
>                 ^
>>> kernel/fork.c:675:19: error: 'MMU_NO_CONTEXT' undeclared (first use in this function)
>      mm->context.id = MMU_NO_CONTEXT;
>                       ^
>    kernel/fork.c:675:19: note: each undeclared identifier is reported only once for each function it appears in
>
> vim +675 kernel/fork.c
>
>    669			return NULL;
>    670	
>    671		memset(mm, 0, sizeof(*mm));
>    672		/*
>    673		 * FIXME!! we need a better way handle this
>    674		 */
>  > 675		mm->context.id = MMU_NO_CONTEXT;
>    676		return mm_init(mm, current);
>    677	}
>    678	
>

I added that change during development, based on some issue I hit that
time (with a note to fix this correctly later). Now looking back I don't
recollect what the bug was about. AFAIU the following mm_init() should
properly initialize the context.id and we should not hit a tlbflush in between.

Hence for now I will remove this hunk.

-aneesh

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 65/65] powerpc/mm/radix: Cputable update for radix
  2016-03-27  8:24 ` [PATCH 65/65] powerpc/mm/radix: Cputable update for radix Aneesh Kumar K.V
@ 2016-03-30  1:01   ` Michael Neuling
  2016-04-01  6:25   ` Michael Ellerman
  1 sibling, 0 replies; 92+ messages in thread
From: Michael Neuling @ 2016-03-30  1:01 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev

On Sun, 2016-03-27 at 13:54 +0530, Aneesh Kumar K.V wrote:
> This patch move the existing p9 hash to a different PVR and add
> radix feature with p9 PVR. That implies we will not be able to
> runtime select P9 hash. With P9 Radix we need to do
>=20
> * set UPRT =3D 0 in cpu setup
> * set different TLB set count
>=20
> We ideally want to use ibm,pa-features to enable disable radix. But
> we have already done setup cpu by the time we reach pa-features check.

What would we need change to disable radix after?  Can't we just update the=
=20
cur_cpu_spec if we hit this and change some LPCR bits?

> So for now use this hack.
>=20
> Not-Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Good, indeed this is a pretty gross hack :-)

Why not change the existing cputable entry to do radix and lets=20
forget about hash.

Mikey

> ---
>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |  1 +
>  arch/powerpc/include/asm/reg.h                |  4 +++
>  arch/powerpc/kernel/cpu_setup_power.S         | 35 +++++++++++++++++++++=
++++++
>  arch/powerpc/kernel/cputable.c                | 29 +++++++++++++++++++--=
-
>  arch/powerpc/kernel/mce_power.c               |  4 +++
>  5 files changed, 70 insertions(+), 3 deletions(-)
>=20
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc=
/include/asm/book3s/64/mmu-hash.h
> index 7da61b85406b..290157e8d5b2 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> @@ -119,6 +119,7 @@
>  #define POWER7_TLB_SETS		128	/* # sets in POWER7 TLB */
>  #define POWER8_TLB_SETS		512	/* # sets in POWER8 TLB */
>  #define POWER9_TLB_SETS_HASH	256	/* # sets in POWER9 TLB Hash mode */
> +#define POWER9_TLB_SETS_RADIX	128	/* # sets in POWER9 TLB Radix mode */
> =20
>  #ifndef __ASSEMBLY__
> =20
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/re=
g.h
> index 257251ada3a3..d39b5fedabfd 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -347,6 +347,10 @@
>  #define   LPCR_LPES_SH	2
>  #define   LPCR_RMI     0x00000002      /* real mode is cache inhibit */
>  #define   LPCR_HDICE   0x00000001      /* Hyp Decr enable (HV,PR,EE) */
> +/*
> + * Used in asm code, hence we don't want to use PPC_BITCOUNT
> + */
> +#define	  LPCR_UPRT	(ASM_CONST(0x1) << (64 - 42))
>  #ifndef SPRN_LPID
>  #define SPRN_LPID	0x13F	/* Logical Partition Identifier */
>  #endif
> diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/=
cpu_setup_power.S
> index 584e119fa8b0..e9b76c651bd1 100644
> --- a/arch/powerpc/kernel/cpu_setup_power.S
> +++ b/arch/powerpc/kernel/cpu_setup_power.S
> @@ -117,6 +117,41 @@ _GLOBAL(__restore_cpu_power9)
>  	mtlr	r11
>  	blr
> =20
> +_GLOBAL(__setup_cpu_power9_uprt)
> +	mflr	r11
> +	bl	__init_FSCR
> +	bl	__init_hvmode_206
> +	mtlr	r11
> +	beqlr
> +	li	r0,0
> +	mtspr	SPRN_LPID,r0
> +	mfspr	r3,SPRN_LPCR
> +	ori	r3, r3, LPCR_PECEDH
> +	oris	r3,r3,(LPCR_UPRT >> 16)
> +	bl	__init_LPCR
> +	bl	__init_HFSCR
> +	bl	__init_tlb_power7
> +	mtlr	r11
> +	blr
> +
> +_GLOBAL(__restore_cpu_power9_uprt)
> +	mflr	r11
> +	bl	__init_FSCR
> +	mfmsr	r3
> +	rldicl.	r0,r3,4,63
> +	mtlr	r11
> +	beqlr
> +	li	r0,0
> +	mtspr	SPRN_LPID,r0
> +	mfspr   r3,SPRN_LPCR
> +	ori	r3, r3, LPCR_PECEDH
> +	oris	r3,r3,(LPCR_UPRT >> 16)
> +	bl	__init_LPCR
> +	bl	__init_HFSCR
> +	bl	__init_tlb_power7
> +	mtlr	r11
> +	blr
> +
>  __init_hvmode_206:
>  	/* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
>  	mfmsr	r3
> diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputabl=
e.c
> index be4d73053bed..878bcc46ea04 100644
> --- a/arch/powerpc/kernel/cputable.c
> +++ b/arch/powerpc/kernel/cputable.c
> @@ -72,10 +72,13 @@ extern void __setup_cpu_power8(unsigned long offset, =
struct cpu_spec* spec);
>  extern void __restore_cpu_power8(void);
>  extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* sp=
ec);
>  extern void __restore_cpu_power9(void);
> +extern void __setup_cpu_power9_uprt(unsigned long offset, struct cpu_spe=
c* spec);
> +extern void __restore_cpu_power9_uprt(void);
>  extern void __restore_cpu_a2(void);
>  extern void __flush_tlb_power7(unsigned int action);
>  extern void __flush_tlb_power8(unsigned int action);
>  extern void __flush_tlb_power9(unsigned int action);
> +extern void __flush_tlb_power9_radix(unsigned int action);
>  extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
>  extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
>  #endif /* CONFIG_PPC64 */
> @@ -508,9 +511,9 @@ static struct cpu_spec __initdata cpu_specs[] =3D {
>  		.platform		=3D "power8",
>  	},
>  	{	/* Power9 */
> -		.pvr_mask		=3D 0xffff0000,
> -		.pvr_value		=3D 0x004e0000,
> -		.cpu_name		=3D "POWER9 (raw)",
> +		.pvr_mask		=3D 0xffffff00,
> +		.pvr_value		=3D 0x004e0200,
> +		.cpu_name		=3D "POWER9/hash (raw)",
>  		.cpu_features		=3D CPU_FTRS_POWER9,
>  		.cpu_user_features	=3D COMMON_USER_POWER9,
>  		.cpu_user_features2	=3D COMMON_USER2_POWER9,
> @@ -526,6 +529,26 @@ static struct cpu_spec __initdata cpu_specs[] =3D {
>  		.flush_tlb		=3D __flush_tlb_power9,
>  		.platform		=3D "power9",
>  	},
> +	{	/*  Power9 */
> +		.pvr_mask		=3D 0xffff0000,
> +		.pvr_value		=3D 0x004e0000,
> +		.cpu_name		=3D "POWER9/radix (raw)",
> +		.cpu_features		=3D CPU_FTRS_POWER9,
> +		.cpu_user_features	=3D COMMON_USER_POWER9,
> +		.cpu_user_features2	=3D COMMON_USER2_POWER9,
> +		.mmu_features		=3D MMU_FTRS_POWER9 | MMU_FTR_RADIX,
> +		.icache_bsize		=3D 128,
> +		.dcache_bsize		=3D 128,
> +		.num_pmcs		=3D 6,
> +		.pmc_type		=3D PPC_PMC_IBM,
> +		.oprofile_cpu_type	=3D "ppc64/power8",
> +		.oprofile_type		=3D PPC_OPROFILE_INVALID,
> +		.cpu_setup		=3D __setup_cpu_power9_uprt,
> +		.cpu_restore		=3D __restore_cpu_power9_uprt,
> +		.flush_tlb		=3D __flush_tlb_power9_radix,
> +		.machine_check_early	=3D __machine_check_early_realmode_p8,
> +		.platform		=3D "power9",
> +	},
>  	{	/* Cell Broadband Engine */
>  		.pvr_mask		=3D 0xffff0000,
>  		.pvr_value		=3D 0x00700000,
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_po=
wer.c
> index 92a66a2a9b85..f902ede263ab 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -75,6 +75,10 @@ void __flush_tlb_power9(unsigned int action)
>  	flush_tlb_206(POWER9_TLB_SETS_HASH, action);
>  }
> =20
> +void __flush_tlb_power9_radix(unsigned int action)
> +{
> +	flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
> +}
> =20
>  /* flush SLBs and reload */
>  #ifdef CONFIG_PPC_MMU_STD_64

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64
  2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
                   ` (63 preceding siblings ...)
  2016-03-27  8:24 ` [PATCH 65/65] powerpc/mm/radix: Cputable update for radix Aneesh Kumar K.V
@ 2016-03-30 10:53 ` Balbir Singh
  2016-04-04 15:03   ` Aneesh Kumar K.V
  64 siblings, 1 reply; 92+ messages in thread
From: Balbir Singh @ 2016-03-30 10:53 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev


On 27/03/16 19:23, Aneesh Kumar K.V wrote:
> This enables us to share the same page table code for
> both radix and hash. Radix use a hardware defined big endian
> page table
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/hash.h   |  16 +++--
>  arch/powerpc/include/asm/kvm_book3s_64.h    |  13 ++--
>  arch/powerpc/include/asm/page.h             |   4 ++
>  arch/powerpc/include/asm/pgtable-be-types.h | 104 ++++++++++++++++++++++++++++
>  arch/powerpc/mm/hash64_4k.c                 |   7 +-
>  arch/powerpc/mm/hash64_64k.c                |  14 ++--
>  arch/powerpc/mm/hugepage-hash64.c           |   7 +-
>  arch/powerpc/mm/hugetlbpage-hash64.c        |   7 +-
>  arch/powerpc/mm/pgtable_64.c                |   9 ++-
>  9 files changed, 159 insertions(+), 22 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/pgtable-be-types.h
>
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
> index d0ee6fcef823..2113de051824 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -250,22 +250,27 @@ static inline unsigned long pte_update(struct mm_struct *mm,
>  				       int huge)
>  {
>  	unsigned long old, tmp;
> +	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
> +
> +	clr = cpu_to_be64(clr);
> +	set = cpu_to_be64(set);
so clr and set come in in native endian and the page flags (_PAGE_BUSY, etc) are also native endian?
I think the changelog should mention this

1. The bits are stored big endian, but when dealing with them we convert them
to native endian
2. Native endian is for portability across platforms
3. pte/pmd/pud/pgd_val() will continue toreturn the value in native endian

You may also want to check/warn about user tools breakage and cc relevant lists. I think the kdump ML
and any other relevant tools and also check for kvm migration. I think if everything ends up
native endian (like pte/pmd/pgd/pud_val) we might be good.

>  
>  	__asm__ __volatile__(
>  	"1:	ldarx	%0,0,%3		# pte_update\n\
> -	andi.	%1,%0,%6\n\
> +	and.	%1,%0,%6\n\
>  	bne-	1b \n\
>  	andc	%1,%0,%4 \n\
>  	or	%1,%1,%7\n\
>  	stdcx.	%1,0,%3 \n\
>  	bne-	1b"
>  	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
> -	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
> +	: "r" (ptep), "r" (clr), "m" (*ptep), "r" (busy), "r" (set)
>  	: "cc" );
>  	/* huge pages use the old page table lock */
>  	if (!huge)
>  		assert_pte_locked(mm, addr);
>  
> +	old = be64_to_cpu(old);
>  	if (old & _PAGE_HASHPTE)
>  		hpte_need_flush(mm, addr, ptep, old, huge);
>  
> @@ -351,16 +356,19 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
>  		 _PAGE_SOFT_DIRTY);
>  
>  	unsigned long old, tmp;
> +	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
> +
> +	bits = cpu_to_be64(bits);
>  
>  	__asm__ __volatile__(
>  	"1:	ldarx	%0,0,%4\n\
> -		andi.	%1,%0,%6\n\
> +		and.	%1,%0,%6\n\
>  		bne-	1b \n\
>  		or	%0,%3,%0\n\
>  		stdcx.	%0,0,%4\n\
>  		bne-	1b"
>  	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
> -	:"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
> +	:"r" (bits), "r" (ptep), "m" (*ptep), "r" (busy)
>  	:"cc");
>  }
>  
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
> index 2aa79c864e91..f9a7a89a3e4f 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -299,6 +299,8 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
>   */
>  static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
>  {
> +	__be64 opte, npte;
> +	unsigned long old_ptev;
>  	pte_t old_pte, new_pte = __pte(0);
>  
>  	while (1) {
> @@ -306,24 +308,25 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
>  		 * Make sure we don't reload from ptep
>  		 */
>  		old_pte = READ_ONCE(*ptep);
> +		old_ptev = pte_val(old_pte);
>  		/*
>  		 * wait until _PAGE_BUSY is clear then set it atomically
>  		 */
> -		if (unlikely(pte_val(old_pte) & _PAGE_BUSY)) {
> +		if (unlikely(old_ptev & _PAGE_BUSY)) {
>  			cpu_relax();
>  			continue;
>  		}
>  		/* If pte is not present return None */
> -		if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
> +		if (unlikely(!(old_ptev & _PAGE_PRESENT)))
>  			return __pte(0);
>  
>  		new_pte = pte_mkyoung(old_pte);
>  		if (writing && pte_write(old_pte))
>  			new_pte = pte_mkdirty(new_pte);
>  
> -		if (pte_val(old_pte) == __cmpxchg_u64((unsigned long *)ptep,
> -						      pte_val(old_pte),
> -						      pte_val(new_pte))) {
> +		npte = cpu_to_be64(pte_val(new_pte));
> +		opte = cpu_to_be64(old_ptev);
> +		if (opte == __cmpxchg_u64((unsigned long *)ptep, opte, npte)) {
>  			break;
>  		}
>  	}
> diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
> index ab3d8977bacd..158574d2acf4 100644
> --- a/arch/powerpc/include/asm/page.h
> +++ b/arch/powerpc/include/asm/page.h
> @@ -288,7 +288,11 @@ extern long long virt_phys_offset;
>  
>  #ifndef __ASSEMBLY__
>  
> +#ifdef CONFIG_PPC_BOOK3S_64
> +#include <asm/pgtable-be-types.h>
> +#else
>  #include <asm/pgtable-types.h>
Can this be abstracted into a single header?
> +#endif
>  
>  typedef struct { signed long pd; } hugepd_t;
>  
> diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
> new file mode 100644
> index 000000000000..20527200d6ae
> --- /dev/null
> +++ b/arch/powerpc/include/asm/pgtable-be-types.h
> @@ -0,0 +1,104 @@
> +#ifndef _ASM_POWERPC_PGTABLE_BE_TYPES_H
> +#define _ASM_POWERPC_PGTABLE_BE_TYPES_H
> +
> +#ifdef CONFIG_STRICT_MM_TYPECHECKS
> +/* These are used to make use of C type-checking. */
> +
> +/* PTE level */
> +typedef struct { __be64 pte; } pte_t;
> +#define __pte(x)	((pte_t) { cpu_to_be64(x) })
> +static inline unsigned long pte_val(pte_t x)
> +{
> +	return be64_to_cpu(x.pte);
> +}
> +
> +/* PMD level */
> +#ifdef CONFIG_PPC64
> +typedef struct { __be64 pmd; } pmd_t;
> +#define __pmd(x)	((pmd_t) { cpu_to_be64(x) })
> +static inline unsigned long pmd_val(pmd_t x)
> +{
> +	return be64_to_cpu(x.pmd);
> +}
> +
> +/*
> + * 64 bit hash always use 4 level table. Everybody else use 4 level
> + * only for 4K page size.
> + */
> +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
Doesn't this header get included only if

CONFIG_PPC_BOOK3S_64 is defined, do we need the #if defined for it?

> +typedef struct { __be64 pud; } pud_t;
> +#define __pud(x)	((pud_t) { cpu_to_be64(x) })
> +static inline unsigned long pud_val(pud_t x)
> +{
> +	return be64_to_cpu(x.pud);
> +}
> +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
> +#endif /* CONFIG_PPC64 */
> +
> +/* PGD level */
> +typedef struct { __be64 pgd; } pgd_t;
> +#define __pgd(x)	((pgd_t) { cpu_to_be64(x) })
> +static inline unsigned long pgd_val(pgd_t x)
> +{
> +	return be64_to_cpu(x.pgd);
> +}
> +
> +/* Page protection bits */
> +typedef struct { unsigned long pgprot; } pgprot_t;
> +#define pgprot_val(x)	((x).pgprot)
> +#define __pgprot(x)	((pgprot_t) { (x) })
> +
> +#else
> +
> +/*
> + * .. while these make it easier on the compiler
> + */
> +
> +typedef __be64 pte_t;
> +#define __pte(x)	cpu_to_be64(x)
> +static inline unsigned long pte_val(pte_t pte)
> +{
> +	return be64_to_cpu(pte);
> +}
> +
> +#ifdef CONFIG_PPC64
> +typedef __be64 pmd_t;
> +#define __pmd(x)	cpu_to_be64(x)
> +static inline unsigned long pmd_val(pmd_t pmd)
> +{
> +	return be64_to_cpu(pmd);
> +}
> +
> +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
> +typedef __be64 pud_t;
> +#define __pud(x)	cpu_to_be64(x)
> +static inline unsigned long pud_val(pud_t pud)
> +{
> +	return be64_to_cpu(pud);
> +}
> +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
> +#endif /* CONFIG_PPC64 */
> +
> +typedef __be64 pgd_t;
> +#define __pgd(x)	cpu_to_be64(x)
> +static inline unsigned long pgd_val(pgd_t pgd)
> +{
> +	return be64_to_cpu(pgd);
> +}
> +
> +typedef unsigned long pgprot_t;
> +#define pgprot_val(x)	(x)
> +#define __pgprot(x)	(x)
> +
> +#endif /* CONFIG_STRICT_MM_TYPECHECKS */
> +/*
> + * With hash config 64k pages additionally define a bigger "real PTE" type that
> + * gathers the "second half" part of the PTE for pseudo 64k pages
> + */
> +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
> +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
> +#else
> +typedef struct { pte_t pte; } real_pte_t;
> +#endif
> +
> +#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
> diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
> index 47d1b26effc6..71abd4c44c27 100644
> --- a/arch/powerpc/mm/hash64_4k.c
> +++ b/arch/powerpc/mm/hash64_4k.c
> @@ -20,6 +20,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
>  		   pte_t *ptep, unsigned long trap, unsigned long flags,
>  		   int ssize, int subpg_prot)
>  {
> +	__be64 opte, npte;
>  	unsigned long hpte_group;
>  	unsigned long rflags, pa;
>  	unsigned long old_pte, new_pte;
> @@ -47,8 +48,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
>  		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
>  		if (access & _PAGE_RW)
>  			new_pte |= _PAGE_DIRTY;
> -	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
> -					  old_pte, new_pte));
> +
> +		opte = cpu_to_be64(old_pte);
> +		npte = cpu_to_be64(new_pte);
> +	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
>  	/*
>  	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
>  	 * need to add in 0x1 if it's a read-only user page
> diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
> index b2d659cf51c6..6f9b3c34a5c0 100644
> --- a/arch/powerpc/mm/hash64_64k.c
> +++ b/arch/powerpc/mm/hash64_64k.c
> @@ -49,6 +49,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
>  		   pte_t *ptep, unsigned long trap, unsigned long flags,
>  		   int ssize, int subpg_prot)
>  {
> +	__be64 opte, npte;
>  	real_pte_t rpte;
>  	unsigned long *hidxp;
>  	unsigned long hpte_group;
> @@ -79,8 +80,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
>  		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO;
>  		if (access & _PAGE_RW)
>  			new_pte |= _PAGE_DIRTY;
> -	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
> -					  old_pte, new_pte));
> +
> +		opte = cpu_to_be64(old_pte);
> +		npte = cpu_to_be64(new_pte);
> +	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
>  	/*
>  	 * Handle the subpage protection bits
>  	 */
> @@ -220,7 +223,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
>  		    unsigned long vsid, pte_t *ptep, unsigned long trap,
>  		    unsigned long flags, int ssize)
>  {
> -
> +	__be64 opte, npte;
>  	unsigned long hpte_group;
>  	unsigned long rflags, pa;
>  	unsigned long old_pte, new_pte;
> @@ -254,8 +257,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
>  		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
>  		if (access & _PAGE_RW)
>  			new_pte |= _PAGE_DIRTY;
> -	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
> -					  old_pte, new_pte));
> +		opte = cpu_to_be64(old_pte);
> +		npte = cpu_to_be64(new_pte);
> +	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
>  
>  	rflags = htab_convert_pte_flags(new_pte);
>  
> diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
> index eb2accdd76fd..98891139c044 100644
> --- a/arch/powerpc/mm/hugepage-hash64.c
> +++ b/arch/powerpc/mm/hugepage-hash64.c
> @@ -22,6 +22,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
>  		    pmd_t *pmdp, unsigned long trap, unsigned long flags,
>  		    int ssize, unsigned int psize)
>  {
> +	__be64 opmd, npmd;
>  	unsigned int index, valid;
>  	unsigned char *hpte_slot_array;
>  	unsigned long rflags, pa, hidx;
> @@ -49,8 +50,10 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
>  		new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
>  		if (access & _PAGE_RW)
>  			new_pmd |= _PAGE_DIRTY;
> -	} while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
> -					  old_pmd, new_pmd));
> +		opmd = cpu_to_be64(old_pmd);
> +		npmd = cpu_to_be64(new_pmd);
> +	} while (opmd != __cmpxchg_u64((unsigned long *)pmdp, opmd, npmd));
> +
>  	rflags = htab_convert_pte_flags(new_pmd);
>  
>  #if 0
> diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
> index 8555fce902fe..5bcb28606158 100644
> --- a/arch/powerpc/mm/hugetlbpage-hash64.c
> +++ b/arch/powerpc/mm/hugetlbpage-hash64.c
> @@ -22,6 +22,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
>  		     pte_t *ptep, unsigned long trap, unsigned long flags,
>  		     int ssize, unsigned int shift, unsigned int mmu_psize)
>  {
> +	__be64 opte, npte;
>  	unsigned long vpn;
>  	unsigned long old_pte, new_pte;
>  	unsigned long rflags, pa, sz;
> @@ -57,8 +58,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
>  		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
>  		if (access & _PAGE_RW)
>  			new_pte |= _PAGE_DIRTY;
> -	} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
> -					 old_pte, new_pte));
> +		opte = cpu_to_be64(old_pte);
> +		npte = cpu_to_be64(new_pte);
> +	} while (opte != __cmpxchg_u64((unsigned long *)ptep, opte, npte));
> +
>  	rflags = htab_convert_pte_flags(new_pte);
>  
>  	sz = ((1UL) << shift);
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index 0eb53128ca2a..aa742aa35b64 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -516,6 +516,7 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
>  {
>  
>  	unsigned long old, tmp;
> +	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
>  
>  #ifdef CONFIG_DEBUG_VM
>  	WARN_ON(!pmd_trans_huge(*pmdp));
> @@ -523,17 +524,21 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
>  #endif
>  
>  #ifdef PTE_ATOMIC_UPDATES
> +	clr = cpu_to_be64(clr);
> +	set = cpu_to_be64(set);
>  	__asm__ __volatile__(
>  	"1:	ldarx	%0,0,%3\n\
> -		andi.	%1,%0,%6\n\
> +		and.	%1,%0,%6\n\
>  		bne-	1b \n\
>  		andc	%1,%0,%4 \n\
>  		or	%1,%1,%7\n\
>  		stdcx.	%1,0,%3 \n\
>  		bne-	1b"
>  	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
> -	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
> +	: "r" (pmdp), "r" (clr), "m" (*pmdp), "r" (busy), "r" (set)
>  	: "cc" );
> +
> +	old = be64_to_cpu(old);
>  #else
>  	old = pmd_val(*pmdp);
>  	*pmdp = __pmd((old & ~clr) | set);

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access
  2016-03-27  8:23 ` [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
  2016-03-27 15:35   ` Ian Munsie
@ 2016-03-31  0:57   ` Balbir Singh
  2016-04-04 14:55     ` Aneesh Kumar K.V
  1 sibling, 1 reply; 92+ messages in thread
From: Balbir Singh @ 2016-03-31  0:57 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 27/03/16 19:23, Aneesh Kumar K.V wrote:
> This split _PAGE_RW bit to _PAGE_READ and _PAGE_WRITE. It also remove
> the dependency on _PAGE_USER for implying read only. Few things to note
> here is that, we have read implied with write and execute permission.
> Hence we should always find _PAGE_READ set on hash pte fault.
>
> We still can't switch PROT_NONE to !(_PAGE_RWX). Auto numa do depend
> on marking a prot none pte _PAGE_WRITE. (For more details look at
> b191f9b106ea "mm: numa: preserve PTE write permissions across a NUMA hinting fault")
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/hash-64k.h |  4 +--
>  arch/powerpc/include/asm/book3s/64/hash.h     | 35 ++++++++++++++++-----------
>  arch/powerpc/include/asm/pte-common.h         |  5 ++++
>  arch/powerpc/mm/hash64_4k.c                   |  2 +-
>  arch/powerpc/mm/hash64_64k.c                  |  4 +--
>  arch/powerpc/mm/hash_utils_64.c               |  9 ++++---
>  arch/powerpc/mm/hugepage-hash64.c             |  2 +-
>  arch/powerpc/mm/hugetlbpage-hash64.c          |  2 +-
>  arch/powerpc/mm/hugetlbpage.c                 |  4 +--
>  arch/powerpc/mm/pgtable.c                     |  4 +--
>  arch/powerpc/mm/pgtable_64.c                  |  5 ++--
>  arch/powerpc/platforms/cell/spu_base.c        |  2 +-
>  arch/powerpc/platforms/cell/spufs/fault.c     |  4 +--
>  drivers/misc/cxl/fault.c                      |  4 +--
>  14 files changed, 49 insertions(+), 37 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> index 0a7956a80a08..279ded72f1db 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> @@ -291,10 +291,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
>  				      pmd_t *pmdp)
>  {
>  
> -	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
> +	if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0)
>  		return;
>  
> -	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
> +	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
>  }
>  
>  #endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
> index 2113de051824..f092d83fa623 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -16,8 +16,10 @@
>  #define _PAGE_BIT_SWAP_TYPE	0
>  
>  #define _PAGE_EXEC		0x00001 /* execute permission */
> -#define _PAGE_RW		0x00002 /* read & write access allowed */
> +#define _PAGE_WRITE		0x00002 /* write access allowed */
>  #define _PAGE_READ		0x00004	/* read access allowed */
> +#define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
> +#define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
>  #define _PAGE_USER		0x00008 /* page may be accessed by userspace */
>  #define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
>  /* M (memory coherence) is always set in the HPTE, so we don't need it here */
> @@ -147,8 +149,8 @@
>   */
>  #define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
>  			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
> -			 _PAGE_USER | _PAGE_ACCESSED |  \
> -			 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC | \
> +			 _PAGE_USER | _PAGE_ACCESSED |  _PAGE_READ |\
> +			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
>  			 _PAGE_SOFT_DIRTY)
>  /*
>   * We define 2 sets of base prot bits, one for basic pages (ie,
> @@ -173,10 +175,12 @@
>  #define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
>  #define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
>  				 _PAGE_EXEC)
> -#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER )
> -#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
> -#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER )
> -#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
> +#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
> +#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
> +				 _PAGE_EXEC)
> +#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
> +#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
> +				 _PAGE_EXEC)
>  
>  #define __P000	PAGE_NONE
>  #define __P001	PAGE_READONLY
> @@ -300,19 +304,19 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
>  				      pte_t *ptep)
>  {
>  
> -	if ((pte_val(*ptep) & _PAGE_RW) == 0)
> +	if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
>  		return;
>  
> -	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
> +	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
>  }
>  
>  static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
>  					   unsigned long addr, pte_t *ptep)
>  {
> -	if ((pte_val(*ptep) & _PAGE_RW) == 0)
> +	if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
>  		return;
>  
> -	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
> +	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
>  }
>  
>  /*
> @@ -352,7 +356,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
>  static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
>  {
>  	unsigned long bits = pte_val(entry) &
> -		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC |
> +		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |
>  		 _PAGE_SOFT_DIRTY);
>  
>  	unsigned long old, tmp;
> @@ -386,7 +390,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
>  
>  
>  /* Generic accessors to PTE bits */
> -static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
> +static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_WRITE);}
>  static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
>  static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
>  static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
> @@ -447,7 +451,7 @@ static inline unsigned long pte_pfn(pte_t pte)
>  /* Generic modifiers for PTE bits */
>  static inline pte_t pte_wrprotect(pte_t pte)
>  {
> -	return __pte(pte_val(pte) & ~_PAGE_RW);
> +	return __pte(pte_val(pte) & ~_PAGE_WRITE);
>  }
>  
>  static inline pte_t pte_mkclean(pte_t pte)
> @@ -462,6 +466,9 @@ static inline pte_t pte_mkold(pte_t pte)
>  
>  static inline pte_t pte_mkwrite(pte_t pte)
>  {
> +	/*
> +	 * write implies read, hence set both
> +	 */
>  	return __pte(pte_val(pte) | _PAGE_RW);
>  }
>  
> diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
> index 1ec67b043065..9f5dea58b0db 100644
> --- a/arch/powerpc/include/asm/pte-common.h
> +++ b/arch/powerpc/include/asm/pte-common.h
> @@ -198,3 +198,8 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
>  /* Advertise support for _PAGE_SPECIAL */
>  #define __HAVE_ARCH_PTE_SPECIAL
>  
> +#ifndef _PAGE_READ
> +/* if not defined, we should not find _PAGE_WRITE too */
> +#define _PAGE_READ 0
> +#define _PAGE_WRITE _PAGE_RW
> +#endif
> diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
> index 71abd4c44c27..7ebac279d38e 100644
> --- a/arch/powerpc/mm/hash64_4k.c
> +++ b/arch/powerpc/mm/hash64_4k.c
> @@ -46,7 +46,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
>  		 * also add _PAGE_COMBO
>  		 */
>  		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
> -		if (access & _PAGE_RW)
> +		if (access & _PAGE_WRITE)
>  			new_pte |= _PAGE_DIRTY;
>  
>  		opte = cpu_to_be64(old_pte);
> diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
> index 6f9b3c34a5c0..83ac9f658733 100644
> --- a/arch/powerpc/mm/hash64_64k.c
> +++ b/arch/powerpc/mm/hash64_64k.c
> @@ -78,7 +78,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
>  		 * also add _PAGE_COMBO
>  		 */
>  		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO;
> -		if (access & _PAGE_RW)
> +		if (access & _PAGE_WRITE)
>  			new_pte |= _PAGE_DIRTY;
>  
>  		opte = cpu_to_be64(old_pte);
> @@ -255,7 +255,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
>  		 * a write access.
>  		 */
>  		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
> -		if (access & _PAGE_RW)
> +		if (access & _PAGE_WRITE)
>  			new_pte |= _PAGE_DIRTY;
>  		opte = cpu_to_be64(old_pte);
>  		npte = cpu_to_be64(new_pte);
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index 90dd9280894f..ea23403b3fc0 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -175,8 +175,9 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
>  	 * or PP=0x3 for read-only (including writeable but clean pages).
>  	 */
>  	if (pteflags & _PAGE_USER) {
> -		rflags |= 0x2;
> -		if (!((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY)))
> +		if (pteflags & _PAGE_RWX)
Should this be pteflags & _PAGE_RW?
> +			rflags |= 0x2;
> +		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
>  			rflags |= 0x1;
if pteflags == _PAGE_USER | _PAGE_WRITE | _PAGE_DIRTY, what is rflags set to?



>  	}
>  	/*
> @@ -1205,7 +1206,7 @@ EXPORT_SYMBOL_GPL(hash_page);
>  int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
>  		unsigned long dsisr)
>  {
> -	unsigned long access = _PAGE_PRESENT;
> +	unsigned long access = _PAGE_PRESENT | _PAGE_READ;
>  	unsigned long flags = 0;
>  	struct mm_struct *mm = current->mm;
>  
> @@ -1216,7 +1217,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
>  		flags |= HPTE_NOHPTE_UPDATE;
>  
>  	if (dsisr & DSISR_ISSTORE)
> -		access |= _PAGE_RW;
> +		access |= _PAGE_WRITE;
>  	/*
>  	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
>  	 * accessing a userspace segment (even from the kernel). We assume
> diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
> index 98891139c044..39342638a498 100644
> --- a/arch/powerpc/mm/hugepage-hash64.c
> +++ b/arch/powerpc/mm/hugepage-hash64.c
> @@ -48,7 +48,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
>  		 * a write access
>  		 */
>  		new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
> -		if (access & _PAGE_RW)
> +		if (access & _PAGE_WRITE)
>  			new_pmd |= _PAGE_DIRTY;
>  		opmd = cpu_to_be64(old_pmd);
>  		npmd = cpu_to_be64(new_pmd);
> diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
> index 5bcb28606158..e6e54a04bd32 100644
> --- a/arch/powerpc/mm/hugetlbpage-hash64.c
> +++ b/arch/powerpc/mm/hugetlbpage-hash64.c
> @@ -56,7 +56,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
>  		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
>  		 * a write access */
>  		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
> -		if (access & _PAGE_RW)
> +		if (access & _PAGE_WRITE)
>  			new_pte |= _PAGE_DIRTY;
>  		opte = cpu_to_be64(old_pte);
>  		npte = cpu_to_be64(new_pte);
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> index 6dd272b6196f..6e52e722d3f2 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -1003,9 +1003,9 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
>  		end = pte_end;
>  
>  	pte = READ_ONCE(*ptep);
> -	mask = _PAGE_PRESENT | _PAGE_USER;
> +	mask = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
>  	if (write)
> -		mask |= _PAGE_RW;
> +		mask |= _PAGE_WRITE;
>  
>  	if ((pte_val(pte) & mask) != mask)
>  		return 0;
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 83dfd7925c72..98b5c03e344d 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -177,8 +177,8 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
>  	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
>  	 * Hence we can use set_pte_at for them.
>  	 */
> -	VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
> -		(_PAGE_PRESENT | _PAGE_USER));
> +	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
> +
What was wrong with the previous check? The compiler will optimize it, but the new
check uses two pte_val() calls on *ptep
>  	/*
>  	 * Add the pte bit when tryint set a pte
>  	 */
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index aa742aa35b64..00d8d985bba3 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -277,7 +277,7 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
>  	void *caller = __builtin_return_address(0);
>  
>  	/* writeable implies dirty for kernel addresses */
> -	if (flags & _PAGE_RW)
> +	if (flags & _PAGE_WRITE)
>  		flags |= _PAGE_DIRTY;
>  
>  	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
> @@ -681,8 +681,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>  		pmd_t *pmdp, pmd_t pmd)
>  {
>  #ifdef CONFIG_DEBUG_VM
> -	WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
> -		(_PAGE_PRESENT | _PAGE_USER));
> +	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
Same as above, plus I think we can move these to VM_WARN_ON just to be consistent

snip...
Balbir

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 03/65] powerpc/mm/subpage: Clear RWX bit to indicate no access
  2016-03-27  8:23 ` [PATCH 03/65] powerpc/mm/subpage: Clear RWX bit to indicate no access Aneesh Kumar K.V
@ 2016-03-31  1:42   ` Balbir Singh
  2016-04-04 14:59     ` Aneesh Kumar K.V
  0 siblings, 1 reply; 92+ messages in thread
From: Balbir Singh @ 2016-03-31  1:42 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 27/03/16 19:23, Aneesh Kumar K.V wrote:
> Subpage protection used to depend on _PAGE_USER bit to implement no
> access mode. This patch switch that to use _PAGE_RWX. We clear READ,
> Write and Execute access from pte instead of clearing _PAGE_USER now.
> This was done so that we can switch to _PAGE_PRIVILEGED in later patch.
> subpage_protection() returns pte bits that need to be cleared.
Could you please clarify what bit needs to be cleared. I think the underlying
assumption is that when this routine is called access cannot be _PAGE_RWX
> Instead of updating the interface to handle no-access in a separate way,
> it appears simple to clear RWX acecss to indicate no access.
>
> We still don't insert hash pte for no access implied by !_PAGE_RWX.
> Hence we should not get PROT_FAULT with change.
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/mm/hash_utils_64.c | 11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index ea23403b3fc0..ec37f4b0a8ff 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -917,7 +917,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
>   * Userspace sets the subpage permissions using the subpage_prot system call.
>   *
>   * Result is 0: full permissions, _PAGE_RW: read-only,
> - * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access.
> + * _PAGE_RWX: no access.
>   */
>  static int subpage_protection(struct mm_struct *mm, unsigned long ea)
>  {
> @@ -943,8 +943,13 @@ static int subpage_protection(struct mm_struct *mm, unsigned long ea)
>  	/* extract 2-bit bitfield for this 4k subpage */
>  	spp >>= 30 - 2 * ((ea >> 12) & 0xf);
>  
> -	/* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */
> -	spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0);
> +	/*
> +	 * 0 -> full premission
> +	 * 1 -> Read only
> +	 * 2 -> no access.
> +	 * We return the flag that need to be cleared.
> +	 */
> +	spp = ((spp & 2) ? _PAGE_RWX : 0) | ((spp & 1) ? _PAGE_WRITE : 0);
>  	return spp;
>  }
>  

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 04/65] powerpc/mm: Use pte_user instead of opencoding
  2016-03-27  8:23 ` [PATCH 04/65] powerpc/mm: Use pte_user instead of opencoding Aneesh Kumar K.V
@ 2016-03-31  1:45   ` Balbir Singh
  2016-04-04 15:00     ` Aneesh Kumar K.V
  0 siblings, 1 reply; 92+ messages in thread
From: Balbir Singh @ 2016-03-31  1:45 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 27/03/16 19:23, Aneesh Kumar K.V wrote:
> We have common declaration in pte-common.h Add book3s specific one
> and switch to pte_user. In the later patch we will be switching
> _PAGE_USER to _PAGE_PRIVILEGED
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/pgtable.h | 5 +++++
>  arch/powerpc/perf/callchain.c                | 2 +-
>  2 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 77d3ce05798e..4ac6221802ad 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -185,6 +185,11 @@ extern struct page *pgd_page(pgd_t pgd);
>  #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
>  #define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
>  
> +static inline bool pte_user(pte_t pte)
> +{
> +	return (pte_val(pte) & _PAGE_USER);
> +}
Ideally this should be
    return !!(pte_val(pte) & _PAGE_USER)
for consistency with page_present() and the fact that it returns a bool

> +
>  #ifdef CONFIG_MEM_SOFT_DIRTY
>  #define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
>  #else
> diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
> index e04a6752b399..0071de76d776 100644
> --- a/arch/powerpc/perf/callchain.c
> +++ b/arch/powerpc/perf/callchain.c
> @@ -137,7 +137,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
>  	offset = addr & ((1UL << shift) - 1);
>  
>  	pte = READ_ONCE(*ptep);
> -	if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
> +	if (!pte_present(pte) || !pte_user(pte))
>  		goto err_out;
>  	pfn = pte_pfn(pte);
>  	if (!page_is_ram(pfn))

Makes sense

Acked-by: Balbir Singh <bsingharora@gmail.com>

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 65/65] powerpc/mm/radix: Cputable update for radix
  2016-03-27  8:24 ` [PATCH 65/65] powerpc/mm/radix: Cputable update for radix Aneesh Kumar K.V
  2016-03-30  1:01   ` Michael Neuling
@ 2016-04-01  6:25   ` Michael Ellerman
  2016-04-01  9:34     ` Aneesh Kumar K.V
  1 sibling, 1 reply; 92+ messages in thread
From: Michael Ellerman @ 2016-04-01  6:25 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, distroguy; +Cc: linuxppc-dev

On Sun, 2016-03-27 at 13:54 +0530, Aneesh Kumar K.V wrote:

> This patch move the existing p9 hash to a different PVR and add
> radix feature with p9 PVR. That implies we will not be able to
> runtime select P9 hash. With P9 Radix we need to do
> 
> * set UPRT = 0 in cpu setup
> * set different TLB set count
> 
> We ideally want to use ibm,pa-features to enable disable radix. But
> we have already done setup cpu by the time we reach pa-features check.
> 
> So for now use this hack.
> 
> Not-Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
...
> diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
> index 584e119fa8b0..e9b76c651bd1 100644
> --- a/arch/powerpc/kernel/cpu_setup_power.S
> +++ b/arch/powerpc/kernel/cpu_setup_power.S
> @@ -117,6 +117,41 @@ _GLOBAL(__restore_cpu_power9)
>  	mtlr	r11
>  	blr
>  
> +_GLOBAL(__setup_cpu_power9_uprt)
> +	mflr	r11
> +	bl	__init_FSCR
> +	bl	__init_hvmode_206
> +	mtlr	r11
> +	beqlr
> +	li	r0,0
> +	mtspr	SPRN_LPID,r0
> +	mfspr	r3,SPRN_LPCR
> +	ori	r3, r3, LPCR_PECEDH
> +	oris	r3,r3,(LPCR_UPRT >> 16)
> +	bl	__init_LPCR

I don't see why we *have* to initialise this here.

ie. could we do it later in early_init_mmu() or similar ?

cheers

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 65/65] powerpc/mm/radix: Cputable update for radix
  2016-04-01  6:25   ` Michael Ellerman
@ 2016-04-01  9:34     ` Aneesh Kumar K.V
  2018-06-28 23:55       ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-04-01  9:34 UTC (permalink / raw)
  To: Michael Ellerman, benh, paulus, distroguy; +Cc: linuxppc-dev

Michael Ellerman <mpe@ellerman.id.au> writes:

> [ text/plain ]
> On Sun, 2016-03-27 at 13:54 +0530, Aneesh Kumar K.V wrote:
>
>> This patch move the existing p9 hash to a different PVR and add
>> radix feature with p9 PVR. That implies we will not be able to
>> runtime select P9 hash. With P9 Radix we need to do
>> 
>> * set UPRT = 0 in cpu setup
>> * set different TLB set count
>> 
>> We ideally want to use ibm,pa-features to enable disable radix. But
>> we have already done setup cpu by the time we reach pa-features check.
>> 
>> So for now use this hack.
>> 
>> Not-Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ...
>> diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
>> index 584e119fa8b0..e9b76c651bd1 100644
>> --- a/arch/powerpc/kernel/cpu_setup_power.S
>> +++ b/arch/powerpc/kernel/cpu_setup_power.S
>> @@ -117,6 +117,41 @@ _GLOBAL(__restore_cpu_power9)
>>  	mtlr	r11
>>  	blr
>>  
>> +_GLOBAL(__setup_cpu_power9_uprt)
>> +	mflr	r11
>> +	bl	__init_FSCR
>> +	bl	__init_hvmode_206
>> +	mtlr	r11
>> +	beqlr
>> +	li	r0,0
>> +	mtspr	SPRN_LPID,r0
>> +	mfspr	r3,SPRN_LPCR
>> +	ori	r3, r3, LPCR_PECEDH
>> +	oris	r3,r3,(LPCR_UPRT >> 16)
>> +	bl	__init_LPCR
>
> I don't see why we *have* to initialise this here.
>
> ie. could we do it later in early_init_mmu() or similar ?

That helped. This works for me.

commit 9c9d8b4f6a2c2210c90cbb3f5c6d33b2a642e8d2
Author: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Date:   Mon Feb 15 13:44:01 2016 +0530

    powerpc/mm/radix: Cputable update for radix
    
    With P9 Radix we need to do
    
    * set UPRT = 1
    * set different TLB set count
    
    In this patch we delay the UPRT=1 to early mmu init. We also update
    other cpu_spec callback there. The restore cpu callback is used to
    init secondary cpus and also during opal init. So we do a full
    radix variant for that, even though the only difference is UPRT=1
    
    Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b546e6f28d44..3400ed884f10 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -347,6 +347,10 @@
 #define   LPCR_LPES_SH	2
 #define   LPCR_RMI     0x00000002      /* real mode is cache inhibit */
 #define   LPCR_HDICE   0x00000001      /* Hyp Decr enable (HV,PR,EE) */
+/*
+ * Used in asm code, hence we don't want to use PPC_BITCOUNT
+ */
+#define	  LPCR_UPRT	(ASM_CONST(0x1) << 22)
 #ifndef SPRN_LPID
 #define SPRN_LPID	0x13F	/* Logical Partition Identifier */
 #endif
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 584e119fa8b0..8d717954d0ca 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -117,6 +117,24 @@ _GLOBAL(__restore_cpu_power9)
 	mtlr	r11
 	blr
 
+_GLOBAL(__restore_cpu_power9_uprt)
+	mflr	r11
+	bl	__init_FSCR
+	mfmsr	r3
+	rldicl.	r0,r3,4,63
+	mtlr	r11
+	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
+	mfspr   r3,SPRN_LPCR
+	ori	r3, r3, LPCR_PECEDH
+	oris	r3,r3,LPCR_UPRT@h
+	bl	__init_LPCR
+	bl	__init_HFSCR
+	bl	__init_tlb_power7
+	mtlr	r11
+	blr
+
 __init_hvmode_206:
 	/* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
 	mfmsr	r3
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 6c662b8de90d..e009722d5914 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -514,7 +514,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_POWER9,
 		.cpu_user_features	= COMMON_USER_POWER9,
 		.cpu_user_features2	= COMMON_USER2_POWER9,
-		.mmu_features		= MMU_FTRS_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9 | MMU_FTR_RADIX,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 92a66a2a9b85..f902ede263ab 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -75,6 +75,10 @@ void __flush_tlb_power9(unsigned int action)
 	flush_tlb_206(POWER9_TLB_SETS_HASH, action);
 }
 
+void __flush_tlb_power9_radix(unsigned int action)
+{
+	flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
+}
 
 /* flush SLBs and reload */
 #ifdef CONFIG_PPC_MMU_STD_64
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index bb1eb7d0911c..6e56051bf825 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -294,8 +294,20 @@ found:
 	return;
 }
 
+extern void __restore_cpu_power9_uprt(void);
+extern void __flush_tlb_power9_radix(unsigned int action);
 void __init rearly_init_mmu(void)
 {
+	unsigned long lpcr;
+	/*
+	 * setup LPCR UPRT based on mmu_features
+	 */
+	lpcr = mfspr(SPRN_LPCR);
+	mtspr(SPRN_LPCR, lpcr | LPCR_UPRT);
+	/* update cpu_spec to point to radix enabled callbacks */
+	cur_cpu_spec->cpu_restore = __restore_cpu_power9_uprt;
+	cur_cpu_spec->flush_tlb   = __flush_tlb_power9_radix;
+
 #ifdef CONFIG_PPC_64K_PAGES
 	/* PAGE_SIZE mappings */
 	mmu_virtual_psize = MMU_PAGE_64K;

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* Re: [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access
  2016-03-31  0:57   ` Balbir Singh
@ 2016-04-04 14:55     ` Aneesh Kumar K.V
  2016-04-05  3:25       ` Balbir Singh
  0 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-04-04 14:55 UTC (permalink / raw)
  To: Balbir Singh, benh, paulus, mpe; +Cc: linuxppc-dev

Balbir Singh <bsingharora@gmail.com> writes:

> [ text/plain ]
>
>
> On 27/03/16 19:23, Aneesh Kumar K.V wrote:
>> This split _PAGE_RW bit to _PAGE_READ and _PAGE_WRITE. It also remove
>> the dependency on _PAGE_USER for implying read only. Few things to note
>> here is that, we have read implied with write and execute permission.
>> Hence we should always find _PAGE_READ set on hash pte fault.
>>
>> We still can't switch PROT_NONE to !(_PAGE_RWX). Auto numa do depend
>> on marking a prot none pte _PAGE_WRITE. (For more details look at
>> b191f9b106ea "mm: numa: preserve PTE write permissions across a NUMA hinting fault")
>>

......


>> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
>> index 90dd9280894f..ea23403b3fc0 100644
>> --- a/arch/powerpc/mm/hash_utils_64.c
>> +++ b/arch/powerpc/mm/hash_utils_64.c
>> @@ -175,8 +175,9 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
>>  	 * or PP=0x3 for read-only (including writeable but clean pages).
>>  	 */
>>  	if (pteflags & _PAGE_USER) {
>> -		rflags |= 0x2;
>> -		if (!((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY)))
>> +		if (pteflags & _PAGE_RWX)
> Should this be pteflags & _PAGE_RW?

That will work, because we always have _PAGE_READ set for _PAGE_EXEC.
But what we really need to check here is READ/WRITE/EXEC.


>> +			rflags |= 0x2;
>> +		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
>>  			rflags |= 0x1;
> if pteflags == _PAGE_USER | _PAGE_WRITE | _PAGE_DIRTY, what is rflags set to?
>


rflags will be 0x2. and for readlyonly 0x3. That is documented above. 



>
>
>>  	}
>>  	/*
>> @@ -1205,7 +1206,7 @@ EXPORT_SYMBOL_GPL(hash_page);
>>  int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
>>  		unsigned long dsisr)
>>  {
>> -	unsigned long access = _PAGE_PRESENT;
>> +	unsigned long access = _PAGE_PRESENT | _PAGE_READ;
>>  	unsigned long flags = 0;
>>  	struct mm_struct *mm = current->mm;


....

>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>> index 83dfd7925c72..98b5c03e344d 100644
>> --- a/arch/powerpc/mm/pgtable.c
>> +++ b/arch/powerpc/mm/pgtable.c
>> @@ -177,8 +177,8 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
>>  	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
>>  	 * Hence we can use set_pte_at for them.
>>  	 */
>> -	VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
>> -		(_PAGE_PRESENT | _PAGE_USER));
>> +	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
>> +
> What was wrong with the previous check? The compiler will optimize it, but the new
> check uses two pte_val() calls on *ptep

That was confusing, even though we documented it clearly above, it does
confuse, because checking for _PAGE_USER is an indirect hint for prot
numa ptes.


>>  	/*
>>  	 * Add the pte bit when tryint set a pte
>>  	 */
>> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
>> index aa742aa35b64..00d8d985bba3 100644
>> --- a/arch/powerpc/mm/pgtable_64.c
>> +++ b/arch/powerpc/mm/pgtable_64.c
>> @@ -277,7 +277,7 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
>>  	void *caller = __builtin_return_address(0);
>>  
>>  	/* writeable implies dirty for kernel addresses */
>> -	if (flags & _PAGE_RW)
>> +	if (flags & _PAGE_WRITE)
>>  		flags |= _PAGE_DIRTY;
>>  
>>  	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
>> @@ -681,8 +681,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>>  		pmd_t *pmdp, pmd_t pmd)
>>  {
>>  #ifdef CONFIG_DEBUG_VM
>> -	WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
>> -		(_PAGE_PRESENT | _PAGE_USER));
>> +	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
> Same as above, plus I think we can move these to VM_WARN_ON just to be consistent

We already have #ifdef CONFIG_DEBUG_VM around, hence we can avoid using
VM_WARN_ON.

-aneesh

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 03/65] powerpc/mm/subpage: Clear RWX bit to indicate no access
  2016-03-31  1:42   ` Balbir Singh
@ 2016-04-04 14:59     ` Aneesh Kumar K.V
  2016-04-05  3:30       ` Balbir Singh
  0 siblings, 1 reply; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-04-04 14:59 UTC (permalink / raw)
  To: Balbir Singh, benh, paulus, mpe; +Cc: linuxppc-dev

Balbir Singh <bsingharora@gmail.com> writes:

> [ text/plain ]
>
>
> On 27/03/16 19:23, Aneesh Kumar K.V wrote:
>> Subpage protection used to depend on _PAGE_USER bit to implement no
>> access mode. This patch switch that to use _PAGE_RWX. We clear READ,
>> Write and Execute access from pte instead of clearing _PAGE_USER now.
>> This was done so that we can switch to _PAGE_PRIVILEGED in later patch.
>> subpage_protection() returns pte bits that need to be cleared.
> Could you please clarify what bit needs to be cleared. I think the underlying
> assumption is that when this routine is called access cannot be _PAGE_RWX


I didn't follow the question. subpage_protection() returns the pte bits
that need to be cleared for a specific access depending on the subpage
prot mask we set using subpage_prot syscall. 

>> Instead of updating the interface to handle no-access in a separate way,
>> it appears simple to clear RWX acecss to indicate no access.
>>

-aneesh

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 04/65] powerpc/mm: Use pte_user instead of opencoding
  2016-03-31  1:45   ` Balbir Singh
@ 2016-04-04 15:00     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-04-04 15:00 UTC (permalink / raw)
  To: Balbir Singh, benh, paulus, mpe; +Cc: linuxppc-dev

Balbir Singh <bsingharora@gmail.com> writes:

> [ text/plain ]
>
>
> On 27/03/16 19:23, Aneesh Kumar K.V wrote:
>> We have common declaration in pte-common.h Add book3s specific one
>> and switch to pte_user. In the later patch we will be switching
>> _PAGE_USER to _PAGE_PRIVILEGED
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/book3s/64/pgtable.h | 5 +++++
>>  arch/powerpc/perf/callchain.c                | 2 +-
>>  2 files changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 77d3ce05798e..4ac6221802ad 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -185,6 +185,11 @@ extern struct page *pgd_page(pgd_t pgd);
>>  #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
>>  #define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
>>  
>> +static inline bool pte_user(pte_t pte)
>> +{
>> +	return (pte_val(pte) & _PAGE_USER);
>> +}
> Ideally this should be
>     return !!(pte_val(pte) & _PAGE_USER)
> for consistency with page_present() and the fact that it returns a bool

Fixed. I missed that because later patch convert this to

!(pte_val(pte) & _PAGE_PRIVILEGED). 


-aneesh

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64
  2016-03-30 10:53 ` [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Balbir Singh
@ 2016-04-04 15:03   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 92+ messages in thread
From: Aneesh Kumar K.V @ 2016-04-04 15:03 UTC (permalink / raw)
  To: Balbir Singh, benh, paulus, mpe; +Cc: linuxppc-dev

Balbir Singh <bsingharora@gmail.com> writes:

> [ text/plain ]
>
> On 27/03/16 19:23, Aneesh Kumar K.V wrote:
>> This enables us to share the same page table code for
>> both radix and hash. Radix use a hardware defined big endian
>> page table
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

.....

>> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
>> index d0ee6fcef823..2113de051824 100644
>> --- a/arch/powerpc/include/asm/book3s/64/hash.h
>> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
>> @@ -250,22 +250,27 @@ static inline unsigned long pte_update(struct mm_struct *mm,
>>  				       int huge)
>>  {
>>  	unsigned long old, tmp;
>> +	unsigned long busy = cpu_to_be64(_PAGE_BUSY);
>> +
>> +	clr = cpu_to_be64(clr);
>> +	set = cpu_to_be64(set);
> so clr and set come in in native endian and the page flags (_PAGE_BUSY, etc) are also native endian?
> I think the changelog should mention this
>
> 1. The bits are stored big endian, but when dealing with them we convert them
> to native endian
> 2. Native endian is for portability across platforms
> 3. pte/pmd/pud/pgd_val() will continue toreturn the value in native endian
>
> You may also want to check/warn about user tools breakage and cc relevant lists. I think the kdump ML
> and any other relevant tools and also check for kvm migration. I think if everything ends up
> native endian (like pte/pmd/pgd/pud_val) we might be good.

Hari is already looking at this.

>
>>  
>>  	__asm__ __volatile__(
>>  	"1:	ldarx	%0,0,%3		# pte_update\n\
>> -	andi.	%1,%0,%6\n\
>> +	and.	%1,%0,%6\n\
>>  	bne-	1b \n\
>>  	andc	%1,%0,%4 \n\
>>  	or	%1,%1,%7\n\
>>  	stdcx.	%1,0,%3 \n\
>>  	bne-	1b"
>>  	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
>> -	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
>> +	: "r" (ptep), "r" (clr), "m" (*ptep), "r" (busy), "r" (set)
>>  	: "cc" );
>>  	/* huge pages use the old page table lock */
>>  	if (!huge)
>>  		assert_pte_locked(mm, addr);
>>  
>> +	old = be64_to_cpu(old);
>>  	if (old & _PAGE_HASHPTE)
>>  		hpte_need_flush(mm, addr, ptep, old, huge);
>>  
.....

>> +/*
>> + * 64 bit hash always use 4 level table. Everybody else use 4 level
>> + * only for 4K page size.
>> + */
>> +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
> Doesn't this header get included only if
>
> CONFIG_PPC_BOOK3S_64 is defined, do we need the #if defined for it?

If other platforms want to implement big endian table they can just
include this header. But yes for now this will be included only by
book3s 64.



>
>> +typedef struct { __be64 pud; } pud_t;
>> +#define __pud(x)	((pud_t) { cpu_to_be64(x) })
>> +static inline unsigned long pud_val(pud_t x)
>> +{
>> +	return be64_to_cpu(x.pud);
>> +}
>> +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
>> +#endif /* CONFIG_PPC64 */
>> +
>> +/* PGD level */
>> +typedef struct { __be64 pgd; } pgd_t;
>> +#define __pgd(x)	((pgd_t) { cpu_to_be64(x) })
>> +static inline unsigned long pgd_val(pgd_t x)
>> +{
>> +	return be64_to_cpu(x.pgd);
>> +}
>> +
>> +/* Page protection bits */
>> +typedef struct { unsigned long pgprot; } pgprot_t;
>> +#define pgprot_val(x)	((x).pgprot)
>> +#define __pgprot(x)	((pgprot_t) { (x) })
>> +

-aneesh

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access
  2016-04-04 14:55     ` Aneesh Kumar K.V
@ 2016-04-05  3:25       ` Balbir Singh
  0 siblings, 0 replies; 92+ messages in thread
From: Balbir Singh @ 2016-04-05  3:25 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 05/04/16 00:55, Aneesh Kumar K.V wrote:
> Balbir Singh <bsingharora@gmail.com> writes:
>
>> [ text/plain ]
>>
>>
>> On 27/03/16 19:23, Aneesh Kumar K.V wrote:
>>> This split _PAGE_RW bit to _PAGE_READ and _PAGE_WRITE. It also remove
>>> the dependency on _PAGE_USER for implying read only. Few things to note
>>> here is that, we have read implied with write and execute permission.
>>> Hence we should always find _PAGE_READ set on hash pte fault.
>>>
>>> We still can't switch PROT_NONE to !(_PAGE_RWX). Auto numa do depend
>>> on marking a prot none pte _PAGE_WRITE. (For more details look at
>>> b191f9b106ea "mm: numa: preserve PTE write permissions across a NUMA hinting fault")
>>>
> ......
>
>
>>> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
>>> index 90dd9280894f..ea23403b3fc0 100644
>>> --- a/arch/powerpc/mm/hash_utils_64.c
>>> +++ b/arch/powerpc/mm/hash_utils_64.c
>>> @@ -175,8 +175,9 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
>>>  	 * or PP=0x3 for read-only (including writeable but clean pages).
>>>  	 */
>>>  	if (pteflags & _PAGE_USER) {
>>> -		rflags |= 0x2;
>>> -		if (!((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY)))
>>> +		if (pteflags & _PAGE_RWX)
>> Should this be pteflags & _PAGE_RW?
> That will work, because we always have _PAGE_READ set for _PAGE_EXEC.
> But what we really need to check here is READ/WRITE/EXEC.
Does READ/WRITE matter for pp bits?

>
>>> +			rflags |= 0x2;
>>> +		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
>>>  			rflags |= 0x1;
>> if pteflags == _PAGE_USER | _PAGE_WRITE | _PAGE_DIRTY, what is rflags set to?
>>
>
> rflags will be 0x2. and for readlyonly 0x3. That is documented above. 
Yep the comment says that, I  think the comment can enhanced to add more information

For example

pp = 0x3 for PAGE_READ | PAGE_EXEC (read-only)
pp = 0x2 if the page is writable (read-write)



>
>
>>
>>>  	}
>>>  	/*
>>> @@ -1205,7 +1206,7 @@ EXPORT_SYMBOL_GPL(hash_page);
>>>  int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
>>>  		unsigned long dsisr)
>>>  {
>>> -	unsigned long access = _PAGE_PRESENT;
>>> +	unsigned long access = _PAGE_PRESENT | _PAGE_READ;
>>>  	unsigned long flags = 0;
>>>  	struct mm_struct *mm = current->mm;
>
> ....
>
>>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>>> index 83dfd7925c72..98b5c03e344d 100644
>>> --- a/arch/powerpc/mm/pgtable.c
>>> +++ b/arch/powerpc/mm/pgtable.c
>>> @@ -177,8 +177,8 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
>>>  	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
>>>  	 * Hence we can use set_pte_at for them.
>>>  	 */
>>> -	VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
>>> -		(_PAGE_PRESENT | _PAGE_USER));
>>> +	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
>>> +
>> What was wrong with the previous check? The compiler will optimize it, but the new
>> check uses two pte_val() calls on *ptep
> That was confusing, even though we documented it clearly above, it does
> confuse, because checking for _PAGE_USER is an indirect hint for prot
> numa ptes.
>
>
>>>  	/*
>>>  	 * Add the pte bit when tryint set a pte
>>>  	 */
>>> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
>>> index aa742aa35b64..00d8d985bba3 100644
>>> --- a/arch/powerpc/mm/pgtable_64.c
>>> +++ b/arch/powerpc/mm/pgtable_64.c
>>> @@ -277,7 +277,7 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
>>>  	void *caller = __builtin_return_address(0);
>>>  
>>>  	/* writeable implies dirty for kernel addresses */
>>> -	if (flags & _PAGE_RW)
>>> +	if (flags & _PAGE_WRITE)
>>>  		flags |= _PAGE_DIRTY;
>>>  
>>>  	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
>>> @@ -681,8 +681,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>>>  		pmd_t *pmdp, pmd_t pmd)
>>>  {
>>>  #ifdef CONFIG_DEBUG_VM
>>> -	WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
>>> -		(_PAGE_PRESENT | _PAGE_USER));
>>> +	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
>> Same as above, plus I think we can move these to VM_WARN_ON just to be consistent
> We already have #ifdef CONFIG_DEBUG_VM around, hence we can avoid using
> VM_WARN_ON.
-just nit-picking
Balbir Singh

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 03/65] powerpc/mm/subpage: Clear RWX bit to indicate no access
  2016-04-04 14:59     ` Aneesh Kumar K.V
@ 2016-04-05  3:30       ` Balbir Singh
  0 siblings, 0 replies; 92+ messages in thread
From: Balbir Singh @ 2016-04-05  3:30 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 05/04/16 00:59, Aneesh Kumar K.V wrote:
> Balbir Singh <bsingharora@gmail.com> writes:
>
>> [ text/plain ]
>>
>>
>> On 27/03/16 19:23, Aneesh Kumar K.V wrote:
>>> Subpage protection used to depend on _PAGE_USER bit to implement no
>>> access mode. This patch switch that to use _PAGE_RWX. We clear READ,
>>> Write and Execute access from pte instead of clearing _PAGE_USER now.
>>> This was done so that we can switch to _PAGE_PRIVILEGED in later patch.
>>> subpage_protection() returns pte bits that need to be cleared.
>> Could you please clarify what bit needs to be cleared. I think the underlying
>> assumption is that when this routine is called access cannot be _PAGE_RWX
>
> I didn't follow the question. subpage_protection() returns the pte bits
> that need to be cleared for a specific access depending on the subpage
> prot mask we set using subpage_prot syscall. 
What I meant is that if the subpage protection was no access, then _PAGE_RWX cannot
be set in the access value.
>>> Instead of updating the interface to handle no-access in a separate way,
>>> it appears simple to clear RWX acecss to indicate no access.
>>>
> -aneesh
>

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 08/65] powerpc/mm: Update _PAGE_KERNEL_RO
  2016-03-27  8:23 ` [PATCH 08/65] powerpc/mm: Update _PAGE_KERNEL_RO Aneesh Kumar K.V
@ 2016-04-05  7:45   ` Balbir Singh
  0 siblings, 0 replies; 92+ messages in thread
From: Balbir Singh @ 2016-04-05  7:45 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 27/03/16 19:23, Aneesh Kumar K.V wrote:
> PS3 had used PPP bit hack to implement a read only mapping in the
> kernel area. Since we are bolt mapping the ioremap area, it used
> the pte flags _PAGE_PRESENT | _PAGE_USER to get a PPP value of 0x3
> there by resulting in a read only mapping. This means the area
> can be accessed by user space, but kernel will never return such an
> address to user space.
>
> But we can do better by implementing a read only kernel mapping using
> PPP bits 0b110
Looks good!

Acked-by: Balbir Singh <bsingharora@gmail.com>

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 09/65] powerpc/mm: Use helper for finding pte bits mapping I/O area
  2016-03-27  8:23 ` [PATCH 09/65] powerpc/mm: Use helper for finding pte bits mapping I/O area Aneesh Kumar K.V
@ 2016-04-05 11:47   ` Balbir Singh
  0 siblings, 0 replies; 92+ messages in thread
From: Balbir Singh @ 2016-04-05 11:47 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 27/03/16 19:23, Aneesh Kumar K.V wrote:
> Use helper instead of opencoding with constants. Later patch will
> drop the WIMG bits and use PowerISA 3.0 defines
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/btext.c      | 2 +-
>  arch/powerpc/kernel/isa-bridge.c | 4 ++--
>  arch/powerpc/kernel/pci_64.c     | 2 +-
>  arch/powerpc/mm/pgtable_64.c     | 4 ++--
>  arch/powerpc/platforms/ps3/spu.c | 2 +-
>  drivers/pcmcia/electra_cf.c      | 2 +-
>  6 files changed, 8 insertions(+), 8 deletions(-)
>
>

To be honest the code is harder to read with these changes

Balbir

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 10/65] powerpc/mm: Drop WIMG in favour of new constants
  2016-03-27  8:23 ` [PATCH 10/65] powerpc/mm: Drop WIMG in favour of new constants Aneesh Kumar K.V
@ 2016-04-05 13:25   ` Balbir Singh
  0 siblings, 0 replies; 92+ messages in thread
From: Balbir Singh @ 2016-04-05 13:25 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 27/03/16 19:23, Aneesh Kumar K.V wrote:
> PowerISA 3.0 introduce two pte bits with the below meaning w.r.t Radix
> 00 ->  Normal Memory
> 01 ->  Strong Access Order
> 10 -> Non idempotent I/O (Cache inhibited and guarded)
> 11 -> Tolerant I/O (Cache inhibited)
>
> We drop the existing WIMG bits in linux page table in favour of above
> constants. We loose _PAGE_WRITETHRU with this conversion. We only use
> writethru via pgprot_cached_wthru() which is used by fbdev/controlfb.c
> which is Apple control display and also PPC32.
>
> With respect to _PAGE_COHERENCE, we have been marking hpte
> always coherent for some time now. htab_convert_pte_flags always added
> HPTE_R_M.
>
> NOTE: KVM changes need closer review.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/hash.h | 72 ++++++++++++++-----------------
>  arch/powerpc/include/asm/kvm_book3s_64.h  | 27 +++++-------
>  arch/powerpc/kvm/book3s_64_mmu_hv.c       | 11 +++--
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c       | 12 +++---
>  arch/powerpc/mm/hash64_64k.c              |  2 +-
>  arch/powerpc/mm/hash_utils_64.c           | 18 ++++----
>  arch/powerpc/mm/pgtable.c                 |  8 ++--
>  arch/powerpc/mm/pgtable_64.c              |  4 --
>  arch/powerpc/platforms/pseries/lpar.c     |  4 --
>  9 files changed, 67 insertions(+), 91 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
> index 2a80981f1b0b..fd2d0ebfc49c 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -20,12 +20,10 @@
>  #define _PAGE_READ		0x00004	/* read access allowed */
>  #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
>  #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
> -#define _PAGE_PRIVILEGED	0x00008 /* kernel access only */ 
This change is redundant
> -#define _PAGE_GUARDED		0x00010 /* G: guarded (side-effect) page */
> -/* M (memory coherence) is always set in the HPTE, so we don't need it here */
> -#define _PAGE_COHERENT		0x0
> -#define _PAGE_NO_CACHE		0x00020 /* I: cache inhibit */
> -#define _PAGE_WRITETHRU		0x00040 /* W: cache write-through */
> +#define _PAGE_PRIVILEGED	0x00008 /* kernel access only */
We add it right back here
> +#define _PAGE_SAO		0x00010 /* Strong access order */
> +#define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
I think the comment is not very useful, it just calls out the defines
> +#define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */
>  #define _PAGE_DIRTY		0x00080 /* C: page changed */
>  #define _PAGE_ACCESSED		0x00100 /* R: page referenced */
>  #define _PAGE_SPECIAL		0x00400 /* software: special page */
> @@ -43,7 +41,12 @@
>  #define _PAGE_HASHPTE		(1ul << 61)	/* PTE has associated HPTE */
>  #define _PAGE_PTE		(1ul << 62)	/* distinguishes PTEs from pointers */
>  #define _PAGE_PRESENT		(1ul << 63)	/* pte contains a translation */
> -
> +/*
> + * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
> + * Instead of fixing all of them, add an alternate define which
> + * maps CI pte mapping.
> + */
> +#define _PAGE_NO_CACHE		_PAGE_TOLERANT
>  /*
>   * We need to differentiate between explicit huge page and THP huge
>   * page, since THP huge page also need to track real subpage details
> @@ -122,9 +125,6 @@
>  #define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | \
>  				 _PAGE_RW | _PAGE_EXEC)
>  
> -/* Strong Access Ordering */
> -#define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
> -
>  /* No page size encoding in the linux PTE */
>  #define _PAGE_PSIZE		0
>  
> @@ -150,10 +150,9 @@
>  /*
>   * Mask of bits returned by pte_pgprot()
>   */
> -#define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
> -			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
> -			 _PAGE_PRIVILEGED | _PAGE_ACCESSED |  _PAGE_READ |\
> -			 _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
> +#define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
> +			 _PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
> +			 _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
>  			 _PAGE_SOFT_DIRTY)
>  /*
>   * We define 2 sets of base prot bits, one for basic pages (ie,
> @@ -162,7 +161,7 @@
>   * the processor might need it for DMA coherency.
>   */
>  #define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
> -#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
> +#define _PAGE_BASE	(_PAGE_BASE_NC)
>  
>  /* Permission masks used to generate the __P and __S table,
>   *
> @@ -203,9 +202,9 @@
>  /* Permission masks used for kernel mappings */
>  #define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
>  #define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
> -				 _PAGE_NO_CACHE)
> +				 _PAGE_TOLERANT)
I don't think this change is required given that _PAGE_NO_CACHE is _PAGE_TOLERANT
>  #define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
> -				 _PAGE_NO_CACHE | _PAGE_GUARDED)
> +				 _PAGE_NON_IDEMPOTENT)
>  #define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
>  #define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
>  #define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
> @@ -512,45 +511,26 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
>  	*ptep = pte;
>  }
>  
> -/*
> - * Macro to mark a page protection value as "uncacheable".
> - */
> -
> -#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
> -			 _PAGE_WRITETHRU)
> +#define _PAGE_CACHE_CTL	(_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
>  
>  #define pgprot_noncached pgprot_noncached
>  static inline pgprot_t pgprot_noncached(pgprot_t prot)
>  {
>  	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
> -			_PAGE_NO_CACHE | _PAGE_GUARDED);
> +			_PAGE_NON_IDEMPOTENT);
>  }
>  
>  #define pgprot_noncached_wc pgprot_noncached_wc
>  static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
>  {
>  	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
> -			_PAGE_NO_CACHE);
> +			_PAGE_TOLERANT);
Same as before
>  }
>  
>  #define pgprot_cached pgprot_cached
>  static inline pgprot_t pgprot_cached(pgprot_t prot)
>  {
> -	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
> -			_PAGE_COHERENT);
> -}
> -
> -#define pgprot_cached_wthru pgprot_cached_wthru
> -static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
> -{
> -	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
> -			_PAGE_COHERENT | _PAGE_WRITETHRU);
> -}
> -
> -#define pgprot_cached_noncoherent pgprot_cached_noncoherent
> -static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
> -{
> -	return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
> +	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
>  }
>  
>  #define pgprot_writecombine pgprot_writecombine
> @@ -558,6 +538,18 @@ static inline pgprot_t pgprot_writecombine(pgprot_t prot)
>  {
>  	return pgprot_noncached_wc(prot);
>  }
> +/*
> + * check a pte mapping have cache inhibited property
> + */
> +static inline bool pte_ci(pte_t pte)
> +{
> +	unsigned long pte_v = pte_val(pte);
> +
> +	if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) ||
> +	    ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT))
> +		return true;
> +	return false;
> +}
>  
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
> index f9a7a89a3e4f..ebdaf576cf26 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -278,19 +278,24 @@ static inline unsigned long hpte_make_readonly(unsigned long ptel)
>  	return ptel;
>  }
>  
> -static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
> +static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
>  {
> -	unsigned int wimg = ptel & HPTE_R_WIMG;
> +	unsigned int wimg = hptel & HPTE_R_WIMG;
>  
>  	/* Handle SAO */
>  	if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
>  	    cpu_has_feature(CPU_FTR_ARCH_206))
>  		wimg = HPTE_R_M;
>  
> -	if (!io_type)
> +	if (!is_ci)
>  		return wimg == HPTE_R_M;
> -
> -	return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
> +	/*
> +	 * if host is mapped cache inhibited, make sure hptel also have
> +	 * cache inhibited.
> +	 */
The comment applies to the !!(wimg & HPTE_R_I)
> +	if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */
> +		return false;
This says the page cannot be cache inhibited and writethrough?
> +	return !!(wimg & HPTE_R_I);
>  }
>  
>  /*
> @@ -333,18 +338,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
>  	return new_pte;
>  }
>  
> -
> -/* Return HPTE cache control bits corresponding to Linux pte bits */
> -static inline unsigned long hpte_cache_bits(unsigned long pte_val)
> -{
> -#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W
> -	return pte_val & (HPTE_R_W | HPTE_R_I);
> -#else
> -	return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
> -		((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
> -#endif
> -}
> -
>  static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
>  {
>  	if (key)
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index c7b78d8336b2..05f09ae82587 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -447,7 +447,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  	struct revmap_entry *rev;
>  	struct page *page, *pages[1];
>  	long index, ret, npages;
> -	unsigned long is_io;
> +	bool is_ci;
>  	unsigned int writing, write_ok;
>  	struct vm_area_struct *vma;
>  	unsigned long rcbits;
> @@ -503,7 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  	smp_rmb();
>  
>  	ret = -EFAULT;
> -	is_io = 0;
> +	is_ci = false;
>  	pfn = 0;
>  	page = NULL;
>  	pte_size = PAGE_SIZE;
> @@ -521,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  			pfn = vma->vm_pgoff +
>  				((hva - vma->vm_start) >> PAGE_SHIFT);
>  			pte_size = psize;
> -			is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
> +			is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
>  			write_ok = vma->vm_flags & VM_WRITE;
>  		}
>  		up_read(&current->mm->mmap_sem);
> @@ -558,10 +558,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  		goto out_put;
>  
>  	/* Check WIMG vs. the actual page we're accessing */
> -	if (!hpte_cache_flags_ok(r, is_io)) {
> -		if (is_io)
> +	if (!hpte_cache_flags_ok(r, is_ci)) {
> +		if (is_ci)
>  			goto out_put;
> -
>  		/*
>  		 * Allow guest to map emulated device memory as
>  		 * uncacheable, but actually make it cacheable.
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 4cb8db05f3e5..99b4e9d5dd23 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -175,7 +175,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
>  	unsigned long g_ptel;
>  	struct kvm_memory_slot *memslot;
>  	unsigned hpage_shift;
> -	unsigned long is_io;
> +	bool is_ci;
>  	unsigned long *rmap;
>  	pte_t *ptep;
>  	unsigned int writing;
> @@ -199,7 +199,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
>  	gfn = gpa >> PAGE_SHIFT;
>  	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
>  	pa = 0;
> -	is_io = ~0ul;
> +	is_ci = false;
>  	rmap = NULL;
>  	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
>  		/* Emulated MMIO - mark this with key=31 */
> @@ -250,7 +250,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
>  			if (writing && !pte_write(pte))
>  				/* make the actual HPTE be read-only */
>  				ptel = hpte_make_readonly(ptel);
> -			is_io = hpte_cache_bits(pte_val(pte));
> +			is_ci = pte_ci(pte);
>  			pa = pte_pfn(pte) << PAGE_SHIFT;
>  			pa |= hva & (host_pte_size - 1);
>  			pa |= gpa & ~PAGE_MASK;
> @@ -267,9 +267,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
>  	else
>  		pteh |= HPTE_V_ABSENT;
>  
> -	/* Check WIMG */
> -	if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
> -		if (is_io)
> +	/*If we had host pte mapping then  Check WIMG */
> +	if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
> +		if (is_ci)
>  			return H_PARAMETER;
>  		/*
>  		 * Allow guest to map emulated device memory as
> diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
> index f33b410d6c8a..419562b0e9c8 100644
> --- a/arch/powerpc/mm/hash64_64k.c
> +++ b/arch/powerpc/mm/hash64_64k.c
> @@ -248,7 +248,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
>  		 * If so, bail out and refault as a 4k page
>  		 */
>  		if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) &&
> -		    unlikely(old_pte & _PAGE_NO_CACHE))
> +		    unlikely(pte_ci(pte)))
>  			return 0;
>  		/*
>  		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index 59d4600bacd5..e924690a5a0e 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -192,12 +192,13 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
>  	/*
>  	 * Add in WIG bits
>  	 */
> -	if (pteflags & _PAGE_WRITETHRU)
> -		rflags |= HPTE_R_W;
> -	if (pteflags & _PAGE_NO_CACHE)
> +
> +	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
>  		rflags |= HPTE_R_I;
> -	if (pteflags & _PAGE_GUARDED)
> -		rflags |= HPTE_R_G;
> +	if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT)
> +		rflags |= (HPTE_R_I | HPTE_R_G);
> +	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
> +		rflags |= (HPTE_R_I | HPTE_R_W);
>  
>  	return rflags;
>  }
> @@ -1138,8 +1139,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
>  	/* If this PTE is non-cacheable and we have restrictions on
>  	 * using non cacheable large pages, then we switch to 4k
>  	 */
> -	if (mmu_ci_restrictions && psize == MMU_PAGE_64K &&
> -	    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
> +	if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) {
>  		if (user_region) {
>  			demote_segment_4k(mm, ea);
>  			psize = MMU_PAGE_4K;
> @@ -1293,13 +1293,13 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
>  
>  	WARN_ON(hugepage_shift);
>  #ifdef CONFIG_PPC_64K_PAGES
> -	/* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on
> +	/* If either _PAGE_4K_PFN or cache inhibited is set (and we are on
>  	 * a 64K kernel), then we don't preload, hash_page() will take
>  	 * care of it once we actually try to access the page.
>  	 * That way we don't have to duplicate all of the logic for segment
>  	 * page size demotion here
>  	 */
> -	if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
> +	if ((pte_val(*ptep) & _PAGE_4K_PFN) || pte_ci(*ptep))
>  		goto out_exit;
>  #endif /* CONFIG_PPC_64K_PAGES */
>  
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index a34884beaa47..115a0a19d5a2 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -38,16 +38,16 @@ static inline int is_exec_fault(void)
>  
>  /* We only try to do i/d cache coherency on stuff that looks like
>   * reasonably "normal" PTEs. We currently require a PTE to be present
> - * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that
> + * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that
>   * on userspace PTEs
>   */
>  static inline int pte_looks_normal(pte_t pte)
>  {
>  
>  #if defined(CONFIG_PPC_BOOK3S_64)
> -	if ((pte_val(pte) &
> -	     (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
> -	    _PAGE_PRESENT) {
> +	if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) {
> +		if (pte_ci(pte))
> +			return 0;
>  		if (pte_user(pte))
>  			return 1;
>  	}
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index 6f1b7064f822..db924c54f370 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -167,10 +167,6 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
>  	if ((flags & _PAGE_PRESENT) == 0)
>  		flags |= pgprot_val(PAGE_KERNEL);
>  
> -	/* Non-cacheable page cannot be coherent */
> -	if (flags & _PAGE_NO_CACHE)
> -		flags &= ~_PAGE_COHERENT;
> -
>  	/* We don't support the 4K PFN hack with ioremap */
>  	if (flags & _PAGE_4K_PFN)
>  		return NULL;
> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
> index 2415a0d31f8f..0d4608990702 100644
> --- a/arch/powerpc/platforms/pseries/lpar.c
> +++ b/arch/powerpc/platforms/pseries/lpar.c
> @@ -152,10 +152,6 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
>  	/* Exact = 0                   */
>  	flags = 0;
>  
> -	/* Make pHyp happy */
> -	if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
> -		hpte_r &= ~HPTE_R_M;
> -
>  	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
>  		flags |= H_COALESCE_CAND;
>  

Balbir

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 11/65] powerpc/mm: Use generic version of pmdp_clear_flush_young
  2016-03-27  8:23 ` [PATCH 11/65] powerpc/mm: Use generic version of pmdp_clear_flush_young Aneesh Kumar K.V
@ 2016-04-05 23:51   ` Balbir Singh
  0 siblings, 0 replies; 92+ messages in thread
From: Balbir Singh @ 2016-04-05 23:51 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 27/03/16 19:23, Aneesh Kumar K.V wrote:
> The radix variant is going to require a flush_pmd_tlb_range. With
> flush_pmd_tlb_range added, pmdp_clear_flush_young is same as the generic
> version. So drop the powerpc specific variant
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>
Make sense, what happens to the HPTE variant?

Balbir Singh.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 12/65] powerpc/mm: Use generic version of ptep_clear_flush_young
  2016-03-27  8:23 ` [PATCH 12/65] powerpc/mm: Use generic version of ptep_clear_flush_young Aneesh Kumar K.V
@ 2016-04-05 23:53   ` Balbir Singh
  0 siblings, 0 replies; 92+ messages in thread
From: Balbir Singh @ 2016-04-05 23:53 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 27/03/16 19:23, Aneesh Kumar K.V wrote:
> The radix variant is going to require a flush_tlb_range. With
> flush_tlb_range added, ptep_clear_flush_young is same as the generic
> version. So drop the powerpc specific variant
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>
The changelog is same as 11/65.. May be you could merge 11 and 12 or help
understand the difference better.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 14/65] powerpc/mm/power9: Add partition table format
  2016-03-27  8:23 ` [PATCH 14/65] powerpc/mm/power9: Add partition table format Aneesh Kumar K.V
@ 2016-04-06  6:11   ` Balbir Singh
  0 siblings, 0 replies; 92+ messages in thread
From: Balbir Singh @ 2016-04-06  6:11 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 27/03/16 19:23, Aneesh Kumar K.V wrote:
> We also add mach dep call back for updating partition table entry.
The changelog needs to be enhanced.
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/mmu.h | 31 +++++++++++++++++++++++++++++--
>  arch/powerpc/include/asm/machdep.h       |  1 +
>  arch/powerpc/include/asm/reg.h           |  1 +
>  3 files changed, 31 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
> index aadb0bbc5c71..b86786f2521c 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -21,12 +21,39 @@ struct mmu_psize_def {
>  extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
>  #endif /* __ASSEMBLY__ */
>  
> -#ifdef CONFIG_PPC_STD_MMU_64
>  /* 64-bit classic hash table MMU */
>  #include <asm/book3s/64/mmu-hash.h>
> -#endif
>  
>  #ifndef __ASSEMBLY__
> +/*
> + * ISA 3.0 partiton and process table entry format
> + */
> +struct prtb_entry {
> +	__be64 prtb0;
> +	__be64 prtb1;
> +};
> +extern struct prtb_entry *process_tb;
> +
> +struct patb_entry {
> +	__be64 patb0;
> +	__be64 patb1;
> +};
> +extern struct patb_entry *partition_tb;
> +
> +#define PATB_HR		(1UL << 63)
> +#define PATB_GR		(1UL << 63)
> +#define RPDB_MASK	0x0ffffffffffff00fUL
> +#define RPDB_SHIFT	(1UL << 8)
Some comments on the fields and constants?
> +/*
> + * Limit process table to PAGE_SIZE table. This
> + * also limit the max pid we can support.
> + * MAX_USER_CONTEXT * 16 bytes of space.
> + */
> +#define PRTB_SIZE_SHIFT	(CONTEXT_BITS + 4)
> +/*
> + * Power9 currently only support 64K partition table size.
> + */
> +#define PATB_SIZE_SHIFT	16
>  
>  typedef unsigned long mm_context_id_t;
>  struct spinlock;
> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
> index fd22442d30a9..6bdcd0da9e21 100644
> --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -256,6 +256,7 @@ struct machdep_calls {
>  #ifdef CONFIG_ARCH_RANDOM
>  	int (*get_random_seed)(unsigned long *v);
>  #endif
> +	int (*update_partition_table)(u64);
>  };
>  
>  extern void e500_idle(void);
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index 52ed654d01ba..257251ada3a3 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -587,6 +587,7 @@
>  #define SPRN_PIR	0x3FF	/* Processor Identification Register */
>  #endif
>  #define SPRN_TIR	0x1BE	/* Thread Identification Register */
> +#define SPRN_PTCR	0x1D0	/* Partition table control Register */
>  #define SPRN_PSPB	0x09F	/* Problem State Priority Boost reg */
>  #define SPRN_PTEHI	0x3D5	/* 981 7450 PTE HI word (S/W TLB load) */
>  #define SPRN_PTELO	0x3D6	/* 982 7450 PTE LO word (S/W TLB load) */

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 65/65] powerpc/mm/radix: Cputable update for radix
  2016-04-01  9:34     ` Aneesh Kumar K.V
@ 2018-06-28 23:55       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 92+ messages in thread
From: Benjamin Herrenschmidt @ 2018-06-28 23:55 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Michael Ellerman, paulus, distroguy; +Cc: linuxppc-dev

On Fri, 2016-04-01 at 15:04 +0530, Aneesh Kumar K.V wrote:
> 
> commit 9c9d8b4f6a2c2210c90cbb3f5c6d33b2a642e8d2
> Author: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> Date:   Mon Feb 15 13:44:01 2016 +0530
> 
>     powerpc/mm/radix: Cputable update for radix
>     
>     With P9 Radix we need to do
>     
>     * set UPRT = 1
>     * set different TLB set count
>     
>     In this patch we delay the UPRT=1 to early mmu init. We also update
>     other cpu_spec callback there. The restore cpu callback is used to
>     init secondary cpus and also during opal init. So we do a full
>     radix variant for that, even though the only difference is UPRT=1
>     
>     Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

How are things working in absence of cputable/PVR match ?

Remember we have a requirement to be able to boot existing OSes on
future chips, so Nick's new cpu-features node needs to be what we
test against.

> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index b546e6f28d44..3400ed884f10 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -347,6 +347,10 @@
>  #define   LPCR_LPES_SH	2
>  #define   LPCR_RMI     0x00000002      /* real mode is cache inhibit */
>  #define   LPCR_HDICE   0x00000001      /* Hyp Decr enable (HV,PR,EE) */
> +/*
> + * Used in asm code, hence we don't want to use PPC_BITCOUNT
> + */
> +#define	  LPCR_UPRT	(ASM_CONST(0x1) << 22)
>  #ifndef SPRN_LPID
>  #define SPRN_LPID	0x13F	/* Logical Partition Identifier */
>  #endif
> diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
> index 584e119fa8b0..8d717954d0ca 100644
> --- a/arch/powerpc/kernel/cpu_setup_power.S
> +++ b/arch/powerpc/kernel/cpu_setup_power.S
> @@ -117,6 +117,24 @@ _GLOBAL(__restore_cpu_power9)
>  	mtlr	r11
>  	blr
>  
> +_GLOBAL(__restore_cpu_power9_uprt)
> +	mflr	r11
> +	bl	__init_FSCR
> +	mfmsr	r3
> +	rldicl.	r0,r3,4,63
> +	mtlr	r11
> +	beqlr
> +	li	r0,0
> +	mtspr	SPRN_LPID,r0
> +	mfspr   r3,SPRN_LPCR
> +	ori	r3, r3, LPCR_PECEDH
> +	oris	r3,r3,LPCR_UPRT@h
> +	bl	__init_LPCR
> +	bl	__init_HFSCR
> +	bl	__init_tlb_power7
> +	mtlr	r11
> +	blr
> +
>  __init_hvmode_206:
>  	/* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
>  	mfmsr	r3
> diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
> index 6c662b8de90d..e009722d5914 100644
> --- a/arch/powerpc/kernel/cputable.c
> +++ b/arch/powerpc/kernel/cputable.c
> @@ -514,7 +514,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
>  		.cpu_features		= CPU_FTRS_POWER9,
>  		.cpu_user_features	= COMMON_USER_POWER9,
>  		.cpu_user_features2	= COMMON_USER2_POWER9,
> -		.mmu_features		= MMU_FTRS_POWER9,
> +		.mmu_features		= MMU_FTRS_POWER9 | MMU_FTR_RADIX,
>  		.icache_bsize		= 128,
>  		.dcache_bsize		= 128,
>  		.num_pmcs		= 6,
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index 92a66a2a9b85..f902ede263ab 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -75,6 +75,10 @@ void __flush_tlb_power9(unsigned int action)
>  	flush_tlb_206(POWER9_TLB_SETS_HASH, action);
>  }
>  
> +void __flush_tlb_power9_radix(unsigned int action)
> +{
> +	flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
> +}
>  
>  /* flush SLBs and reload */
>  #ifdef CONFIG_PPC_MMU_STD_64
> diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
> index bb1eb7d0911c..6e56051bf825 100644
> --- a/arch/powerpc/mm/pgtable-radix.c
> +++ b/arch/powerpc/mm/pgtable-radix.c
> @@ -294,8 +294,20 @@ found:
>  	return;
>  }
>  
> +extern void __restore_cpu_power9_uprt(void);
> +extern void __flush_tlb_power9_radix(unsigned int action);
>  void __init rearly_init_mmu(void)
>  {
> +	unsigned long lpcr;
> +	/*
> +	 * setup LPCR UPRT based on mmu_features
> +	 */
> +	lpcr = mfspr(SPRN_LPCR);
> +	mtspr(SPRN_LPCR, lpcr | LPCR_UPRT);
> +	/* update cpu_spec to point to radix enabled callbacks */
> +	cur_cpu_spec->cpu_restore = __restore_cpu_power9_uprt;
> +	cur_cpu_spec->flush_tlb   = __flush_tlb_power9_radix;
> +
>  #ifdef CONFIG_PPC_64K_PAGES
>  	/* PAGE_SIZE mappings */
>  	mmu_virtual_psize = MMU_PAGE_64K;

^ permalink raw reply	[flat|nested] 92+ messages in thread

end of thread, other threads:[~2018-06-28 23:55 UTC | newest]

Thread overview: 92+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-27  8:23 [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 02/65] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
2016-03-27 15:35   ` Ian Munsie
2016-03-31  0:57   ` Balbir Singh
2016-04-04 14:55     ` Aneesh Kumar K.V
2016-04-05  3:25       ` Balbir Singh
2016-03-27  8:23 ` [PATCH 03/65] powerpc/mm/subpage: Clear RWX bit to indicate no access Aneesh Kumar K.V
2016-03-31  1:42   ` Balbir Singh
2016-04-04 14:59     ` Aneesh Kumar K.V
2016-04-05  3:30       ` Balbir Singh
2016-03-27  8:23 ` [PATCH 04/65] powerpc/mm: Use pte_user instead of opencoding Aneesh Kumar K.V
2016-03-31  1:45   ` Balbir Singh
2016-04-04 15:00     ` Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 05/65] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED Aneesh Kumar K.V
2016-03-27 15:41   ` Ian Munsie
2016-03-27  8:23 ` [PATCH 06/65] powerpc/cxl: Use REGION_ID instead of opencoding Aneesh Kumar K.V
2016-03-27 15:48   ` Ian Munsie
2016-03-27  8:23 ` [PATCH 07/65] powerpc/mm: Remove RPN_SHIFT and RPN_SIZE Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 08/65] powerpc/mm: Update _PAGE_KERNEL_RO Aneesh Kumar K.V
2016-04-05  7:45   ` Balbir Singh
2016-03-27  8:23 ` [PATCH 09/65] powerpc/mm: Use helper for finding pte bits mapping I/O area Aneesh Kumar K.V
2016-04-05 11:47   ` Balbir Singh
2016-03-27  8:23 ` [PATCH 10/65] powerpc/mm: Drop WIMG in favour of new constants Aneesh Kumar K.V
2016-04-05 13:25   ` Balbir Singh
2016-03-27  8:23 ` [PATCH 11/65] powerpc/mm: Use generic version of pmdp_clear_flush_young Aneesh Kumar K.V
2016-04-05 23:51   ` Balbir Singh
2016-03-27  8:23 ` [PATCH 12/65] powerpc/mm: Use generic version of ptep_clear_flush_young Aneesh Kumar K.V
2016-04-05 23:53   ` Balbir Singh
2016-03-27  8:23 ` [PATCH 13/65] powerpc/mm: Move common data structure between radix and hash to book3s 64 generic headers Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 14/65] powerpc/mm/power9: Add partition table format Aneesh Kumar K.V
2016-04-06  6:11   ` Balbir Singh
2016-03-27  8:23 ` [PATCH 15/65] powerpc/mm/hash: Add support for POWER9 hash Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 16/65] powerpc/mm: Move hash and no hash code to separate files Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 17/65] powerpc/mm/book3s: Rename hash specific PTE bits to carry H_ prefix Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 18/65] powerpc/mm: Handle _PTE_NONE_MASK Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 19/65] powerpc/mm: Move common pte bits and accessors to book3s/64/pgtable.h Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 20/65] powerpc/mm: Move pte accessors that operate on common pte bits to pgtable.h Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 21/65] powerpc/mm: Make page table size a variable Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 22/65] powerpc/mm: Move page table index and and vaddr to pgtable.h Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 23/65] powerpc/mm: Move pte related function together Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 24/65] powerpc/mm/radix: Add radix pte defines Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 25/65] powerpc/mm/radix: Dummy radix_enabled() Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 26/65] powerpc/mm: Add radix callbacks to pte accessors Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 27/65] powerpc/mm: Move hugetlb and THP related pmd accessors to pgtable.h Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 28/65] powerpc/mm/radix: Add radix callback for pmd accessors Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 29/65] powerpc/mm: Abstraction for early init routines Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 30/65] powerpc/mm/radix: Add radix callback " Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 31/65] powerpc/mm: Abstraction for vmemmap and map_kernel_page Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 32/65] powerpc/mm/radix: Add radix callback for vmemmap and map_kernel page Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 33/65] powerpc/mm: Abstraction for switch_mmu_context Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 34/65] powerpc/mm/radix: Add mmu context handling callback for radix Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 35/65] powerpc/mm: Rename mmu_context_hash64.c to mmu_context_book3s64.c Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 36/65] powerpc/mm: Hash linux abstraction for tlbflush routines Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 37/65] powerpc/mm/radix: Add " Aneesh Kumar K.V
2016-03-27 10:00   ` kbuild test robot
2016-03-28 17:58     ` Aneesh Kumar K.V
2016-03-27 10:09   ` kbuild test robot
2016-03-27 10:11   ` kbuild test robot
2016-03-27  8:23 ` [PATCH 38/65] powerpc/mm/radix: Add MMU_FTR_RADIX Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 39/65] powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 40/65] powerpc/mm/radix: Isolate hash table function from pseries guest code Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 41/65] powerpc/mm/radix: Add checks in slice code to catch radix usage Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 42/65] powerpc/mm/radix: Limit paca allocation in radix Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 43/65] powerpc/mm/radix: Pick the address layout for radix config Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 44/65] powerpc/mm/radix: Update secondary PTCR Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 45/65] powerpc/mm: Make a copy of pgalloc.h for 32 and 64 book3s Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 46/65] powerpc/mm: revert changes made to generic pgalloc-64.h Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 47/65] powerpc/mm: Copy pgalloc (part 2) Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 48/65] powerpc/mm: Simplify the code dropping 4 level table #ifdef Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 49/65] powerpc/mm: Rename function to indicate we are allocating fragments Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 50/65] powerpc/mm: make 4k and 64k use pte_t for pgtable_t Aneesh Kumar K.V
2016-03-27  8:23 ` [PATCH 51/65] powerpc/mm: Add radix pgalloc details Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 52/65] powerpc/mm: Update pte filter for radix Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 53/65] powerpc/mm: VMALLOC abstraction Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 54/65] powerpc/radix: update mmu cache Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 55/65] powerpc/mm: pte_frag abstraction Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 56/65] powerpc/mm: Fix vma_mmu_pagesize for radix Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 57/65] powerpc/mm: Add radix support for hugetlb Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 58/65] powerpc/mm/radix: Make sure swapper pgdir is properly aligned Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 59/65] powerpc/mm/radix: Add hugetlb support 4K page size Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 60/65] powerpc/mm: Drop PTE_ATOMIC_UPDATES from pmd_hugepage_update Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 61/65] powerpc/mm: THP is only available on hash64 as of now Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 62/65] powerpc/mm/thp: Abstraction for THP functions Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 63/65] powerpc/mm/radix: Add radix THP callbacks Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 64/65] powerpc/mm/radix: Add THP support for 4k linux page size Aneesh Kumar K.V
2016-03-27  8:24 ` [PATCH 65/65] powerpc/mm/radix: Cputable update for radix Aneesh Kumar K.V
2016-03-30  1:01   ` Michael Neuling
2016-04-01  6:25   ` Michael Ellerman
2016-04-01  9:34     ` Aneesh Kumar K.V
2018-06-28 23:55       ` Benjamin Herrenschmidt
2016-03-30 10:53 ` [PATCH 01/65] powerpc/mm: Use big endian page table for book3s 64 Balbir Singh
2016-04-04 15:03   ` Aneesh Kumar K.V

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).