linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] Add 4 level page table for 64k book3s config
@ 2016-03-01  4:15 Aneesh Kumar K.V
  2016-03-01  4:15 ` [PATCH 1/4] powerpc/mm: Split pgtable types to separate header Aneesh Kumar K.V
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-01  4:15 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This series move book3s config with 64k linux page size to a 4 level
linux page table setup. This makes it similar to PowerISA 3.0 radix
setup which inturn will enable us to support both hash and radix
MMU model using single kernel.


Aneesh Kumar K.V (3):
  powerpc/mm: Split pgtable types to separate header
  powerpc/mm: Don't have conditional defines for real_pte_t
  powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table

Kirill A. Shutemov (1):
  mm: Some arch may want to use HPAGE_PMD related values as variables

 arch/powerpc/Kconfig                          |   1 +
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  33 +-------
 arch/powerpc/include/asm/book3s/64/hash-64k.h |  31 ++++----
 arch/powerpc/include/asm/book3s/64/hash.h     |  11 +++
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  30 ++++++--
 arch/powerpc/include/asm/page.h               | 104 +-------------------------
 arch/powerpc/include/asm/pgalloc-64.h         |  28 ++++++-
 arch/powerpc/include/asm/pgtable-types.h      | 103 +++++++++++++++++++++++++
 arch/powerpc/mm/init_64.c                     |  21 ++++--
 arch/powerpc/mm/pgtable_64.c                  |   7 ++
 include/linux/bug.h                           |   9 +++
 include/linux/huge_mm.h                       |   3 -
 mm/huge_memory.c                              |  17 ++++-
 13 files changed, 227 insertions(+), 171 deletions(-)
 create mode 100644 arch/powerpc/include/asm/pgtable-types.h

-- 
2.5.0

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/4] powerpc/mm: Split pgtable types to separate header
  2016-03-01  4:15 [PATCH 0/4] Add 4 level page table for 64k book3s config Aneesh Kumar K.V
@ 2016-03-01  4:15 ` Aneesh Kumar K.V
  2016-03-04  0:40   ` [1/4] " Michael Ellerman
  2016-03-01  4:15 ` [PATCH 2/4] powerpc/mm: Don't have conditional defines for real_pte_t Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-01  4:15 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We move the page table accessors into a separate header. We will
later add a big endian variant of the table which is needed for radix.
No functionality change only code movement.

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/page.h          | 104 +----------------------------
 arch/powerpc/include/asm/pgtable-types.h | 108 +++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+), 103 deletions(-)
 create mode 100644 arch/powerpc/include/asm/pgtable-types.h

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index af7a3422a3ef..ab3d8977bacd 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -288,109 +288,7 @@ extern long long virt_phys_offset;
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_STRICT_MM_TYPECHECKS
-/* These are used to make use of C type-checking. */
-
-/* PTE level */
-typedef struct { pte_basic_t pte; } pte_t;
-#define __pte(x)	((pte_t) { (x) })
-static inline pte_basic_t pte_val(pte_t x)
-{
-	return x.pte;
-}
-
-/* 64k pages additionally define a bigger "real PTE" type that gathers
- * the "second half" part of the PTE for pseudo 64k pages
- */
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef struct { pte_t pte; } real_pte_t;
-#endif
-
-/* PMD level */
-#ifdef CONFIG_PPC64
-typedef struct { unsigned long pmd; } pmd_t;
-#define __pmd(x)	((pmd_t) { (x) })
-static inline unsigned long pmd_val(pmd_t x)
-{
-	return x.pmd;
-}
-
-/* PUD level exusts only on 4k pages */
-#ifndef CONFIG_PPC_64K_PAGES
-typedef struct { unsigned long pud; } pud_t;
-#define __pud(x)	((pud_t) { (x) })
-static inline unsigned long pud_val(pud_t x)
-{
-	return x.pud;
-}
-#endif /* !CONFIG_PPC_64K_PAGES */
-#endif /* CONFIG_PPC64 */
-
-/* PGD level */
-typedef struct { unsigned long pgd; } pgd_t;
-#define __pgd(x)	((pgd_t) { (x) })
-static inline unsigned long pgd_val(pgd_t x)
-{
-	return x.pgd;
-}
-
-/* Page protection bits */
-typedef struct { unsigned long pgprot; } pgprot_t;
-#define pgprot_val(x)	((x).pgprot)
-#define __pgprot(x)	((pgprot_t) { (x) })
-
-#else
-
-/*
- * .. while these make it easier on the compiler
- */
-
-typedef pte_basic_t pte_t;
-#define __pte(x)	(x)
-static inline pte_basic_t pte_val(pte_t pte)
-{
-	return pte;
-}
-
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef pte_t real_pte_t;
-#endif
-
-
-#ifdef CONFIG_PPC64
-typedef unsigned long pmd_t;
-#define __pmd(x)	(x)
-static inline unsigned long pmd_val(pmd_t pmd)
-{
-	return pmd;
-}
-
-#ifndef CONFIG_PPC_64K_PAGES
-typedef unsigned long pud_t;
-#define __pud(x)	(x)
-static inline unsigned long pud_val(pud_t pud)
-{
-	return pud;
-}
-#endif /* !CONFIG_PPC_64K_PAGES */
-#endif /* CONFIG_PPC64 */
-
-typedef unsigned long pgd_t;
-#define __pgd(x)	(x)
-static inline unsigned long pgd_val(pgd_t pgd)
-{
-	return pgd;
-}
-
-typedef unsigned long pgprot_t;
-#define pgprot_val(x)	(x)
-#define __pgprot(x)	(x)
-
-#endif
+#include <asm/pgtable-types.h>
 
 typedef struct { signed long pd; } hugepd_t;
 
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
new file mode 100644
index 000000000000..2fac0c4acfa4
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -0,0 +1,108 @@
+#ifndef _ASM_POWERPC_PGTABLE_TYPES_H
+#define _ASM_POWERPC_PGTABLE_TYPES_H
+
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
+/* These are used to make use of C type-checking. */
+
+/* PTE level */
+typedef struct { pte_basic_t pte; } pte_t;
+#define __pte(x)	((pte_t) { (x) })
+static inline pte_basic_t pte_val(pte_t x)
+{
+	return x.pte;
+}
+
+/* 64k pages additionally define a bigger "real PTE" type that gathers
+ * the "second half" part of the PTE for pseudo 64k pages
+ */
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+
+/* PMD level */
+#ifdef CONFIG_PPC64
+typedef struct { unsigned long pmd; } pmd_t;
+#define __pmd(x)	((pmd_t) { (x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+	return x.pmd;
+}
+
+/* PUD level exusts only on 4k pages */
+#ifndef CONFIG_PPC_64K_PAGES
+typedef struct { unsigned long pud; } pud_t;
+#define __pud(x)	((pud_t) { (x) })
+static inline unsigned long pud_val(pud_t x)
+{
+	return x.pud;
+}
+#endif /* !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+/* PGD level */
+typedef struct { unsigned long pgd; } pgd_t;
+#define __pgd(x)	((pgd_t) { (x) })
+static inline unsigned long pgd_val(pgd_t x)
+{
+	return x.pgd;
+}
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+
+#else
+
+/*
+ * .. while these make it easier on the compiler
+ */
+
+typedef pte_basic_t pte_t;
+#define __pte(x)	(x)
+static inline pte_basic_t pte_val(pte_t pte)
+{
+	return pte;
+}
+
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef pte_t real_pte_t;
+#endif
+
+
+#ifdef CONFIG_PPC64
+typedef unsigned long pmd_t;
+#define __pmd(x)	(x)
+static inline unsigned long pmd_val(pmd_t pmd)
+{
+	return pmd;
+}
+
+#ifndef CONFIG_PPC_64K_PAGES
+typedef unsigned long pud_t;
+#define __pud(x)	(x)
+static inline unsigned long pud_val(pud_t pud)
+{
+	return pud;
+}
+#endif /* !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+typedef unsigned long pgd_t;
+#define __pgd(x)	(x)
+static inline unsigned long pgd_val(pgd_t pgd)
+{
+	return pgd;
+}
+
+typedef unsigned long pgprot_t;
+#define pgprot_val(x)	(x)
+#define __pgprot(x)	(x)
+
+#endif
+
+#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/4] powerpc/mm: Don't have conditional defines for real_pte_t
  2016-03-01  4:15 [PATCH 0/4] Add 4 level page table for 64k book3s config Aneesh Kumar K.V
  2016-03-01  4:15 ` [PATCH 1/4] powerpc/mm: Split pgtable types to separate header Aneesh Kumar K.V
@ 2016-03-01  4:15 ` Aneesh Kumar K.V
  2016-03-04  0:40   ` [2/4] " Michael Ellerman
  2016-03-01  4:15 ` [PATCH 3/4] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table Aneesh Kumar K.V
  2016-03-01  4:15 ` [PATCH 4/4] mm: Some arch may want to use HPAGE_PMD related values as variables Aneesh Kumar K.V
  3 siblings, 1 reply; 9+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-01  4:15 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We remove real_pte_t out of STRICT_MM_TYPESCHECK.

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h |  5 -----
 arch/powerpc/include/asm/pgtable-types.h     | 26 +++++++++-----------------
 2 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index c8240b737d11..7482f69117b6 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -43,13 +43,8 @@
  */
 #ifndef __real_pte
 
-#ifdef CONFIG_STRICT_MM_TYPECHECKS
 #define __real_pte(e,p)		((real_pte_t){(e)})
 #define __rpte_to_pte(r)	((r).pte)
-#else
-#define __real_pte(e,p)		(e)
-#define __rpte_to_pte(r)	(__pte(r))
-#endif
 #define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT)
 
 #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index 2fac0c4acfa4..71487e1ca638 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -12,15 +12,6 @@ static inline pte_basic_t pte_val(pte_t x)
 	return x.pte;
 }
 
-/* 64k pages additionally define a bigger "real PTE" type that gathers
- * the "second half" part of the PTE for pseudo 64k pages
- */
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef struct { pte_t pte; } real_pte_t;
-#endif
-
 /* PMD level */
 #ifdef CONFIG_PPC64
 typedef struct { unsigned long pmd; } pmd_t;
@@ -67,13 +58,6 @@ static inline pte_basic_t pte_val(pte_t pte)
 	return pte;
 }
 
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef pte_t real_pte_t;
-#endif
-
-
 #ifdef CONFIG_PPC64
 typedef unsigned long pmd_t;
 #define __pmd(x)	(x)
@@ -103,6 +87,14 @@ typedef unsigned long pgprot_t;
 #define pgprot_val(x)	(x)
 #define __pgprot(x)	(x)
 
+#endif /* CONFIG_STRICT_MM_TYPECHECKS */
+/*
+ * With hash config 64k pages additionally define a bigger "real PTE" type that
+ * gathers the "second half" part of the PTE for pseudo 64k pages
+ */
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
 #endif
-
 #endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/4] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table
  2016-03-01  4:15 [PATCH 0/4] Add 4 level page table for 64k book3s config Aneesh Kumar K.V
  2016-03-01  4:15 ` [PATCH 1/4] powerpc/mm: Split pgtable types to separate header Aneesh Kumar K.V
  2016-03-01  4:15 ` [PATCH 2/4] powerpc/mm: Don't have conditional defines for real_pte_t Aneesh Kumar K.V
@ 2016-03-01  4:15 ` Aneesh Kumar K.V
  2016-03-04  0:40   ` [3/4] " Michael Ellerman
  2016-03-01  4:15 ` [PATCH 4/4] mm: Some arch may want to use HPAGE_PMD related values as variables Aneesh Kumar K.V
  3 siblings, 1 reply; 9+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-01  4:15 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This is needed so that we can support both hash and radix page table
using single kernel. Radix kernel uses a 4 level table.

We now use physical address in upper page table tree levels. Even though
they are aligned to their size, for the masked bits we use the
bit positions as per PowerISA 3.0.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig                          |  1 +
 arch/powerpc/include/asm/book3s/64/hash-4k.h  | 33 +--------------------------
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 31 +++++++++++++------------
 arch/powerpc/include/asm/book3s/64/hash.h     | 11 +++++++++
 arch/powerpc/include/asm/book3s/64/pgtable.h  | 25 +++++++++++++++++++-
 arch/powerpc/include/asm/pgalloc-64.h         | 28 ++++++++++++++++++++---
 arch/powerpc/include/asm/pgtable-types.h      | 13 +++++++----
 arch/powerpc/mm/init_64.c                     | 21 ++++++++++++-----
 8 files changed, 101 insertions(+), 62 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9faa18c4f3f7..599329332613 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -303,6 +303,7 @@ config ZONE_DMA32
 config PGTABLE_LEVELS
 	int
 	default 2 if !PPC64
+	default 4 if PPC_BOOK3S_64
 	default 3 if PPC_64K_PAGES
 	default 4
 
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 7f60f7e814d4..5f08a0832238 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -58,39 +58,8 @@
 #define _PAGE_4K_PFN		0
 #ifndef __ASSEMBLY__
 /*
- * 4-level page tables related bits
+ * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
  */
-
-#define pgd_none(pgd)		(!pgd_val(pgd))
-#define pgd_bad(pgd)		(pgd_val(pgd) == 0)
-#define pgd_present(pgd)	(pgd_val(pgd) != 0)
-#define pgd_page_vaddr(pgd)	__va(pgd_val(pgd) & ~PGD_MASKED_BITS)
-
-static inline void pgd_clear(pgd_t *pgdp)
-{
-	*pgdp = __pgd(0);
-}
-
-static inline pte_t pgd_pte(pgd_t pgd)
-{
-	return __pte(pgd_val(pgd));
-}
-
-static inline pgd_t pte_pgd(pte_t pte)
-{
-	return __pgd(pte_val(pte));
-}
-extern struct page *pgd_page(pgd_t pgd);
-
-#define pud_offset(pgdp, addr)	\
-  (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
-    (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
-
-#define pud_ERROR(e) \
-	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
-
-/*
- * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */
 #define remap_4k_pfn(vma, addr, pfn, prot)	\
 	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
 
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 8bb03251f34c..60c2c912c3a7 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -1,15 +1,14 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
 #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
 
-#include <asm-generic/pgtable-nopud.h>
-
 #define PTE_INDEX_SIZE  8
-#define PMD_INDEX_SIZE  10
-#define PUD_INDEX_SIZE	0
+#define PMD_INDEX_SIZE  5
+#define PUD_INDEX_SIZE	5
 #define PGD_INDEX_SIZE  12
 
 #define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
 #define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
 #define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
 
 /* With 4k base page size, hugepage PTEs go at the PMD level */
@@ -20,8 +19,13 @@
 #define PMD_SIZE	(1UL << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
 
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
+#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
@@ -56,13 +60,12 @@
 #define PTE_FRAG_SIZE_SHIFT  12
 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
 
-/*
- * Bits to mask out from a PMD to get to the PTE page
- * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned.
- */
-#define PMD_MASKED_BITS		(PTE_FRAG_SIZE - 1)
-/* Bits to mask out from a PGD/PUD to get to the PMD page */
-#define PUD_MASKED_BITS		0x1ff
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS		0xc0000000000000ffUL
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS		0xc0000000000000ffUL
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS		0xc0000000000000ffUL
 
 #ifndef __ASSEMBLY__
 
@@ -132,11 +135,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
 #else
 #define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
 #endif
+#define PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
 #define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
 
-#define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
-#define pte_pgd(pte)	((pgd_t)pte_pud(pte))
-
 #ifdef CONFIG_HUGETLB_PAGE
 /*
  * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index ef9bd68f7e6d..d0ee6fcef823 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -235,6 +235,7 @@
 #define __pgtable_ptr_val(ptr)	__pa(ptr)
 
 #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
+#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
 #define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
 #define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
 
@@ -363,8 +364,18 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 	:"cc");
 }
 
+static inline int pgd_bad(pgd_t pgd)
+{
+	return (pgd_val(pgd) == 0);
+}
+
 #define __HAVE_ARCH_PTE_SAME
 #define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+{
+	return (unsigned long)__va(pgd_val(pgd) & ~PGD_MASKED_BITS);
+}
+
 
 /* Generic accessors to PTE bits */
 static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 7482f69117b6..77d3ce05798e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -106,6 +106,26 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 	*pgdp = __pgd(val);
 }
 
+static inline void pgd_clear(pgd_t *pgdp)
+{
+	*pgdp = __pgd(0);
+}
+
+#define pgd_none(pgd)		(!pgd_val(pgd))
+#define pgd_present(pgd)	(!pgd_none(pgd))
+
+static inline pte_t pgd_pte(pgd_t pgd)
+{
+	return __pte(pgd_val(pgd));
+}
+
+static inline pgd_t pte_pgd(pte_t pte)
+{
+	return __pgd(pte_val(pte));
+}
+
+extern struct page *pgd_page(pgd_t pgd);
+
 /*
  * Find an entry in a page-table-directory.  We combine the address region
  * (the high order N bits) and the pgd portion of the address.
@@ -113,9 +133,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 
 #define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
 
+#define pud_offset(pgdp, addr)	\
+	(((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr))
 #define pmd_offset(pudp,addr) \
 	(((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
-
 #define pte_offset_kernel(dir,addr) \
 	(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
 
@@ -130,6 +151,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
 #define pmd_ERROR(e) \
 	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
 #define pgd_ERROR(e) \
 	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index 7ac59a32c66a..8d5fc3ac43da 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -171,7 +171,29 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
 extern void __tlb_remove_table(void *_table);
 #endif
 
-#define pud_populate(mm, pud, pmd)	pud_set(pud, __pgtable_ptr_val(pmd))
+#ifndef __PAGETABLE_PUD_FOLDED
+/* book3s 64 is 4 level page table */
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+	pgd_set(pgd, __pgtable_ptr_val(pud));
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+				GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+}
+#endif
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_set(pud, __pgtable_ptr_val(pmd));
+}
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 				       pte_t *pte)
@@ -233,11 +255,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #define __pmd_free_tlb(tlb, pmd, addr)		      \
 	pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
-#ifndef CONFIG_PPC_64K_PAGES
+#ifndef __PAGETABLE_PUD_FOLDED
 #define __pud_free_tlb(tlb, pud, addr)		      \
 	pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
 
-#endif /* CONFIG_PPC_64K_PAGES */
+#endif /* __PAGETABLE_PUD_FOLDED */
 
 #define check_pgt_cache()	do { } while (0)
 
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index 71487e1ca638..43140f8b0592 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -21,15 +21,18 @@ static inline unsigned long pmd_val(pmd_t x)
 	return x.pmd;
 }
 
-/* PUD level exusts only on 4k pages */
-#ifndef CONFIG_PPC_64K_PAGES
+/*
+ * 64 bit hash always use 4 level table. Everybody else use 4 level
+ * only for 4K page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
 typedef struct { unsigned long pud; } pud_t;
 #define __pud(x)	((pud_t) { (x) })
 static inline unsigned long pud_val(pud_t x)
 {
 	return x.pud;
 }
-#endif /* !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
 #endif /* CONFIG_PPC64 */
 
 /* PGD level */
@@ -66,14 +69,14 @@ static inline unsigned long pmd_val(pmd_t pmd)
 	return pmd;
 }
 
-#ifndef CONFIG_PPC_64K_PAGES
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
 typedef unsigned long pud_t;
 #define __pud(x)	(x)
 static inline unsigned long pud_val(pud_t pud)
 {
 	return pud;
 }
-#endif /* !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
 #endif /* CONFIG_PPC64 */
 
 typedef unsigned long pgd_t;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 379a6a90644b..8ce1ec24d573 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -85,6 +85,11 @@ static void pgd_ctor(void *addr)
 	memset(addr, 0, PGD_TABLE_SIZE);
 }
 
+static void pud_ctor(void *addr)
+{
+	memset(addr, 0, PUD_TABLE_SIZE);
+}
+
 static void pmd_ctor(void *addr)
 {
 	memset(addr, 0, PMD_TABLE_SIZE);
@@ -138,14 +143,18 @@ void pgtable_cache_init(void)
 {
 	pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
 	pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
+	/*
+	 * In all current configs, when the PUD index exists it's the
+	 * same size as either the pgd or pmd index except with THP enabled
+	 * on book3s 64
+	 */
+	if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+		pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+
 	if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
 		panic("Couldn't allocate pgtable caches");
-	/* In all current configs, when the PUD index exists it's the
-	 * same size as either the pgd or pmd index.  Verify that the
-	 * initialization above has also created a PUD cache.  This
-	 * will need re-examiniation if we add new possibilities for
-	 * the pagetable layout. */
-	BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
+	if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+		panic("Couldn't allocate pud pgtable caches");
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/4] mm: Some arch may want to use HPAGE_PMD related values as variables
  2016-03-01  4:15 [PATCH 0/4] Add 4 level page table for 64k book3s config Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2016-03-01  4:15 ` [PATCH 3/4] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table Aneesh Kumar K.V
@ 2016-03-01  4:15 ` Aneesh Kumar K.V
  2016-03-04  0:40   ` [4/4] " Michael Ellerman
  3 siblings, 1 reply; 9+ messages in thread
From: Aneesh Kumar K.V @ 2016-03-01  4:15 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Kirill A. Shutemov, Aneesh Kumar K.V

From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>

With next generation power processor, we are having a new mmu model
[1] that require us to maintain a different linux page table format.

Inorder to support both current and future ppc64 systems with a single
kernel we need to make sure kernel can select between different page
table format at runtime. With the new MMU (radix MMU) added, we will
have two different pmd hugepage size 16MB for hash model and 2MB for
Radix model. Hence make HPAGE_PMD related values as a variable.

Actual conversion of HPAGE_PMD to a variable for ppc64 happens in a
followup patch.

[1] http://ibm.biz/power-isa3 (Needs registration).

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/pgtable_64.c |  7 +++++++
 include/linux/bug.h          |  9 +++++++++
 include/linux/huge_mm.h      |  3 ---
 mm/huge_memory.c             | 17 ++++++++++++++---
 4 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index af304e6d5a89..0eb53128ca2a 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -817,6 +817,13 @@ pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 
 int has_transparent_hugepage(void)
 {
+
+	BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) >= MAX_ORDER,
+		"hugepages can't be allocated by the buddy allocator");
+
+	BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) < 2,
+			 "We need more than 2 pages to do deferred thp split");
+
 	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
 		return 0;
 	/*
diff --git a/include/linux/bug.h b/include/linux/bug.h
index 7f4818673c41..e51b0709e78d 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -20,6 +20,7 @@ struct pt_regs;
 #define BUILD_BUG_ON_MSG(cond, msg) (0)
 #define BUILD_BUG_ON(condition) (0)
 #define BUILD_BUG() (0)
+#define MAYBE_BUILD_BUG_ON(cond) (0)
 #else /* __CHECKER__ */
 
 /* Force a compilation error if a constant expression is not a power of 2 */
@@ -83,6 +84,14 @@ struct pt_regs;
  */
 #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
 
+#define MAYBE_BUILD_BUG_ON(cond)			\
+	do {						\
+		if (__builtin_constant_p((cond)))       \
+			BUILD_BUG_ON(cond);             \
+		else                                    \
+			BUG_ON(cond);                   \
+	} while (0)
+
 #endif	/* __CHECKER__ */
 
 #ifdef CONFIG_GENERIC_BUG
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 459fd25b378e..f12513a20a06 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -111,9 +111,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			__split_huge_pmd(__vma, __pmd, __address);	\
 	}  while (0)
 
-#if HPAGE_PMD_ORDER >= MAX_ORDER
-#error "hugepages can't be allocated by the buddy allocator"
-#endif
 extern int hugepage_madvise(struct vm_area_struct *vma,
 			    unsigned long *vm_flags, int advice);
 extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index aea8f7a42df9..36c22a89df61 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -83,7 +83,7 @@ unsigned long transparent_hugepage_flags __read_mostly =
 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 
 /* default scan 8*512 pte (or vmas) every 30 second */
-static unsigned int khugepaged_pages_to_scan __read_mostly = HPAGE_PMD_NR*8;
+static unsigned int khugepaged_pages_to_scan __read_mostly;
 static unsigned int khugepaged_pages_collapsed;
 static unsigned int khugepaged_full_scans;
 static unsigned int khugepaged_scan_sleep_millisecs __read_mostly = 10000;
@@ -98,7 +98,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
  * it would have happened if the vma was large enough during page
  * fault.
  */
-static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
+static unsigned int khugepaged_max_ptes_none __read_mostly;
 
 static int khugepaged(void *none);
 static int khugepaged_slab_init(void);
@@ -660,6 +660,18 @@ static int __init hugepage_init(void)
 		return -EINVAL;
 	}
 
+	khugepaged_pages_to_scan = HPAGE_PMD_NR * 8;
+	khugepaged_max_ptes_none = HPAGE_PMD_NR - 1;
+	/*
+	 * hugepages can't be allocated by the buddy allocator
+	 */
+	MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER >= MAX_ORDER);
+	/*
+	 * we use page->mapping and page->index in second tail page
+	 * as list_head: assuming THP order >= 2
+	 */
+	MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER < 2);
+
 	err = hugepage_init_sysfs(&hugepage_kobj);
 	if (err)
 		goto err_sysfs;
@@ -764,7 +776,6 @@ void prep_transhuge_page(struct page *page)
 	 * we use page->mapping and page->indexlru in second tail page
 	 * as list_head: assuming THP order >= 2
 	 */
-	BUILD_BUG_ON(HPAGE_PMD_ORDER < 2);
 
 	INIT_LIST_HEAD(page_deferred_list(page));
 	set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [1/4] powerpc/mm: Split pgtable types to separate header
  2016-03-01  4:15 ` [PATCH 1/4] powerpc/mm: Split pgtable types to separate header Aneesh Kumar K.V
@ 2016-03-04  0:40   ` Michael Ellerman
  0 siblings, 0 replies; 9+ messages in thread
From: Michael Ellerman @ 2016-03-04  0:40 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V

On Tue, 2016-01-03 at 04:15:11 UTC, "Aneesh Kumar K.V" wrote:
> We move the page table accessors into a separate header. We will
> later add a big endian variant of the table which is needed for radix.
> No functionality change only code movement.
> 
> Reviewed-by: Paul Mackerras <paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/2bf59916ef033edb9f8e968ee2

cheers

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [2/4] powerpc/mm: Don't have conditional defines for real_pte_t
  2016-03-01  4:15 ` [PATCH 2/4] powerpc/mm: Don't have conditional defines for real_pte_t Aneesh Kumar K.V
@ 2016-03-04  0:40   ` Michael Ellerman
  0 siblings, 0 replies; 9+ messages in thread
From: Michael Ellerman @ 2016-03-04  0:40 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V

On Tue, 2016-01-03 at 04:15:12 UTC, "Aneesh Kumar K.V" wrote:
> We remove real_pte_t out of STRICT_MM_TYPESCHECK.
> 
> Reviewed-by: Paul Mackerras <paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/ae9a71afa4d7cf29a816fbc387

cheers

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [3/4] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table
  2016-03-01  4:15 ` [PATCH 3/4] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table Aneesh Kumar K.V
@ 2016-03-04  0:40   ` Michael Ellerman
  0 siblings, 0 replies; 9+ messages in thread
From: Michael Ellerman @ 2016-03-04  0:40 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V

On Tue, 2016-01-03 at 04:15:13 UTC, "Aneesh Kumar K.V" wrote:
> This is needed so that we can support both hash and radix page table
> using single kernel. Radix kernel uses a 4 level table.
> 
> We now use physical address in upper page table tree levels. Even though
> they are aligned to their size, for the masked bits we use the
> bit positions as per PowerISA 3.0.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/368ced78e6ed3d72c2acc61233

cheers

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [4/4] mm: Some arch may want to use HPAGE_PMD related values as variables
  2016-03-01  4:15 ` [PATCH 4/4] mm: Some arch may want to use HPAGE_PMD related values as variables Aneesh Kumar K.V
@ 2016-03-04  0:40   ` Michael Ellerman
  0 siblings, 0 replies; 9+ messages in thread
From: Michael Ellerman @ 2016-03-04  0:40 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus
  Cc: linuxppc-dev, Aneesh Kumar K.V, Kirill A. Shutemov

On Tue, 2016-01-03 at 04:15:14 UTC, "Aneesh Kumar K.V" wrote:
> From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
> 
> With next generation power processor, we are having a new mmu model
> [1] that require us to maintain a different linux page table format.
> 
> Inorder to support both current and future ppc64 systems with a single
> kernel we need to make sure kernel can select between different page
> table format at runtime. With the new MMU (radix MMU) added, we will
> have two different pmd hugepage size 16MB for hash model and 2MB for
> Radix model. Hence make HPAGE_PMD related values as a variable.
> 
> Actual conversion of HPAGE_PMD to a variable for ppc64 happens in a
> followup patch.
> 
> [1] http://ibm.biz/power-isa3 (Needs registration).
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/ff20c2e0acc5ad7e27c68592ad

cheers

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-03-04  0:40 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-01  4:15 [PATCH 0/4] Add 4 level page table for 64k book3s config Aneesh Kumar K.V
2016-03-01  4:15 ` [PATCH 1/4] powerpc/mm: Split pgtable types to separate header Aneesh Kumar K.V
2016-03-04  0:40   ` [1/4] " Michael Ellerman
2016-03-01  4:15 ` [PATCH 2/4] powerpc/mm: Don't have conditional defines for real_pte_t Aneesh Kumar K.V
2016-03-04  0:40   ` [2/4] " Michael Ellerman
2016-03-01  4:15 ` [PATCH 3/4] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table Aneesh Kumar K.V
2016-03-04  0:40   ` [3/4] " Michael Ellerman
2016-03-01  4:15 ` [PATCH 4/4] mm: Some arch may want to use HPAGE_PMD related values as variables Aneesh Kumar K.V
2016-03-04  0:40   ` [4/4] " Michael Ellerman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).