linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	x86@kernel.org, Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Andy Lutomirski <luto@amacapital.net>,
	Dmitry Safonov <dsafonov@virtuozzo.com>,
	Cyrill Gorcunov <gorcunov@openvz.org>,
	Borislav Petkov <bp@suse.de>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv5 08/19] x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable
Date: Mon, 21 Aug 2017 18:29:05 +0300	[thread overview]
Message-ID: <20170821152916.40124-9-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20170821152916.40124-1-kirill.shutemov@linux.intel.com>

For boot-time switching between 4- and 5-level paging we need to be able
to fold p4d page table level at runtime. It requires variable
PGDIR_SHIFT and PTRS_PER_P4D.

The change doesn't affect the kernel image size much:

   text    data     bss     dec     hex filename
10710172        4879964  860160 16450296         fb02f8 vmlinux.before
10710340        4880000  860160 16450500         fb03c4 vmlinux.after

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/boot/compressed/kaslr.c        |  6 ++++++
 arch/x86/include/asm/pgtable_32.h       |  2 ++
 arch/x86/include/asm/pgtable_32_types.h |  2 ++
 arch/x86/include/asm/pgtable_64_types.h | 15 +++++++++++++--
 arch/x86/kernel/head64.c                | 11 ++++++++++-
 arch/x86/mm/dump_pagetables.c           | 12 +++++-------
 arch/x86/mm/init_64.c                   |  2 +-
 arch/x86/mm/kasan_init_64.c             |  2 +-
 arch/x86/platform/efi/efi_64.c          |  4 ++--
 include/asm-generic/5level-fixup.h      |  1 +
 include/asm-generic/pgtable-nop4d.h     |  1 +
 include/linux/kasan.h                   |  2 +-
 mm/kasan/kasan_init.c                   |  2 +-
 13 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 99c7194f7ea6..06837cf8445e 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -43,6 +43,12 @@
 #define STATIC
 #include <linux/decompress/mm.h>
 
+#ifdef CONFIG_X86_5LEVEL
+unsigned int pgtable_l5_enabled __read_mostly = 1;
+unsigned int pgdir_shift __read_mostly = 48;
+unsigned int ptrs_per_p4d __read_mostly = 512;
+#endif
+
 extern unsigned long get_cmd_line_ptr(void);
 
 /* Simplified build-specific string for starting entropy. */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index bfab55675c16..9c3c811347b0 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -32,6 +32,8 @@ static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
 void paging_init(void);
 
+static inline int pgd_large(pgd_t pgd) { return 0; }
+
 /*
  * Define this if things work differently on an i386 and an i486:
  * it will (on an i486) warn about kernel memory accesses that are
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 9fb2f2bc8245..eab5418d82d2 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -14,6 +14,8 @@
 # include <asm/pgtable-2level_types.h>
 #endif
 
+#define pgtable_l5_enabled 0
+
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index a9f77ead7088..163a049bbb56 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -19,6 +19,15 @@ typedef unsigned long	pgprotval_t;
 
 typedef struct { pteval_t pte; } pte_t;
 
+#ifdef CONFIG_X86_5LEVEL
+extern unsigned int pgtable_l5_enabled;
+#else
+#define pgtable_l5_enabled 0
+#endif
+
+extern unsigned int pgdir_shift;
+extern unsigned int ptrs_per_p4d;
+
 #endif	/* !__ASSEMBLY__ */
 
 #define SHARED_KERNEL_PMD	0
@@ -28,14 +37,15 @@ typedef struct { pteval_t pte; } pte_t;
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
  */
-#define PGDIR_SHIFT	48
+#define PGDIR_SHIFT	pgdir_shift
 #define PTRS_PER_PGD	512
 
 /*
  * 4th level page in 5-level paging case
  */
 #define P4D_SHIFT	39
-#define PTRS_PER_P4D	512
+#define __PTRS_PER_P4D	512
+#define PTRS_PER_P4D	ptrs_per_p4d
 #define P4D_SIZE	(_AC(1, UL) << P4D_SHIFT)
 #define P4D_MASK	(~(P4D_SIZE - 1))
 
@@ -46,6 +56,7 @@ typedef struct { pteval_t pte; } pte_t;
  */
 #define PGDIR_SHIFT	39
 #define PTRS_PER_PGD	512
+#define __PTRS_PER_P4D	1
 
 #endif /* CONFIG_X86_5LEVEL */
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index aa163bda4a29..98d9788969cc 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -38,6 +38,15 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
 static unsigned int __initdata next_early_pgt;
 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
 
+#ifdef CONFIG_X86_5LEVEL
+unsigned int pgtable_l5_enabled __read_mostly = 1;
+EXPORT_SYMBOL(pgtable_l5_enabled);
+unsigned int pgdir_shift __read_mostly = 48;
+EXPORT_SYMBOL(pgdir_shift);
+unsigned int ptrs_per_p4d __read_mostly = 512;
+EXPORT_SYMBOL(ptrs_per_p4d);
+#endif
+
 #if defined(CONFIG_RANDOMIZE_MEMORY) || defined(CONFIG_X86_5LEVEL)
 unsigned long page_offset_base __read_mostly = __PAGE_OFFSET_BASE;
 EXPORT_SYMBOL(page_offset_base);
@@ -329,7 +338,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 	BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
 	BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
 	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
-	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+	MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
 				(__START_KERNEL & PGDIR_MASK)));
 	BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
 
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 5e3ac6fe6c9e..2eabd07ae2d2 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -399,14 +399,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
 #define p4d_none(a)  pud_none(__pud(p4d_val(a)))
 #endif
 
-#if PTRS_PER_P4D > 1
-
 static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
 {
 	int i;
 	p4d_t *start, *p4d_start;
 	pgprotval_t prot;
 
+	if (PTRS_PER_P4D == 1)
+		return walk_pud_level(m, st, __p4d(pgd_val(addr)), P);
+
 	p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
 
 	for (i = 0; i < PTRS_PER_P4D; i++) {
@@ -426,11 +427,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 	}
 }
 
-#else
-#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p)
-#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
-#define pgd_none(a)  p4d_none(__p4d(pgd_val(a)))
-#endif
+#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
+#define pgd_none(a)  (pgtable_l5_enabled ? pgd_none(a) : pgd_none(a))
 
 static inline bool is_hypervisor_range(int idx)
 {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 136422d7d539..649b8df485ad 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -143,7 +143,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 		 * With folded p4d, pgd_none() is always false, we need to
 		 * handle synchonization on p4d level.
 		 */
-		BUILD_BUG_ON(pgd_none(*pgd_ref));
+		MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
 		p4d_ref = p4d_offset(pgd_ref, addr);
 
 		if (p4d_none(*p4d_ref))
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index f6b4db2647b5..e1e2cca88567 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -15,7 +15,7 @@
 
 extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
-static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+static p4d_t tmp_p4d_table[__PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 
 static int __init map_range(struct range *range)
 {
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 12e83888e5b9..970a0f5f787d 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -249,8 +249,8 @@ void efi_sync_low_kernel_mappings(void)
 	 * only span a single PGD entry and that the entry also maps
 	 * other important kernel regions.
 	 */
-	BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
-	BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
+	MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
+	MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
 			(EFI_VA_END & PGDIR_MASK));
 
 	pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
index b5ca82dc4175..e9fcfc6b2518 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -7,6 +7,7 @@
 #define P4D_SHIFT			PGDIR_SHIFT
 #define P4D_SIZE			PGDIR_SIZE
 #define P4D_MASK			PGDIR_MASK
+#define __PTRS_PER_P4D			1
 #define PTRS_PER_P4D			1
 
 #define p4d_t				pgd_t
diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h
index de364ecb8df6..99cb2fa61cef 100644
--- a/include/asm-generic/pgtable-nop4d.h
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -8,6 +8,7 @@
 typedef struct { pgd_t pgd; } p4d_t;
 
 #define P4D_SHIFT	PGDIR_SHIFT
+#define __PTRS_PER_P4D	1
 #define PTRS_PER_P4D	1
 #define P4D_SIZE	(1UL << P4D_SHIFT)
 #define P4D_MASK	(~(P4D_SIZE-1))
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index a5c7046f26b4..d27787ab2b84 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -19,7 +19,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
 extern pte_t kasan_zero_pte[PTRS_PER_PTE];
 extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
 extern pud_t kasan_zero_pud[PTRS_PER_PUD];
-extern p4d_t kasan_zero_p4d[PTRS_PER_P4D];
+extern p4d_t kasan_zero_p4d[__PTRS_PER_P4D];
 
 void kasan_populate_zero_shadow(const void *shadow_start,
 				const void *shadow_end);
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 554e4c0f23a2..419e0d33f9be 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -31,7 +31,7 @@
 unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
 
 #if CONFIG_PGTABLE_LEVELS > 4
-p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss;
+p4d_t kasan_zero_p4d[__PTRS_PER_P4D] __page_aligned_bss;
 #endif
 #if CONFIG_PGTABLE_LEVELS > 3
 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
-- 
2.14.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-08-21 15:30 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-21 15:28 [PATCHv5 00/19] Boot-time switching between 4- and 5-level paging Kirill A. Shutemov
2017-08-21 15:28 ` [PATCHv5 01/19] mm/sparsemem: Allocate mem_section at runtime for SPARSEMEM_EXTREME Kirill A. Shutemov
2017-08-22 16:28   ` Borislav Petkov
2017-08-22 17:50     ` Kirill A. Shutemov
2017-08-21 15:28 ` [PATCHv5 02/19] mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 03/19] x86/kasan: Use the same shadow offset for 4- and 5-level paging Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 04/19] x86/xen: Provide pre-built page tables only for XEN_PV and XEN_PVH Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 05/19] x86/xen: Drop 5-level paging support code from XEN_PV code Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 06/19] x86/boot/compressed/64: Detect and handle 5-level paging at boot-time Kirill A. Shutemov
2017-08-27 11:29   ` Cyrill Gorcunov
2017-09-04 13:02     ` Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 07/19] x86/mm: Make virtual memory layout movable for CONFIG_X86_5LEVEL Kirill A. Shutemov
2017-08-21 15:29 ` Kirill A. Shutemov [this message]
2017-08-21 15:29 ` [PATCHv5 09/19] x86/mm: Make MAX_PHYSADDR_BITS and MAX_PHYSMEM_BITS dynamic Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 10/19] x86/mm: Make __PHYSICAL_MASK_SHIFT and __VIRTUAL_MASK_SHIFT dynamic Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 11/19] x86/mm: Make STACK_TOP_MAX dynamic Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 12/19] x86/mm: Adjust virtual address space layout in early boot Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 13/19] x86/mm: Make early boot code support boot-time switching of paging modes Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 14/19] x86/mm: Fold p4d page table layer at runtime Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 15/19] x86/mm: Replace compile-time checks for 5-level with runtime-time Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 16/19] x86/mm: Allow to boot without la57 if CONFIG_X86_5LEVEL=y Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 17/19] x86/xen: Allow XEN_PV and XEN_PVH to be enabled with X86_5LEVEL Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 18/19] x86/mm: Redefine some of page table helpers as macros Kirill A. Shutemov
2017-08-21 15:29 ` [PATCHv5 19/19] x86/mm: Offset boot-time paging mode switching cost Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170821152916.40124-9-kirill.shutemov@linux.intel.com \
    --to=kirill.shutemov@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=bp@suse.de \
    --cc=dsafonov@virtuozzo.com \
    --cc=gorcunov@openvz.org \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@amacapital.net \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).