linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5 0/6] Move swapper_pg_dir to rodata section.
@ 2018-09-17  4:43 Jun Yao
  2018-09-17  4:43 ` [PATCH v5 1/6] arm64/mm: Introduce the init_pg_dir Jun Yao
                   ` (7 more replies)
  0 siblings, 8 replies; 23+ messages in thread
From: Jun Yao @ 2018-09-17  4:43 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: catalin.marinas, will.deacon, james.morse, linux-kernel

Version 5 changes:
	1. Correct spelling and indentation errors[1].
	2. Update init_mm.pgd by assembly[2].
	3. Simplify set_p?d() by introducing set_swapper_pgd()[3].
	4. Reduce unnecessary tlbi for every write to swapper_pg_dir
	   during paging_init()[3].

v4: https://www.spinics.net/lists/arm-kernel/msg672195.html
v3: https://www.spinics.net/lists/arm-kernel/msg662537.html
v2: https://patchwork.kernel.org/patch/10485641/
v1: https://patchwork.kernel.org/patch/10476595/

[1] https://www.spinics.net/lists/arm-kernel/msg675189.html
[2] https://www.spinics.net/lists/arm-kernel/msg675193.html
[3] https://www.spinics.net/lists/arm-kernel/msg675196.html

Jun Yao (6):
  arm64/mm: Introduce the init_pg_dir.
  arm64/mm: Pass ttbr1 as a parameter to __enable_mmu().
  arm64/mm: Create the initial page table in the init_pg_dir.
  arm64/mm: Create the final page table directly in swapper_pg_dir.
  arm64/mm: Populate the swapper_pg_dir by fixmap.
  arm64/mm: Move {idmap_pg_dir .. swapper_pg_dir} to rodata section.

 arch/arm64/include/asm/assembler.h | 29 ++++++++++++++++++
 arch/arm64/include/asm/pgtable.h   | 36 ++++++++++++++++++----
 arch/arm64/kernel/asm-offsets.c    |  1 +
 arch/arm64/kernel/head.S           | 48 ++++++++++++++++++++----------
 arch/arm64/kernel/sleep.S          |  1 +
 arch/arm64/kernel/vmlinux.lds.S    | 47 +++++++++++++++++++----------
 arch/arm64/mm/mmu.c                | 45 +++++++++++++---------------
 7 files changed, 147 insertions(+), 60 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v5 1/6] arm64/mm: Introduce the init_pg_dir.
  2018-09-17  4:43 [PATCH v5 0/6] Move swapper_pg_dir to rodata section Jun Yao
@ 2018-09-17  4:43 ` Jun Yao
  2018-09-24 13:01   ` Mark Rutland
  2018-09-17  4:43 ` [PATCH v5 2/6] arm64/mm: Pass ttbr1 as a parameter to __enable_mmu() Jun Yao
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Jun Yao @ 2018-09-17  4:43 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: catalin.marinas, will.deacon, james.morse, linux-kernel

To make the swapper_pg_dir read only, we will move it to the rodata
section. And force the kernel to set up the initial page table in
the init_pg_dir. After generating all levels page table, we copy
only the top level into the swapper_pg_dir during paging_init().

In this patch, just add the init_pg_dir to vmlinux.lds.S and
boiler-plate clearing/cleaning/invalidating it in head.S.

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/include/asm/assembler.h | 29 +++++++++++++++++++++++++++++
 arch/arm64/kernel/head.S           | 22 +++++++++++++++-------
 arch/arm64/kernel/vmlinux.lds.S    |  8 ++++++++
 3 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 0bcc98dbba56..e7bdc324d538 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -456,6 +456,35 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 	b.ne	9998b
 	.endm
 
+/*
+ * clear_page - clear one page
+ *
+ *	start:	page aligned virtual address
+ */
+	.macro clear_page, start:req
+9996:	stp	xzr, xzr, [\start], #16
+	stp	xzr, xzr, [\start], #16
+	stp	xzr, xzr, [\start], #16
+	stp	xzr, xzr, [\start], #16
+	tst	\start, #(PAGE_SIZE - 1)
+	b.ne	9996b
+	.endm
+
+/*
+ * clear_pages - clear contiguous pages
+ *
+ *	start, end:	page aligned virtual addresses
+ */
+	.macro clear_pages, start:req, end:req
+	sub	\end, \end, \start
+	lsr	\end, \end, #(PAGE_SHIFT)
+9997:	cbz	\end, 9998f
+	clear_page \start
+	sub	\end, \end, #1
+	b	9997b
+9998:
+	.endm
+
 /*
  * Annotate a function as position independent, i.e., safe to be called before
  * the kernel virtual mapping is activated.
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index b0853069702f..2c83a8c47e3f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -295,18 +295,21 @@ __create_page_tables:
 	sub	x1, x1, x0
 	bl	__inval_dcache_area
 
+	adrp	x0, init_pg_dir
+	adrp	x1, init_pg_end
+	sub	x1, x1, x0
+	bl	__inval_dcache_area
+
 	/*
 	 * Clear the idmap and swapper page tables.
 	 */
 	adrp	x0, idmap_pg_dir
 	adrp	x1, swapper_pg_end
-	sub	x1, x1, x0
-1:	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	subs	x1, x1, #64
-	b.ne	1b
+	clear_pages x0, x1
+
+	adrp	x0, init_pg_dir
+	adrp	x1, init_pg_end
+	clear_pages x0, x1
 
 	mov	x7, SWAPPER_MM_MMUFLAGS
 
@@ -395,6 +398,11 @@ __create_page_tables:
 	dmb	sy
 	bl	__inval_dcache_area
 
+	adrp	x0, init_pg_dir
+	adrp	x1, init_pg_end
+	sub	x1, x1, x0
+	bl	__inval_dcache_area
+
 	ret	x28
 ENDPROC(__create_page_tables)
 	.ltorg
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 605d1b60469c..612ffc0bbe11 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -68,6 +68,12 @@ jiffies = jiffies_64;
 #define TRAMP_TEXT
 #endif
 
+#define INIT_PG_TABLES					\
+	. = ALIGN(PAGE_SIZE);				\
+	init_pg_dir = .;				\
+	. += SWAPPER_DIR_SIZE;				\
+	init_pg_end = .;
+
 /*
  * The size of the PE/COFF section that covers the kernel image, which
  * runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -161,6 +167,8 @@ SECTIONS
 	__inittext_end = .;
 	__initdata_begin = .;
 
+	INIT_PG_TABLES
+
 	.init.data : {
 		INIT_DATA
 		INIT_SETUP(16)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v5 2/6] arm64/mm: Pass ttbr1 as a parameter to __enable_mmu().
  2018-09-17  4:43 [PATCH v5 0/6] Move swapper_pg_dir to rodata section Jun Yao
  2018-09-17  4:43 ` [PATCH v5 1/6] arm64/mm: Introduce the init_pg_dir Jun Yao
@ 2018-09-17  4:43 ` Jun Yao
  2018-09-24 13:26   ` Mark Rutland
  2018-09-17  4:43 ` [PATCH v5 3/6] arm64/mm: Create the initial page table in the init_pg_dir Jun Yao
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Jun Yao @ 2018-09-17  4:43 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: catalin.marinas, will.deacon, james.morse, linux-kernel

The kernel will set up the initial page table in the init_pg_dir.
However, it will create the final page table in the swapper_pg_dir
during the initialization process. We need to let __enable_mmu()
know which page table to use.

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/kernel/head.S  | 19 +++++++++++--------
 arch/arm64/kernel/sleep.S |  1 +
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2c83a8c47e3f..de2aaea00bd2 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -714,6 +714,7 @@ secondary_startup:
 	 * Common entry point for secondary CPUs.
 	 */
 	bl	__cpu_setup			// initialise processor
+	adrp	x1, swapper_pg_dir
 	bl	__enable_mmu
 	ldr	x8, =__secondary_switched
 	br	x8
@@ -756,6 +757,7 @@ ENDPROC(__secondary_switched)
  * Enable the MMU.
  *
  *  x0  = SCTLR_EL1 value for turning on the MMU.
+ *  x1  = TTBR1_EL1 value for turning on the MMU.
  *
  * Returns to the caller via x30/lr. This requires the caller to be covered
  * by the .idmap.text section.
@@ -764,15 +766,15 @@ ENDPROC(__secondary_switched)
  * If it isn't, park the CPU
  */
 ENTRY(__enable_mmu)
-	mrs	x1, ID_AA64MMFR0_EL1
-	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
-	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
+	mrs	x5, ID_AA64MMFR0_EL1
+	ubfx	x6, x5, #ID_AA64MMFR0_TGRAN_SHIFT, 4
+	cmp	x6, #ID_AA64MMFR0_TGRAN_SUPPORTED
 	b.ne	__no_granule_support
-	update_early_cpu_boot_status 0, x1, x2
-	adrp	x1, idmap_pg_dir
-	adrp	x2, swapper_pg_dir
-	phys_to_ttbr x3, x1
-	phys_to_ttbr x4, x2
+	update_early_cpu_boot_status 0, x5, x6
+	adrp	x5, idmap_pg_dir
+	mov	x6, x1
+	phys_to_ttbr x3, x5
+	phys_to_ttbr x4, x6
 	msr	ttbr0_el1, x3			// load TTBR0
 	msr	ttbr1_el1, x4			// load TTBR1
 	isb
@@ -831,6 +833,7 @@ __primary_switch:
 	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
 #endif
 
+	adrp	x1, swapper_pg_dir
 	bl	__enable_mmu
 #ifdef CONFIG_RELOCATABLE
 	bl	__relocate_kernel
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index bebec8ef9372..3e53ffa07994 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -101,6 +101,7 @@ ENTRY(cpu_resume)
 	bl	el2_setup		// if in EL2 drop to EL1 cleanly
 	bl	__cpu_setup
 	/* enable the MMU early - so we can access sleep_save_stash by va */
+	adrp	x1, swapper_pg_dir
 	bl	__enable_mmu
 	ldr	x8, =_cpu_resume
 	br	x8
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v5 3/6] arm64/mm: Create the initial page table in the init_pg_dir.
  2018-09-17  4:43 [PATCH v5 0/6] Move swapper_pg_dir to rodata section Jun Yao
  2018-09-17  4:43 ` [PATCH v5 1/6] arm64/mm: Introduce the init_pg_dir Jun Yao
  2018-09-17  4:43 ` [PATCH v5 2/6] arm64/mm: Pass ttbr1 as a parameter to __enable_mmu() Jun Yao
@ 2018-09-17  4:43 ` Jun Yao
  2018-09-24 13:34   ` Mark Rutland
  2018-09-17  4:43 ` [PATCH v5 4/6] arm64/mm: Create the final page table directly in swapper_pg_dir Jun Yao
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Jun Yao @ 2018-09-17  4:43 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: catalin.marinas, will.deacon, james.morse, linux-kernel

Create the initial page table in the init_pg_dir. And update the
init_mm.pgd to make sure that pgd_offset_k() works correctly. When
the final page table is created, we redirect the init_mm.pgd to the
swapper_pg_dir.

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/include/asm/pgtable.h | 2 ++
 arch/arm64/kernel/asm-offsets.c  | 1 +
 arch/arm64/kernel/head.S         | 9 +++++++--
 arch/arm64/mm/mmu.c              | 1 +
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2ab2031b778c..b11d6fc62a62 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -718,6 +718,8 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 }
 #endif
 
+extern pgd_t init_pg_dir[PTRS_PER_PGD];
+extern pgd_t init_pg_end[];
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern pgd_t swapper_pg_end[];
 extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 323aeb5f2fe6..43f52cfdfad4 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -82,6 +82,7 @@ int main(void)
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
   DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
+  DEFINE(MM_PGD,		offsetof(struct mm_struct, pgd));
   BLANK();
   DEFINE(VMA_VM_MM,		offsetof(struct vm_area_struct, vm_mm));
   DEFINE(VMA_VM_FLAGS,		offsetof(struct vm_area_struct, vm_flags));
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index de2aaea00bd2..cf8a58211b80 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -376,7 +376,7 @@ __create_page_tables:
 	/*
 	 * Map the kernel image (starting with PHYS_OFFSET).
 	 */
-	adrp	x0, swapper_pg_dir
+	adrp	x0, init_pg_dir
 	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)
 	add	x5, x5, x23			// add KASLR displacement
 	mov	x4, PTRS_PER_PGD
@@ -439,6 +439,11 @@ __primary_switched:
 	bl	__pi_memset
 	dsb	ishst				// Make zero page visible to PTW
 
+	// Update init_mm.pgd
+	adrp	x0, init_pg_dir
+	adr_l	x1, init_mm
+	str	x0, [x1, #MM_PGD]
+
 #ifdef CONFIG_KASAN
 	bl	kasan_early_init
 #endif
@@ -833,7 +838,7 @@ __primary_switch:
 	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
 #endif
 
-	adrp	x1, swapper_pg_dir
+	adrp	x1, init_pg_dir
 	bl	__enable_mmu
 #ifdef CONFIG_RELOCATABLE
 	bl	__relocate_kernel
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 65f86271f02b..af80dca335ce 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -646,6 +646,7 @@ void __init paging_init(void)
 	cpu_replace_ttbr1(__va(pgd_phys));
 	memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
 	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+	init_mm.pgd = swapper_pg_dir;
 
 	pgd_clear_fixmap();
 	memblock_free(pgd_phys, PAGE_SIZE);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v5 4/6] arm64/mm: Create the final page table directly in swapper_pg_dir.
  2018-09-17  4:43 [PATCH v5 0/6] Move swapper_pg_dir to rodata section Jun Yao
                   ` (2 preceding siblings ...)
  2018-09-17  4:43 ` [PATCH v5 3/6] arm64/mm: Create the initial page table in the init_pg_dir Jun Yao
@ 2018-09-17  4:43 ` Jun Yao
  2018-09-17  4:43 ` [PATCH v5 5/6] arm64/mm: Populate the swapper_pg_dir by fixmap Jun Yao
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 23+ messages in thread
From: Jun Yao @ 2018-09-17  4:43 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: catalin.marinas, will.deacon, james.morse, linux-kernel

As the initial page table is created in the init_pg_dir, we can set
up the final page table directly in the swapper_pg_dir. And it only
contains the top level page table, so we can reduce it to a page
size.

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/kernel/vmlinux.lds.S |  2 +-
 arch/arm64/mm/mmu.c             | 29 ++---------------------------
 2 files changed, 3 insertions(+), 28 deletions(-)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 612ffc0bbe11..a634def77442 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -237,7 +237,7 @@ SECTIONS
 	. += RESERVED_TTBR0_SIZE;
 #endif
 	swapper_pg_dir = .;
-	. += SWAPPER_DIR_SIZE;
+	. += PAGE_SIZE;
 	swapper_pg_end = .;
 
 	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index af80dca335ce..71532bcd76c1 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -629,35 +629,10 @@ static void __init map_kernel(pgd_t *pgdp)
  */
 void __init paging_init(void)
 {
-	phys_addr_t pgd_phys = early_pgtable_alloc();
-	pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
-
-	map_kernel(pgdp);
-	map_mem(pgdp);
-
-	/*
-	 * We want to reuse the original swapper_pg_dir so we don't have to
-	 * communicate the new address to non-coherent secondaries in
-	 * secondary_entry, and so cpu_switch_mm can generate the address with
-	 * adrp+add rather than a load from some global variable.
-	 *
-	 * To do this we need to go via a temporary pgd.
-	 */
-	cpu_replace_ttbr1(__va(pgd_phys));
-	memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
+	map_kernel(swapper_pg_dir);
+	map_mem(swapper_pg_dir);
 	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 	init_mm.pgd = swapper_pg_dir;
-
-	pgd_clear_fixmap();
-	memblock_free(pgd_phys, PAGE_SIZE);
-
-	/*
-	 * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
-	 * allocated with it.
-	 */
-	memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
-		      __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
-		      - PAGE_SIZE);
 }
 
 /*
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v5 5/6] arm64/mm: Populate the swapper_pg_dir by fixmap.
  2018-09-17  4:43 [PATCH v5 0/6] Move swapper_pg_dir to rodata section Jun Yao
                   ` (3 preceding siblings ...)
  2018-09-17  4:43 ` [PATCH v5 4/6] arm64/mm: Create the final page table directly in swapper_pg_dir Jun Yao
@ 2018-09-17  4:43 ` Jun Yao
  2018-09-24 16:36   ` Mark Rutland
  2018-09-24 16:54   ` Mark Rutland
  2018-09-17  4:43 ` [PATCH v5 6/6] arm64/mm: Move {idmap_pg_dir .. swapper_pg_dir} to rodata section Jun Yao
                   ` (2 subsequent siblings)
  7 siblings, 2 replies; 23+ messages in thread
From: Jun Yao @ 2018-09-17  4:43 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: catalin.marinas, will.deacon, james.morse, linux-kernel

Since we will move the swapper_pg_dir to rodata section, we need a
way to update it. The fixmap can handle it. When the swapper_pg_dir
needs to be updated, we map it dynamically. The map will be
canceled after the update is complete. In this way, we can defend
against KSMA(Kernel Space Mirror Attack).

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/include/asm/pgtable.h | 38 ++++++++++++++++++++++++++------
 arch/arm64/mm/mmu.c              | 25 +++++++++++++++++++--
 2 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b11d6fc62a62..9e643fc2453d 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -429,8 +429,29 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				 PUD_TYPE_TABLE)
 #endif
 
+extern pgd_t init_pg_dir[PTRS_PER_PGD];
+extern pgd_t init_pg_end[];
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern pgd_t swapper_pg_end[];
+extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
+extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
+
+extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
+
+static inline bool in_swapper_pgdir(void *addr)
+{
+	return ((unsigned long)addr & PAGE_MASK) ==
+		((unsigned long)swapper_pg_dir & PAGE_MASK);
+}
+
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
+#ifdef __PAGETABLE_PMD_FOLDED
+	if (in_swapper_pgdir(pmdp)) {
+		set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd)));
+		return;
+	}
+#endif
 	WRITE_ONCE(*pmdp, pmd);
 
 	if (pmd_valid(pmd))
@@ -484,6 +505,12 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
 {
+#ifdef __PAGETABLE_PUD_FOLDED
+	if (in_swapper_pgdir(pudp)) {
+		set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
+		return;
+	}
+#endif
 	WRITE_ONCE(*pudp, pud);
 
 	if (pud_valid(pud))
@@ -538,6 +565,10 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 
 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
+	if (in_swapper_pgdir(pgdp)) {
+		set_swapper_pgd(pgdp, pgd);
+		return;
+	}
 	WRITE_ONCE(*pgdp, pgd);
 	dsb(ishst);
 }
@@ -718,13 +749,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 }
 #endif
 
-extern pgd_t init_pg_dir[PTRS_PER_PGD];
-extern pgd_t init_pg_end[];
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern pgd_t swapper_pg_end[];
-extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
-extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
-
 /*
  * Encode and decode a swap entry:
  *	bits 0-1:	present (must be zero)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 71532bcd76c1..a8a60927f716 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
 static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
 static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
 
+static DEFINE_SPINLOCK(swapper_pgdir_lock);
+
+void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	pgd_t *fixmap_pgdp;
+
+	spin_lock(&swapper_pgdir_lock);
+	fixmap_pgdp = pgd_set_fixmap(__pa(pgdp));
+	WRITE_ONCE(*fixmap_pgdp, pgd);
+	/*
+	 * We need dsb(ishst) here to ensure the page-table-walker sees
+	 * our new entry before set_p?d() returns. The fixmap's
+	 * flush_tlb_kernel_range() via clear_fixmap() does this for us.
+	 */
+	pgd_clear_fixmap();
+	spin_unlock(&swapper_pgdir_lock);
+}
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 			      unsigned long size, pgprot_t vma_prot)
 {
@@ -629,8 +647,11 @@ static void __init map_kernel(pgd_t *pgdp)
  */
 void __init paging_init(void)
 {
-	map_kernel(swapper_pg_dir);
-	map_mem(swapper_pg_dir);
+	pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
+
+	map_kernel(pgdp);
+	map_mem(pgdp);
+	pgd_clear_fixmap();
 	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 	init_mm.pgd = swapper_pg_dir;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v5 6/6] arm64/mm: Move {idmap_pg_dir .. swapper_pg_dir} to rodata section.
  2018-09-17  4:43 [PATCH v5 0/6] Move swapper_pg_dir to rodata section Jun Yao
                   ` (4 preceding siblings ...)
  2018-09-17  4:43 ` [PATCH v5 5/6] arm64/mm: Populate the swapper_pg_dir by fixmap Jun Yao
@ 2018-09-17  4:43 ` Jun Yao
  2018-09-21 22:26 ` [PATCH v5 0/6] Move swapper_pg_dir " James Morse
  2018-09-24 17:19 ` Mark Rutland
  7 siblings, 0 replies; 23+ messages in thread
From: Jun Yao @ 2018-09-17  4:43 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: catalin.marinas, will.deacon, james.morse, linux-kernel

Move the idmap_pg_dir/tramp_pg_dir/reserved_ttbr0/swapper_pg_dir to
the rodata section. When the kernel is initialized, the
idmap_pg_dir, tramp_pg_dir and reserved_ttbr0 will not change. And
it's safe to move them to rodata section.

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/kernel/vmlinux.lds.S | 39 ++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index a634def77442..328925b308fc 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -64,8 +64,13 @@ jiffies = jiffies_64;
 	*(.entry.tramp.text)				\
 	. = ALIGN(PAGE_SIZE);				\
 	__entry_tramp_text_end = .;
+
+#define TRAMP_PG_TABLE					\
+	tramp_pg_dir = .;				\
+	. += PAGE_SIZE;
 #else
 #define TRAMP_TEXT
+#define TRAMP_PG_TABLE
 #endif
 
 #define INIT_PG_TABLES					\
@@ -74,6 +79,24 @@ jiffies = jiffies_64;
 	. += SWAPPER_DIR_SIZE;				\
 	init_pg_end = .;
 
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+#define RESERVED_PG_TABLE				\
+	reserved_ttbr0 = .;				\
+	. += RESERVED_TTBR0_SIZE;
+#else
+#define RESERVED_PG_TABLE
+#endif
+
+#define KERNEL_PG_TABLES				\
+	. = ALIGN(PAGE_SIZE);                           \
+	idmap_pg_dir = .;				\
+	. += IDMAP_DIR_SIZE;				\
+	TRAMP_PG_TABLE					\
+	RESERVED_PG_TABLE				\
+	swapper_pg_dir = .;				\
+	. += PAGE_SIZE;					\
+	swapper_pg_end = .;
+
 /*
  * The size of the PE/COFF section that covers the kernel image, which
  * runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -143,6 +166,7 @@ SECTIONS
 	RO_DATA(PAGE_SIZE)		/* everything from this point to     */
 	EXCEPTION_TABLE(8)		/* __init_begin will be marked RO NX */
 	NOTES
+	KERNEL_PG_TABLES
 
 	. = ALIGN(SEGMENT_ALIGN);
 	__init_begin = .;
@@ -224,21 +248,6 @@ SECTIONS
 	BSS_SECTION(0, 0, 0)
 
 	. = ALIGN(PAGE_SIZE);
-	idmap_pg_dir = .;
-	. += IDMAP_DIR_SIZE;
-
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	tramp_pg_dir = .;
-	. += PAGE_SIZE;
-#endif
-
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
-	reserved_ttbr0 = .;
-	. += RESERVED_TTBR0_SIZE;
-#endif
-	swapper_pg_dir = .;
-	. += PAGE_SIZE;
-	swapper_pg_end = .;
 
 	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
 	_end = .;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 0/6] Move swapper_pg_dir to rodata section.
  2018-09-17  4:43 [PATCH v5 0/6] Move swapper_pg_dir to rodata section Jun Yao
                   ` (5 preceding siblings ...)
  2018-09-17  4:43 ` [PATCH v5 6/6] arm64/mm: Move {idmap_pg_dir .. swapper_pg_dir} to rodata section Jun Yao
@ 2018-09-21 22:26 ` James Morse
  2018-09-25  8:56   ` Jun Yao
  2018-09-24 17:19 ` Mark Rutland
  7 siblings, 1 reply; 23+ messages in thread
From: James Morse @ 2018-09-21 22:26 UTC (permalink / raw)
  To: Jun Yao; +Cc: linux-arm-kernel, catalin.marinas, will.deacon, linux-kernel

Hi Jun,

On 09/17/2018 05:43 AM, Jun Yao wrote:
> Version 5 changes:
> 	1. Correct spelling and indentation errors[1].
> 	2. Update init_mm.pgd by assembly[2].
> 	3. Simplify set_p?d() by introducing set_swapper_pgd()[3].
> 	4. Reduce unnecessary tlbi for every write to swapper_pg_dir
> 	   during paging_init()[3].


For the series:
Reviewed-by: James Morse <james.morse@arm.com>


Thanks,

James

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 1/6] arm64/mm: Introduce the init_pg_dir.
  2018-09-17  4:43 ` [PATCH v5 1/6] arm64/mm: Introduce the init_pg_dir Jun Yao
@ 2018-09-24 13:01   ` Mark Rutland
  2018-09-24 14:03     ` Mark Rutland
  0 siblings, 1 reply; 23+ messages in thread
From: Mark Rutland @ 2018-09-24 13:01 UTC (permalink / raw)
  To: Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, james.morse,
	linux-kernel

On Mon, Sep 17, 2018 at 12:43:28PM +0800, Jun Yao wrote:
> To make the swapper_pg_dir read only, we will move it to the rodata
> section. And force the kernel to set up the initial page table in
> the init_pg_dir. After generating all levels page table, we copy
> only the top level into the swapper_pg_dir during paging_init().
> 
> In this patch, just add the init_pg_dir to vmlinux.lds.S and
> boiler-plate clearing/cleaning/invalidating it in head.S.

I don't believe it is necessary to explicitly zero init_pg_dir.

We have to clear swapper_pg_dir and idmap_pg_dir since they're after
_edata, and the bootloader won't have zeroed the relevant memory (nor
will it have cleaned that memory to the PoC).

However, anything in .init must have been loaded and cleaned to the PoC
by the bootloader per the arm64 Linux boot protocol. So as long as
that's zero in the kernel Image, that must be zero in memory (and any
caches).

> 
> Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
> ---
>  arch/arm64/include/asm/assembler.h | 29 +++++++++++++++++++++++++++++
>  arch/arm64/kernel/head.S           | 22 +++++++++++++++-------
>  arch/arm64/kernel/vmlinux.lds.S    |  8 ++++++++
>  3 files changed, 52 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 0bcc98dbba56..e7bdc324d538 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -456,6 +456,35 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
>  	b.ne	9998b
>  	.endm
>  
> +/*
> + * clear_page - clear one page
> + *
> + *	start:	page aligned virtual address
> + */
> +	.macro clear_page, start:req
> +9996:	stp	xzr, xzr, [\start], #16
> +	stp	xzr, xzr, [\start], #16
> +	stp	xzr, xzr, [\start], #16
> +	stp	xzr, xzr, [\start], #16
> +	tst	\start, #(PAGE_SIZE - 1)
> +	b.ne	9996b
> +	.endm
> +
> +/*
> + * clear_pages - clear contiguous pages
> + *
> + *	start, end:	page aligned virtual addresses
> + */
> +	.macro clear_pages, start:req, end:req
> +	sub	\end, \end, \start
> +	lsr	\end, \end, #(PAGE_SHIFT)
> +9997:	cbz	\end, 9998f
> +	clear_page \start
> +	sub	\end, \end, #1
> +	b	9997b
> +9998:
> +	.endm

Given the above, I don't think that we need these macros. In a
subsequent patch, we can just limit the range of pages that we zero.

> +
>  /*
>   * Annotate a function as position independent, i.e., safe to be called before
>   * the kernel virtual mapping is activated.
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index b0853069702f..2c83a8c47e3f 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -295,18 +295,21 @@ __create_page_tables:
>  	sub	x1, x1, x0
>  	bl	__inval_dcache_area
>  
> +	adrp	x0, init_pg_dir
> +	adrp	x1, init_pg_end
> +	sub	x1, x1, x0
> +	bl	__inval_dcache_area
> +
>  	/*
>  	 * Clear the idmap and swapper page tables.
>  	 */
>  	adrp	x0, idmap_pg_dir
>  	adrp	x1, swapper_pg_end
> -	sub	x1, x1, x0
> -1:	stp	xzr, xzr, [x0], #16
> -	stp	xzr, xzr, [x0], #16
> -	stp	xzr, xzr, [x0], #16
> -	stp	xzr, xzr, [x0], #16
> -	subs	x1, x1, #64
> -	b.ne	1b
> +	clear_pages x0, x1
> +
> +	adrp	x0, init_pg_dir
> +	adrp	x1, init_pg_end
> +	clear_pages x0, x1
>  
>  	mov	x7, SWAPPER_MM_MMUFLAGS
>  
> @@ -395,6 +398,11 @@ __create_page_tables:
>  	dmb	sy
>  	bl	__inval_dcache_area
>  
> +	adrp	x0, init_pg_dir
> +	adrp	x1, init_pg_end
> +	sub	x1, x1, x0
> +	bl	__inval_dcache_area
> +
>  	ret	x28
>  ENDPROC(__create_page_tables)
>  	.ltorg
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index 605d1b60469c..612ffc0bbe11 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -68,6 +68,12 @@ jiffies = jiffies_64;
>  #define TRAMP_TEXT
>  #endif
>  
> +#define INIT_PG_TABLES					\
> +	. = ALIGN(PAGE_SIZE);				\
> +	init_pg_dir = .;				\
> +	. += SWAPPER_DIR_SIZE;				\
> +	init_pg_end = .;

This will allocate a full set of PGD + PUD + PMD (+ PTE) levels, but the
commit message says we only copy the top level table.

The memory for otjher levels of table can be re-allocated once the
kernel init phase is over, so that seems a bit worrying.

Do we switch over to new copies of those levels, or do we continue using
the entries from INIT_PG_TABLES?

Thanks,
Mark.

> +
>  /*
>   * The size of the PE/COFF section that covers the kernel image, which
>   * runs from stext to _edata, must be a round multiple of the PE/COFF
> @@ -161,6 +167,8 @@ SECTIONS
>  	__inittext_end = .;
>  	__initdata_begin = .;
>  
> +	INIT_PG_TABLES
> +
>  	.init.data : {
>  		INIT_DATA
>  		INIT_SETUP(16)
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 2/6] arm64/mm: Pass ttbr1 as a parameter to __enable_mmu().
  2018-09-17  4:43 ` [PATCH v5 2/6] arm64/mm: Pass ttbr1 as a parameter to __enable_mmu() Jun Yao
@ 2018-09-24 13:26   ` Mark Rutland
  0 siblings, 0 replies; 23+ messages in thread
From: Mark Rutland @ 2018-09-24 13:26 UTC (permalink / raw)
  To: Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, james.morse,
	linux-kernel

On Mon, Sep 17, 2018 at 12:43:29PM +0800, Jun Yao wrote:
> The kernel will set up the initial page table in the init_pg_dir.
> However, it will create the final page table in the swapper_pg_dir
> during the initialization process. We need to let __enable_mmu()
> know which page table to use.
> 
> Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
> ---
>  arch/arm64/kernel/head.S  | 19 +++++++++++--------
>  arch/arm64/kernel/sleep.S |  1 +
>  2 files changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 2c83a8c47e3f..de2aaea00bd2 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -714,6 +714,7 @@ secondary_startup:
>  	 * Common entry point for secondary CPUs.
>  	 */
>  	bl	__cpu_setup			// initialise processor
> +	adrp	x1, swapper_pg_dir
>  	bl	__enable_mmu
>  	ldr	x8, =__secondary_switched
>  	br	x8
> @@ -756,6 +757,7 @@ ENDPROC(__secondary_switched)
>   * Enable the MMU.
>   *
>   *  x0  = SCTLR_EL1 value for turning on the MMU.
> + *  x1  = TTBR1_EL1 value for turning on the MMU.
>   *
>   * Returns to the caller via x30/lr. This requires the caller to be covered
>   * by the .idmap.text section.
> @@ -764,15 +766,15 @@ ENDPROC(__secondary_switched)
>   * If it isn't, park the CPU
>   */
>  ENTRY(__enable_mmu)
> -	mrs	x1, ID_AA64MMFR0_EL1
> -	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
> -	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
> +	mrs	x5, ID_AA64MMFR0_EL1
> +	ubfx	x6, x5, #ID_AA64MMFR0_TGRAN_SHIFT, 4
> +	cmp	x6, #ID_AA64MMFR0_TGRAN_SUPPORTED
>  	b.ne	__no_granule_support
> -	update_early_cpu_boot_status 0, x1, x2
> -	adrp	x1, idmap_pg_dir
> -	adrp	x2, swapper_pg_dir
> -	phys_to_ttbr x3, x1
> -	phys_to_ttbr x4, x2
> +	update_early_cpu_boot_status 0, x5, x6
> +	adrp	x5, idmap_pg_dir
> +	mov	x6, x1
> +	phys_to_ttbr x3, x5
> +	phys_to_ttbr x4, x6
>  	msr	ttbr0_el1, x3			// load TTBR0
>  	msr	ttbr1_el1, x4			// load TTBR1
>  	isb

I think that the register shuffling here is unnecessarily confusing, as
this can be reduced to:

ENTRY(__enable_mmu)
	mrs	x2, ID_AA64MMFR0_EL1
	ubfx	x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
	b.ne	__no_granule_support
	update_early_cpu_boot_status 0, x2, x3
	adrp	x2, idmap_pg_dir
	phys_to_ttbr x1, x1
	phys_to_ttbr x2, x2
	msr	ttbr0_el1, x2			// load TTBR0
	msr	ttbr1_el1, x1			// load TTBR1
	isb

... otherwhise, this patch looks sane to me, so with the above change:

Reviewed-by: Mark Rutland <mark.rutland@arm.com>

Thanks,
Mark.

> @@ -831,6 +833,7 @@ __primary_switch:
>  	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
>  #endif
>  
> +	adrp	x1, swapper_pg_dir
>  	bl	__enable_mmu
>  #ifdef CONFIG_RELOCATABLE
>  	bl	__relocate_kernel
> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
> index bebec8ef9372..3e53ffa07994 100644
> --- a/arch/arm64/kernel/sleep.S
> +++ b/arch/arm64/kernel/sleep.S
> @@ -101,6 +101,7 @@ ENTRY(cpu_resume)
>  	bl	el2_setup		// if in EL2 drop to EL1 cleanly
>  	bl	__cpu_setup
>  	/* enable the MMU early - so we can access sleep_save_stash by va */
> +	adrp	x1, swapper_pg_dir
>  	bl	__enable_mmu
>  	ldr	x8, =_cpu_resume
>  	br	x8
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 3/6] arm64/mm: Create the initial page table in the init_pg_dir.
  2018-09-17  4:43 ` [PATCH v5 3/6] arm64/mm: Create the initial page table in the init_pg_dir Jun Yao
@ 2018-09-24 13:34   ` Mark Rutland
  2018-10-01 13:49     ` James Morse
  0 siblings, 1 reply; 23+ messages in thread
From: Mark Rutland @ 2018-09-24 13:34 UTC (permalink / raw)
  To: Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, james.morse,
	linux-kernel

On Mon, Sep 17, 2018 at 12:43:30PM +0800, Jun Yao wrote:
> Create the initial page table in the init_pg_dir. And update the
> init_mm.pgd to make sure that pgd_offset_k() works correctly. When
> the final page table is created, we redirect the init_mm.pgd to the
> swapper_pg_dir.
> 
> Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
> ---
>  arch/arm64/include/asm/pgtable.h | 2 ++
>  arch/arm64/kernel/asm-offsets.c  | 1 +
>  arch/arm64/kernel/head.S         | 9 +++++++--
>  arch/arm64/mm/mmu.c              | 1 +
>  4 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 2ab2031b778c..b11d6fc62a62 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -718,6 +718,8 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>  }
>  #endif
>  
> +extern pgd_t init_pg_dir[PTRS_PER_PGD];
> +extern pgd_t init_pg_end[];
>  extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
>  extern pgd_t swapper_pg_end[];
>  extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 323aeb5f2fe6..43f52cfdfad4 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -82,6 +82,7 @@ int main(void)
>    DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
>    BLANK();
>    DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
> +  DEFINE(MM_PGD,		offsetof(struct mm_struct, pgd));
>    BLANK();
>    DEFINE(VMA_VM_MM,		offsetof(struct vm_area_struct, vm_mm));
>    DEFINE(VMA_VM_FLAGS,		offsetof(struct vm_area_struct, vm_flags));
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index de2aaea00bd2..cf8a58211b80 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -376,7 +376,7 @@ __create_page_tables:
>  	/*
>  	 * Map the kernel image (starting with PHYS_OFFSET).
>  	 */
> -	adrp	x0, swapper_pg_dir
> +	adrp	x0, init_pg_dir
>  	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)
>  	add	x5, x5, x23			// add KASLR displacement
>  	mov	x4, PTRS_PER_PGD
> @@ -439,6 +439,11 @@ __primary_switched:
>  	bl	__pi_memset
>  	dsb	ishst				// Make zero page visible to PTW
>  
> +	// Update init_mm.pgd
> +	adrp	x0, init_pg_dir
> +	adr_l	x1, init_mm
> +	str	x0, [x1, #MM_PGD]
> +

We should be able to set this up statically with INIT_MM_CONTEXT(). i.e.
in <asm/mmu.h> have:

#define INIT_MM_CONTEXT(name)	\
	.pgd = init_pg_dir

Thanks,
Mark.

>  #ifdef CONFIG_KASAN
>  	bl	kasan_early_init
>  #endif
> @@ -833,7 +838,7 @@ __primary_switch:
>  	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
>  #endif
>  
> -	adrp	x1, swapper_pg_dir
> +	adrp	x1, init_pg_dir
>  	bl	__enable_mmu
>  #ifdef CONFIG_RELOCATABLE
>  	bl	__relocate_kernel
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 65f86271f02b..af80dca335ce 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -646,6 +646,7 @@ void __init paging_init(void)
>  	cpu_replace_ttbr1(__va(pgd_phys));
>  	memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
>  	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
> +	init_mm.pgd = swapper_pg_dir;
>  
>  	pgd_clear_fixmap();
>  	memblock_free(pgd_phys, PAGE_SIZE);
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 1/6] arm64/mm: Introduce the init_pg_dir.
  2018-09-24 13:01   ` Mark Rutland
@ 2018-09-24 14:03     ` Mark Rutland
  0 siblings, 0 replies; 23+ messages in thread
From: Mark Rutland @ 2018-09-24 14:03 UTC (permalink / raw)
  To: Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, james.morse,
	linux-kernel

On Mon, Sep 24, 2018 at 02:01:37PM +0100, Mark Rutland wrote:
> On Mon, Sep 17, 2018 at 12:43:28PM +0800, Jun Yao wrote:
> > To make the swapper_pg_dir read only, we will move it to the rodata
> > section. And force the kernel to set up the initial page table in
> > the init_pg_dir. After generating all levels page table, we copy
> > only the top level into the swapper_pg_dir during paging_init().

> > +#define INIT_PG_TABLES					\
> > +	. = ALIGN(PAGE_SIZE);				\
> > +	init_pg_dir = .;				\
> > +	. += SWAPPER_DIR_SIZE;				\
> > +	init_pg_end = .;
> 
> This will allocate a full set of PGD + PUD + PMD (+ PTE) levels, but the
> commit message says we only copy the top level table.
> 
> The memory for otjher levels of table can be re-allocated once the
> kernel init phase is over, so that seems a bit worrying.
> 
> Do we switch over to new copies of those levels, or do we continue using
> the entries from INIT_PG_TABLES?

I now see that we already allocate copies of those levels of table (and
free the originals in paging_init), so this is not a problem.

Sorry for the noise!

Mark.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 5/6] arm64/mm: Populate the swapper_pg_dir by fixmap.
  2018-09-17  4:43 ` [PATCH v5 5/6] arm64/mm: Populate the swapper_pg_dir by fixmap Jun Yao
@ 2018-09-24 16:36   ` Mark Rutland
  2018-10-01 10:41     ` James Morse
  2018-09-24 16:54   ` Mark Rutland
  1 sibling, 1 reply; 23+ messages in thread
From: Mark Rutland @ 2018-09-24 16:36 UTC (permalink / raw)
  To: Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, james.morse,
	linux-kernel

On Mon, Sep 17, 2018 at 12:43:32PM +0800, Jun Yao wrote:
> Since we will move the swapper_pg_dir to rodata section, we need a
> way to update it. The fixmap can handle it. When the swapper_pg_dir
> needs to be updated, we map it dynamically. The map will be
> canceled after the update is complete. In this way, we can defend
> against KSMA(Kernel Space Mirror Attack).
> 
> Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
> ---
>  arch/arm64/include/asm/pgtable.h | 38 ++++++++++++++++++++++++++------
>  arch/arm64/mm/mmu.c              | 25 +++++++++++++++++++--
>  2 files changed, 54 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index b11d6fc62a62..9e643fc2453d 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -429,8 +429,29 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
>  				 PUD_TYPE_TABLE)
>  #endif
>  
> +extern pgd_t init_pg_dir[PTRS_PER_PGD];
> +extern pgd_t init_pg_end[];
> +extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
> +extern pgd_t swapper_pg_end[];
> +extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
> +extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
> +
> +extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
> +
> +static inline bool in_swapper_pgdir(void *addr)
> +{
> +	return ((unsigned long)addr & PAGE_MASK) ==
> +		((unsigned long)swapper_pg_dir & PAGE_MASK);
> +}
> +
>  static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
>  {
> +#ifdef __PAGETABLE_PMD_FOLDED
> +	if (in_swapper_pgdir(pmdp)) {
> +		set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd)));
> +		return;
> +	}
> +#endif
>  	WRITE_ONCE(*pmdp, pmd);
>  
>  	if (pmd_valid(pmd))
> @@ -484,6 +505,12 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
>  
>  static inline void set_pud(pud_t *pudp, pud_t pud)
>  {
> +#ifdef __PAGETABLE_PUD_FOLDED
> +	if (in_swapper_pgdir(pudp)) {
> +		set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
> +		return;
> +	}
> +#endif
>  	WRITE_ONCE(*pudp, pud);
>  
>  	if (pud_valid(pud))
> @@ -538,6 +565,10 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
>  
>  static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
>  {
> +	if (in_swapper_pgdir(pgdp)) {
> +		set_swapper_pgd(pgdp, pgd);
> +		return;
> +	}

It's somewhat frustrating that we have to duplicate this logic across
all of set_p{m,u,g}d(), rather than this living in set_pgd(), passing
the value up set_pmd() -> set_pud() -> set_pgd().

I see that the generic no-p{m,u}d headers force this structure, and I
haven't come up with anything better. :/

>  	WRITE_ONCE(*pgdp, pgd);
>  	dsb(ishst);
>  }
> @@ -718,13 +749,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>  }
>  #endif
>  
> -extern pgd_t init_pg_dir[PTRS_PER_PGD];
> -extern pgd_t init_pg_end[];
> -extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
> -extern pgd_t swapper_pg_end[];
> -extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
> -extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
> -
>  /*
>   * Encode and decode a swap entry:
>   *	bits 0-1:	present (must be zero)
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 71532bcd76c1..a8a60927f716 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
>  static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
>  static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
>  
> +static DEFINE_SPINLOCK(swapper_pgdir_lock);
> +
> +void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
> +{
> +	pgd_t *fixmap_pgdp;
> +
> +	spin_lock(&swapper_pgdir_lock);
> +	fixmap_pgdp = pgd_set_fixmap(__pa(pgdp));
> +	WRITE_ONCE(*fixmap_pgdp, pgd);
> +	/*
> +	 * We need dsb(ishst) here to ensure the page-table-walker sees
> +	 * our new entry before set_p?d() returns. The fixmap's
> +	 * flush_tlb_kernel_range() via clear_fixmap() does this for us.
> +	 */
> +	pgd_clear_fixmap();
> +	spin_unlock(&swapper_pgdir_lock);
> +}

I'm rather worried that we could deadlock here.

Are we certain we never poke the kernel page tables in IRQ context?

Otherwise, this looks fine to me.

Thanks,
Mark.

> +
>  pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
>  			      unsigned long size, pgprot_t vma_prot)
>  {
> @@ -629,8 +647,11 @@ static void __init map_kernel(pgd_t *pgdp)
>   */
>  void __init paging_init(void)
>  {
> -	map_kernel(swapper_pg_dir);
> -	map_mem(swapper_pg_dir);
> +	pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
> +
> +	map_kernel(pgdp);
> +	map_mem(pgdp);
> +	pgd_clear_fixmap();
>  	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
>  	init_mm.pgd = swapper_pg_dir;
>  }
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 5/6] arm64/mm: Populate the swapper_pg_dir by fixmap.
  2018-09-17  4:43 ` [PATCH v5 5/6] arm64/mm: Populate the swapper_pg_dir by fixmap Jun Yao
  2018-09-24 16:36   ` Mark Rutland
@ 2018-09-24 16:54   ` Mark Rutland
  1 sibling, 0 replies; 23+ messages in thread
From: Mark Rutland @ 2018-09-24 16:54 UTC (permalink / raw)
  To: Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, james.morse,
	linux-kernel

On Mon, Sep 17, 2018 at 12:43:32PM +0800, Jun Yao wrote:
> Since we will move the swapper_pg_dir to rodata section, we need a
> way to update it. The fixmap can handle it. When the swapper_pg_dir
> needs to be updated, we map it dynamically. The map will be
> canceled after the update is complete. In this way, we can defend
> against KSMA(Kernel Space Mirror Attack).
> 
> Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
> ---
>  arch/arm64/include/asm/pgtable.h | 38 ++++++++++++++++++++++++++------
>  arch/arm64/mm/mmu.c              | 25 +++++++++++++++++++--
>  2 files changed, 54 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index b11d6fc62a62..9e643fc2453d 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -429,8 +429,29 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
>  				 PUD_TYPE_TABLE)
>  #endif
>  
> +extern pgd_t init_pg_dir[PTRS_PER_PGD];
> +extern pgd_t init_pg_end[];
> +extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
> +extern pgd_t swapper_pg_end[];
> +extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
> +extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
> +
> +extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
> +
> +static inline bool in_swapper_pgdir(void *addr)
> +{
> +	return ((unsigned long)addr & PAGE_MASK) ==
> +		((unsigned long)swapper_pg_dir & PAGE_MASK);
> +}
> +
>  static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
>  {
> +#ifdef __PAGETABLE_PMD_FOLDED
> +	if (in_swapper_pgdir(pmdp)) {
> +		set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd)));
> +		return;
> +	}
> +#endif

So that we can get consistent build coverage, could we make this:

	if (__is_defined(__PAGETABLE_PMD_FOLDED) && in_swapper_pgdir(pmdp)) {
		set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd)));
		return;
	}

>  	WRITE_ONCE(*pmdp, pmd);
>  
>  	if (pmd_valid(pmd))
> @@ -484,6 +505,12 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
>  
>  static inline void set_pud(pud_t *pudp, pud_t pud)
>  {
> +#ifdef __PAGETABLE_PUD_FOLDED
> +	if (in_swapper_pgdir(pudp)) {
> +		set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
> +		return;
> +	}
> +#endif

... and likewise:

	if (__is_enabled(__PAGETABLE_PUD_FOLDED) && in_swapper_pgdir(pudp)) {
		set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
		return;
	}

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 0/6] Move swapper_pg_dir to rodata section.
  2018-09-17  4:43 [PATCH v5 0/6] Move swapper_pg_dir to rodata section Jun Yao
                   ` (6 preceding siblings ...)
  2018-09-21 22:26 ` [PATCH v5 0/6] Move swapper_pg_dir " James Morse
@ 2018-09-24 17:19 ` Mark Rutland
  2018-09-25  9:53   ` Jun Yao
  7 siblings, 1 reply; 23+ messages in thread
From: Mark Rutland @ 2018-09-24 17:19 UTC (permalink / raw)
  To: Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, james.morse,
	linux-kernel

Hi,

On Mon, Sep 17, 2018 at 12:43:27PM +0800, Jun Yao wrote:
> Version 5 changes:
> 	1. Correct spelling and indentation errors[1].
> 	2. Update init_mm.pgd by assembly[2].
> 	3. Simplify set_p?d() by introducing set_swapper_pgd()[3].
> 	4. Reduce unnecessary tlbi for every write to swapper_pg_dir
> 	   during paging_init()[3].
> 
> v4: https://www.spinics.net/lists/arm-kernel/msg672195.html
> v3: https://www.spinics.net/lists/arm-kernel/msg662537.html
> v2: https://patchwork.kernel.org/patch/10485641/
> v1: https://patchwork.kernel.org/patch/10476595/
> 
> [1] https://www.spinics.net/lists/arm-kernel/msg675189.html
> [2] https://www.spinics.net/lists/arm-kernel/msg675193.html
> [3] https://www.spinics.net/lists/arm-kernel/msg675196.html
> 
> Jun Yao (6):
>   arm64/mm: Introduce the init_pg_dir.
>   arm64/mm: Pass ttbr1 as a parameter to __enable_mmu().
>   arm64/mm: Create the initial page table in the init_pg_dir.
>   arm64/mm: Create the final page table directly in swapper_pg_dir.
>   arm64/mm: Populate the swapper_pg_dir by fixmap.
>   arm64/mm: Move {idmap_pg_dir .. swapper_pg_dir} to rodata section.
> 
>  arch/arm64/include/asm/assembler.h | 29 ++++++++++++++++++
>  arch/arm64/include/asm/pgtable.h   | 36 ++++++++++++++++++----
>  arch/arm64/kernel/asm-offsets.c    |  1 +
>  arch/arm64/kernel/head.S           | 48 ++++++++++++++++++++----------
>  arch/arm64/kernel/sleep.S          |  1 +
>  arch/arm64/kernel/vmlinux.lds.S    | 47 +++++++++++++++++++----------
>  arch/arm64/mm/mmu.c                | 45 +++++++++++++---------------
>  7 files changed, 147 insertions(+), 60 deletions(-)

I've pushed a branch with the cleanups I requested [1] folded in.

I'm still a bit worried about the pgd lock, but otherwise I think this
is sound. I intend to throw some testing at it, just in case.

If you're happy with that branch, I'll ask Will and Catalin to consider
picking it up.

Thanks,
Mark.

[1] git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git arm64/ro-swapper

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 0/6] Move swapper_pg_dir to rodata section.
  2018-09-21 22:26 ` [PATCH v5 0/6] Move swapper_pg_dir " James Morse
@ 2018-09-25  8:56   ` Jun Yao
  0 siblings, 0 replies; 23+ messages in thread
From: Jun Yao @ 2018-09-25  8:56 UTC (permalink / raw)
  To: James Morse; +Cc: linux-arm-kernel, catalin.marinas, will.deacon, linux-kernel

Hi James,

On Fri, Sep 21, 2018 at 11:26:39PM +0100, James Morse wrote:
> Hi Jun,
> 
> On 09/17/2018 05:43 AM, Jun Yao wrote:
> > Version 5 changes:
> > 	1. Correct spelling and indentation errors[1].
> > 	2. Update init_mm.pgd by assembly[2].
> > 	3. Simplify set_p?d() by introducing set_swapper_pgd()[3].
> > 	4. Reduce unnecessary tlbi for every write to swapper_pg_dir
> > 	   during paging_init()[3].
> 
> 
> For the series:
> Reviewed-by: James Morse <james.morse@arm.com>

Thank you very much for helping me improve the patch and tolerate the
mistakes I made.

Thanks,

Jun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 0/6] Move swapper_pg_dir to rodata section.
  2018-09-24 17:19 ` Mark Rutland
@ 2018-09-25  9:53   ` Jun Yao
  2018-09-25 14:06     ` Mark Rutland, catalin.marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Jun Yao @ 2018-09-25  9:53 UTC (permalink / raw)
  To: Mark Rutland
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, james.morse,
	linux-kernel

Hi Mark,

On Mon, Sep 24, 2018 at 06:19:36PM +0100, Mark Rutland wrote:
> Hi,
> 
> On Mon, Sep 17, 2018 at 12:43:27PM +0800, Jun Yao wrote:
> > Version 5 changes:
> > 	1. Correct spelling and indentation errors[1].
> > 	2. Update init_mm.pgd by assembly[2].
> > 	3. Simplify set_p?d() by introducing set_swapper_pgd()[3].
> > 	4. Reduce unnecessary tlbi for every write to swapper_pg_dir
> > 	   during paging_init()[3].
> > 
> > v4: https://www.spinics.net/lists/arm-kernel/msg672195.html
> > v3: https://www.spinics.net/lists/arm-kernel/msg662537.html
> > v2: https://patchwork.kernel.org/patch/10485641/
> > v1: https://patchwork.kernel.org/patch/10476595/
> > 
> > [1] https://www.spinics.net/lists/arm-kernel/msg675189.html
> > [2] https://www.spinics.net/lists/arm-kernel/msg675193.html
> > [3] https://www.spinics.net/lists/arm-kernel/msg675196.html
> > 
> > Jun Yao (6):
> >   arm64/mm: Introduce the init_pg_dir.
> >   arm64/mm: Pass ttbr1 as a parameter to __enable_mmu().
> >   arm64/mm: Create the initial page table in the init_pg_dir.
> >   arm64/mm: Create the final page table directly in swapper_pg_dir.
> >   arm64/mm: Populate the swapper_pg_dir by fixmap.
> >   arm64/mm: Move {idmap_pg_dir .. swapper_pg_dir} to rodata section.
> > 
> >  arch/arm64/include/asm/assembler.h | 29 ++++++++++++++++++
> >  arch/arm64/include/asm/pgtable.h   | 36 ++++++++++++++++++----
> >  arch/arm64/kernel/asm-offsets.c    |  1 +
> >  arch/arm64/kernel/head.S           | 48 ++++++++++++++++++++----------
> >  arch/arm64/kernel/sleep.S          |  1 +
> >  arch/arm64/kernel/vmlinux.lds.S    | 47 +++++++++++++++++++----------
> >  arch/arm64/mm/mmu.c                | 45 +++++++++++++---------------
> >  7 files changed, 147 insertions(+), 60 deletions(-)
> 
> I've pushed a branch with the cleanups I requested [1] folded in.
> 
> I'm still a bit worried about the pgd lock, but otherwise I think this
> is sound. I intend to throw some testing at it, just in case.
> 
> If you're happy with that branch, I'll ask Will and Catalin to consider
> picking it up.

This branch looks great. Thank you for improving my patch.

Is there anything I need to do? This is my first time to submit a patch,
so I am a bit overwhelmed. I think I should wait for other people to
review my patch. If there is a problem, I need to fix it.

Thanks,

Jun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 0/6] Move swapper_pg_dir to rodata section.
  2018-09-25  9:53   ` Jun Yao
@ 2018-09-25 14:06     ` Mark Rutland, catalin.marinas
  2018-09-25 14:38       ` Catalin Marinas
  2018-10-03 13:33       ` James Morse
  0 siblings, 2 replies; 23+ messages in thread
From: Mark Rutland, catalin.marinas @ 2018-09-25 14:06 UTC (permalink / raw)
  To: linux-arm-kernel, will.deacon, james.morse, linux-kernel

On Tue, Sep 25, 2018 at 05:53:16PM +0800, Jun Yao wrote:
> On Mon, Sep 24, 2018 at 06:19:36PM +0100, Mark Rutland wrote:
> > I've pushed a branch with the cleanups I requested [1] folded in.
> > 
> > I'm still a bit worried about the pgd lock, but otherwise I think this
> > is sound. I intend to throw some testing at it, just in case.
> > 
> > If you're happy with that branch, I'll ask Will and Catalin to consider
> > picking it up.
> 
> This branch looks great. Thank you for improving my patch.

Thanks for your patches!

> Is there anything I need to do? This is my first time to submit a patch,
> so I am a bit overwhelmed. I think I should wait for other people to
> review my patch. If there is a problem, I need to fix it.

Hopefully Catalin is happy to pick this up from my branch, and neither
of us have to do anything. :)

Catalin, are you happy to pick the patches from:

  git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git arm64/ro-swapper

... that should be based on the arm64 for-next/core branch, and has
James's R-B prior to my commit fixup notes.

I've given this some basic testing with defconfig, 48-bit/4k and
42-bit/64k (with lockdep on in both cases), and things seem fine.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 0/6] Move swapper_pg_dir to rodata section.
  2018-09-25 14:06     ` Mark Rutland, catalin.marinas
@ 2018-09-25 14:38       ` Catalin Marinas
  2018-10-03 13:33       ` James Morse
  1 sibling, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2018-09-25 14:38 UTC (permalink / raw)
  To: Mark Rutland; +Cc: linux-arm-kernel, will.deacon, james.morse, linux-kernel

On Tue, Sep 25, 2018 at 03:06:25PM +0100, Mark Rutland wrote:
> On Tue, Sep 25, 2018 at 05:53:16PM +0800, Jun Yao wrote:
> > On Mon, Sep 24, 2018 at 06:19:36PM +0100, Mark Rutland wrote:
> > > I've pushed a branch with the cleanups I requested [1] folded in.
> > > 
> > > I'm still a bit worried about the pgd lock, but otherwise I think this
> > > is sound. I intend to throw some testing at it, just in case.
> > > 
> > > If you're happy with that branch, I'll ask Will and Catalin to consider
> > > picking it up.
> > 
> > This branch looks great. Thank you for improving my patch.
> 
> Thanks for your patches!
> 
> > Is there anything I need to do? This is my first time to submit a patch,
> > so I am a bit overwhelmed. I think I should wait for other people to
> > review my patch. If there is a problem, I need to fix it.
> 
> Hopefully Catalin is happy to pick this up from my branch, and neither
> of us have to do anything. :)
> 
> Catalin, are you happy to pick the patches from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git arm64/ro-swapper

I queued them for 4.20. Thanks.

-- 
Catalin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 5/6] arm64/mm: Populate the swapper_pg_dir by fixmap.
  2018-09-24 16:36   ` Mark Rutland
@ 2018-10-01 10:41     ` James Morse
  2018-10-01 13:49       ` James Morse
  0 siblings, 1 reply; 23+ messages in thread
From: James Morse @ 2018-10-01 10:41 UTC (permalink / raw)
  To: Mark Rutland, Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, linux-kernel

Hi Mark,

On 24/09/18 17:36, Mark Rutland wrote:
> On Mon, Sep 17, 2018 at 12:43:32PM +0800, Jun Yao wrote:
>> Since we will move the swapper_pg_dir to rodata section, we need a
>> way to update it. The fixmap can handle it. When the swapper_pg_dir
>> needs to be updated, we map it dynamically. The map will be
>> canceled after the update is complete. In this way, we can defend
>> against KSMA(Kernel Space Mirror Attack).

>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index 71532bcd76c1..a8a60927f716 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
>>  static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
>>  static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
>>  
>> +static DEFINE_SPINLOCK(swapper_pgdir_lock);
>> +
>> +void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
>> +{
>> +	pgd_t *fixmap_pgdp;
>> +
>> +	spin_lock(&swapper_pgdir_lock);
>> +	fixmap_pgdp = pgd_set_fixmap(__pa(pgdp));
>> +	WRITE_ONCE(*fixmap_pgdp, pgd);
>> +	/*
>> +	 * We need dsb(ishst) here to ensure the page-table-walker sees
>> +	 * our new entry before set_p?d() returns. The fixmap's
>> +	 * flush_tlb_kernel_range() via clear_fixmap() does this for us.
>> +	 */
>> +	pgd_clear_fixmap();
>> +	spin_unlock(&swapper_pgdir_lock);
>> +}
> 
> I'm rather worried that we could deadlock here.

We can use the irqsave versions if you're worried, but I think any code doing
this is already broken.

(I'd like to eventually depend on the init_mm.page_table_lock for this, but it
isn't held when the vmemmap is being populated.)


> Are we certain we never poke the kernel page tables in IRQ context?

The RAS code was doing this, but was deemed unsafe, and changed to use the
fixmap: https://lkml.org/lkml/2017/10/30/500
The fixmap only ever touches the last level, so can't hit this.

x86 can't do its IPI tlb-maintenance from IRQ context, so anything trying to
unmap from irq context is already broken: https://lkml.org/lkml/2018/9/6/324

vunmap()/vfree() is allowed from irq context, but it defers its work.

I can't find any way to pass GFP_ATOMIC into ioremap(),
I didn't think vmalloc() could either, ...  but now I spot __vmalloc() does...

This __vmalloc() path is used by the percpu allocator, which starting from
pcpu_alloc() can be passed something other than GFP_KERNEL, and uses
spin_lock_irqsave(), so it is expecting to be called in irq context.

... so yes it looks like this can happen.

I'll post a fix


Thanks!

James

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 5/6] arm64/mm: Populate the swapper_pg_dir by fixmap.
  2018-10-01 10:41     ` James Morse
@ 2018-10-01 13:49       ` James Morse
  0 siblings, 0 replies; 23+ messages in thread
From: James Morse @ 2018-10-01 13:49 UTC (permalink / raw)
  To: Mark Rutland, Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, linux-kernel

Hi Mark,

On 01/10/18 11:41, James Morse wrote:
> On 24/09/18 17:36, Mark Rutland wrote:
>> On Mon, Sep 17, 2018 at 12:43:32PM +0800, Jun Yao wrote:
>>> Since we will move the swapper_pg_dir to rodata section, we need a
>>> way to update it. The fixmap can handle it. When the swapper_pg_dir
>>> needs to be updated, we map it dynamically. The map will be
>>> canceled after the update is complete. In this way, we can defend
>>> against KSMA(Kernel Space Mirror Attack).
> 
>>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>>> index 71532bcd76c1..a8a60927f716 100644
>>> --- a/arch/arm64/mm/mmu.c
>>> +++ b/arch/arm64/mm/mmu.c
>>> @@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
>>>  static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
>>>  static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
>>>  
>>> +static DEFINE_SPINLOCK(swapper_pgdir_lock);
>>> +
>>> +void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
>>> +{
>>> +	pgd_t *fixmap_pgdp;
>>> +
>>> +	spin_lock(&swapper_pgdir_lock);
>>> +	fixmap_pgdp = pgd_set_fixmap(__pa(pgdp));
>>> +	WRITE_ONCE(*fixmap_pgdp, pgd);
>>> +	/*
>>> +	 * We need dsb(ishst) here to ensure the page-table-walker sees
>>> +	 * our new entry before set_p?d() returns. The fixmap's
>>> +	 * flush_tlb_kernel_range() via clear_fixmap() does this for us.
>>> +	 */
>>> +	pgd_clear_fixmap();
>>> +	spin_unlock(&swapper_pgdir_lock);
>>> +}

>> Are we certain we never poke the kernel page tables in IRQ context?
> 
> The RAS code was doing this, but was deemed unsafe, and changed to use the
> fixmap: https://lkml.org/lkml/2017/10/30/500
> The fixmap only ever touches the last level, so can't hit this.
> 
> x86 can't do its IPI tlb-maintenance from IRQ context, so anything trying to
> unmap from irq context is already broken: https://lkml.org/lkml/2018/9/6/324
> 
> vunmap()/vfree() is allowed from irq context, but it defers its work.
> 
> I can't find any way to pass GFP_ATOMIC into ioremap(),
> I didn't think vmalloc() could either, ...  but now I spot __vmalloc() does...
> 
> This __vmalloc() path is used by the percpu allocator, which starting from
> pcpu_alloc() can be passed something other than GFP_KERNEL, and uses
> spin_lock_irqsave(), so it is expecting to be called in irq context.
> 
> ... so yes it looks like this can happen.

But! These two things (irq-context and calls-__vmalloc()) can't happen at the
same time. If pcpu_alloc() is passed GFP_ATOMIC, and pcpu_alloc_area() fails,
(so a new chunk needs to be allocated), it will fail instead.

(This explains the scary looking "if (!in_atomic) mutex_lock()", in that code).


If you try it, you hit the "BUG_ON(in_interrupt())", in
__get_vm_area_node(). So even if you do pass GFP_ATOMIC in here, you can't call
it from interrupt context. (sanity prevails!)

I was wrong, it doesn't need fixing.


James

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 3/6] arm64/mm: Create the initial page table in the init_pg_dir.
  2018-09-24 13:34   ` Mark Rutland
@ 2018-10-01 13:49     ` James Morse
  0 siblings, 0 replies; 23+ messages in thread
From: James Morse @ 2018-10-01 13:49 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Jun Yao, linux-arm-kernel, catalin.marinas, will.deacon, linux-kernel

Hi Mark,

On 24/09/18 14:34, Mark Rutland wrote:
> On Mon, Sep 17, 2018 at 12:43:30PM +0800, Jun Yao wrote:
>> Create the initial page table in the init_pg_dir. And update the
>> init_mm.pgd to make sure that pgd_offset_k() works correctly. When
>> the final page table is created, we redirect the init_mm.pgd to the
>> swapper_pg_dir.

>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index de2aaea00bd2..cf8a58211b80 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -376,7 +376,7 @@ __create_page_tables:
>>  	/*
>>  	 * Map the kernel image (starting with PHYS_OFFSET).
>>  	 */
>> -	adrp	x0, swapper_pg_dir
>> +	adrp	x0, init_pg_dir
>>  	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)
>>  	add	x5, x5, x23			// add KASLR displacement
>>  	mov	x4, PTRS_PER_PGD
>> @@ -439,6 +439,11 @@ __primary_switched:
>>  	bl	__pi_memset
>>  	dsb	ishst				// Make zero page visible to PTW
>>  
>> +	// Update init_mm.pgd
>> +	adrp	x0, init_pg_dir
>> +	adr_l	x1, init_mm
>> +	str	x0, [x1, #MM_PGD]
>> +
> 
> We should be able to set this up statically with INIT_MM_CONTEXT(). i.e.
> in <asm/mmu.h> have:
> 
> #define INIT_MM_CONTEXT(name)	\
> 	.pgd = init_pg_dir

Aha, I didn't think this would be possible because we end up setting .pgd twice,
but gcc says that's fine, it gets the last value:
https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html

(and it looks like clang has a test for this)


Thanks,

James

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v5 0/6] Move swapper_pg_dir to rodata section.
  2018-09-25 14:06     ` Mark Rutland, catalin.marinas
  2018-09-25 14:38       ` Catalin Marinas
@ 2018-10-03 13:33       ` James Morse
  1 sibling, 0 replies; 23+ messages in thread
From: James Morse @ 2018-10-03 13:33 UTC (permalink / raw)
  To: Mark Rutland, catalin.marinas; +Cc: linux-arm-kernel, will.deacon, linux-kernel

Hi Catalin, Mark,

On 25/09/18 15:06, Mark Rutland wrote:
> On Tue, Sep 25, 2018 at 05:53:16PM +0800, Jun Yao wrote:
>> On Mon, Sep 24, 2018 at 06:19:36PM +0100, Mark Rutland wrote:
>>> I've pushed a branch with the cleanups I requested [1] folded in.
>>>
>>> I'm still a bit worried about the pgd lock, but otherwise I think this
>>> is sound. I intend to throw some testing at it, just in case.
>>>
>>> If you're happy with that branch, I'll ask Will and Catalin to consider
>>> picking it up.
>>
>> This branch looks great. Thank you for improving my patch.
> 
> Thanks for your patches!
> 
>> Is there anything I need to do? This is my first time to submit a patch,
>> so I am a bit overwhelmed. I think I should wait for other people to
>> review my patch. If there is a problem, I need to fix it.
> 
> Hopefully Catalin is happy to pick this up from my branch, and neither
> of us have to do anything. :)
> 
> Catalin, are you happy to pick the patches from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git arm64/ro-swapper
> 
> ... that should be based on the arm64 for-next/core branch, and has
> James's R-B prior to my commit fixup notes.
> 
> I've given this some basic testing with defconfig, 48-bit/4k and
> 42-bit/64k (with lockdep on in both cases), and things seem fine.

Re-running my hacky tests for this series knocks for-next/core over:
| Unable to handle kernel write to read-only memory at virtual address
fffffc00092b0008
| Mem abort info:
|   ESR = 0x9600004f
|   Exception class = DABT (current EL), IL = 32 bits
|   SET = 0, FnV = 0
|   EA = 0, S1PTW = 0
| Data abort info:
|   ISV = 0, ISS = 0x0000004f
|   CM = 0, WnR = 1
| swapper pgtable: 64k pages, 42-bit VAs, pgdp = 00000000992f9c42
| [fffffc00092b0008] pgd=00000087ffff0803, pud=00000087ffff0803,
pmd=00000087ffff0803, pte=00e00080032b0f93
| Internal error: Oops: 9600004f [#1] PREEMPT SMP
| Modules linked in:
| CPU: 4 PID: 13938 Comm: hackbench Tainted: G        W
4.19.0-rc2-00063-gc219bc4e9205 #10628
| Hardware name: AMD Seattle (Rev.B0) Development Board (Overdrive) (DT)
| pstate: a0400005 (NzCv daif +PAN -UAO)
| pc : __pte_alloc_kernel+0x60/0xd8
| lr : __pte_alloc_kernel+0x44/0xd8

| Process hackbench (pid: 13938, stack limit = 0x000000007ca756ec)
| Call trace:
|  __pte_alloc_kernel+0x60/0xd8
|  vmap_page_range_noflush+0x1bc/0x1d8
|  map_vm_area+0x30/0x40
|  __vmalloc_node_range+0x1e4/0x2e8
|  copy_process.isra.3.part.4+0x1e8/0x1ae8
|  _do_fork+0xd0/0x7f0
|  __arm64_sys_clone+0x1c/0x28

|  el0_svc_handler+0x7c/0x130

|  el0_svc+0x8/0xc

| Code: 90007b60 f946e000 cb000273 b2400673 (f9000293)

|  ---[ end trace 7bf2d2ffce498c7a ]---

| Kernel panic - not syncing: Fatal exception


The below patch fixes it: I'll post it properly shortly...
-----------------------%<-----------------------
Author: James Morse <james.morse@arm.com>
Date:   Wed Oct 3 12:03:38 2018 +0100

    asm-generic/pgtable-nop?d.h: define folded with a value for use in C

    It turns out "if (__is_defined(__PAGETABLE_PMD_FOLDED))" isn't equivalent
    to "#ifdef __PAGETABLE_PMD_FOLDED". (who knew!)

    kconfig.h's __is_defined() expects a define of the form
    "#define CONFIG_BOOGER 1". But these nop?d headers just have
    "#define __PAGETABLE_PMD_FOLDED".

    This means ____is_defined()'s triplet passed to __take_second_arg() is
    'empty-string 1 0' in both cases, meaning these symbols can't be used
    from C. (they are always false).

    asm-generic gets away with this as its using the pre-processor's
    defined() macro on this, not the C __is_defined().

    Add the expected '1'.

    Signed-off-by: James Morse <james.morse@arm.com>

diff --git a/include/asm-generic/pgtable-nopmd.h
b/include/asm-generic/pgtable-nopmd.h
index f35f6e8149e4..fcb4769a075a 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -8,7 +8,7 @@

 struct mm_struct;

-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1

 /*
  * Having the pmd type consist of a pud gets the size right, and allows
diff --git a/include/asm-generic/pgtable-nopud.h
b/include/asm-generic/pgtable-nopud.h
index e950b9c50f34..d300dbcddaf3 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -9,7 +9,7 @@
 #else
 #include <asm-generic/pgtable-nop4d.h>

-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1

 /*
  * Having the pud type consist of a p4d gets the size right, and allows
-----------------------%<-----------------------


(and, while digging through this, I spotted:
| #define set_pmd_at(mm, addr, pmdp, pmd)	
|	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
which may trip up too)


Thanks,

James

^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2018-10-03 13:34 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-17  4:43 [PATCH v5 0/6] Move swapper_pg_dir to rodata section Jun Yao
2018-09-17  4:43 ` [PATCH v5 1/6] arm64/mm: Introduce the init_pg_dir Jun Yao
2018-09-24 13:01   ` Mark Rutland
2018-09-24 14:03     ` Mark Rutland
2018-09-17  4:43 ` [PATCH v5 2/6] arm64/mm: Pass ttbr1 as a parameter to __enable_mmu() Jun Yao
2018-09-24 13:26   ` Mark Rutland
2018-09-17  4:43 ` [PATCH v5 3/6] arm64/mm: Create the initial page table in the init_pg_dir Jun Yao
2018-09-24 13:34   ` Mark Rutland
2018-10-01 13:49     ` James Morse
2018-09-17  4:43 ` [PATCH v5 4/6] arm64/mm: Create the final page table directly in swapper_pg_dir Jun Yao
2018-09-17  4:43 ` [PATCH v5 5/6] arm64/mm: Populate the swapper_pg_dir by fixmap Jun Yao
2018-09-24 16:36   ` Mark Rutland
2018-10-01 10:41     ` James Morse
2018-10-01 13:49       ` James Morse
2018-09-24 16:54   ` Mark Rutland
2018-09-17  4:43 ` [PATCH v5 6/6] arm64/mm: Move {idmap_pg_dir .. swapper_pg_dir} to rodata section Jun Yao
2018-09-21 22:26 ` [PATCH v5 0/6] Move swapper_pg_dir " James Morse
2018-09-25  8:56   ` Jun Yao
2018-09-24 17:19 ` Mark Rutland
2018-09-25  9:53   ` Jun Yao
2018-09-25 14:06     ` Mark Rutland, catalin.marinas
2018-09-25 14:38       ` Catalin Marinas
2018-10-03 13:33       ` James Morse

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).