linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/5] Move {idmap_pg_dir,swapper_pg_dir} to .rodata
@ 2018-07-02 11:16 Jun Yao
  2018-07-02 11:16 ` [PATCH v3 1/5] arm64/mm: Introduce init_pg_dir Jun Yao
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Jun Yao @ 2018-07-02 11:16 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: catalin.marinas, will.deacon, james.morse, suzuki.poulose, linux-kernel

Version 3 changes:
	As James Morse suggested[1], we split the old patch into 5
	patches:

	1. Introduce init_pg_dir.
	2. Make __enable_mmu() take the ttbr1 page as an argument.
	3. Create initial page tables in init_pg_dir and then create final
	   page tables in swapper_pg_dir directly.
	4. Make swapper_pg_dir smaller.
	5. Move {idmap_pg_dir, swapper_pg_dir} to .rodata section.

	At the same time, fix bugs mentioned in [1] and [2].

Special thanks to James Morse and Suzuki K Poulose. Without their help, I
couldn't write these patches.

v2: https://patchwork.kernel.org/patch/10485641/
v1: https://patchwork.kernel.org/patch/10476595/

[1] https://patchwork.kernel.org/patch/10485641/
[2] https://patchwork.kernel.org/patch/10485643/

Jun Yao (5):
  arm64/mm: Introduce init_pg_dir
  arm64/mm: Make __enable_mmu() take the ttbr1 page as an argument
  arm64/mm: Create initial page tables in init_pg_dir
  arm64/mm: Make swapper_pg_dir smaller
  arm64/mm: Move {idmap_pg_dir, swapper_pg_dir} to .rodata section

 arch/arm64/include/asm/assembler.h | 23 ++++++++++++++
 arch/arm64/include/asm/pgalloc.h   | 48 ++++++++++++++++++++++++++++++
 arch/arm64/include/asm/pgtable.h   | 17 ++++++-----
 arch/arm64/kernel/head.S           | 31 ++++++++++++++-----
 arch/arm64/kernel/setup.c          |  1 +
 arch/arm64/kernel/sleep.S          |  1 +
 arch/arm64/kernel/vmlinux.lds.S    | 29 ++++++++++++------
 arch/arm64/mm/mmu.c                | 32 +++-----------------
 8 files changed, 129 insertions(+), 53 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v3 1/5] arm64/mm: Introduce init_pg_dir
  2018-07-02 11:16 [PATCH v3 0/5] Move {idmap_pg_dir,swapper_pg_dir} to .rodata Jun Yao
@ 2018-07-02 11:16 ` Jun Yao
  2018-07-06  8:56   ` James Morse
  2018-07-02 11:16 ` [PATCH v3 2/5] arm64/mm: Make __enable_mmu() take the ttbr1 page as an argument Jun Yao
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Jun Yao @ 2018-07-02 11:16 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: catalin.marinas, will.deacon, james.morse, suzuki.poulose, linux-kernel

Add init_pg_dir to vmlinux.lds.S and boiler-plate
clearing/cleaning/invalidating it in head.S.

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/include/asm/assembler.h | 23 +++++++++++++++++++++++
 arch/arm64/kernel/head.S           | 24 ++++++++++++++++++------
 arch/arm64/kernel/vmlinux.lds.S    |  7 +++++++
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 0bcc98dbba56..414fb167e3e7 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -456,6 +456,29 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 	b.ne	9998b
 	.endm
 
+/*
+ * clear_page - clear one page
+ */
+	.macro clear_page, start:req
+9996:	stp     xzr, xzr, [\start], #16
+	stp     xzr, xzr, [\start], #16
+	stp     xzr, xzr, [\start], #16
+	stp     xzr, xzr, [\start], #16
+	tst	\start, #(PAGE_SIZE - 1)
+	b.ne    9996b
+	.endm
+
+/*
+ * clear_pages - clear contiguous pages
+ */
+	.macro clear_pages, start:req, count:req
+9997:	cbz	\count, 9998f
+	clear_page \start
+	sub	\count, \count, #1
+	b	9997b
+9998:
+	.endm
+
 /*
  * Annotate a function as position independent, i.e., safe to be called before
  * the kernel virtual mapping is activated.
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index b0853069702f..3f99c59ba193 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -295,18 +295,25 @@ __create_page_tables:
 	sub	x1, x1, x0
 	bl	__inval_dcache_area
 
+	adrp	x0, init_pg_dir
+	adrp	x1, init_pg_end
+	sub	x1, x1, x0
+	bl	__inval_dcache_area
+
 	/*
 	 * Clear the idmap and swapper page tables.
 	 */
 	adrp	x0, idmap_pg_dir
 	adrp	x1, swapper_pg_end
 	sub	x1, x1, x0
-1:	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	subs	x1, x1, #64
-	b.ne	1b
+	lsr	x1, x1, #(PAGE_SHIFT)
+	clear_pages x0, x1
+
+	adrp	x0, init_pg_dir
+	adrp	x1, init_pg_end
+	sub	x1, x1, x0
+	lsr	x1, x1, #(PAGE_SHIFT)
+	clear_pages x0, x1
 
 	mov	x7, SWAPPER_MM_MMUFLAGS
 
@@ -395,6 +402,11 @@ __create_page_tables:
 	dmb	sy
 	bl	__inval_dcache_area
 
+	adrp	x0, init_pg_dir
+	adrp	x1, init_pg_end
+	sub	x1, x1, x0
+	bl	__inval_dcache_area
+
 	ret	x28
 ENDPROC(__create_page_tables)
 	.ltorg
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 605d1b60469c..d4fc68286a49 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -68,6 +68,12 @@ jiffies = jiffies_64;
 #define TRAMP_TEXT
 #endif
 
+#define INIT_DIR					\
+	. = ALIGN(PAGE_SIZE);                           \
+	init_pg_dir = .;                                \
+	. += SWAPPER_DIR_SIZE;                          \
+	init_pg_end = .;
+
 /*
  * The size of the PE/COFF section that covers the kernel image, which
  * runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -168,6 +174,7 @@ SECTIONS
 		CON_INITCALL
 		SECURITY_INITCALL
 		INIT_RAM_FS
+		INIT_DIR
 		*(.init.rodata.* .init.bss)	/* from the EFI stub */
 	}
 	.exit.data : {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 2/5] arm64/mm: Make __enable_mmu() take the ttbr1 page as an argument
  2018-07-02 11:16 [PATCH v3 0/5] Move {idmap_pg_dir,swapper_pg_dir} to .rodata Jun Yao
  2018-07-02 11:16 ` [PATCH v3 1/5] arm64/mm: Introduce init_pg_dir Jun Yao
@ 2018-07-02 11:16 ` Jun Yao
  2018-07-06  8:57   ` James Morse
  2018-07-02 11:16 ` [PATCH v3 3/5] arm64/mm: Create initial page tables in init_pg_dir Jun Yao
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Jun Yao @ 2018-07-02 11:16 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: catalin.marinas, will.deacon, james.morse, suzuki.poulose, linux-kernel

Make __enable_mmu() take the physical address of the ttbr1 page as
an argument.

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/kernel/head.S  | 5 ++++-
 arch/arm64/kernel/sleep.S | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 3f99c59ba193..a1c7a4d3b9f3 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -718,6 +718,7 @@ secondary_startup:
 	 * Common entry point for secondary CPUs.
 	 */
 	bl	__cpu_setup			// initialise processor
+	adrp	x26, swapper_pg_dir
 	bl	__enable_mmu
 	ldr	x8, =__secondary_switched
 	br	x8
@@ -760,6 +761,7 @@ ENDPROC(__secondary_switched)
  * Enable the MMU.
  *
  *  x0  = SCTLR_EL1 value for turning on the MMU.
+ *  x26 = TTBR1_EL1 value for turning on the MMU.
  *
  * Returns to the caller via x30/lr. This requires the caller to be covered
  * by the .idmap.text section.
@@ -774,7 +776,7 @@ ENTRY(__enable_mmu)
 	b.ne	__no_granule_support
 	update_early_cpu_boot_status 0, x1, x2
 	adrp	x1, idmap_pg_dir
-	adrp	x2, swapper_pg_dir
+	mov	x2, x26
 	phys_to_ttbr x3, x1
 	phys_to_ttbr x4, x2
 	msr	ttbr0_el1, x3			// load TTBR0
@@ -835,6 +837,7 @@ __primary_switch:
 	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
 #endif
 
+	adrp	x26, swapper_pg_dir
 	bl	__enable_mmu
 #ifdef CONFIG_RELOCATABLE
 	bl	__relocate_kernel
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index bebec8ef9372..3f2c7bf67a2c 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -101,6 +101,7 @@ ENTRY(cpu_resume)
 	bl	el2_setup		// if in EL2 drop to EL1 cleanly
 	bl	__cpu_setup
 	/* enable the MMU early - so we can access sleep_save_stash by va */
+	adrp	x26, swapper_pg_dir
 	bl	__enable_mmu
 	ldr	x8, =_cpu_resume
 	br	x8
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 3/5] arm64/mm: Create initial page tables in init_pg_dir
  2018-07-02 11:16 [PATCH v3 0/5] Move {idmap_pg_dir,swapper_pg_dir} to .rodata Jun Yao
  2018-07-02 11:16 ` [PATCH v3 1/5] arm64/mm: Introduce init_pg_dir Jun Yao
  2018-07-02 11:16 ` [PATCH v3 2/5] arm64/mm: Make __enable_mmu() take the ttbr1 page as an argument Jun Yao
@ 2018-07-02 11:16 ` Jun Yao
  2018-07-06  8:58   ` James Morse
  2018-07-02 11:16 ` [PATCH v3 4/5] arm64/mm: Make swapper_pg_dir smaller Jun Yao
  2018-07-02 11:16 ` [PATCH v3 5/5] arm64/mm: Move {idmap_pg_dir, swapper_pg_dir} to .rodata section Jun Yao
  4 siblings, 1 reply; 13+ messages in thread
From: Jun Yao @ 2018-07-02 11:16 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: catalin.marinas, will.deacon, james.morse, suzuki.poulose, linux-kernel

Create initial page tables in init_pg_dir and then create final
page tables in swapper_pg_dir directly.

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/include/asm/pgtable.h |  2 ++
 arch/arm64/kernel/head.S         |  4 ++--
 arch/arm64/kernel/setup.c        |  1 +
 arch/arm64/mm/mmu.c              | 24 ++++--------------------
 4 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7c4c8f318ba9..3b408f21fe2e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -718,6 +718,8 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 }
 #endif
 
+extern pgd_t init_pg_dir[PTRS_PER_PGD] __section(.init.data);
+extern pgd_t init_pg_end[] __section(.init.data);
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern pgd_t swapper_pg_end[];
 extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a1c7a4d3b9f3..a8e9432ffa8a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -380,7 +380,7 @@ __create_page_tables:
 	/*
 	 * Map the kernel image (starting with PHYS_OFFSET).
 	 */
-	adrp	x0, swapper_pg_dir
+	adrp	x0, init_pg_dir
 	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)
 	add	x5, x5, x23			// add KASLR displacement
 	mov	x4, PTRS_PER_PGD
@@ -837,7 +837,7 @@ __primary_switch:
 	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
 #endif
 
-	adrp	x26, swapper_pg_dir
+	adrp	x26, init_pg_dir
 	bl	__enable_mmu
 #ifdef CONFIG_RELOCATABLE
 	bl	__relocate_kernel
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 30ad2f085d1f..b065c08d4008 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -249,6 +249,7 @@ void __init setup_arch(char **cmdline_p)
 	init_mm.end_code   = (unsigned long) _etext;
 	init_mm.end_data   = (unsigned long) _edata;
 	init_mm.brk	   = (unsigned long) _end;
+	init_mm.pgd	   = init_pg_dir;
 
 	*cmdline_p = boot_command_line;
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 2dbb2c9f1ec1..a7ab0010ff80 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -628,26 +628,10 @@ static void __init map_kernel(pgd_t *pgdp)
  */
 void __init paging_init(void)
 {
-	phys_addr_t pgd_phys = early_pgtable_alloc();
-	pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
-
-	map_kernel(pgdp);
-	map_mem(pgdp);
-
-	/*
-	 * We want to reuse the original swapper_pg_dir so we don't have to
-	 * communicate the new address to non-coherent secondaries in
-	 * secondary_entry, and so cpu_switch_mm can generate the address with
-	 * adrp+add rather than a load from some global variable.
-	 *
-	 * To do this we need to go via a temporary pgd.
-	 */
-	cpu_replace_ttbr1(__va(pgd_phys));
-	memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
-	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
-
-	pgd_clear_fixmap();
-	memblock_free(pgd_phys, PAGE_SIZE);
+	map_kernel(swapper_pg_dir);
+	map_mem(swapper_pg_dir);
+	cpu_replace_ttbr1(swapper_pg_dir);
+	init_mm.pgd = swapper_pg_dir;
 
 	/*
 	 * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 4/5] arm64/mm: Make swapper_pg_dir smaller
  2018-07-02 11:16 [PATCH v3 0/5] Move {idmap_pg_dir,swapper_pg_dir} to .rodata Jun Yao
                   ` (2 preceding siblings ...)
  2018-07-02 11:16 ` [PATCH v3 3/5] arm64/mm: Create initial page tables in init_pg_dir Jun Yao
@ 2018-07-02 11:16 ` Jun Yao
  2018-07-06  8:58   ` James Morse
  2018-07-02 11:16 ` [PATCH v3 5/5] arm64/mm: Move {idmap_pg_dir, swapper_pg_dir} to .rodata section Jun Yao
  4 siblings, 1 reply; 13+ messages in thread
From: Jun Yao @ 2018-07-02 11:16 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: catalin.marinas, will.deacon, james.morse, suzuki.poulose, linux-kernel

Make swapper_pg_dir smaller so we don't need to memblock_free() it.

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/kernel/vmlinux.lds.S | 2 +-
 arch/arm64/mm/mmu.c             | 8 --------
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index d4fc68286a49..d69e11ad92e3 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -236,7 +236,7 @@ SECTIONS
 	. += RESERVED_TTBR0_SIZE;
 #endif
 	swapper_pg_dir = .;
-	. += SWAPPER_DIR_SIZE;
+	. += PAGE_SIZE;
 	swapper_pg_end = .;
 
 	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index a7ab0010ff80..a1e8fc624324 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -632,14 +632,6 @@ void __init paging_init(void)
 	map_mem(swapper_pg_dir);
 	cpu_replace_ttbr1(swapper_pg_dir);
 	init_mm.pgd = swapper_pg_dir;
-
-	/*
-	 * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
-	 * allocated with it.
-	 */
-	memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
-		      __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
-		      - PAGE_SIZE);
 }
 
 /*
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 5/5] arm64/mm: Move {idmap_pg_dir, swapper_pg_dir} to .rodata section
  2018-07-02 11:16 [PATCH v3 0/5] Move {idmap_pg_dir,swapper_pg_dir} to .rodata Jun Yao
                   ` (3 preceding siblings ...)
  2018-07-02 11:16 ` [PATCH v3 4/5] arm64/mm: Make swapper_pg_dir smaller Jun Yao
@ 2018-07-02 11:16 ` Jun Yao
  2018-07-11 16:15   ` James Morse
  4 siblings, 1 reply; 13+ messages in thread
From: Jun Yao @ 2018-07-02 11:16 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: catalin.marinas, will.deacon, james.morse, suzuki.poulose, linux-kernel

Move {idmap_pg_dir, swapper_pg_dir} to .rodata section and
populate swapper_pg_dir by fixmap.

Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
---
 arch/arm64/include/asm/pgalloc.h | 48 ++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/pgtable.h | 15 +++++-----
 arch/arm64/kernel/vmlinux.lds.S  | 22 +++++++++------
 3 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..a0ce7d0f81c5 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -29,6 +29,23 @@
 #define PGALLOC_GFP	(GFP_KERNEL | __GFP_ZERO)
 #define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
 
+static inline int in_swapper_dir(void *addr)
+{
+	if ((unsigned long)addr >= (unsigned long)swapper_pg_dir &&
+		(unsigned long)addr < (unsigned long)swapper_pg_end) {
+		return 1;
+	}
+	return 0;
+}
+
+static inline void *swapper_mirror_addr(void *start, void *addr)
+{
+	unsigned long offset;
+
+	offset = (unsigned long)addr - (unsigned long)swapper_pg_dir;
+	return start + offset;
+}
+
 #if CONFIG_PGTABLE_LEVELS > 2
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -49,6 +66,17 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
+#ifdef __PAGETABLE_PUD_FOLDED
+	if ((mm == &init_mm) && in_swapper_dir(pudp)) {
+		pud_t *pud;
+
+		pud = pud_set_fixmap(__pa_symbol(swapper_pg_dir));
+		pud = (pud_t *)swapper_mirror_addr(pud, pudp);
+		__pud_populate(pud, __pa(pmdp), PMD_TYPE_TABLE);
+		pud_clear_fixmap();
+		return;
+	}
+#endif
 	__pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
 }
 #else
@@ -78,6 +106,15 @@ static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 
 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
 {
+	if ((mm == &init_mm) && in_swapper_dir(pgdp)) {
+		pgd_t *pgd;
+
+		pgd = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
+		pgd = (pgd_t *)swapper_mirror_addr(pgd, pgdp);
+		__pgd_populate(pgd, __pa(pudp), PUD_TYPE_TABLE);
+		pgd_clear_fixmap();
+		return;
+	}
 	__pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
 }
 #else
@@ -139,6 +176,17 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
 {
+#ifdef __PAGETABLE_PMD_FOLDED
+	if (in_swapper_dir(pmdp)) {
+		pmd_t *pmd;
+
+		pmd = pmd_set_fixmap(__pa_symbol(swapper_pg_dir));
+		pmd = (pmd_t *)swapper_mirror_addr(pmd, pmdp);
+		__pmd_populate(pmd, __pa(ptep), PMD_TYPE_TABLE);
+		pmd_clear_fixmap();
+		return;
+	}
+#endif
 	/*
 	 * The pmd must be loaded with the physical address of the PTE table
 	 */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 3b408f21fe2e..b479d1b997c2 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -475,6 +475,9 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
  */
 #define mk_pte(page,prot)	pfn_pte(page_to_pfn(page),prot)
 
+#define pmd_set_fixmap(addr)		((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
+#define pmd_clear_fixmap()		clear_fixmap(FIX_PMD)
+
 #if CONFIG_PGTABLE_LEVELS > 2
 
 #define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd_val(pmd))
@@ -506,11 +509,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 #define pmd_offset_phys(dir, addr)	(pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
 #define pmd_offset(dir, addr)		((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
 
-#define pmd_set_fixmap(addr)		((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
 #define pmd_set_fixmap_offset(pud, addr)	pmd_set_fixmap(pmd_offset_phys(pud, addr))
-#define pmd_clear_fixmap()		clear_fixmap(FIX_PMD)
 
 #define pud_page(pud)		pfn_to_page(__phys_to_pfn(__pud_to_phys(pud)))
+#define pud_set_fixmap(addr)		((pud_t *)set_fixmap_offset(FIX_PUD, addr))
+#define pud_clear_fixmap()		clear_fixmap(FIX_PUD)
 
 /* use ONLY for statically allocated translation tables */
 #define pmd_offset_kimg(dir,addr)	((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
@@ -518,11 +521,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 #else
 
 #define pud_page_paddr(pud)	({ BUILD_BUG(); 0; })
+#define pud_set_fixmap(addr)		NULL
+#define pud_clear_fixmap()
 
 /* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
-#define pmd_set_fixmap(addr)		NULL
 #define pmd_set_fixmap_offset(pudp, addr)	((pmd_t *)pudp)
-#define pmd_clear_fixmap()
 
 #define pmd_offset_kimg(dir,addr)	((pmd_t *)dir)
 
@@ -558,9 +561,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 #define pud_offset_phys(dir, addr)	(pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
 #define pud_offset(dir, addr)		((pud_t *)__va(pud_offset_phys((dir), (addr))))
 
-#define pud_set_fixmap(addr)		((pud_t *)set_fixmap_offset(FIX_PUD, addr))
 #define pud_set_fixmap_offset(pgd, addr)	pud_set_fixmap(pud_offset_phys(pgd, addr))
-#define pud_clear_fixmap()		clear_fixmap(FIX_PUD)
 
 #define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
 
@@ -572,9 +573,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 #define pgd_page_paddr(pgd)	({ BUILD_BUG(); 0;})
 
 /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
-#define pud_set_fixmap(addr)		NULL
 #define pud_set_fixmap_offset(pgdp, addr)	((pud_t *)pgdp)
-#define pud_clear_fixmap()
 
 #define pud_offset_kimg(dir,addr)	((pud_t *)dir)
 
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index d69e11ad92e3..beff018bf0f9 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -223,21 +223,25 @@ SECTIONS
 	BSS_SECTION(0, 0, 0)
 
 	. = ALIGN(PAGE_SIZE);
-	idmap_pg_dir = .;
-	. += IDMAP_DIR_SIZE;
+
+	.rodata : {
+		. = ALIGN(PAGE_SIZE);
+		idmap_pg_dir = .;
+		. += IDMAP_DIR_SIZE;
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	tramp_pg_dir = .;
-	. += PAGE_SIZE;
+		tramp_pg_dir = .;
+		. += PAGE_SIZE;
 #endif
 
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
-	reserved_ttbr0 = .;
-	. += RESERVED_TTBR0_SIZE;
+		reserved_ttbr0 = .;
+		. += RESERVED_TTBR0_SIZE;
 #endif
-	swapper_pg_dir = .;
-	. += PAGE_SIZE;
-	swapper_pg_end = .;
+		swapper_pg_dir = .;
+		. += PAGE_SIZE;
+		swapper_pg_end = .;
+	}
 
 	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
 	_end = .;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 1/5] arm64/mm: Introduce init_pg_dir
  2018-07-02 11:16 ` [PATCH v3 1/5] arm64/mm: Introduce init_pg_dir Jun Yao
@ 2018-07-06  8:56   ` James Morse
  0 siblings, 0 replies; 13+ messages in thread
From: James Morse @ 2018-07-06  8:56 UTC (permalink / raw)
  To: Jun Yao, linux-arm-kernel
  Cc: catalin.marinas, will.deacon, suzuki.poulose, linux-kernel

Hi Jun,

On 02/07/18 12:16, Jun Yao wrote:
> Add init_pg_dir to vmlinux.lds.S and boiler-plate
> clearing/cleaning/invalidating it in head.S.

With just this patch I get a weird link error from ld[0]:
aarch64-linux-gnu-ld:./arch/arm64/kernel/vmlinux.lds:90 cannot move location
counter backwards (from ffff000008fa8000 to fffefffff8ead000)
make[2]: *** [/home/morse/linux/Makefile:1015: vmlinux] Error 1
make[1]: *** [Makefile:146: sub-make] Error 2
make: *** [Makefile:24: __sub-make] Error 2

Where line 90 is your INIT_DIR macro.

I'm going to guess that this is because SWAPPER_DIR_SIZE is defined in terms of
'_end', and that this can't be used inside '.init.data'.

Moving it outside the '.init.data' section fixes this [1]. We only need this to
be within the __init_begin/__init_end markers that free_initmem() uses, which it
still is after this change.
A side effect of this is we shouldn't label the C externs '__initdata' as they
are no longer in the '.init.data' section. (looks like I was wrong about sparse
being able to pick that up...)

(Could we make it clearer INIT_DIR is related to the page tables, e.g.
INIT_PG_TABLES? INIT_DIR makes me think of the rootfs for some reason)


Otherwise some boring nits:

> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 0bcc98dbba56..414fb167e3e7 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -456,6 +456,29 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
>  	b.ne	9998b
>  	.endm
>  
> +/*
> + * clear_page - clear one page
> + */
> +	.macro clear_page, start:req
> +9996:	stp     xzr, xzr, [\start], #16
> +	stp     xzr, xzr, [\start], #16
> +	stp     xzr, xzr, [\start], #16
> +	stp     xzr, xzr, [\start], #16
> +	tst	\start, #(PAGE_SIZE - 1)

(This will match any page-aligned end address, so this macro only clears one
page if you give it a page aligned start address. Just something for us to
remember.)

> +	b.ne    9996b
> +	.endm

The rest of this file uses the white-space $instruction[tab]$arg, $arg, here you
mix and match.


> +/*
> + * clear_pages - clear contiguous pages
> + */
> +	.macro clear_pages, start:req, count:req
> +9997:	cbz	\count, 9998f
> +	clear_page \start
> +	sub	\count, \count, #1
> +	b	9997b
> +9998:
> +	.endm

Both callers of this have a start/end address, you may as well move the 'count'
calculation in here to save duplicating it.



With the link-error fixed:
Reviewed-by: James Morse <james.morse@arm.com>


Thanks,

James


> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index 605d1b60469c..d4fc68286a49 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -68,6 +68,12 @@ jiffies = jiffies_64;
>  #define TRAMP_TEXT
>  #endif
>  
> +#define INIT_DIR					\
> +	. = ALIGN(PAGE_SIZE);                           \
> +	init_pg_dir = .;                                \
> +	. += SWAPPER_DIR_SIZE;                          \
> +	init_pg_end = .;
> +
>  /*
>   * The size of the PE/COFF section that covers the kernel image, which
>   * runs from stext to _edata, must be a round multiple of the PE/COFF
> @@ -168,6 +174,7 @@ SECTIONS
>  		CON_INITCALL
>  		SECURITY_INITCALL
>  		INIT_RAM_FS
> +		INIT_DIR
>  		*(.init.rodata.* .init.bss)	/* from the EFI stub */
>  	}
>  	.exit.data : {
> 

[0] aarch64-linux-gnu-ld --version
GNU ld (GNU Binutils for Debian) 2.30.90.20180627
Copyright (C) 2018 Free Software Foundation, Inc.
This program is free software; you may redistribute it under the terms of
the GNU General Public License version 3 or (at your option) a later version.
This program has absolutely no warranty.


[1] Move INIT_DIR outside the .init.data section
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index d4fc68286a49..169abc3d01c8 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -167,6 +167,8 @@ SECTIONS
        __inittext_end = .;
        __initdata_begin = .;

+       INIT_DIR
+
        .init.data : {
                INIT_DATA
                INIT_SETUP(16)
@@ -174,7 +176,6 @@ SECTIONS
                CON_INITCALL
                SECURITY_INITCALL
                INIT_RAM_FS
-               INIT_DIR
                *(.init.rodata.* .init.bss)     /* from the EFI stub */
        }
        .exit.data : {



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 2/5] arm64/mm: Make __enable_mmu() take the ttbr1 page as an argument
  2018-07-02 11:16 ` [PATCH v3 2/5] arm64/mm: Make __enable_mmu() take the ttbr1 page as an argument Jun Yao
@ 2018-07-06  8:57   ` James Morse
  0 siblings, 0 replies; 13+ messages in thread
From: James Morse @ 2018-07-06  8:57 UTC (permalink / raw)
  To: Jun Yao, linux-arm-kernel
  Cc: catalin.marinas, will.deacon, suzuki.poulose, linux-kernel

Hi Jun,

On 02/07/18 12:16, Jun Yao wrote:
> Make __enable_mmu() take the physical address of the ttbr1 page as
> an argument.

> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 3f99c59ba193..a1c7a4d3b9f3 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -760,6 +761,7 @@ ENDPROC(__secondary_switched)
>   * Enable the MMU.
>   *
>   *  x0  = SCTLR_EL1 value for turning on the MMU.
> + *  x26 = TTBR1_EL1 value for turning on the MMU.

This works, but I'd really like as many assembly functions as possible to follow
the PCS [0]. We don't want the assembly code to become a maze of register-usage,
sticking to the existing conventions makes it predictable.

Passing arguments in callee-saved registers looks weird, and its just to avoid
shuffling the existing users of x1/x2 up.

e.g. something like movign the existing use of {x1, x2} -> {x5, x6}:
----------%<----------
-       mrs     x1, ID_AA64MMFR0_EL1
-       ubfx    x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
-       cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
+       mrs     x5, ID_AA64MMFR0_EL1
+       ubfx    x6, x5, #ID_AA64MMFR0_TGRAN_SHIFT, 4
+       cmp     x6, #ID_AA64MMFR0_TGRAN_SUPPORTED
        b.ne    __no_granule_support
-       update_early_cpu_boot_status 0, x1, x2
-       adrp    x1, idmap_pg_dir
-       adrp    x2, swapper_pg_dir
-       phys_to_ttbr x3, x1
-       phys_to_ttbr x4, x2
+       update_early_cpu_boot_status 0, x5, x6
+
+       adrp    x5, idmap_pg_dir
+       phys_to_ttbr x3, x5
+       phys_to_ttbr x4, x1
----------%<----------


Thanks,

James

[0] http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 3/5] arm64/mm: Create initial page tables in init_pg_dir
  2018-07-02 11:16 ` [PATCH v3 3/5] arm64/mm: Create initial page tables in init_pg_dir Jun Yao
@ 2018-07-06  8:58   ` James Morse
  2018-07-06 14:41     ` James Morse
  0 siblings, 1 reply; 13+ messages in thread
From: James Morse @ 2018-07-06  8:58 UTC (permalink / raw)
  To: Jun Yao, linux-arm-kernel
  Cc: catalin.marinas, will.deacon, suzuki.poulose, linux-kernel

Hi Jun,

On 02/07/18 12:16, Jun Yao wrote:
> Create initial page tables in init_pg_dir and then create final
> page tables in swapper_pg_dir directly.

This is what the patch does, but doesn't preserve the why for the git-log. Could
you expand it to describe why we're doing this.


The series so far fails to boot from me. This is because the kaslr code tries to
read the kaslr-seed from the DT, via the fixmap. To do this, it needs the fixmap
tables installed, which early_fixmap_init() does.

early_fixmap_init() calls pgd_offset_k(), which assumes init_mm.pgd is in use,
so we hit a BUG_ON() when trying to setup the fixmap because we added the tables
to the wrong page tables.

If you enable 'CONFIG_RANDOMIZE_BASE', even without EFI you should see the same
thing happen.


I think we should move this hunk:
> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> index 30ad2f085d1f..b065c08d4008 100644
> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
> @@ -249,6 +249,7 @@ void __init setup_arch(char **cmdline_p)
>  	init_mm.end_code   = (unsigned long) _etext;
>  	init_mm.end_data   = (unsigned long) _edata;
>  	init_mm.brk	   = (unsigned long) _end;
> +	init_mm.pgd	   = init_pg_dir;
>
>  	*cmdline_p = boot_command_line;
>

into early_fixmap_init(), which is the first thing setup_arch() calls.
Something like [0] fixes it.

This looks to be the only thing that goes near init_mm.pgd before paging_init().


> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 7c4c8f318ba9..3b408f21fe2e 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -718,6 +718,8 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>  }
>  #endif
>  
> +extern pgd_t init_pg_dir[PTRS_PER_PGD] __section(.init.data);
> +extern pgd_t init_pg_end[] __section(.init.data);

normally we'd spell this '__initdata', but if we move them out of the
'.init.data' section we shouldn't, otherwise the extern definition doesn't match
where the symbol appears. (Looks like I was wrong to think that tools like
sparse pick this up, that's just the __iomem/__user stuff.)


> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 2dbb2c9f1ec1..a7ab0010ff80 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -628,26 +628,10 @@ static void __init map_kernel(pgd_t *pgdp)
>   */
>  void __init paging_init(void)
>  {
> -	phys_addr_t pgd_phys = early_pgtable_alloc();
> -	pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
> -
> -	map_kernel(pgdp);
> -	map_mem(pgdp);
> -
> -	/*
> -	 * We want to reuse the original swapper_pg_dir so we don't have to
> -	 * communicate the new address to non-coherent secondaries in
> -	 * secondary_entry, and so cpu_switch_mm can generate the address with
> -	 * adrp+add rather than a load from some global variable.
> -	 *
> -	 * To do this we need to go via a temporary pgd.
> -	 */
> -	cpu_replace_ttbr1(__va(pgd_phys));
> -	memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
> -	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
> -
> -	pgd_clear_fixmap();
> -	memblock_free(pgd_phys, PAGE_SIZE);
> +	map_kernel(swapper_pg_dir);
> +	map_mem(swapper_pg_dir);

> +	cpu_replace_ttbr1(swapper_pg_dir);

The lm_alias() here is important: cpu_replace_ttbr1() calls virt_to_phys() on
its argument, virt_to_phys() is only intended for addresses in the linear-map
region of the VA space, as it works by doing some arithmetic with the address.

swapper_pg_dir is the name of the kernel symbol, so its address will be in the
kernel-text region of the VA space.

Today virt_to_phys() catches this happening and fixes it, CONFIG_DEBUG_VIRTUAL
will give you a warning, at some point virt_to_phys()'s safety net will go-away.

The original call that did this was wrapped in lm_alias(), which gives you the
linear-map address of a symbol in the kernel-text.


Thanks,

James


[0] make sure fixmap tables go in the init page tables
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 117d080639b3..e097c78a66f8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -770,6 +771,13 @@ void __init early_fixmap_init(void)
        pmd_t *pmdp;
        unsigned long addr = FIXADDR_START;

+       /*
+        * During early setup we use init_pg_dir, update init_mm so its
+        * implicit use by pgd_offset_k() gets the live page tables.
+        * swapper_pg_dir is restored by paging_init().
+        */
+       init_mm.pgd = init_pg_dir;
+
        pgdp = pgd_offset_k(addr);
        pgd = READ_ONCE(*pgdp);
        if (CONFIG_PGTABLE_LEVELS > 3 &&




^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 4/5] arm64/mm: Make swapper_pg_dir smaller
  2018-07-02 11:16 ` [PATCH v3 4/5] arm64/mm: Make swapper_pg_dir smaller Jun Yao
@ 2018-07-06  8:58   ` James Morse
  0 siblings, 0 replies; 13+ messages in thread
From: James Morse @ 2018-07-06  8:58 UTC (permalink / raw)
  To: Jun Yao, linux-arm-kernel
  Cc: catalin.marinas, will.deacon, suzuki.poulose, linux-kernel

Hi Jun,

On 02/07/18 12:16, Jun Yao wrote:
> Make swapper_pg_dir smaller so we don't need to memblock_free() it.

Patch looks good, but could the commit message explain why its now safe to do this?

Thanks,

James


> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index d4fc68286a49..d69e11ad92e3 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -236,7 +236,7 @@ SECTIONS
>  	. += RESERVED_TTBR0_SIZE;
>  #endif
>  	swapper_pg_dir = .;
> -	. += SWAPPER_DIR_SIZE;
> +	. += PAGE_SIZE;
>  	swapper_pg_end = .;
>  
>  	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index a7ab0010ff80..a1e8fc624324 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -632,14 +632,6 @@ void __init paging_init(void)
>  	map_mem(swapper_pg_dir);
>  	cpu_replace_ttbr1(swapper_pg_dir);
>  	init_mm.pgd = swapper_pg_dir;
> -
> -	/*
> -	 * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
> -	 * allocated with it.
> -	 */
> -	memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
> -		      __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
> -		      - PAGE_SIZE);
>  }
>  
>  /*
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 3/5] arm64/mm: Create initial page tables in init_pg_dir
  2018-07-06  8:58   ` James Morse
@ 2018-07-06 14:41     ` James Morse
  2018-08-15 10:26       ` Jun Yao
  0 siblings, 1 reply; 13+ messages in thread
From: James Morse @ 2018-07-06 14:41 UTC (permalink / raw)
  To: Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, suzuki.poulose,
	linux-kernel

Hi Jun,

On 06/07/18 09:58, James Morse wrote:
> The series so far fails to boot from me. This is because the kaslr code tries to
> read the kaslr-seed from the DT, via the fixmap. To do this, it needs the fixmap
> tables installed, which early_fixmap_init() does.
> 
> early_fixmap_init() calls pgd_offset_k(), which assumes init_mm.pgd is in use,
> so we hit a BUG_ON() when trying to setup the fixmap because we added the tables
> to the wrong page tables.
> 
> If you enable 'CONFIG_RANDOMIZE_BASE', even without EFI you should see the same
> thing happen.
> 
> 
> I think we should move this hunk:
>> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
>> index 30ad2f085d1f..b065c08d4008 100644
>> --- a/arch/arm64/kernel/setup.c
>> +++ b/arch/arm64/kernel/setup.c
>> @@ -249,6 +249,7 @@ void __init setup_arch(char **cmdline_p)
>>  	init_mm.end_code   = (unsigned long) _etext;
>>  	init_mm.end_data   = (unsigned long) _edata;
>>  	init_mm.brk	   = (unsigned long) _end;
>> +	init_mm.pgd	   = init_pg_dir;
>>
>>  	*cmdline_p = boot_command_line;
>>
> 
> into early_fixmap_init(), which is the first thing setup_arch() calls.
> Something like [0] fixes it.
> 
> This looks to be the only thing that goes near init_mm.pgd before paging_init().

I missed one: head.S has a call to kasan_early_init() before start_kernel(),
this goes messing with the page tables, and calls pgd_offset_k(), which pulls in
swapper_pg_dir. This one is enabled by CONFIG_KASAN.

Something like that same hunk [0] in kasan_early_init() fixes it. This is still
within arch/arm64, so I still think we should get away without some #ifdeffery
to override the core-code's initial setup of swapper_pg_dir...

Thanks,

James


> [0] make sure fixmap tables go in the init page tables
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 117d080639b3..e097c78a66f8 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -770,6 +771,13 @@ void __init early_fixmap_init(void)
>         pmd_t *pmdp;
>         unsigned long addr = FIXADDR_START;
> 
> +       /*
> +        * During early setup we use init_pg_dir, update init_mm so its
> +        * implicit use by pgd_offset_k() gets the live page tables.
> +        * swapper_pg_dir is restored by paging_init().
> +        */
> +       init_mm.pgd = init_pg_dir;
> +
>         pgdp = pgd_offset_k(addr);
>         pgd = READ_ONCE(*pgdp);
>         if (CONFIG_PGTABLE_LEVELS > 3 &&
> 
> 
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 5/5] arm64/mm: Move {idmap_pg_dir, swapper_pg_dir} to .rodata section
  2018-07-02 11:16 ` [PATCH v3 5/5] arm64/mm: Move {idmap_pg_dir, swapper_pg_dir} to .rodata section Jun Yao
@ 2018-07-11 16:15   ` James Morse
  0 siblings, 0 replies; 13+ messages in thread
From: James Morse @ 2018-07-11 16:15 UTC (permalink / raw)
  To: Jun Yao
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, suzuki.poulose,
	linux-kernel

Hi Yun,

On 02/07/18 12:16, Jun Yao wrote:
> Move {idmap_pg_dir, swapper_pg_dir} to .rodata section and

> populate swapper_pg_dir by fixmap.

(any chance you could split the fixmap bits into a separate patch so that the
rodata move comes last? This will make review and bisecting any problems easier.)


> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 2e05bcd944c8..a0ce7d0f81c5 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -29,6 +29,23 @@
>  #define PGALLOC_GFP	(GFP_KERNEL | __GFP_ZERO)
>  #define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
>  
> +static inline int in_swapper_dir(void *addr)
> +{
> +	if ((unsigned long)addr >= (unsigned long)swapper_pg_dir &&
> +		(unsigned long)addr < (unsigned long)swapper_pg_end) {
> +		return 1;
> +	}
> +	return 0;
> +}

Making this a bool and returning the condition feels more natural.

We know swapper_pg_dir and swapper_pg_end are both page aligned, if we can tell
the compiler, it can generate better code. Something like:
| return ((long)addr & PAGE_MASK) == ((long)swapper_pg_dir & PAGE_MASK);

(if you agree its the same)


> +static inline void *swapper_mirror_addr(void *start, void *addr)
> +{
> +	unsigned long offset;
> +
> +	offset = (unsigned long)addr - (unsigned long)swapper_pg_dir;
> +	return start + offset;
> +}

I think you've re-invented __set_fixmap_offset, ...


>  #if CONFIG_PGTABLE_LEVELS > 2
>  
>  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
> @@ -49,6 +66,17 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
>  
>  static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
>  {
> +#ifdef __PAGETABLE_PUD_FOLDED
> +	if ((mm == &init_mm) && in_swapper_dir(pudp)) {
> +		pud_t *pud;

> +		pud = pud_set_fixmap(__pa_symbol(swapper_pg_dir));
> +		pud = (pud_t *)swapper_mirror_addr(pud, pudp);

This is calculating the corresponding pudp in the fixmap mapping of swapper.
If you give pud_set_fixmap() the physical address of the original pudp it will
calculate this for you.

I think this could fold down to something like:
|	pud_t *fixmap_pudp = pud_set_fixmap(__kimg_to_phys((long)pudp));

(please check I've dug through all that properly!)


On a wider issue: these p?d_populate() helpers only put table entries down,
these are used by vmalloc(). Unfortunately ioremap() will try to add block
mappings if it can, which don't use this helper. (bits and pieces for testing
this in [0]). Systems with nvdimms probably do this all the time...

We could add the same in_swapper_dir() tests and fixmap operations to
p?d_set_huge(), but it may be worth pushing them down to set_p?d(), which is
where all these paths join up. This would also cover pgd_clear(), which is
called via p?d_none_or_clear_bad().


I think we should do something about concurrent calls. Today that ~should work,
providing they aren't modifying the same locations, but with these changes both
CPUs would try and clear_fixmap()/tlbi the range from underneath each other.

The p?d_alloc() helpers take the mm lock around the p?d_populate() calls, but it
doesn't look like there is any other lock for the ioremap work calling
p?d_set_huge(), so we will need one of our own, especially for
p?d_none_or_clear_bad().


> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 3b408f21fe2e..b479d1b997c2 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -475,6 +475,9 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
>   */
>  #define mk_pte(page,prot)	pfn_pte(page_to_pfn(page),prot)
>  
> +#define pmd_set_fixmap(addr)		((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
> +#define pmd_clear_fixmap()		clear_fixmap(FIX_PMD)
> +

Ooer, we shouldn't need to move these around. If they're wrong, it should be the
subject of a separate patch, but its more likely we're using the wrong ones...


> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index d69e11ad92e3..beff018bf0f9 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -223,21 +223,25 @@ SECTIONS
>  	BSS_SECTION(0, 0, 0)
>  
>  	. = ALIGN(PAGE_SIZE);
> -	idmap_pg_dir = .;
> -	. += IDMAP_DIR_SIZE;
> +
> +	.rodata : {

Now I'm confused. I thought this named section was output by RO_DATA() further
up, just before __init_begin, which mark_rodata_ro() uses as its end marker.
I thought named sections had to be unique.

Are we expecting the linker to realise we want it to move this stuff up there,
and not move the read-only contents down here, outside the markers. (maybe the
linker always takes the first definition?)

Can we move these to sit near INIT_DIR/INIT_PG_TABLES as a macro, which we then
put after NOTES? (something like KERNEL_PG_TABLES?)


Thanks!

James



[0] abuse kdump to give us a 1G naturally aligned block of memory that is
missing from the linear map. We can memremap() this, and get a top-level block
entry on !4-level systems. Do this late enough  Add 'crashdump=1G' to your cmdline
----------------------%<----------------------
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 325cfb3b858a..081d4e216067 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/errno.h>
+#include <linux/io.h>
 #include <linux/swap.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
@@ -104,7 +105,7 @@ static void __init reserve_crashkernel(void)
        if (crash_base == 0) {
                /* Current arm64 boot protocol requires 2MB alignment */
                crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
-                               crash_size, SZ_2M);
+                               crash_size, SZ_1G);
                if (crash_base == 0) {
                        pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
                                crash_size);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 955b40efcc0c..f8cca5adb0c8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -485,12 +485,22 @@ static void __init map_mem(pgd_t *pgdp)
                __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
                               PAGE_KERNEL,
                               NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
-               memblock_clear_nomap(crashk_res.start,
-                                    resource_size(&crashk_res));
        }
 #endif
 }

+#ifdef CONFIG_KEXEC_CORE
+static void hack__remap_kdump_region(void)
+{
+       size_t size = crashk_res.end - crashk_res.start;
+
+       if (!crashk_res.end)
+               return;
+
+       memremap((unsigned long)crashk_res.start, size, MEMREMAP_WB);
+}
+#endif
+ void mark_rodata_ro(void)
 {
        unsigned long section_size;
@@ -504,6 +514,10 @@ void mark_rodata_ro(void)
                            section_size, PAGE_KERNEL_RO);

        debug_checkwx();
+
+#ifdef CONFIG_KEXEC_CORE
+       hack__remap_kdump_region();
+#endif
 }

 static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
----------------------%<----------------------

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 3/5] arm64/mm: Create initial page tables in init_pg_dir
  2018-07-06 14:41     ` James Morse
@ 2018-08-15 10:26       ` Jun Yao
  0 siblings, 0 replies; 13+ messages in thread
From: Jun Yao @ 2018-08-15 10:26 UTC (permalink / raw)
  To: James Morse
  Cc: linux-arm-kernel, catalin.marinas, will.deacon, suzuki.poulose,
	linux-kernel

Hi James,

On Fri, Jul 06, 2018 at 03:41:07PM +0100, James Morse wrote:
> I missed one: head.S has a call to kasan_early_init() before start_kernel(),
> this goes messing with the page tables, and calls pgd_offset_k(), which pulls in
> swapper_pg_dir. This one is enabled by CONFIG_KASAN.
> 
> Something like that same hunk [0] in kasan_early_init() fixes it. This is still
> within arch/arm64, so I still think we should get away without some #ifdeffery
> to override the core-code's initial setup of swapper_pg_dir...

I'm sorry to reply you so late, I missed this email before.

In order to ensure that pgd_offset_k() works properly, I update
init_mm.pgd by introducing set_init_mm_pgd(). And its implementation is
like this:

>diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>index 65f86271f02b..e4f0868b4cfd 100644
>--- a/arch/arm64/mm/mmu.c
>+++ b/arch/arm64/mm/mmu.c
>@@ -623,6 +623,19 @@ static void __init map_kernel(pgd_t *pgdp)
>        kasan_copy_shadow(pgdp);
> }
>
>+void __init set_init_mm_pgd(pgd_t *pgd)
>+{
>+       pgd_t **addr = &(init_mm.pgd);
>+
>+       asm volatile("str %x0, [%1]\n"
>+                   : : "r" (pgd), "r" (addr) : "memory");
>+}
> /*
>  * paging_init() sets up the page tables, initialises the zone memory
>  * maps and sets up the zero page.

The purpose of using assembly is to prevent KASAN instrumentation, as
KASAN has not been initialized when this function is called:

>diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>index c3e4b1886cde..ede2e964592b 100644
>--- a/arch/arm64/kernel/head.S
>+++ b/arch/arm64/kernel/head.S
>@@ -439,6 +438,9 @@ __primary_switched:
>        bl      __pi_memset
>        dsb     ishst                           // Make zero page visible to PTW
>
>+       adrp    x0, init_pg_dir
>+       bl      set_init_mm_pgd
>+
> #ifdef CONFIG_KASAN
>        bl      kasan_early_init
> #endif

What do you think?

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-08-15 10:26 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-02 11:16 [PATCH v3 0/5] Move {idmap_pg_dir,swapper_pg_dir} to .rodata Jun Yao
2018-07-02 11:16 ` [PATCH v3 1/5] arm64/mm: Introduce init_pg_dir Jun Yao
2018-07-06  8:56   ` James Morse
2018-07-02 11:16 ` [PATCH v3 2/5] arm64/mm: Make __enable_mmu() take the ttbr1 page as an argument Jun Yao
2018-07-06  8:57   ` James Morse
2018-07-02 11:16 ` [PATCH v3 3/5] arm64/mm: Create initial page tables in init_pg_dir Jun Yao
2018-07-06  8:58   ` James Morse
2018-07-06 14:41     ` James Morse
2018-08-15 10:26       ` Jun Yao
2018-07-02 11:16 ` [PATCH v3 4/5] arm64/mm: Make swapper_pg_dir smaller Jun Yao
2018-07-06  8:58   ` James Morse
2018-07-02 11:16 ` [PATCH v3 5/5] arm64/mm: Move {idmap_pg_dir, swapper_pg_dir} to .rodata section Jun Yao
2018-07-11 16:15   ` James Morse

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).