All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] arm64: mm: increase VA range of identity map
@ 2015-02-24 17:08 Ard Biesheuvel
  2015-02-25  1:30 ` Laura Abbott
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Ard Biesheuvel @ 2015-02-24 17:08 UTC (permalink / raw)
  To: linux-arm-kernel

The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.

This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
v2:
- Dropped hunk regarding KVM, this will be addressed separately. Note that the
  build is still broken on Seattle if you have KVM enabled and 4k pages with
  3 levels of translation configured, but at least you have something to watch
  on your console now
- Fix ordering wrt TLB flushing
- Set T0SZ based on actual leading zero count in __pa(KERNEL_END), the net
  result is the same (one additional level of translation, if needed)

 arch/arm64/include/asm/mmu_context.h   | 38 ++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/page.h          |  6 ++++--
 arch/arm64/include/asm/pgtable-hwdef.h |  7 ++++++-
 arch/arm64/kernel/head.S               | 22 ++++++++++++++++++++
 arch/arm64/kernel/smp.c                |  1 +
 arch/arm64/mm/mmu.c                    |  7 ++++++-
 arch/arm64/mm/proc-macros.S            | 11 ++++++++++
 arch/arm64/mm/proc.S                   |  3 +++
 8 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a9eee33dfa62..641ce0574999 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -64,6 +64,44 @@ static inline void cpu_set_reserved_ttbr0(void)
 	: "r" (ttbr));
 }
 
+/*
+ * TCR.T0SZ value to use when the ID map is active. Usually equals
+ * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
+ * physical memory, in which case it will be smaller.
+ */
+extern u64 idmap_t0sz;
+
+static inline void __cpu_set_tcr_t0sz(u64 t0sz)
+{
+	unsigned long tcr;
+
+	if (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48)
+	    && unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)))
+		asm volatile(
+		"	mrs	%0, tcr_el1		;"
+		"	bfi	%0, %1, #%2, #%3	;"
+		"	msr	tcr_el1, %0		;"
+		"	isb"
+		: "=&r" (tcr)
+		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+/*
+ * Set TCR.T0SZ to the value appropriate for activating the identity map.
+ */
+static inline void cpu_set_idmap_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(idmap_t0sz);
+}
+
+/*
+ * Set TCR.T0SZ to its default value (based on VA_BITS)
+ */
+static inline void cpu_set_default_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
+}
+
 static inline void switch_new_context(struct mm_struct *mm)
 {
 	unsigned long flags;
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 22b16232bd60..3d02b1869eb8 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -33,7 +33,9 @@
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
  * map the kernel. With the 64K page configuration, swapper and idmap need to
  * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information).
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so 3 pages are reserved in all cases.
  */
 #ifdef CONFIG_ARM64_64K_PAGES
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_ARM64_PGTABLE_LEVELS)
@@ -42,7 +44,7 @@
 #endif
 
 #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(SWAPPER_DIR_SIZE)
+#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5f930cc9ea83..847e864202cc 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -143,7 +143,12 @@
 /*
  * TCR flags.
  */
-#define TCR_TxSZ(x)		(((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
+#define TCR_T0SZ_OFFSET		0
+#define TCR_T1SZ_OFFSET		16
+#define TCR_T0SZ(x)		((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ(x)		((UL(64) - (x)) << TCR_T1SZ_OFFSET)
+#define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
+#define TCR_TxSZ_WIDTH		6
 #define TCR_IRGN_NC		((UL(0) << 8) | (UL(0) << 24))
 #define TCR_IRGN_WBWA		((UL(1) << 8) | (UL(1) << 24))
 #define TCR_IRGN_WT		((UL(2) << 8) | (UL(2) << 24))
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 8ce88e08c030..88f592f47643 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -387,6 +387,28 @@ __create_page_tables:
 	mov	x0, x25				// idmap_pg_dir
 	ldr	x3, =KERNEL_START
 	add	x3, x3, x28			// __pa(KERNEL_START)
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
+	/*
+	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
+	 * created that covers system RAM if that is located sufficiently high
+	 * in the physical address space. So for the ID map, use an extended
+	 * virtual range in that case, by configuring an additional translation
+	 * level.
+	 */
+	adrp	x5, KERNEL_END
+	clz	x5, x5			// # of leading 0's in __pa(KERNEL_END)
+	cmp	x5, #TCR_T0SZ(VA_BITS)	// VA_BITS sufficiently large?
+	b.ge	1f			// .. then skip additional level
+
+	adrp	x6, idmap_t0sz
+	str	x5, [x6, #:lo12:idmap_t0sz]
+
+	create_table_entry x0, x3, EXTRA_SHIFT, PTRS_PER_PGD, x5, x6
+1:
+#endif
+
 	create_pgd_entry x0, x3, x5, x6
 	ldr	x6, =KERNEL_END
 	mov	x5, x3				// __pa(KERNEL_START)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 328b8ce4b007..74554dfcce73 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -151,6 +151,7 @@ asmlinkage void secondary_start_kernel(void)
 	 */
 	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
 
 	preempt_disable();
 	trace_hardirqs_off();
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c6daaf6c6f97..c4f60393383e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -40,6 +40,8 @@
 
 #include "mm.h"
 
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
@@ -454,6 +456,7 @@ void __init paging_init(void)
 	 */
 	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
 }
 
 /*
@@ -461,8 +464,10 @@ void __init paging_init(void)
  */
 void setup_mm_for_reboot(void)
 {
-	cpu_switch_mm(idmap_pg_dir, &init_mm);
+	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_idmap_tcr_t0sz();
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
 }
 
 /*
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 005d29e2977d..c17fdd6a19bc 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -52,3 +52,14 @@
 	mov	\reg, #4			// bytes per word
 	lsl	\reg, \reg, \tmp		// actual cache line size
 	.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+	adrp	\tmpreg, idmap_t0sz
+	ldr	\tmpreg, [\tmpreg, #:lo12:idmap_t0sz]
+	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 28eebfb6af76..cdd754e19b9b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -156,6 +156,7 @@ ENTRY(cpu_do_resume)
 	msr	cpacr_el1, x6
 	msr	ttbr0_el1, x1
 	msr	ttbr1_el1, x7
+	tcr_set_idmap_t0sz x8, x7
 	msr	tcr_el1, x8
 	msr	vbar_el1, x9
 	msr	mdscr_el1, x10
@@ -233,6 +234,8 @@ ENTRY(__cpu_setup)
 	 */
 	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
 			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+	tcr_set_idmap_t0sz	x10, x9
+
 	/*
 	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
 	 * TCR_EL1.
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2] arm64: mm: increase VA range of identity map
  2015-02-24 17:08 [PATCH v2] arm64: mm: increase VA range of identity map Ard Biesheuvel
@ 2015-02-25  1:30 ` Laura Abbott
  2015-02-25 12:38 ` Catalin Marinas
  2015-02-25 14:01 ` Will Deacon
  2 siblings, 0 replies; 11+ messages in thread
From: Laura Abbott @ 2015-02-25  1:30 UTC (permalink / raw)
  To: linux-arm-kernel

On 2/24/2015 9:08 AM, Ard Biesheuvel wrote:
> The page size and the number of translation levels, and hence the supported
> virtual address range, are build-time configurables on arm64 whose optimal
> values are use case dependent. However, in the current implementation, if
> the system's RAM is located at a very high offset, the virtual address range
> needs to reflect that merely because the identity mapping, which is only used
> to enable or disable the MMU, requires the extended virtual range to map the
> physical memory at an equal virtual offset.
>
> This patch relaxes that requirement, by increasing the number of translation
> levels for the identity mapping only, and only when actually needed, i.e.,
> when system RAM's offset is found to be out of reach at runtime.
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> v2:
> - Dropped hunk regarding KVM, this will be addressed separately. Note that the
>    build is still broken on Seattle if you have KVM enabled and 4k pages with
>    3 levels of translation configured, but at least you have something to watch
>    on your console now
> - Fix ordering wrt TLB flushing
> - Set T0SZ based on actual leading zero count in __pa(KERNEL_END), the net
>    result is the same (one additional level of translation, if needed)
>
>   arch/arm64/include/asm/mmu_context.h   | 38 ++++++++++++++++++++++++++++++++++
>   arch/arm64/include/asm/page.h          |  6 ++++--
>   arch/arm64/include/asm/pgtable-hwdef.h |  7 ++++++-
>   arch/arm64/kernel/head.S               | 22 ++++++++++++++++++++
>   arch/arm64/kernel/smp.c                |  1 +
>   arch/arm64/mm/mmu.c                    |  7 ++++++-
>   arch/arm64/mm/proc-macros.S            | 11 ++++++++++
>   arch/arm64/mm/proc.S                   |  3 +++
>   8 files changed, 91 insertions(+), 4 deletions(-)
>

Tested-by: Laura Abbott <lauraa@codeaurora.org>

For both 4K and 64K pages.


-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] arm64: mm: increase VA range of identity map
  2015-02-24 17:08 [PATCH v2] arm64: mm: increase VA range of identity map Ard Biesheuvel
  2015-02-25  1:30 ` Laura Abbott
@ 2015-02-25 12:38 ` Catalin Marinas
  2015-02-25 13:10   ` Ard Biesheuvel
  2015-02-25 14:01 ` Will Deacon
  2 siblings, 1 reply; 11+ messages in thread
From: Catalin Marinas @ 2015-02-25 12:38 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Feb 24, 2015 at 05:08:23PM +0000, Ard Biesheuvel wrote:
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -387,6 +387,28 @@ __create_page_tables:
>  	mov	x0, x25				// idmap_pg_dir
>  	ldr	x3, =KERNEL_START
>  	add	x3, x3, x28			// __pa(KERNEL_START)
> +
> +#ifndef CONFIG_ARM64_VA_BITS_48
> +#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
> +	/*
> +	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
> +	 * created that covers system RAM if that is located sufficiently high
> +	 * in the physical address space. So for the ID map, use an extended
> +	 * virtual range in that case, by configuring an additional translation
> +	 * level.
> +	 */
> +	adrp	x5, KERNEL_END
> +	clz	x5, x5			// # of leading 0's in __pa(KERNEL_END)
> +	cmp	x5, #TCR_T0SZ(VA_BITS)	// VA_BITS sufficiently large?

I think we need some better comment here for people looking at this code
again in the future (including myself). So the VA bits needed to cover
KERNEL_END are calculated as (64 - clz(__pa(KERNEL_END))). T0SZ is
calculated as (64 - VA_BITS) which means that T0SZ is
clz(__pa(KERNEL_END)).

> +	b.ge	1f			// .. then skip additional level
> +
> +	adrp	x6, idmap_t0sz
> +	str	x5, [x6, #:lo12:idmap_t0sz]
> +
> +	create_table_entry x0, x3, EXTRA_SHIFT, PTRS_PER_PGD, x5, x6

EXTRA_SHIFT is correctly calculated (one level more than PGDIR_SHIFT)
but I don't think PTRS_PER_PGD is appropriate here. For example, we had
in the past 39-bit VA with 64KB pages which made PTRS_PER_PGD pretty
small (well, it may cover the 40-bit case you are trying to fix but not
up to 48). So maybe define something like:

#define EXTRA_PTRS	(1 << (48 - EXTRA_SHIFT))

Otherwise the patch looks fine to me. We have to look into fixing KVM as
well, ideally with sharing the same idmap with the kernel.

-- 
Catalin

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] arm64: mm: increase VA range of identity map
  2015-02-25 12:38 ` Catalin Marinas
@ 2015-02-25 13:10   ` Ard Biesheuvel
  2015-02-25 13:55     ` Ard Biesheuvel
  0 siblings, 1 reply; 11+ messages in thread
From: Ard Biesheuvel @ 2015-02-25 13:10 UTC (permalink / raw)
  To: linux-arm-kernel

On 25 February 2015 at 12:38, Catalin Marinas <catalin.marinas@arm.com> wrote:
> On Tue, Feb 24, 2015 at 05:08:23PM +0000, Ard Biesheuvel wrote:
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -387,6 +387,28 @@ __create_page_tables:
>>       mov     x0, x25                         // idmap_pg_dir
>>       ldr     x3, =KERNEL_START
>>       add     x3, x3, x28                     // __pa(KERNEL_START)
>> +
>> +#ifndef CONFIG_ARM64_VA_BITS_48
>> +#define EXTRA_SHIFT  (PGDIR_SHIFT + PAGE_SHIFT - 3)
>> +     /*
>> +      * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
>> +      * created that covers system RAM if that is located sufficiently high
>> +      * in the physical address space. So for the ID map, use an extended
>> +      * virtual range in that case, by configuring an additional translation
>> +      * level.
>> +      */
>> +     adrp    x5, KERNEL_END
>> +     clz     x5, x5                  // # of leading 0's in __pa(KERNEL_END)
>> +     cmp     x5, #TCR_T0SZ(VA_BITS)  // VA_BITS sufficiently large?
>
> I think we need some better comment here for people looking at this code
> again in the future (including myself). So the VA bits needed to cover
> KERNEL_END are calculated as (64 - clz(__pa(KERNEL_END))). T0SZ is
> calculated as (64 - VA_BITS) which means that T0SZ is
> clz(__pa(KERNEL_END)).
>

OK.

>> +     b.ge    1f                      // .. then skip additional level
>> +
>> +     adrp    x6, idmap_t0sz
>> +     str     x5, [x6, #:lo12:idmap_t0sz]
>> +
>> +     create_table_entry x0, x3, EXTRA_SHIFT, PTRS_PER_PGD, x5, x6
>
> EXTRA_SHIFT is correctly calculated (one level more than PGDIR_SHIFT)
> but I don't think PTRS_PER_PGD is appropriate here. For example, we had
> in the past 39-bit VA with 64KB pages which made PTRS_PER_PGD pretty
> small (well, it may cover the 40-bit case you are trying to fix but not
> up to 48). So maybe define something like:
>
> #define EXTRA_PTRS      (1 << (48 - EXTRA_SHIFT))
>

My assumption was that, if you are running with fewer translation
levels than required to map all 48 bits, it makes little sense to use
only half a page or less at the top level, and PTRS_PER_PGD will
always cover a full page (which is what we reserve at the root level
anyway). But perhaps this is not universally true.

I will change it.

> Otherwise the patch looks fine to me. We have to look into fixing KVM as
> well, ideally with sharing the same idmap with the kernel.
>

Yes, but much of that code is shared with ARM as well, so it is not as
straight-forward, unfortunately.

Thanks,
Ard.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] arm64: mm: increase VA range of identity map
  2015-02-25 13:10   ` Ard Biesheuvel
@ 2015-02-25 13:55     ` Ard Biesheuvel
  2015-02-25 15:22       ` Catalin Marinas
  0 siblings, 1 reply; 11+ messages in thread
From: Ard Biesheuvel @ 2015-02-25 13:55 UTC (permalink / raw)
  To: linux-arm-kernel

On 25 February 2015 at 13:10, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 25 February 2015 at 12:38, Catalin Marinas <catalin.marinas@arm.com> wrote:
>> On Tue, Feb 24, 2015 at 05:08:23PM +0000, Ard Biesheuvel wrote:
>>> --- a/arch/arm64/kernel/head.S
>>> +++ b/arch/arm64/kernel/head.S
>>> @@ -387,6 +387,28 @@ __create_page_tables:
>>>       mov     x0, x25                         // idmap_pg_dir
>>>       ldr     x3, =KERNEL_START
>>>       add     x3, x3, x28                     // __pa(KERNEL_START)
>>> +
>>> +#ifndef CONFIG_ARM64_VA_BITS_48
>>> +#define EXTRA_SHIFT  (PGDIR_SHIFT + PAGE_SHIFT - 3)
>>> +     /*
>>> +      * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
>>> +      * created that covers system RAM if that is located sufficiently high
>>> +      * in the physical address space. So for the ID map, use an extended
>>> +      * virtual range in that case, by configuring an additional translation
>>> +      * level.
>>> +      */
>>> +     adrp    x5, KERNEL_END
>>> +     clz     x5, x5                  // # of leading 0's in __pa(KERNEL_END)
>>> +     cmp     x5, #TCR_T0SZ(VA_BITS)  // VA_BITS sufficiently large?
>>
>> I think we need some better comment here for people looking at this code
>> again in the future (including myself). So the VA bits needed to cover
>> KERNEL_END are calculated as (64 - clz(__pa(KERNEL_END))). T0SZ is
>> calculated as (64 - VA_BITS) which means that T0SZ is
>> clz(__pa(KERNEL_END)).
>>
>
> OK.
>
>>> +     b.ge    1f                      // .. then skip additional level
>>> +
>>> +     adrp    x6, idmap_t0sz
>>> +     str     x5, [x6, #:lo12:idmap_t0sz]
>>> +
>>> +     create_table_entry x0, x3, EXTRA_SHIFT, PTRS_PER_PGD, x5, x6
>>
>> EXTRA_SHIFT is correctly calculated (one level more than PGDIR_SHIFT)
>> but I don't think PTRS_PER_PGD is appropriate here. For example, we had
>> in the past 39-bit VA with 64KB pages which made PTRS_PER_PGD pretty
>> small (well, it may cover the 40-bit case you are trying to fix but not
>> up to 48). So maybe define something like:
>>
>> #define EXTRA_PTRS      (1 << (48 - EXTRA_SHIFT))
>>
>
> My assumption was that, if you are running with fewer translation
> levels than required to map all 48 bits, it makes little sense to use
> only half a page or less at the top level, and PTRS_PER_PGD will
> always cover a full page (which is what we reserve at the root level
> anyway). But perhaps this is not universally true.
>

Actually, this assumption is encoded into the patch in another way as
well: decreasing T0SZ is always assumed to result in an additional
level of translation to be configured. However, your VA_BITS==39 on
64k pages case would break this assumption too. So perhaps using
MAX_VA_BITS instead of the actual size of the PA is the right thing to
do here after all.

>> Otherwise the patch looks fine to me. We have to look into fixing KVM as
>> well, ideally with sharing the same idmap with the kernel.
>>
>
> Yes, but much of that code is shared with ARM as well, so it is not as
> straight-forward, unfortunately.
>
> Thanks,
> Ard.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] arm64: mm: increase VA range of identity map
  2015-02-24 17:08 [PATCH v2] arm64: mm: increase VA range of identity map Ard Biesheuvel
  2015-02-25  1:30 ` Laura Abbott
  2015-02-25 12:38 ` Catalin Marinas
@ 2015-02-25 14:01 ` Will Deacon
  2015-02-25 14:15   ` Ard Biesheuvel
  2 siblings, 1 reply; 11+ messages in thread
From: Will Deacon @ 2015-02-25 14:01 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Feb 24, 2015 at 05:08:23PM +0000, Ard Biesheuvel wrote:
> The page size and the number of translation levels, and hence the supported
> virtual address range, are build-time configurables on arm64 whose optimal
> values are use case dependent. However, in the current implementation, if
> the system's RAM is located at a very high offset, the virtual address range
> needs to reflect that merely because the identity mapping, which is only used
> to enable or disable the MMU, requires the extended virtual range to map the
> physical memory at an equal virtual offset.
> 
> This patch relaxes that requirement, by increasing the number of translation
> levels for the identity mapping only, and only when actually needed, i.e.,
> when system RAM's offset is found to be out of reach at runtime.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> v2:
> - Dropped hunk regarding KVM, this will be addressed separately. Note that the
>   build is still broken on Seattle if you have KVM enabled and 4k pages with
>   3 levels of translation configured, but at least you have something to watch
>   on your console now
> - Fix ordering wrt TLB flushing
> - Set T0SZ based on actual leading zero count in __pa(KERNEL_END), the net
>   result is the same (one additional level of translation, if needed)
> 
>  arch/arm64/include/asm/mmu_context.h   | 38 ++++++++++++++++++++++++++++++++++
>  arch/arm64/include/asm/page.h          |  6 ++++--
>  arch/arm64/include/asm/pgtable-hwdef.h |  7 ++++++-
>  arch/arm64/kernel/head.S               | 22 ++++++++++++++++++++
>  arch/arm64/kernel/smp.c                |  1 +
>  arch/arm64/mm/mmu.c                    |  7 ++++++-
>  arch/arm64/mm/proc-macros.S            | 11 ++++++++++
>  arch/arm64/mm/proc.S                   |  3 +++
>  8 files changed, 91 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
> index a9eee33dfa62..641ce0574999 100644
> --- a/arch/arm64/include/asm/mmu_context.h
> +++ b/arch/arm64/include/asm/mmu_context.h
> @@ -64,6 +64,44 @@ static inline void cpu_set_reserved_ttbr0(void)
>  	: "r" (ttbr));
>  }
>  
> +/*
> + * TCR.T0SZ value to use when the ID map is active. Usually equals
> + * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
> + * physical memory, in which case it will be smaller.
> + */
> +extern u64 idmap_t0sz;
> +
> +static inline void __cpu_set_tcr_t0sz(u64 t0sz)
> +{
> +	unsigned long tcr;
> +
> +	if (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48)
> +	    && unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)))
> +		asm volatile(
> +		"	mrs	%0, tcr_el1		;"
> +		"	bfi	%0, %1, #%2, #%3	;"

It's odd that you need these '#'s. Do you see issues without them?

> +		"	msr	tcr_el1, %0		;"
> +		"	isb"
> +		: "=&r" (tcr)
> +		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
> +}

Hmm, do we need a memory clobber here, or can we rely on the caller doing
having the appropriate compiler barriers?

Will

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] arm64: mm: increase VA range of identity map
  2015-02-25 14:01 ` Will Deacon
@ 2015-02-25 14:15   ` Ard Biesheuvel
  2015-02-25 14:24     ` Will Deacon
  0 siblings, 1 reply; 11+ messages in thread
From: Ard Biesheuvel @ 2015-02-25 14:15 UTC (permalink / raw)
  To: linux-arm-kernel

On 25 February 2015 at 14:01, Will Deacon <will.deacon@arm.com> wrote:
> On Tue, Feb 24, 2015 at 05:08:23PM +0000, Ard Biesheuvel wrote:
>> The page size and the number of translation levels, and hence the supported
>> virtual address range, are build-time configurables on arm64 whose optimal
>> values are use case dependent. However, in the current implementation, if
>> the system's RAM is located at a very high offset, the virtual address range
>> needs to reflect that merely because the identity mapping, which is only used
>> to enable or disable the MMU, requires the extended virtual range to map the
>> physical memory at an equal virtual offset.
>>
>> This patch relaxes that requirement, by increasing the number of translation
>> levels for the identity mapping only, and only when actually needed, i.e.,
>> when system RAM's offset is found to be out of reach at runtime.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>> v2:
>> - Dropped hunk regarding KVM, this will be addressed separately. Note that the
>>   build is still broken on Seattle if you have KVM enabled and 4k pages with
>>   3 levels of translation configured, but at least you have something to watch
>>   on your console now
>> - Fix ordering wrt TLB flushing
>> - Set T0SZ based on actual leading zero count in __pa(KERNEL_END), the net
>>   result is the same (one additional level of translation, if needed)
>>
>>  arch/arm64/include/asm/mmu_context.h   | 38 ++++++++++++++++++++++++++++++++++
>>  arch/arm64/include/asm/page.h          |  6 ++++--
>>  arch/arm64/include/asm/pgtable-hwdef.h |  7 ++++++-
>>  arch/arm64/kernel/head.S               | 22 ++++++++++++++++++++
>>  arch/arm64/kernel/smp.c                |  1 +
>>  arch/arm64/mm/mmu.c                    |  7 ++++++-
>>  arch/arm64/mm/proc-macros.S            | 11 ++++++++++
>>  arch/arm64/mm/proc.S                   |  3 +++
>>  8 files changed, 91 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
>> index a9eee33dfa62..641ce0574999 100644
>> --- a/arch/arm64/include/asm/mmu_context.h
>> +++ b/arch/arm64/include/asm/mmu_context.h
>> @@ -64,6 +64,44 @@ static inline void cpu_set_reserved_ttbr0(void)
>>       : "r" (ttbr));
>>  }
>>
>> +/*
>> + * TCR.T0SZ value to use when the ID map is active. Usually equals
>> + * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
>> + * physical memory, in which case it will be smaller.
>> + */
>> +extern u64 idmap_t0sz;
>> +
>> +static inline void __cpu_set_tcr_t0sz(u64 t0sz)
>> +{
>> +     unsigned long tcr;
>> +
>> +     if (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48)
>> +         && unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)))
>> +             asm volatile(
>> +             "       mrs     %0, tcr_el1             ;"
>> +             "       bfi     %0, %1, #%2, #%3        ;"
>
> It's odd that you need these '#'s. Do you see issues without them?
>

I actually haven't tried without them. I can remove them if you prefer

>> +             "       msr     tcr_el1, %0             ;"
>> +             "       isb"
>> +             : "=&r" (tcr)
>> +             : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
>> +}
>
> Hmm, do we need a memory clobber here, or can we rely on the caller doing
> having the appropriate compiler barriers?
>

The TCR_EL1 update only affects the lower TTBR0 mapping, so I don't
think it would matter in this particular case if any memory accesses
are reordered across it, would it?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] arm64: mm: increase VA range of identity map
  2015-02-25 14:15   ` Ard Biesheuvel
@ 2015-02-25 14:24     ` Will Deacon
  2015-02-25 14:47       ` Ard Biesheuvel
  0 siblings, 1 reply; 11+ messages in thread
From: Will Deacon @ 2015-02-25 14:24 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Feb 25, 2015 at 02:15:52PM +0000, Ard Biesheuvel wrote:
> On 25 February 2015 at 14:01, Will Deacon <will.deacon@arm.com> wrote:
> > On Tue, Feb 24, 2015 at 05:08:23PM +0000, Ard Biesheuvel wrote:
> >> +static inline void __cpu_set_tcr_t0sz(u64 t0sz)
> >> +{
> >> +     unsigned long tcr;
> >> +
> >> +     if (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48)
> >> +         && unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)))
> >> +             asm volatile(
> >> +             "       mrs     %0, tcr_el1             ;"
> >> +             "       bfi     %0, %1, #%2, #%3        ;"
> >
> > It's odd that you need these '#'s. Do you see issues without them?
> >
> 
> I actually haven't tried without them. I can remove them if you prefer

Yes, please.

> >> +             "       msr     tcr_el1, %0             ;"
> >> +             "       isb"
> >> +             : "=&r" (tcr)
> >> +             : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
> >> +}
> >
> > Hmm, do we need a memory clobber here, or can we rely on the caller doing
> > having the appropriate compiler barriers?
> >
> 
> The TCR_EL1 update only affects the lower TTBR0 mapping, so I don't
> think it would matter in this particular case if any memory accesses
> are reordered across it, would it?

What if those accesses were intended for the identity mapping and ended
up being translated with stale user mappings? It could be that the
preempt_disable() is enough, but if so, a comment would be helpful.

Will

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] arm64: mm: increase VA range of identity map
  2015-02-25 14:24     ` Will Deacon
@ 2015-02-25 14:47       ` Ard Biesheuvel
  2015-02-25 14:58         ` Will Deacon
  0 siblings, 1 reply; 11+ messages in thread
From: Ard Biesheuvel @ 2015-02-25 14:47 UTC (permalink / raw)
  To: linux-arm-kernel

On 25 February 2015 at 14:24, Will Deacon <will.deacon@arm.com> wrote:
> On Wed, Feb 25, 2015 at 02:15:52PM +0000, Ard Biesheuvel wrote:
>> On 25 February 2015 at 14:01, Will Deacon <will.deacon@arm.com> wrote:
>> > On Tue, Feb 24, 2015 at 05:08:23PM +0000, Ard Biesheuvel wrote:
>> >> +static inline void __cpu_set_tcr_t0sz(u64 t0sz)
>> >> +{
>> >> +     unsigned long tcr;
>> >> +
>> >> +     if (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48)
>> >> +         && unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)))
>> >> +             asm volatile(
>> >> +             "       mrs     %0, tcr_el1             ;"
>> >> +             "       bfi     %0, %1, #%2, #%3        ;"
>> >
>> > It's odd that you need these '#'s. Do you see issues without them?
>> >
>>
>> I actually haven't tried without them. I can remove them if you prefer
>
> Yes, please.
>
>> >> +             "       msr     tcr_el1, %0             ;"
>> >> +             "       isb"
>> >> +             : "=&r" (tcr)
>> >> +             : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
>> >> +}
>> >
>> > Hmm, do we need a memory clobber here, or can we rely on the caller doing
>> > having the appropriate compiler barriers?
>> >
>>
>> The TCR_EL1 update only affects the lower TTBR0 mapping, so I don't
>> think it would matter in this particular case if any memory accesses
>> are reordered across it, would it?
>
> What if those accesses were intended for the identity mapping and ended
> up being translated with stale user mappings? It could be that the
> preempt_disable() is enough, but if so, a comment would be helpful.
>

Sorry, I don't quite follow. First of all, if that concern is valid,
then it is equally valid without this patch. This just updates TCR_EL1
in places where we were already assigning TTBR0_EL1 a new value.
(Although I am not saying that means it is not my problem if there is
an issue here.)

The function cpu_set_default_tcr_t0sz() is called from paging_init()
and secondary_start_kernel() to set the default T0SZ value after
having deactivated the ID map. Catalin made a point about how to order
those operations wrt the TLB flush, and I am pretty sure the compiler
emits the asm() blocks in program order. In either case, there is no
user mapping, just the old mapping (the ID map) and the new invalid
mapping (the zero page). IIUC, we would only need compiler barriers
here to prevent it from deferring a read via the ID map until it has
already been deactivated, if such a read was present in the code.

Then there is setup_mm_for_reboot() [which may be dead code, I think?
It is only called from soft_restart() which doesn't seem to have any
callers itself], which just reads some global kernel vars.

Apologies if I am being thick here

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] arm64: mm: increase VA range of identity map
  2015-02-25 14:47       ` Ard Biesheuvel
@ 2015-02-25 14:58         ` Will Deacon
  0 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2015-02-25 14:58 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Feb 25, 2015 at 02:47:02PM +0000, Ard Biesheuvel wrote:
> On 25 February 2015 at 14:24, Will Deacon <will.deacon@arm.com> wrote:
> > On Wed, Feb 25, 2015 at 02:15:52PM +0000, Ard Biesheuvel wrote:
> >> >> +             "       msr     tcr_el1, %0             ;"
> >> >> +             "       isb"
> >> >> +             : "=&r" (tcr)
> >> >> +             : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
> >> >> +}
> >> >
> >> > Hmm, do we need a memory clobber here, or can we rely on the caller doing
> >> > having the appropriate compiler barriers?
> >> >
> >>
> >> The TCR_EL1 update only affects the lower TTBR0 mapping, so I don't
> >> think it would matter in this particular case if any memory accesses
> >> are reordered across it, would it?
> >
> > What if those accesses were intended for the identity mapping and ended
> > up being translated with stale user mappings? It could be that the
> > preempt_disable() is enough, but if so, a comment would be helpful.
> >
> 
> Sorry, I don't quite follow. First of all, if that concern is valid,
> then it is equally valid without this patch. This just updates TCR_EL1
> in places where we were already assigning TTBR0_EL1 a new value.
> (Although I am not saying that means it is not my problem if there is
> an issue here.)
> 
> The function cpu_set_default_tcr_t0sz() is called from paging_init()
> and secondary_start_kernel() to set the default T0SZ value after
> having deactivated the ID map. Catalin made a point about how to order
> those operations wrt the TLB flush, and I am pretty sure the compiler
> emits the asm() blocks in program order. In either case, there is no
> user mapping, just the old mapping (the ID map) and the new invalid
> mapping (the zero page). IIUC, we would only need compiler barriers
> here to prevent it from deferring a read via the ID map until it has
> already been deactivated, if such a read was present in the code.
> 
> Then there is setup_mm_for_reboot() [which may be dead code, I think?
> It is only called from soft_restart() which doesn't seem to have any
> callers itself], which just reads some global kernel vars.
> 
> Apologies if I am being thick here

You're not being thick at all, I just want to make sure we've got this
right. Actually, the tlb flush will give us the compiler barriers we need
and you're right to point out that the volatile asm blocks will be emitted
in program order.

Will

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] arm64: mm: increase VA range of identity map
  2015-02-25 13:55     ` Ard Biesheuvel
@ 2015-02-25 15:22       ` Catalin Marinas
  0 siblings, 0 replies; 11+ messages in thread
From: Catalin Marinas @ 2015-02-25 15:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Feb 25, 2015 at 01:55:04PM +0000, Ard Biesheuvel wrote:
> On 25 February 2015 at 13:10, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> > On 25 February 2015 at 12:38, Catalin Marinas <catalin.marinas@arm.com> wrote:
> >> On Tue, Feb 24, 2015 at 05:08:23PM +0000, Ard Biesheuvel wrote:
> >>> --- a/arch/arm64/kernel/head.S
> >>> +++ b/arch/arm64/kernel/head.S
> >>> @@ -387,6 +387,28 @@ __create_page_tables:
> >>>       mov     x0, x25                         // idmap_pg_dir
> >>>       ldr     x3, =KERNEL_START
> >>>       add     x3, x3, x28                     // __pa(KERNEL_START)
> >>> +
> >>> +#ifndef CONFIG_ARM64_VA_BITS_48
> >>> +#define EXTRA_SHIFT  (PGDIR_SHIFT + PAGE_SHIFT - 3)
> >>> +     /*
> >>> +      * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
> >>> +      * created that covers system RAM if that is located sufficiently high
> >>> +      * in the physical address space. So for the ID map, use an extended
> >>> +      * virtual range in that case, by configuring an additional translation
> >>> +      * level.
> >>> +      */
> >>> +     adrp    x5, KERNEL_END
> >>> +     clz     x5, x5                  // # of leading 0's in __pa(KERNEL_END)
> >>> +     cmp     x5, #TCR_T0SZ(VA_BITS)  // VA_BITS sufficiently large?
> >>
> >> I think we need some better comment here for people looking at this code
> >> again in the future (including myself). So the VA bits needed to cover
> >> KERNEL_END are calculated as (64 - clz(__pa(KERNEL_END))). T0SZ is
> >> calculated as (64 - VA_BITS) which means that T0SZ is
> >> clz(__pa(KERNEL_END)).
> >>
> >
> > OK.
> >
> >>> +     b.ge    1f                      // .. then skip additional level
> >>> +
> >>> +     adrp    x6, idmap_t0sz
> >>> +     str     x5, [x6, #:lo12:idmap_t0sz]
> >>> +
> >>> +     create_table_entry x0, x3, EXTRA_SHIFT, PTRS_PER_PGD, x5, x6
> >>
> >> EXTRA_SHIFT is correctly calculated (one level more than PGDIR_SHIFT)
> >> but I don't think PTRS_PER_PGD is appropriate here. For example, we had
> >> in the past 39-bit VA with 64KB pages which made PTRS_PER_PGD pretty
> >> small (well, it may cover the 40-bit case you are trying to fix but not
> >> up to 48). So maybe define something like:
> >>
> >> #define EXTRA_PTRS      (1 << (48 - EXTRA_SHIFT))
> >>
> >
> > My assumption was that, if you are running with fewer translation
> > levels than required to map all 48 bits, it makes little sense to use
> > only half a page or less at the top level, and PTRS_PER_PGD will
> > always cover a full page (which is what we reserve at the root level
> > anyway). But perhaps this is not universally true.
> 
> Actually, this assumption is encoded into the patch in another way as
> well: decreasing T0SZ is always assumed to result in an additional
> level of translation to be configured. However, your VA_BITS==39 on
> 64k pages case would break this assumption too. So perhaps using
> MAX_VA_BITS instead of the actual size of the PA is the right thing to
> do here after all.

We have two options:

a) enforce that for VA_BITS != 48, the PTRS_PER_PGD is always (1 <<
   (PAGE_SHIFT - 3)). This way we now that anything bigger than VA_BITS
   requires more levels (currently only one more)

b) decouple the idmap_t0sz calculation from the additional page table
   level so we can have different idmap_t0sz but not necessarily an
   extra level

I think to keep things simple, we should go for (a) with your original
MAX_VA_BITS when KERNEL_END cannot be covered by VA_BITS:

	64 - clz(__pa(KERNEL_END) > VA_BITS

You can still use PTRS_PER_PGD since we know it covers a full page but
we need an #error somewhere in case we forget about this restriction,
something like:

#if !defined(CONFIG_ARM64_VA_BITS_48) && \
	(8 * PTRS_PER_PGD != PAGE_SIZE)
#error "PGD not a full page size"
#endif

-- 
Catalin

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-02-25 15:22 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-24 17:08 [PATCH v2] arm64: mm: increase VA range of identity map Ard Biesheuvel
2015-02-25  1:30 ` Laura Abbott
2015-02-25 12:38 ` Catalin Marinas
2015-02-25 13:10   ` Ard Biesheuvel
2015-02-25 13:55     ` Ard Biesheuvel
2015-02-25 15:22       ` Catalin Marinas
2015-02-25 14:01 ` Will Deacon
2015-02-25 14:15   ` Ard Biesheuvel
2015-02-25 14:24     ` Will Deacon
2015-02-25 14:47       ` Ard Biesheuvel
2015-02-25 14:58         ` Will Deacon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.