linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC 1/3] ARM: LPAE: rework TTBR0/TTBR1 split
@ 2014-11-18 15:53 Konstantin Khlebnikov
  2014-11-18 15:53 ` [PATCH RFC 2/3] ARM: enable TTBR0/TTBR1 split in short mode Konstantin Khlebnikov
  2014-11-18 15:53 ` [PATCH RFC 3/3] ARM: reduce size of page table directory for " Konstantin Khlebnikov
  0 siblings, 2 replies; 7+ messages in thread
From: Konstantin Khlebnikov @ 2014-11-18 15:53 UTC (permalink / raw)
  To: Russell King, linux-kernel, linux-arm-kernel

This patch moves enabling TTBRx split from __v7_setup (v7_ttb_setup)
into cpu_init() which is called in init_mm context after leaving idmap.
Also it disables split in setup_mm_for_reboot() before switching mm to idmap.
After that idmap and VM split never meet, thus they no longer conflict.

Callback keystone_smp_secondary_initmem isn't required, all setup is
done in cpu_init() which is called right before smp_ops.smp_secondary_init.

Also this patch prepares code for enabling split in non-LPAE mode.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
---
 arch/arm/include/asm/pgtable-2level-hwdef.h |    2 ++
 arch/arm/include/asm/pgtable-3level-hwdef.h |   12 +++++-------
 arch/arm/include/asm/proc-fns.h             |   13 +++++++++++++
 arch/arm/kernel/setup.c                     |   11 +++++++++++
 arch/arm/mach-keystone/platsmp.c            |   13 -------------
 arch/arm/mm/idmap.c                         |    6 ++++++
 arch/arm/mm/proc-v7-3level.S                |   13 -------------
 7 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h
index 5cfba15..c2ed1fa 100644
--- a/arch/arm/include/asm/pgtable-2level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-2level-hwdef.h
@@ -90,4 +90,6 @@
 
 #define PHYS_MASK		(~0UL)
 
+#define TTBR1_SIZE	0
+
 #endif
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 9fd61c7..8bb32a0 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -89,19 +89,17 @@
  *   0x40000000: T0SZ = 2, T1SZ = 0 (not used)
  *   0x80000000: T0SZ = 0, T1SZ = 1
  *   0xc0000000: T0SZ = 0, T1SZ = 2
- *
- * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise
- * booting secondary CPUs would end up using TTBR1 for the identity
- * mapping set up in TTBR0.
  */
 #if defined CONFIG_VMSPLIT_2G
 #define TTBR1_OFFSET	16			/* skip two L1 entries */
+#define TTBR1_SIZE	(1<<16)
 #elif defined CONFIG_VMSPLIT_3G
 #define TTBR1_OFFSET	(4096 * (1 + 3))	/* only L2, skip pgd + 3*pmd */
+#define TTBR1_SIZE	(2<<16)
 #else
-#define TTBR1_OFFSET	0
+#define TTBR1_OFFSET	8			/* skip one L1 entry */
+/* Not implemented. In this mode TTBR0 points to first pmd instead of pgd. */
+#define TTBR1_SIZE	0
 #endif
 
-#define TTBR1_SIZE	(((PAGE_OFFSET >> 30) - 1) << 16)
-
 #endif
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 5324c11..8353eb4 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -149,6 +149,19 @@ extern void cpu_resume(void);
 	})
 #endif
 
+static inline u32 cpu_get_ttbcr(void)
+{
+	u32 val;
+
+	__asm__("mrc p15, 0, %0, c2, c0, 2" : "=r" (val));
+	return val;
+}
+
+static inline void cpu_set_ttbcr(u32 val)
+{
+	__asm__("mcr p15, 0, %0, c2, c0, 2" : : "r" (val));
+}
+
 #else	/*!CONFIG_MMU */
 
 #define cpu_switch_mm(pgd,mm)	{ }
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index c031063..6a54a82 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -488,6 +488,17 @@ void notrace cpu_init(void)
 	      PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
 	    : "r14");
 #endif
+
+#ifdef CONFIG_ARM_LPAE
+	/* For short mode TTBR1 already loaded by macro v7_ttb_setup */
+	cpu_set_ttbr(1, __pa(init_mm.pgd) + TTBR1_OFFSET);
+#endif
+
+#if defined(CONFIG_MMU) && TTBR1_SIZE
+	/* Enable TTBR0/TTBR1 split */
+	cpu_set_ttbcr(cpu_get_ttbcr() | TTBR1_SIZE);
+	local_flush_tlb_all();
+#endif
 }
 
 u32 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = MPIDR_INVALID };
diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c
index 5f46a7c..4bbb184 100644
--- a/arch/arm/mach-keystone/platsmp.c
+++ b/arch/arm/mach-keystone/platsmp.c
@@ -39,19 +39,6 @@ static int keystone_smp_boot_secondary(unsigned int cpu,
 	return error;
 }
 
-#ifdef CONFIG_ARM_LPAE
-static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
-{
-	pgd_t *pgd0 = pgd_offset_k(0);
-	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
-	local_flush_tlb_all();
-}
-#else
-static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
-{}
-#endif
-
 struct smp_operations keystone_smp_ops __initdata = {
 	.smp_boot_secondary	= keystone_smp_boot_secondary,
-	.smp_secondary_init     = keystone_smp_secondary_initmem,
 };
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index e7a81ceb..cc51b40 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -123,6 +123,12 @@ void setup_mm_for_reboot(void)
 {
 	/* Switch to the identity mapping. */
 	cpu_switch_mm(idmap_pgd, &init_mm);
+
+#if TTBR1_SIZE
+	/* Disable TTBR0/TTBR1 split, idmap might collide with TTRB1 range */
+	cpu_set_ttbcr(cpu_get_ttbcr() | ~TTBR1_SIZE);
+#endif
+
 	local_flush_bp_all();
 
 #ifdef CONFIG_CPU_HAS_ASID
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index d3daed0..b8173cf 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -127,26 +127,13 @@ ENDPROC(cpu_v7_set_pte_ext)
 	 * - \ttbr1 updated.
 	 */
 	.macro	v7_ttb_setup, zero, ttbr0, ttbr1, tmp
-	ldr	\tmp, =swapper_pg_dir		@ swapper_pg_dir virtual address
-	mov	\tmp, \tmp, lsr #ARCH_PGD_SHIFT
-	cmp	\ttbr1, \tmp			@ PHYS_OFFSET > PAGE_OFFSET?
 	mrc	p15, 0, \tmp, c2, c0, 2		@ TTB control register
 	orr	\tmp, \tmp, #TTB_EAE
 	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP)
 	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP)
 	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP << 16)
 	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP << 16)
-	/*
-	 * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above),
-	 * otherwise booting secondary CPUs would end up using TTBR1 for the
-	 * identity mapping set up in TTBR0.
-	 */
-	orrls	\tmp, \tmp, #TTBR1_SIZE				@ TTBCR.T1SZ
 	mcr	p15, 0, \tmp, c2, c0, 2				@ TTBCR
-	mov	\tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
-	mov	\ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT		@ lower bits
-	addls	\ttbr1, \ttbr1, #TTBR1_OFFSET
-	mcrr	p15, 1, \ttbr1, \tmp, c2			@ load TTBR1
 	mov	\tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
 	mov	\ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT		@ lower bits
 	mcrr	p15, 0, \ttbr0, \tmp, c2			@ load TTBR0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH RFC 2/3] ARM: enable TTBR0/TTBR1 split in short mode
  2014-11-18 15:53 [PATCH RFC 1/3] ARM: LPAE: rework TTBR0/TTBR1 split Konstantin Khlebnikov
@ 2014-11-18 15:53 ` Konstantin Khlebnikov
  2014-11-18 18:04   ` Catalin Marinas
  2014-11-18 15:53 ` [PATCH RFC 3/3] ARM: reduce size of page table directory for " Konstantin Khlebnikov
  1 sibling, 1 reply; 7+ messages in thread
From: Konstantin Khlebnikov @ 2014-11-18 15:53 UTC (permalink / raw)
  To: Russell King, linux-kernel, linux-arm-kernel

This feature was partially implemented long time ago. All cpus load
swapper_pg_dir into TTBR1 since commit d427958a46af24f75d0017c45eadd172273bbf33
("ARM: 6942/1: mm: make TTBR1 always point to swapper_pg_dir on ARMv6/7").
But TTBRx split is never been enabled in TTBCR.

This patch enables TTBRx split for 2Gb/2Gb and 1Gb/3Gb combinations.
Area covered by TTBR0 must be power of two, thus 3Gb/1Gb mode is unsupported.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
---
 arch/arm/include/asm/pgtable-2level-hwdef.h |   13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h
index c2ed1fa..605652c 100644
--- a/arch/arm/include/asm/pgtable-2level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-2level-hwdef.h
@@ -90,6 +90,19 @@
 
 #define PHYS_MASK		(~0UL)
 
+/*
+ * TTBR0/TTBR1 split (PAGE_OFFSET):
+ *   0x40000000: TTBCR.N = 2
+ *   0x80000000: TTBCR.N = 1
+ *   0xc0000000: TTBCR.N = 0 (not used)
+ */
+
+#ifdef CONFIG_VMSPLIT_1G
+#define TTBR1_SIZE	2
+#elif defined CONFIG_VMSPLIT_2G
+#define TTBR1_SIZE	1
+#elif defined CONFIG_VMSPLIT_3G
 #define TTBR1_SIZE	0
+#endif
 
 #endif


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH RFC 3/3] ARM: reduce size of page table directory for short mode
  2014-11-18 15:53 [PATCH RFC 1/3] ARM: LPAE: rework TTBR0/TTBR1 split Konstantin Khlebnikov
  2014-11-18 15:53 ` [PATCH RFC 2/3] ARM: enable TTBR0/TTBR1 split in short mode Konstantin Khlebnikov
@ 2014-11-18 15:53 ` Konstantin Khlebnikov
  2014-11-18 18:14   ` Catalin Marinas
  1 sibling, 1 reply; 7+ messages in thread
From: Konstantin Khlebnikov @ 2014-11-18 15:53 UTC (permalink / raw)
  To: Russell King, linux-kernel, linux-arm-kernel

Virtual memory above PAGE_OFFSET is covered by TTBR1 which always points
to swapper_pg_dir, thus size of the rest pgd tables might be reduced.
Of course idmap_pgd must be full 16k because it's used without split.
It seems nobody accesses pgd entries above PAGE_OFFSET via pgd_offset(),
all access is done via pgd_offset_k() which refers directly to init_mm.

This patch saves 8k per process for 2G/2G split (12k for 1G/3G split)
and kills source of frequent order-2 allocations. Unfortunately most
commonly used 3G/1G mode isn't supported, because first part must be
power of two. LPAE supports this mode, but it's pgd is already tiny.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
---
 arch/arm/mm/idmap.c |    6 ++++++
 arch/arm/mm/pgd.c   |   14 +++++++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index cc51b40..cea7ee1 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -100,7 +100,13 @@ extern char  __idmap_text_start[], __idmap_text_end[];
 
 static int __init init_static_idmap(void)
 {
+#ifdef CONFIG_ARM_LPAE
 	idmap_pgd = pgd_alloc(&init_mm);
+#else
+	idmap_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2);
+	if (idmap_pgd)
+		memcpy(idmap_pgd, swapper_pg_dir, PTRS_PER_PGD * sizeof(pgd_t));
+#endif
 	if (!idmap_pgd)
 		return -ENOMEM;
 
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index 2493795..3fbcb5a 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -20,15 +20,19 @@
 #include "mm.h"
 
 #ifdef CONFIG_ARM_LPAE
+#define TTBR0_PTRS_PER_PGD	PTRS_PER_PGD
 #define __pgd_alloc()	kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL)
 #define __pgd_free(pgd)	kfree(pgd)
 #else
-#define __pgd_alloc()	(pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, 2)
-#define __pgd_free(pgd)	free_pages((unsigned long)pgd, 2)
+#define TTBR0_PTRS_PER_PGD	(PTRS_PER_PGD >> TTBR1_SIZE)
+#define __pgd_alloc()	(pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, \
+							2 - TTBR1_SIZE)
+#define __pgd_free(pgd)	free_pages((unsigned long)pgd, 2 - TTBR1_SIZE)
 #endif
 
 /*
- * need to get a 16k page for level 1
+ * We need 4k/8k/16k for pgd in short mode for CONFIG_VMSPLIT_1G/2G/3G
+ * or only 64 bytes if LPAE is enabled.
  */
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
@@ -48,9 +52,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	 */
 	init_pgd = pgd_offset_k(0);
 	memcpy(new_pgd + USER_PTRS_PER_PGD, init_pgd + USER_PTRS_PER_PGD,
-		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+		(TTBR0_PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 
-	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
+	clean_dcache_area(new_pgd, TTBR0_PTRS_PER_PGD * sizeof(pgd_t));
 
 #ifdef CONFIG_ARM_LPAE
 	/*


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC 2/3] ARM: enable TTBR0/TTBR1 split in short mode
  2014-11-18 15:53 ` [PATCH RFC 2/3] ARM: enable TTBR0/TTBR1 split in short mode Konstantin Khlebnikov
@ 2014-11-18 18:04   ` Catalin Marinas
  0 siblings, 0 replies; 7+ messages in thread
From: Catalin Marinas @ 2014-11-18 18:04 UTC (permalink / raw)
  To: Konstantin Khlebnikov; +Cc: Russell King, linux-kernel, linux-arm-kernel

On Tue, Nov 18, 2014 at 03:53:19PM +0000, Konstantin Khlebnikov wrote:
> This feature was partially implemented long time ago. All cpus load
> swapper_pg_dir into TTBR1 since commit d427958a46af24f75d0017c45eadd172273bbf33
> ("ARM: 6942/1: mm: make TTBR1 always point to swapper_pg_dir on ARMv6/7").
> But TTBRx split is never been enabled in TTBCR.

I recall there is an erratum on Cortex-A9 around the use of TTBR1 and at
the time that stopped me from pushing such change.

-- 
Catalin

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC 3/3] ARM: reduce size of page table directory for short mode
  2014-11-18 15:53 ` [PATCH RFC 3/3] ARM: reduce size of page table directory for " Konstantin Khlebnikov
@ 2014-11-18 18:14   ` Catalin Marinas
  2014-11-18 19:11     ` Russell King - ARM Linux
  0 siblings, 1 reply; 7+ messages in thread
From: Catalin Marinas @ 2014-11-18 18:14 UTC (permalink / raw)
  To: Konstantin Khlebnikov; +Cc: Russell King, linux-kernel, linux-arm-kernel

On Tue, Nov 18, 2014 at 03:53:25PM +0000, Konstantin Khlebnikov wrote:
> Virtual memory above PAGE_OFFSET is covered by TTBR1 which always points
> to swapper_pg_dir, thus size of the rest pgd tables might be reduced.
> Of course idmap_pgd must be full 16k because it's used without split.
> It seems nobody accesses pgd entries above PAGE_OFFSET via pgd_offset(),
> all access is done via pgd_offset_k() which refers directly to init_mm.

We did a similar trick on arm64 with swapper_pg_dir only containing the
kernel mappings while pgd_alloc() only allocates enough for the user
memory map. We just need to sort out the potential A9 issue (I'll do
some digging tomorrow).

> This patch saves 8k per process for 2G/2G split (12k for 1G/3G split)
> and kills source of frequent order-2 allocations. Unfortunately most
> commonly used 3G/1G mode isn't supported, because first part must be
> power of two. LPAE supports this mode, but it's pgd is already tiny.

Actually, with LPAE you can save a pmd and pte allocated for the vectors
page at 0xffff0000. So you can save 8K per task here. Similarly with the
classic MMU, you can save another 4K for the vectors page pte.

-- 
Catalin

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC 3/3] ARM: reduce size of page table directory for short mode
  2014-11-18 18:14   ` Catalin Marinas
@ 2014-11-18 19:11     ` Russell King - ARM Linux
  2014-11-18 23:02       ` Catalin Marinas
  0 siblings, 1 reply; 7+ messages in thread
From: Russell King - ARM Linux @ 2014-11-18 19:11 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: Konstantin Khlebnikov, linux-kernel, linux-arm-kernel

On Tue, Nov 18, 2014 at 06:14:12PM +0000, Catalin Marinas wrote:
> Actually, with LPAE you can save a pmd and pte allocated for the vectors
> page at 0xffff0000. So you can save 8K per task here. Similarly with the
> classic MMU, you can save another 4K for the vectors page pte.

No.  For any CPU which has high vectors (approximately ARMv5 and up),
there is no "vectors page pte".  The vectors page PTE is only allocated
when we need low vectors.

-- 
FTTC broadband for 0.8mile line: currently at 9.5Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC 3/3] ARM: reduce size of page table directory for short mode
  2014-11-18 19:11     ` Russell King - ARM Linux
@ 2014-11-18 23:02       ` Catalin Marinas
  0 siblings, 0 replies; 7+ messages in thread
From: Catalin Marinas @ 2014-11-18 23:02 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Konstantin Khlebnikov, linux-kernel, linux-arm-kernel

On Tue, Nov 18, 2014 at 07:11:04PM +0000, Russell King - ARM Linux wrote:
> On Tue, Nov 18, 2014 at 06:14:12PM +0000, Catalin Marinas wrote:
> > Actually, with LPAE you can save a pmd and pte allocated for the vectors
> > page at 0xffff0000. So you can save 8K per task here. Similarly with the
> > classic MMU, you can save another 4K for the vectors page pte.
> 
> No.  For any CPU which has high vectors (approximately ARMv5 and up),
> there is no "vectors page pte".  The vectors page PTE is only allocated
> when we need low vectors.

You are right. The kernel pgd entries are copied by pgd_alloc, so no
need for additional pmd/pte allocations (they would be shared).

-- 
Catalin

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-11-18 23:03 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-11-18 15:53 [PATCH RFC 1/3] ARM: LPAE: rework TTBR0/TTBR1 split Konstantin Khlebnikov
2014-11-18 15:53 ` [PATCH RFC 2/3] ARM: enable TTBR0/TTBR1 split in short mode Konstantin Khlebnikov
2014-11-18 18:04   ` Catalin Marinas
2014-11-18 15:53 ` [PATCH RFC 3/3] ARM: reduce size of page table directory for " Konstantin Khlebnikov
2014-11-18 18:14   ` Catalin Marinas
2014-11-18 19:11     ` Russell King - ARM Linux
2014-11-18 23:02       ` Catalin Marinas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).