linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] arm64: Get the number of bits in ASID from the CPU
@ 2014-07-08 19:14 Allen Martin
  2014-07-08 21:42 ` Catalin Marinas
  0 siblings, 1 reply; 4+ messages in thread
From: Allen Martin @ 2014-07-08 19:14 UTC (permalink / raw)
  To: linux-arm-kernel

From: Alex Van Brunt <avanbrunt@nvidia.com>

Instead of hard coding the number of ASID bits to 16, read the value
from the CPU through the register ID_AA64MMFR0_EL1 at boot time.  This
is required on Tegra132 Denver CPU which implements 8 bits.

Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Signed-off-by: Allen Martin <amartin@nvidia.com>
Reviewed-by: Peng Du <pdu@nvidia.com>
Reviewed-by: Bo Yan <byan@nvidia.com>
---
 arch/arm64/include/asm/mmu_context.h |  4 ++--
 arch/arm64/kernel/setup.c            | 13 ++++++++++++-
 arch/arm64/mm/context.c              | 13 ++++++-------
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a9eee33dfa62..ddd78cad98a5 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -28,7 +28,7 @@
 #include <asm/cputype.h>
 #include <asm/pgtable.h>
 
-#define MAX_ASID_BITS	16
+extern unsigned int max_asid_bits;
 
 extern unsigned int cpu_last_asid;
 
@@ -84,7 +84,7 @@ static inline void check_and_switch_context(struct mm_struct *mm,
 	 */
 	cpu_set_reserved_ttbr0();
 
-	if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))
+	if (!((mm->context.id ^ cpu_last_asid) >> max_asid_bits))
 		/*
 		 * The ASID is from the current generation, just switch to the
 		 * new pgd. This condition is only true for calls from
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 46d1125571f6..420fb6662309 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -58,6 +58,7 @@
 #include <asm/memblock.h>
 #include <asm/psci.h>
 #include <asm/efi.h>
+#include <asm/mmu_context.h>
 
 unsigned int processor_id;
 EXPORT_SYMBOL(processor_id);
@@ -200,7 +201,7 @@ static void __init smp_build_mpidr_hash(void)
 static void __init setup_processor(void)
 {
 	struct cpu_info *cpu_info;
-	u64 features, block;
+	u64 features, block, reg_value;
 	u32 cwg;
 	int cls;
 
@@ -219,6 +220,16 @@ static void __init setup_processor(void)
 	sprintf(init_utsname()->machine, ELF_PLATFORM);
 	elf_hwcap = 0;
 
+	/* Read the number of ASID bits */
+	reg_value = read_cpuid(ID_AA64MMFR0_EL1) & 0xf0;
+	if (reg_value == 0x00)
+		max_asid_bits = 8;
+	else if (reg_value == 0x20)
+		max_asid_bits = 16;
+	else
+		BUG_ON(1);
+	cpu_last_asid = 1 << max_asid_bits;
+
 	/*
 	 * Check for sane CTR_EL0.CWG value.
 	 */
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index baa758d37021..fbc4256d66ac 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -30,10 +30,9 @@
 #define asid_bits(reg) \
 	(((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8)
 
-#define ASID_FIRST_VERSION	(1 << MAX_ASID_BITS)
-
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+unsigned int max_asid_bits;
+unsigned int cpu_last_asid;
 
 /*
  * We fork()ed a process, and we need a new context for the child to run in.
@@ -66,7 +65,7 @@ static void set_mm_context(struct mm_struct *mm, unsigned int asid)
 	 * mm->context.id_lock has to be IRQ-safe.
 	 */
 	raw_spin_lock_irqsave(&mm->context.id_lock, flags);
-	if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
+	if (likely((mm->context.id ^ cpu_last_asid) >> max_asid_bits)) {
 		/*
 		 * Old version of ASID found. Set the new one and reset
 		 * mm_cpumask(mm).
@@ -123,7 +122,7 @@ void __new_context(struct mm_struct *mm)
 	 * Check the ASID again, in case the change was broadcast from another
 	 * CPU before we acquired the lock.
 	 */
-	if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
+	if (!unlikely((mm->context.id ^ cpu_last_asid) >> max_asid_bits)) {
 		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
 		raw_spin_unlock(&cpu_asid_lock);
 		return;
@@ -142,9 +141,9 @@ void __new_context(struct mm_struct *mm)
 	 */
 	if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
 		/* increment the ASID version */
-		cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
+		cpu_last_asid += (1 << max_asid_bits) - (1 << bits);
 		if (cpu_last_asid == 0)
-			cpu_last_asid = ASID_FIRST_VERSION;
+			cpu_last_asid = 1 << max_asid_bits;
 		asid = cpu_last_asid + smp_processor_id();
 		flush_context();
 #ifdef CONFIG_SMP
-- 
1.8.1.5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] arm64: Get the number of bits in ASID from the CPU
  2014-07-08 19:14 [PATCH] arm64: Get the number of bits in ASID from the CPU Allen Martin
@ 2014-07-08 21:42 ` Catalin Marinas
  2014-07-10 20:50   ` Allen Martin
  0 siblings, 1 reply; 4+ messages in thread
From: Catalin Marinas @ 2014-07-08 21:42 UTC (permalink / raw)
  To: linux-arm-kernel

On 8 Jul 2014, at 20:14, Allen Martin <amartin@nvidia.com> wrote:
> From: Alex Van Brunt <avanbrunt@nvidia.com>
> 
> Instead of hard coding the number of ASID bits to 16, read the value
> from the CPU through the register ID_AA64MMFR0_EL1 at boot time.  This
> is required on Tegra132 Denver CPU which implements 8 bits.

Did you actually find a bug in the existing code?

> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
[?]
> @@ -219,6 +220,16 @@ static void __init setup_processor(void)
> 	sprintf(init_utsname()->machine, ELF_PLATFORM);
> 	elf_hwcap = 0;
> 
> +	/* Read the number of ASID bits */
> +	reg_value = read_cpuid(ID_AA64MMFR0_EL1) & 0xf0;
> +	if (reg_value == 0x00)
> +		max_asid_bits = 8;
> +	else if (reg_value == 0x20)
> +		max_asid_bits = 16;
> +	else
> +		BUG_ON(1);
> +	cpu_last_asid = 1 << max_asid_bits;

There is even a macro called asid_bits() in arch/arm64/mm/context.c.

> --- a/arch/arm64/mm/context.c
> +++ b/arch/arm64/mm/context.c
[?]
> 
> @@ -142,9 +141,9 @@ void __new_context(struct mm_struct *mm)
> 	 */
> 	if (unlikely((asid & ((1 << bits) - 1)) == 0)) {

That?s the part where it uses the actual number of bits supported by
the hardware (bits is computed higher up in this function based on
ID_AA64MMFR0_EL1).

> 		/* increment the ASID version */
> -		cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);

And here it bumps the generation@bit 16 onwards.

So unless you find some bug in the existing logic, I don?t think your
patch is needed.

Catalin

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] arm64: Get the number of bits in ASID from the CPU
  2014-07-08 21:42 ` Catalin Marinas
@ 2014-07-10 20:50   ` Allen Martin
  2014-07-11 12:34     ` Catalin Marinas
  0 siblings, 1 reply; 4+ messages in thread
From: Allen Martin @ 2014-07-10 20:50 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jul 08, 2014 at 02:42:17PM -0700, Catalin Marinas wrote:
> On 8 Jul 2014, at 20:14, Allen Martin <amartin@nvidia.com> wrote:
> > From: Alex Van Brunt <avanbrunt@nvidia.com>
> > 
> > Instead of hard coding the number of ASID bits to 16, read the value
> > from the CPU through the register ID_AA64MMFR0_EL1 at boot time.  This
> > is required on Tegra132 Denver CPU which implements 8 bits.
> 
> Did you actually find a bug in the existing code?

There was an internal bug associated with this patch, but I didn't
author the patch, and it's not clear what if anything fails without
this patch.  I'll dig further.

> > --- a/arch/arm64/mm/context.c
> > +++ b/arch/arm64/mm/context.c
> [?]
> > 
> > @@ -142,9 +141,9 @@ void __new_context(struct mm_struct *mm)
> > 	 */
> > 	if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
> 
> That?s the part where it uses the actual number of bits supported by
> the hardware (bits is computed higher up in this function based on
> ID_AA64MMFR0_EL1).
> 
> > 		/* increment the ASID version */
> > -		cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
> 
> And here it bumps the generation at bit 16 onwards.

So based on your feedback and my read of the code, it looks like on
CPUs that implement 8 bits of ASID (ID_AA64MMFR0_EL1 & 0xf0 == 0x20),
bits 7:0 of asid are the real hw ASID, bits 31:16 are the ASID
"version" which is how hw ASID wrap is counted, and bits 15:8 are
unused.  A few quesions come up: 

in set_mm_context():
>        if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))

why is this likely() ?  Shouldn't this only happen on an ASID wrap?


in __new_context():
>                /* increment the ASID version */
>                cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
>                if (cpu_last_asid == 0)
>                        cpu_last_asid = ASID_FIRST_VERSION;

How do you prevent two processes from having the same context.id in
the case of ASID version wrap?  Since context.id is 64 bits, should
asid and cpu_last_asid be 64 bits as well to make this less likely?

> 
> So unless you find some bug in the existing logic, I don?t think your
> patch is needed.

After reading your comments and looking at the code some more I'm not
sure it's needed either.  I guess the only advantage is that it uses
bits 15:8 for ASID version instead of having them be unused.  I'll dig
more on why this patch was added to begin with.

-Allen

nvpublic

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] arm64: Get the number of bits in ASID from the CPU
  2014-07-10 20:50   ` Allen Martin
@ 2014-07-11 12:34     ` Catalin Marinas
  0 siblings, 0 replies; 4+ messages in thread
From: Catalin Marinas @ 2014-07-11 12:34 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Jul 10, 2014 at 09:50:12PM +0100, Allen Martin wrote:
> On Tue, Jul 08, 2014 at 02:42:17PM -0700, Catalin Marinas wrote:
> > On 8 Jul 2014, at 20:14, Allen Martin <amartin@nvidia.com> wrote:
> > > --- a/arch/arm64/mm/context.c
> > > +++ b/arch/arm64/mm/context.c
> > [?]
> > > 
> > > @@ -142,9 +141,9 @@ void __new_context(struct mm_struct *mm)
> > > 	 */
> > > 	if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
> > 
> > That?s the part where it uses the actual number of bits supported by
> > the hardware (bits is computed higher up in this function based on
> > ID_AA64MMFR0_EL1).
> > 
> > > 		/* increment the ASID version */
> > > -		cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
> > 
> > And here it bumps the generation at bit 16 onwards.
> 
> So based on your feedback and my read of the code, it looks like on
> CPUs that implement 8 bits of ASID (ID_AA64MMFR0_EL1 & 0xf0 == 0x20),
> bits 7:0 of asid are the real hw ASID, bits 31:16 are the ASID
> "version" which is how hw ASID wrap is counted, and bits 15:8 are
> unused.  

That's correct.

> A few quesions come up: 
> 
> in set_mm_context():
> >        if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))
> 
> why is this likely() ?  Shouldn't this only happen on an ASID wrap?

set_mm_context() is called when we want to change the ASID because the
old one was from a previous generation. So the check above is the likely
case where generations differ. The unlikely case would be that the
context was already updated by IPI from a roll-over on another CPU.

> in __new_context():
> >                /* increment the ASID version */
> >                cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
> >                if (cpu_last_asid == 0)
> >                        cpu_last_asid = ASID_FIRST_VERSION;
> 
> How do you prevent two processes from having the same context.id in
> the case of ASID version wrap?  Since context.id is 64 bits, should
> asid and cpu_last_asid be 64 bits as well to make this less likely?

context.id is 32-bit. There is indeed a risk of an application not being
scheduled for a long time and ASID version rolling-over. We could make
both variables 64-bit.

> > So unless you find some bug in the existing logic, I don?t think your
> > patch is needed.
> 
> After reading your comments and looking at the code some more I'm not
> sure it's needed either.  I guess the only advantage is that it uses
> bits 15:8 for ASID version instead of having them be unused.  I'll dig
> more on why this patch was added to begin with.

Note that we plan to re-write this algorithm anyway because the IPI
during roll-over doesn't scale well. The 32-bit arm port already has a
new algorithm, the problem on 64-bit would be that the bitmap would be
bigger (64K bits = 8KB) and it needs benchmarking.

-- 
Catalin

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-07-11 12:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-08 19:14 [PATCH] arm64: Get the number of bits in ASID from the CPU Allen Martin
2014-07-08 21:42 ` Catalin Marinas
2014-07-10 20:50   ` Allen Martin
2014-07-11 12:34     ` Catalin Marinas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).