linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
@ 2015-08-29 18:46 Andrew Pinski
  2015-09-01 16:33 ` Mark Rutland
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Pinski @ 2015-08-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

It is useful to pass down MIDR register down to userland if all of
the online cores are all the same type.  This adds AT_ARM64_MIDR
aux vector type and passes down the midr system register.

This is alternative to MIDR_EL1 part of
http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
It allows for faster access to midr_el1 than going through a trap and
does not exist if the set of cores are not the same.

Changes from v1:
Forgot to include the auxvec.h part.

Signed-off-by: Andrew Pinski <apinski@cavium.com>
---
 arch/arm64/include/asm/cpu.h         |    1 +
 arch/arm64/include/asm/elf.h         |    6 ++++++
 arch/arm64/include/uapi/asm/auxvec.h |    3 +++
 arch/arm64/kernel/cpuinfo.c          |   22 ++++++++++++++++++++++
 4 files changed, 32 insertions(+)

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 8e797b2..fab0aa1 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -62,5 +62,6 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 
 void cpuinfo_store_cpu(void);
 void __init cpuinfo_store_boot_cpu(void);
+u32 get_arm64_midr(void);
 
 #endif /* __ASM_CPU_H */
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index faad6df..d3549de 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -17,6 +17,7 @@
 #define __ASM_ELF_H
 
 #include <asm/hwcap.h>
+#include <asm/cpu.h>
 
 /*
  * ELF register definitions..
@@ -138,8 +139,13 @@ typedef struct user_fpsimd_state elf_fpregset_t;
 
 #define ARCH_DLINFO							\
 do {									\
+	u32 midr;							\
+									\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR,					\
 		    (elf_addr_t)current->mm->context.vdso);		\
+	midr = get_arm64_midr();					\
+	if (midr != 0)							\
+		NEW_AUX_ENT(AT_ARM64_MIDR, (elf_addr_t)midr);		\
 } while (0)
 
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h
index 22d6d88..dc55c56 100644
--- a/arch/arm64/include/uapi/asm/auxvec.h
+++ b/arch/arm64/include/uapi/asm/auxvec.h
@@ -19,4 +19,7 @@
 /* vDSO location */
 #define AT_SYSINFO_EHDR	33
 
+/* Machine IDenfier Register (MDIR). */
+#define AT_ARM64_MIDR 38
+
 #endif
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 75d5a86..b14c87d 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -254,3 +254,25 @@ void __init cpuinfo_store_boot_cpu(void)
 
 	boot_cpu_data = *info;
 }
+
+u32 get_arm64_midr(void)
+{
+	int i;
+	u32 midr = 0;
+
+	for_each_online_cpu(i) {
+		struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
+		u32 oldmidr = midr;
+
+		midr = cpuinfo->reg_midr;
+		/*
+		 * If there are cpus which have a different
+		 * midr just return 0.
+		 */
+		if (oldmidr && oldmidr != midr)
+			return 0;
+	}
+
+	return midr;
+}
+
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-08-29 18:46 [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector Andrew Pinski
@ 2015-09-01 16:33 ` Mark Rutland
  2015-09-01 16:51   ` pinskia at gmail.com
  0 siblings, 1 reply; 16+ messages in thread
From: Mark Rutland @ 2015-09-01 16:33 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
> It is useful to pass down MIDR register down to userland if all of
> the online cores are all the same type.  This adds AT_ARM64_MIDR
> aux vector type and passes down the midr system register.
> 
> This is alternative to MIDR_EL1 part of
> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
> It allows for faster access to midr_el1 than going through a trap and
> does not exist if the set of cores are not the same.

I'm not sure I follow the rationale. If speed is important the
application can cache the value the first time it reads it with a trap.

This also means that the behaviour is different across homogeneous and
heterogeneous systems.

> Changes from v1:
> Forgot to include the auxvec.h part.
> 
> Signed-off-by: Andrew Pinski <apinski@cavium.com>
> ---
>  arch/arm64/include/asm/cpu.h         |    1 +
>  arch/arm64/include/asm/elf.h         |    6 ++++++
>  arch/arm64/include/uapi/asm/auxvec.h |    3 +++
>  arch/arm64/kernel/cpuinfo.c          |   22 ++++++++++++++++++++++
>  4 files changed, 32 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
> index 8e797b2..fab0aa1 100644
> --- a/arch/arm64/include/asm/cpu.h
> +++ b/arch/arm64/include/asm/cpu.h
> @@ -62,5 +62,6 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
>  
>  void cpuinfo_store_cpu(void);
>  void __init cpuinfo_store_boot_cpu(void);
> +u32 get_arm64_midr(void);
>  
>  #endif /* __ASM_CPU_H */
> diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
> index faad6df..d3549de 100644
> --- a/arch/arm64/include/asm/elf.h
> +++ b/arch/arm64/include/asm/elf.h
> @@ -17,6 +17,7 @@
>  #define __ASM_ELF_H
>  
>  #include <asm/hwcap.h>
> +#include <asm/cpu.h>
>  
>  /*
>   * ELF register definitions..
> @@ -138,8 +139,13 @@ typedef struct user_fpsimd_state elf_fpregset_t;
>  
>  #define ARCH_DLINFO							\
>  do {									\
> +	u32 midr;							\
> +									\
>  	NEW_AUX_ENT(AT_SYSINFO_EHDR,					\
>  		    (elf_addr_t)current->mm->context.vdso);		\
> +	midr = get_arm64_midr();					\
> +	if (midr != 0)							\
> +		NEW_AUX_ENT(AT_ARM64_MIDR, (elf_addr_t)midr);		\
>  } while (0)
>  
>  #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
> diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h
> index 22d6d88..dc55c56 100644
> --- a/arch/arm64/include/uapi/asm/auxvec.h
> +++ b/arch/arm64/include/uapi/asm/auxvec.h
> @@ -19,4 +19,7 @@
>  /* vDSO location */
>  #define AT_SYSINFO_EHDR	33
>  
> +/* Machine IDenfier Register (MDIR). */
> +#define AT_ARM64_MIDR 38
> +
>  #endif
> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
> index 75d5a86..b14c87d 100644
> --- a/arch/arm64/kernel/cpuinfo.c
> +++ b/arch/arm64/kernel/cpuinfo.c
> @@ -254,3 +254,25 @@ void __init cpuinfo_store_boot_cpu(void)
>  
>  	boot_cpu_data = *info;
>  }
> +
> +u32 get_arm64_midr(void)
> +{
> +	int i;
> +	u32 midr = 0;
> +
> +	for_each_online_cpu(i) {
> +		struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
> +		u32 oldmidr = midr;
> +
> +		midr = cpuinfo->reg_midr;
> +		/*
> +		 * If there are cpus which have a different
> +		 * midr just return 0.
> +		 */
> +		if (oldmidr && oldmidr != midr)
> +			return 0;
> +	}
> +
> +	return midr;
> +}

If I have a big.LITTLE system where all the big CPUs are currently
offline, this will leave the MIDR the little CPUs in the auxvec.
However, at any point after this has run, I could hotplug the big CPUs
on and the little CPUs off, leaving this reporting a MIDR that
represents none of the online CPUs.

Given big.LITTLE and the potential for physical/dynamic hotplug (where
we won't know all the MIDRs in advance), I don't think that we can
generally expose a common MIDR in this fashion, and I don't think that
we should give the impression that we can.

I think that the only things we can do are expose the MIDR for CPU the
code is currently executing on (as Suzuki's patches do), and/or expose
all the MIDRs for currently online CPUs (as Steve's [1] patch does).
Anything else leaves us trying to provide semantics that we cannot
guarantee.

Thanks,
Mark.

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/359127.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-01 16:33 ` Mark Rutland
@ 2015-09-01 16:51   ` pinskia at gmail.com
  2015-09-01 17:06     ` Pinski, Andrew
  2015-09-01 17:19     ` Mark Rutland
  0 siblings, 2 replies; 16+ messages in thread
From: pinskia at gmail.com @ 2015-09-01 16:51 UTC (permalink / raw)
  To: linux-arm-kernel





> On Sep 2, 2015, at 12:33 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> 
> Hi,
> 
>> On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
>> It is useful to pass down MIDR register down to userland if all of
>> the online cores are all the same type.  This adds AT_ARM64_MIDR
>> aux vector type and passes down the midr system register.
>> 
>> This is alternative to MIDR_EL1 part of
>> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
>> It allows for faster access to midr_el1 than going through a trap and
>> does not exist if the set of cores are not the same.
> 
> I'm not sure I follow the rationale. If speed is important the
> application can cache the value the first time it reads it with a trap.

It is also about compatibility also. Exposing the register is not backwards compatible but using the aux vector is. 

> 
> This also means that the behaviour is different across homogeneous and
> heterogeneous systems.
> 
>> Changes from v1:
>> Forgot to include the auxvec.h part.
>> 
>> Signed-off-by: Andrew Pinski <apinski@cavium.com>
>> ---
>> arch/arm64/include/asm/cpu.h         |    1 +
>> arch/arm64/include/asm/elf.h         |    6 ++++++
>> arch/arm64/include/uapi/asm/auxvec.h |    3 +++
>> arch/arm64/kernel/cpuinfo.c          |   22 ++++++++++++++++++++++
>> 4 files changed, 32 insertions(+)
>> 
>> diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
>> index 8e797b2..fab0aa1 100644
>> --- a/arch/arm64/include/asm/cpu.h
>> +++ b/arch/arm64/include/asm/cpu.h
>> @@ -62,5 +62,6 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
>> 
>> void cpuinfo_store_cpu(void);
>> void __init cpuinfo_store_boot_cpu(void);
>> +u32 get_arm64_midr(void);
>> 
>> #endif /* __ASM_CPU_H */
>> diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
>> index faad6df..d3549de 100644
>> --- a/arch/arm64/include/asm/elf.h
>> +++ b/arch/arm64/include/asm/elf.h
>> @@ -17,6 +17,7 @@
>> #define __ASM_ELF_H
>> 
>> #include <asm/hwcap.h>
>> +#include <asm/cpu.h>
>> 
>> /*
>>  * ELF register definitions..
>> @@ -138,8 +139,13 @@ typedef struct user_fpsimd_state elf_fpregset_t;
>> 
>> #define ARCH_DLINFO                            \
>> do {                                    \
>> +    u32 midr;                            \
>> +                                    \
>>    NEW_AUX_ENT(AT_SYSINFO_EHDR,                    \
>>            (elf_addr_t)current->mm->context.vdso);        \
>> +    midr = get_arm64_midr();                    \
>> +    if (midr != 0)                            \
>> +        NEW_AUX_ENT(AT_ARM64_MIDR, (elf_addr_t)midr);        \
>> } while (0)
>> 
>> #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
>> diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h
>> index 22d6d88..dc55c56 100644
>> --- a/arch/arm64/include/uapi/asm/auxvec.h
>> +++ b/arch/arm64/include/uapi/asm/auxvec.h
>> @@ -19,4 +19,7 @@
>> /* vDSO location */
>> #define AT_SYSINFO_EHDR    33
>> 
>> +/* Machine IDenfier Register (MDIR). */
>> +#define AT_ARM64_MIDR 38
>> +
>> #endif
>> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
>> index 75d5a86..b14c87d 100644
>> --- a/arch/arm64/kernel/cpuinfo.c
>> +++ b/arch/arm64/kernel/cpuinfo.c
>> @@ -254,3 +254,25 @@ void __init cpuinfo_store_boot_cpu(void)
>> 
>>    boot_cpu_data = *info;
>> }
>> +
>> +u32 get_arm64_midr(void)
>> +{
>> +    int i;
>> +    u32 midr = 0;
>> +
>> +    for_each_online_cpu(i) {
>> +        struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
>> +        u32 oldmidr = midr;
>> +
>> +        midr = cpuinfo->reg_midr;
>> +        /*
>> +         * If there are cpus which have a different
>> +         * midr just return 0.
>> +         */
>> +        if (oldmidr && oldmidr != midr)
>> +            return 0;
>> +    }
>> +
>> +    return midr;
>> +}
> 
> If I have a big.LITTLE system where all the big CPUs are currently
> offline, this will leave the MIDR the little CPUs in the auxvec.
> However, at any point after this has run, I could hotplug the big CPUs
> on and the little CPUs off, leaving this reporting a MIDR that
> represents none of the online CPUs.
> 
> Given big.LITTLE and the potential for physical/dynamic hotplug (where
> we won't know all the MIDRs in advance), I don't think that we can
> generally expose a common MIDR in this fashion, and I don't think that
> we should give the impression that we can.

This is standard issue with hot plug and big.little. Really big.little is a design flaw but I am not going into that here. 


> 
> I think that the only things we can do are expose the MIDR for CPU the
> code is currently executing on (as Suzuki's patches do), and/or expose
> all the MIDRs for currently online CPUs (as Steve's [1] patch does).
> Anything else leaves us trying to provide semantics that we cannot
> guarantee.

Except they are not backwards compatible which means nobody in their right mind would use the register to get the midr that way. I am sorry but having a newer version of glibc working on a year old kernel is not going to fly. 

Thanks,
Andrew


> 
> Thanks,
> Mark.
> 
> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/359127.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-01 16:51   ` pinskia at gmail.com
@ 2015-09-01 17:06     ` Pinski, Andrew
  2015-09-01 17:30       ` Mark Rutland
  2015-09-01 17:19     ` Mark Rutland
  1 sibling, 1 reply; 16+ messages in thread
From: Pinski, Andrew @ 2015-09-01 17:06 UTC (permalink / raw)
  To: linux-arm-kernel





> On Sep 2, 2015, at 12:51 AM, "pinskia at gmail.com" <pinskia@gmail.com> wrote:
> 
> 
> 
> 
> 
>> On Sep 2, 2015, at 12:33 AM, Mark Rutland <mark.rutland@arm.com> wrote:
>> 
>> Hi,
>> 
>>> On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
>>> It is useful to pass down MIDR register down to userland if all of
>>> the online cores are all the same type.  This adds AT_ARM64_MIDR
>>> aux vector type and passes down the midr system register.
>>> 
>>> This is alternative to MIDR_EL1 part of
>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
>>> It allows for faster access to midr_el1 than going through a trap and
>>> does not exist if the set of cores are not the same.
>> 
>> I'm not sure I follow the rationale. If speed is important the
>> application can cache the value the first time it reads it with a trap.
> 
> It is also about compatibility also. Exposing the register is not backwards compatible but using the aux vector is. 

That would also break big.little too. So either break it with hot plug or break it in userland, your choice. 


> 
>> 
>> This also means that the behaviour is different across homogeneous and
>> heterogeneous systems.

That should be ok because it is still backwards compatible with what was done before.  My goal here is just to allow quick easy access to midr in the case of a homogeneous system which I care about, thunderx and to allow glibc to select a memcpy/memset that is better for thunderx. 

Thanks,
Andrew

>> 
>>> Changes from v1:
>>> Forgot to include the auxvec.h part.
>>> 
>>> Signed-off-by: Andrew Pinski <apinski@cavium.com>
>>> ---
>>> arch/arm64/include/asm/cpu.h         |    1 +
>>> arch/arm64/include/asm/elf.h         |    6 ++++++
>>> arch/arm64/include/uapi/asm/auxvec.h |    3 +++
>>> arch/arm64/kernel/cpuinfo.c          |   22 ++++++++++++++++++++++
>>> 4 files changed, 32 insertions(+)
>>> 
>>> diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
>>> index 8e797b2..fab0aa1 100644
>>> --- a/arch/arm64/include/asm/cpu.h
>>> +++ b/arch/arm64/include/asm/cpu.h
>>> @@ -62,5 +62,6 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
>>> 
>>> void cpuinfo_store_cpu(void);
>>> void __init cpuinfo_store_boot_cpu(void);
>>> +u32 get_arm64_midr(void);
>>> 
>>> #endif /* __ASM_CPU_H */
>>> diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
>>> index faad6df..d3549de 100644
>>> --- a/arch/arm64/include/asm/elf.h
>>> +++ b/arch/arm64/include/asm/elf.h
>>> @@ -17,6 +17,7 @@
>>> #define __ASM_ELF_H
>>> 
>>> #include <asm/hwcap.h>
>>> +#include <asm/cpu.h>
>>> 
>>> /*
>>> * ELF register definitions..
>>> @@ -138,8 +139,13 @@ typedef struct user_fpsimd_state elf_fpregset_t;
>>> 
>>> #define ARCH_DLINFO                            \
>>> do {                                    \
>>> +    u32 midr;                            \
>>> +                                    \
>>>   NEW_AUX_ENT(AT_SYSINFO_EHDR,                    \
>>>           (elf_addr_t)current->mm->context.vdso);        \
>>> +    midr = get_arm64_midr();                    \
>>> +    if (midr != 0)                            \
>>> +        NEW_AUX_ENT(AT_ARM64_MIDR, (elf_addr_t)midr);        \
>>> } while (0)
>>> 
>>> #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
>>> diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h
>>> index 22d6d88..dc55c56 100644
>>> --- a/arch/arm64/include/uapi/asm/auxvec.h
>>> +++ b/arch/arm64/include/uapi/asm/auxvec.h
>>> @@ -19,4 +19,7 @@
>>> /* vDSO location */
>>> #define AT_SYSINFO_EHDR    33
>>> 
>>> +/* Machine IDenfier Register (MDIR). */
>>> +#define AT_ARM64_MIDR 38
>>> +
>>> #endif
>>> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
>>> index 75d5a86..b14c87d 100644
>>> --- a/arch/arm64/kernel/cpuinfo.c
>>> +++ b/arch/arm64/kernel/cpuinfo.c
>>> @@ -254,3 +254,25 @@ void __init cpuinfo_store_boot_cpu(void)
>>> 
>>>   boot_cpu_data = *info;
>>> }
>>> +
>>> +u32 get_arm64_midr(void)
>>> +{
>>> +    int i;
>>> +    u32 midr = 0;
>>> +
>>> +    for_each_online_cpu(i) {
>>> +        struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
>>> +        u32 oldmidr = midr;
>>> +
>>> +        midr = cpuinfo->reg_midr;
>>> +        /*
>>> +         * If there are cpus which have a different
>>> +         * midr just return 0.
>>> +         */
>>> +        if (oldmidr && oldmidr != midr)
>>> +            return 0;
>>> +    }
>>> +
>>> +    return midr;
>>> +}
>> 
>> If I have a big.LITTLE system where all the big CPUs are currently
>> offline, this will leave the MIDR the little CPUs in the auxvec.
>> However, at any point after this has run, I could hotplug the big CPUs
>> on and the little CPUs off, leaving this reporting a MIDR that
>> represents none of the online CPUs.
>> 
>> Given big.LITTLE and the potential for physical/dynamic hotplug (where
>> we won't know all the MIDRs in advance), I don't think that we can
>> generally expose a common MIDR in this fashion, and I don't think that
>> we should give the impression that we can.
> 
> This is standard issue with hot plug and big.little. Really big.little is a design flaw but I am not going into that here. 
> 
> 
>> 
>> I think that the only things we can do are expose the MIDR for CPU the
>> code is currently executing on (as Suzuki's patches do), and/or expose
>> all the MIDRs for currently online CPUs (as Steve's [1] patch does).
>> Anything else leaves us trying to provide semantics that we cannot
>> guarantee.
> 
> Except they are not backwards compatible which means nobody in their right mind would use the register to get the midr that way. I am sorry but having a newer version of glibc working on a year old kernel is not going to fly. 
> 
> Thanks,
> Andrew
> 
> 
>> 
>> Thanks,
>> Mark.
>> 
>> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/359127.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-01 16:51   ` pinskia at gmail.com
  2015-09-01 17:06     ` Pinski, Andrew
@ 2015-09-01 17:19     ` Mark Rutland
  2015-09-01 17:29       ` Pinski, Andrew
  1 sibling, 1 reply; 16+ messages in thread
From: Mark Rutland @ 2015-09-01 17:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Sep 01, 2015 at 05:51:44PM +0100, pinskia at gmail.com wrote:
> 
> 
> 
> 
> > On Sep 2, 2015, at 12:33 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> > 
> > Hi,
> > 
> >> On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
> >> It is useful to pass down MIDR register down to userland if all of
> >> the online cores are all the same type.  This adds AT_ARM64_MIDR
> >> aux vector type and passes down the midr system register.
> >> 
> >> This is alternative to MIDR_EL1 part of
> >> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
> >> It allows for faster access to midr_el1 than going through a trap and
> >> does not exist if the set of cores are not the same.
> > 
> > I'm not sure I follow the rationale. If speed is important the
> > application can cache the value the first time it reads it with a trap.
> 
> It is also about compatibility also. Exposing the register is not
> backwards compatible but using the aux vector is. 

So long as we have HWCAP_CPUID describing the availability of register
access [2], then userspace can test for that before attempting to access
the MIDR.

Other than that, I don't see a backwards or forwards compatibility
issue.

> >> +u32 get_arm64_midr(void)
> >> +{
> >> +    int i;
> >> +    u32 midr = 0;
> >> +
> >> +    for_each_online_cpu(i) {
> >> +        struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
> >> +        u32 oldmidr = midr;
> >> +
> >> +        midr = cpuinfo->reg_midr;
> >> +        /*
> >> +         * If there are cpus which have a different
> >> +         * midr just return 0.
> >> +         */
> >> +        if (oldmidr && oldmidr != midr)
> >> +            return 0;
> >> +    }
> >> +
> >> +    return midr;
> >> +}
> > 
> > If I have a big.LITTLE system where all the big CPUs are currently
> > offline, this will leave the MIDR the little CPUs in the auxvec.
> > However, at any point after this has run, I could hotplug the big CPUs
> > on and the little CPUs off, leaving this reporting a MIDR that
> > represents none of the online CPUs.
> > 
> > Given big.LITTLE and the potential for physical/dynamic hotplug (where
> > we won't know all the MIDRs in advance), I don't think that we can
> > generally expose a common MIDR in this fashion, and I don't think that
> > we should give the impression that we can.
> 
> This is standard issue with hot plug and big.little. Really big.little
> is a design flaw but I am not going into that here. 

Regardless of our personal feelings on big.LITTLE, it's something we
have to deal with.

Hopefully it's a non-issue anyway; a MIDR provided by this interface can
really only be used to derive optimisation criteria rather than
non-architected properties required for correctness.

> > I think that the only things we can do are expose the MIDR for CPU the
> > code is currently executing on (as Suzuki's patches do), and/or expose
> > all the MIDRs for currently online CPUs (as Steve's [1] patch does).
> > Anything else leaves us trying to provide semantics that we cannot
> > guarantee.
> 
> Except they are not backwards compatible which means nobody in their
> right mind would use the register to get the midr that way.

I assume you missed the discussion of HWCAP_CPUID, which prevents the
compatibility issue I believe you're considering here.

> I am sorry but having a newer version of glibc working on a year old
> kernel is not going to fly. 

I'm not sure I follow this, unless you meant _not_ working.

Thanks,
Mark.

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/359127.html
[2] http://lists.infradead.org/pipermail/linux-arm-kernel/2015-August/363559.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-01 17:19     ` Mark Rutland
@ 2015-09-01 17:29       ` Pinski, Andrew
  0 siblings, 0 replies; 16+ messages in thread
From: Pinski, Andrew @ 2015-09-01 17:29 UTC (permalink / raw)
  To: linux-arm-kernel





> On Sep 2, 2015, at 1:19 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> 
>> On Tue, Sep 01, 2015 at 05:51:44PM +0100, pinskia at gmail.com wrote:
>> 
>> 
>> 
>> 
>>> On Sep 2, 2015, at 12:33 AM, Mark Rutland <mark.rutland@arm.com> wrote:
>>> 
>>> Hi,
>>> 
>>>> On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
>>>> It is useful to pass down MIDR register down to userland if all of
>>>> the online cores are all the same type.  This adds AT_ARM64_MIDR
>>>> aux vector type and passes down the midr system register.
>>>> 
>>>> This is alternative to MIDR_EL1 part of
>>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
>>>> It allows for faster access to midr_el1 than going through a trap and
>>>> does not exist if the set of cores are not the same.
>>> 
>>> I'm not sure I follow the rationale. If speed is important the
>>> application can cache the value the first time it reads it with a trap.
>> 
>> It is also about compatibility also. Exposing the register is not
>> backwards compatible but using the aux vector is. 
> 
> So long as we have HWCAP_CPUID describing the availability of register
> access [2], then userspace can test for that before attempting to access
> the MIDR.

So two checks always. Bad choice. 

> 
> Other than that, I don't see a backwards or forwards compatibility
> issue.
> 
>>>> +u32 get_arm64_midr(void)
>>>> +{
>>>> +    int i;
>>>> +    u32 midr = 0;
>>>> +
>>>> +    for_each_online_cpu(i) {
>>>> +        struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
>>>> +        u32 oldmidr = midr;
>>>> +
>>>> +        midr = cpuinfo->reg_midr;
>>>> +        /*
>>>> +         * If there are cpus which have a different
>>>> +         * midr just return 0.
>>>> +         */
>>>> +        if (oldmidr && oldmidr != midr)
>>>> +            return 0;
>>>> +    }
>>>> +
>>>> +    return midr;
>>>> +}
>>> 
>>> If I have a big.LITTLE system where all the big CPUs are currently
>>> offline, this will leave the MIDR the little CPUs in the auxvec.
>>> However, at any point after this has run, I could hotplug the big CPUs
>>> on and the little CPUs off, leaving this reporting a MIDR that
>>> represents none of the online CPUs.
>>> 
>>> Given big.LITTLE and the potential for physical/dynamic hotplug (where
>>> we won't know all the MIDRs in advance), I don't think that we can
>>> generally expose a common MIDR in this fashion, and I don't think that
>>> we should give the impression that we can.
>> 
>> This is standard issue with hot plug and big.little. Really big.little
>> is a design flaw but I am not going into that here. 
> 
> Regardless of our personal feelings on big.LITTLE, it's something we
> have to deal with.

You did not respond to the caching in userspace issue. You raised that as a speed optimization but then considered my patch as a non starter. 

> 
> Hopefully it's a non-issue anyway; a MIDR provided by this interface can
> really only be used to derive optimisation criteria rather than
> non-architected properties required for correctness.

Like hardware workarounds? Yes that is going to be used for that and is already right now by reading /proc/cpuinfo . Also it would useful to use what ever interface for gcc's -mcpu=native option. 


> 
>>> I think that the only things we can do are expose the MIDR for CPU the
>>> code is currently executing on (as Suzuki's patches do), and/or expose
>>> all the MIDRs for currently online CPUs (as Steve's [1] patch does).
>>> Anything else leaves us trying to provide semantics that we cannot
>>> guarantee.
>> 
>> Except they are not backwards compatible which means nobody in their
>> right mind would use the register to get the midr that way.
> 
> I assume you missed the discussion of HWCAP_CPUID, which prevents the
> compatibility issue I believe you're considering here.

And you suggested to cache midr while not considering big.little. You can't have it both ways. I read those and I still think they were wrong in rejecting this.  Also there are two markets arm is in and things like this is causing one of those markets to suffer. Big.little is not going into servers. 

> 
>> I am sorry but having a newer version of glibc working on a year old
>> kernel is not going to fly. 
> 
> I'm not sure I follow this, unless you meant _not_ working.

Because of the double checks, it will be slower and the trap. And gives a bad interface to userland really.  Aux vector is a much cleaner interface to userland than a trapped instruction. 

Thanks,
Andrew

> 
> Thanks,
> Mark.
> 
> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/359127.html
> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2015-August/363559.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-01 17:06     ` Pinski, Andrew
@ 2015-09-01 17:30       ` Mark Rutland
  2015-09-01 17:58         ` pinskia at gmail.com
  0 siblings, 1 reply; 16+ messages in thread
From: Mark Rutland @ 2015-09-01 17:30 UTC (permalink / raw)
  To: linux-arm-kernel

[...]

> >>> On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
> >>> It is useful to pass down MIDR register down to userland if all of
> >>> the online cores are all the same type.  This adds AT_ARM64_MIDR
> >>> aux vector type and passes down the midr system register.
> >>> 
> >>> This is alternative to MIDR_EL1 part of
> >>> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
> >>> It allows for faster access to midr_el1 than going through a trap and
> >>> does not exist if the set of cores are not the same.
> >> 
> >> I'm not sure I follow the rationale. If speed is important the
> >> application can cache the value the first time it reads it with a trap.
> > 
> > It is also about compatibility also. Exposing the register is not backwards compatible but using the aux vector is. 
> 
> That would also break big.little too. So either break it with hot plug or break it in userland, your choice. 

The value wouldn't be representative of the system as a whole; that is
true. However, we never guaranteed that it was, while the aux vector
code implied that we did.

For optimisation that may be good enough; code optimized for a different
uArch should still function on another, even if it is slower.

> >> This also means that the behaviour is different across homogeneous and
> >> heterogeneous systems.
> 
> That should be ok because it is still backwards compatible with what
> was done before.  My goal here is just to allow quick easy access to
> midr in the case of a homogeneous system which I care about, thunderx
> and to allow glibc to select a memcpy/memset that is better for
> thunderx. 

As I mentioned in the other thread, I think that HWCAP_CPUID is
sufficient to enable forwards and backwards compatibility. If it is
present then you can use the current CPU's MIDR to select a better
memcpy/memset if required.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-01 17:30       ` Mark Rutland
@ 2015-09-01 17:58         ` pinskia at gmail.com
  2015-09-01 19:12           ` Siarhei Siamashka
  0 siblings, 1 reply; 16+ messages in thread
From: pinskia at gmail.com @ 2015-09-01 17:58 UTC (permalink / raw)
  To: linux-arm-kernel





> On Sep 2, 2015, at 1:30 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> 
> [...]
> 
>>>>> On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
>>>>> It is useful to pass down MIDR register down to userland if all of
>>>>> the online cores are all the same type.  This adds AT_ARM64_MIDR
>>>>> aux vector type and passes down the midr system register.
>>>>> 
>>>>> This is alternative to MIDR_EL1 part of
>>>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
>>>>> It allows for faster access to midr_el1 than going through a trap and
>>>>> does not exist if the set of cores are not the same.
>>>> 
>>>> I'm not sure I follow the rationale. If speed is important the
>>>> application can cache the value the first time it reads it with a trap.
>>> 
>>> It is also about compatibility also. Exposing the register is not backwards compatible but using the aux vector is.
>> 
>> That would also break big.little too. So either break it with hot plug or break it in userland, your choice.
> 
> The value wouldn't be representative of the system as a whole; that is
> true. However, we never guaranteed that it was, while the aux vector
> code implied that we did.

Yes but I guess you talk about caching the value in userspace but doing it via the aux vector is the same as your suggestion. Just one difference is you don't get the aux vector entry if there is a CPU that is online which is different. No difference from your suggestion of caching it. Without considering hot pug for a second (that is a huge different issue all together), if userland wants to know if all up CPUs have the same midr, they would either read /sys entries (lots of syscalls) or bind to each CPU and do the trap. That means at least three or two syscalls/traps for each CPU. My way is none and gets a value of midr if they are all the Same for free. 

Again what is the difference between the aux vector and caching the value in userspace? Because it seems like you suggesting I do that but then avoiding big.little also. 

Thanks,
Andrew

> 
> For optimisation that may be good enough; code optimized for a different
> uArch should still function on another, even if it is slower.
> 
>>>> This also means that the behaviour is different across homogeneous and
>>>> heterogeneous systems.
>> 
>> That should be ok because it is still backwards compatible with what
>> was done before.  My goal here is just to allow quick easy access to
>> midr in the case of a homogeneous system which I care about, thunderx
>> and to allow glibc to select a memcpy/memset that is better for
>> thunderx.
> 
> As I mentioned in the other thread, I think that HWCAP_CPUID is
> sufficient to enable forwards and backwards compatibility. If it is
> present then you can use the current CPU's MIDR to select a better
> memcpy/memset if required.
> 
> Thanks,
> Mark.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-01 17:58         ` pinskia at gmail.com
@ 2015-09-01 19:12           ` Siarhei Siamashka
  2015-09-02  0:28             ` Pinski, Andrew
  0 siblings, 1 reply; 16+ messages in thread
From: Siarhei Siamashka @ 2015-09-01 19:12 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 2 Sep 2015 01:58:56 +0800
pinskia at gmail.com wrote:

> > On Sep 2, 2015, at 1:30 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> > 
> > [...]
> > 
> >>>>> On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
> >>>>> It is useful to pass down MIDR register down to userland if all of
> >>>>> the online cores are all the same type.  This adds AT_ARM64_MIDR
> >>>>> aux vector type and passes down the midr system register.
> >>>>> 
> >>>>> This is alternative to MIDR_EL1 part of
> >>>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
> >>>>> It allows for faster access to midr_el1 than going through a trap and
> >>>>> does not exist if the set of cores are not the same.
> >>>> 
> >>>> I'm not sure I follow the rationale. If speed is important the
> >>>> application can cache the value the first time it reads it with a trap.
> >>> 
> >>> It is also about compatibility also. Exposing the register is not backwards compatible but using the aux vector is.
> >> 
> >> That would also break big.little too. So either break it with hot plug or break it in userland, your choice.
> > 
> > The value wouldn't be representative of the system as a whole; that is
> > true. However, we never guaranteed that it was, while the aux vector
> > code implied that we did.
> 
> Yes but I guess you talk about caching the value in userspace but doing
> it via the aux vector is the same as your suggestion. Just one
> difference is you don't get the aux vector entry if there is a CPU
> that is online which is different. No difference from your suggestion
> of caching it. Without considering hot pug for a second (that is a
> huge different issue all together), if userland wants to know if all
> up CPUs have the same midr, they would either read /sys entries (lots
> of syscalls) or bind to each CPU and do the trap. That means at least
> three or two syscalls/traps for each CPU. My way is none and gets a
> value of midr if they are all the Same for free. 

Andrew, how do you propose to get the value of MIDR? Open the
"/proc/self/auxv", read it, do a linear search in the buffer to find
the required entry and then read the value? Or use the glibc specific
getauxval() function (https://lwn.net/Articles/519085) ?

Regarding the caching implementation, one can open and parse the
"/proc/cpuinfo" file (with older kernels) or read the new sysfs
entries to get the MIDR value for each core. Then create a lookup
table. As an additional bonus, this lookup table can contain not
just the MIDR values, but any arbitrary data in any format (for
example, a function pointer to the memcpy function or anything else).

After the lookup table is available, one can use the getcpu() syscall
for getting the CPU number and do the table lookup. And for getting
reasonable performance, implement the vdso variant of the getcpu()
syscall.

All of this internal ugliness would be best abstracted inside
of the GCC __builtin_cpu_init(), __builtin_cpu_is() and
__builtin_cpu_supports() builtins:
    http://gcc.gnu.org/gcc-4.8/changes.html

One big.LITTLE systems, the __builtin_cpu_is() could be implemented
via a single getcpu() syscall and the table lookup, like explained
above. The __builtin_cpu_init() could prepare the lookup table.
And on normal systems with identical cores, the use of the syscall
is not required.

It might be interesting to also optionally allow something like this:
    __builtin_cpu_is("cortex-a7 || cortex-a15")
Which would mean that we are interested in checking for the
Cortex-A7+Cortex-A15 pair in a big.LITTLE system, but are not
interested in knowing whether we are running on A7 or A15 in this
particular moment (and avoid the syscall overhead).

We had an old discussion on a similar CPU type identification topic
in the past:
    http://lists.infradead.org/pipermail/linux-arm-kernel/2013-December/220542.html
I have been told that it had been forwarded to the Linaro toolchain
people, but did not track if this resulted in anything useful or not.

I think that it would be best to prefer something that is easily usable
for all applications and libraries, and not just something for a private
use by glibc. To sum everything up:

One the kernel side it means:
  1. Maybe implement vdso for getcpu(), this will make things faster
     on big.LITTLE systems.
  2. Maybe implement sysfs entries for per-core MIDR values from
       http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/359127.html
     This will make things faster and allow to avoid potentially
     messy and cumbersome /proc/cpuinfo text parsing.

On the GCC side it means:
  1. Implement __builtin_cpu_init(), __builtin_cpu_is() and
     __builtin_cpu_supports() builtins, which rely on reading sysfs
     entries (with a fallback to /proc/cpuinfo parsing on old kernels)
     and the getcpu() syscall for the reasonably accurate core type
     runtime identification on big.LITTLE systems.

On the applications/libraries side (including, but not limited to glibc)
it means:
  1. Rely on the GCC __builtin_cpu_init(), __builtin_cpu_is() and
     __builtin_cpu_supports() builtins.
  2. Maybe implement the replacement of these builtins to get all the
     same functionality even with the old versions of GCC.

-- 
Best regards,
Siarhei Siamashka

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-01 19:12           ` Siarhei Siamashka
@ 2015-09-02  0:28             ` Pinski, Andrew
  2015-09-02 13:57               ` Siarhei Siamashka
  0 siblings, 1 reply; 16+ messages in thread
From: Pinski, Andrew @ 2015-09-02  0:28 UTC (permalink / raw)
  To: linux-arm-kernel





> On Sep 2, 2015, at 3:13 AM, Siarhei Siamashka <siarhei.siamashka@gmail.com> wrote:
> 
> On Wed, 2 Sep 2015 01:58:56 +0800
> pinskia at gmail.com wrote:
> 
>>> On Sep 2, 2015, at 1:30 AM, Mark Rutland <mark.rutland@arm.com> wrote:
>>> 
>>> [...]
>>> 
>>>>>>> On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
>>>>>>> It is useful to pass down MIDR register down to userland if all of
>>>>>>> the online cores are all the same type.  This adds AT_ARM64_MIDR
>>>>>>> aux vector type and passes down the midr system register.
>>>>>>> 
>>>>>>> This is alternative to MIDR_EL1 part of
>>>>>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
>>>>>>> It allows for faster access to midr_el1 than going through a trap and
>>>>>>> does not exist if the set of cores are not the same.
>>>>>> 
>>>>>> I'm not sure I follow the rationale. If speed is important the
>>>>>> application can cache the value the first time it reads it with a trap.
>>>>> 
>>>>> It is also about compatibility also. Exposing the register is not backwards compatible but using the aux vector is.
>>>> 
>>>> That would also break big.little too. So either break it with hot plug or break it in userland, your choice.
>>> 
>>> The value wouldn't be representative of the system as a whole; that is
>>> true. However, we never guaranteed that it was, while the aux vector
>>> code implied that we did.
>> 
>> Yes but I guess you talk about caching the value in userspace but doing
>> it via the aux vector is the same as your suggestion. Just one
>> difference is you don't get the aux vector entry if there is a CPU
>> that is online which is different. No difference from your suggestion
>> of caching it. Without considering hot pug for a second (that is a
>> huge different issue all together), if userland wants to know if all
>> up CPUs have the same midr, they would either read /sys entries (lots
>> of syscalls) or bind to each CPU and do the trap. That means at least
>> three or two syscalls/traps for each CPU. My way is none and gets a
>> value of midr if they are all the Same for free. 
> 
> Andrew, how do you propose to get the value of MIDR? Open the
> "/proc/self/auxv", read it, do a linear search in the buffer to find
> the required entry and then read the value? Or use the glibc specific
> getauxval() function (https://lwn.net/Articles/519085) ?

This is inside glibc I am talking about so getauxval. 

> 
> Regarding the caching implementation, one can open and parse the
> "/proc/cpuinfo" file (with older kernels) or read the new sysfs
> entries to get the MIDR value for each core. Then create a lookup
> table. As an additional bonus, this lookup table can contain not
> just the MIDR values, but any arbitrary data in any format (for
> example, a function pointer to the memcpy function or anything else).

You don't want to do that early on in ld.so each time a program starts up. Too much overhead. 

> 
> After the lookup table is available, one can use the getcpu() syscall
> for getting the CPU number and do the table lookup. And for getting
> reasonable performance, implement the vdso variant of the getcpu()
> syscall.
> 
> All of this internal ugliness would be best abstracted inside
> of the GCC __builtin_cpu_init(), __builtin_cpu_is() and
> __builtin_cpu_supports() builtins:
>    http://gcc.gnu.org/gcc-4.8/changes.html

Yes but this is about glibc support and not other userland support. Having glibc depend on that is even worse. 

Thanks,
Andrew


> 
> One big.LITTLE systems, the __builtin_cpu_is() could be implemented
> via a single getcpu() syscall and the table lookup, like explained
> above. The __builtin_cpu_init() could prepare the lookup table.
> And on normal systems with identical cores, the use of the syscall
> is not required.
> 
> It might be interesting to also optionally allow something like this:
>    __builtin_cpu_is("cortex-a7 || cortex-a15")
> Which would mean that we are interested in checking for the
> Cortex-A7+Cortex-A15 pair in a big.LITTLE system, but are not
> interested in knowing whether we are running on A7 or A15 in this
> particular moment (and avoid the syscall overhead).
> 
> We had an old discussion on a similar CPU type identification topic
> in the past:
>    http://lists.infradead.org/pipermail/linux-arm-kernel/2013-December/220542.html
> I have been told that it had been forwarded to the Linaro toolchain
> people, but did not track if this resulted in anything useful or not.
> 
> I think that it would be best to prefer something that is easily usable
> for all applications and libraries, and not just something for a private
> use by glibc. To sum everything up:
> 
> One the kernel side it means:
>  1. Maybe implement vdso for getcpu(), this will make things faster
>     on big.LITTLE systems.
>  2. Maybe implement sysfs entries for per-core MIDR values from
>       http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/359127.html
>     This will make things faster and allow to avoid potentially
>     messy and cumbersome /proc/cpuinfo text parsing.
> 
> On the GCC side it means:
>  1. Implement __builtin_cpu_init(), __builtin_cpu_is() and
>     __builtin_cpu_supports() builtins, which rely on reading sysfs
>     entries (with a fallback to /proc/cpuinfo parsing on old kernels)
>     and the getcpu() syscall for the reasonably accurate core type
>     runtime identification on big.LITTLE systems.
> 
> On the applications/libraries side (including, but not limited to glibc)
> it means:
>  1. Rely on the GCC __builtin_cpu_init(), __builtin_cpu_is() and
>     __builtin_cpu_supports() builtins.
>  2. Maybe implement the replacement of these builtins to get all the
>     same functionality even with the old versions of GCC.
> 
> -- 
> Best regards,
> Siarhei Siamashka

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-02  0:28             ` Pinski, Andrew
@ 2015-09-02 13:57               ` Siarhei Siamashka
  2015-09-02 14:52                 ` Andrew Pinski
  0 siblings, 1 reply; 16+ messages in thread
From: Siarhei Siamashka @ 2015-09-02 13:57 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 2 Sep 2015 00:28:28 +0000
"Pinski, Andrew" <Andrew.Pinski@caviumnetworks.com> wrote:

> > On Sep 2, 2015, at 3:13 AM, Siarhei Siamashka <siarhei.siamashka@gmail.com> wrote:
> > 
> > On Wed, 2 Sep 2015 01:58:56 +0800
> > pinskia at gmail.com wrote:
> > 
> >>> On Sep 2, 2015, at 1:30 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> >>> 
> >>> [...]
> >>> 
> >>>>>>> On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
> >>>>>>> It is useful to pass down MIDR register down to userland if all of
> >>>>>>> the online cores are all the same type.  This adds AT_ARM64_MIDR
> >>>>>>> aux vector type and passes down the midr system register.
> >>>>>>> 
> >>>>>>> This is alternative to MIDR_EL1 part of
> >>>>>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
> >>>>>>> It allows for faster access to midr_el1 than going through a trap and
> >>>>>>> does not exist if the set of cores are not the same.
> >>>>>> 
> >>>>>> I'm not sure I follow the rationale. If speed is important the
> >>>>>> application can cache the value the first time it reads it with a trap.
> >>>>> 
> >>>>> It is also about compatibility also. Exposing the register is not backwards compatible but using the aux vector is.
> >>>> 
> >>>> That would also break big.little too. So either break it with hot plug or break it in userland, your choice.
> >>> 
> >>> The value wouldn't be representative of the system as a whole; that is
> >>> true. However, we never guaranteed that it was, while the aux vector
> >>> code implied that we did.
> >> 
> >> Yes but I guess you talk about caching the value in userspace but doing
> >> it via the aux vector is the same as your suggestion. Just one
> >> difference is you don't get the aux vector entry if there is a CPU
> >> that is online which is different. No difference from your suggestion
> >> of caching it. Without considering hot pug for a second (that is a
> >> huge different issue all together), if userland wants to know if all
> >> up CPUs have the same midr, they would either read /sys entries (lots
> >> of syscalls) or bind to each CPU and do the trap. That means at least
> >> three or two syscalls/traps for each CPU. My way is none and gets a
> >> value of midr if they are all the Same for free. 
> > 
> > Andrew, how do you propose to get the value of MIDR? Open the
> > "/proc/self/auxv", read it, do a linear search in the buffer to find
> > the required entry and then read the value? Or use the glibc specific
> > getauxval() function (https://lwn.net/Articles/519085) ?
> 
> This is inside glibc I am talking about so getauxval. 

OK. Point taken.

> > Regarding the caching implementation, one can open and parse the
> > "/proc/cpuinfo" file (with older kernels) or read the new sysfs
> > entries to get the MIDR value for each core. Then create a lookup
> > table. As an additional bonus, this lookup table can contain not
> > just the MIDR values, but any arbitrary data in any format (for
> > example, a function pointer to the memcpy function or anything else).
> 
> You don't want to do that early on in ld.so each time a program
> starts up. Too much overhead. 

Right.

> > After the lookup table is available, one can use the getcpu() syscall
> > for getting the CPU number and do the table lookup. And for getting
> > reasonable performance, implement the vdso variant of the getcpu()
> > syscall.
> > 
> > All of this internal ugliness would be best abstracted inside
> > of the GCC __builtin_cpu_init(), __builtin_cpu_is() and
> > __builtin_cpu_supports() builtins:
> >    http://gcc.gnu.org/gcc-4.8/changes.html
> 
> Yes but this is about glibc support and not other userland
> support. Having glibc depend on that is even worse. 

Well, basically you are saying that you only care about your
individual use case (glibc only, 64-bit only, no support for
big.LITTLE) and just don't give rats about anything else. This
is not necessarily wrong, but we are not getting the problem
solved on a wider scale with such approach.

Regarding the reliance on the getauxval() function. Appears that
at least it seems to be also supported in recent versions of Android
and in musl too. So it already has a wide support in the Linux
different flavours and just needs a configure check in regular
applications/libraries. This is not perfect, but not bad either
and will only get better in the future.

Regarding the performance. Avoiding to open and read any files
surely helps to make the setup cost lower. In this sense, the
    http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/359127.html
patch is not very good because we would have to deal with multiple
files, increasing the overhead even more.

Regarding the big.LITTLE hardware. We can't just ignore it and
pretend that it does not exist. This would be rather short-sighted.
Just like not thinking about allowing userspace access to the MIDR
register was a short-sighted decision in the past for ARMv8 and
earlier architectures.

So let's review everything. You are suggesting that we should try
to read AT_ARM64_MIDR via getauxval (btw, why ARM64 in the name?) and
if it is found, then all the CPU cores are identical and we have the
MIDR value. If it is not found, then we should check AT_HWCAP for the
HWCAP_CPUID bit and read the MIDR register directly (which will be
trapped, but emulated in the kernel). 

The HWCAP_CPUID approach and emulation looks interesting, because it
might probably motivate ARM to actually implement direct userspace
access to the MIDR register in the future revisions of hardware :-)

Now I wonder about the performance difference between the kernel
emulated MIDR read and the performance of doing a table lookup, using
sched_getcpu() result as an index. And I have done some preliminary
tests. On my Cortex-A7 processor, running "sched_getcpu()" in a loop
takes ~200 cycles per iteration. Running "getauxval(AT_PLATFORM)" in
the same loop takes ~100 cycles per iteration. The emulated SWP and
unaligned LDM instructions take around ~500 and ~400 cycles. But the
emulation of the MIDR register read should be much more lightweight
than SWP/LDM, so it might be more competitive with sched_getcpu()
and I will benchmark this later.

I wonder if we still can try to make "sched_getcpu()" use vDSO
instead of the syscall to make it faster? Now that there exists a
vDSO implementation for gettimeofday(), everything should be easy if
we can find an unused userspace readable coprocessor register.
In the past, Catalin Marinas mentioned that "We have a user read-only
thread register unused on arm64":
    http://lists.infradead.org/pipermail/linux-arm-kernel/2013-December/220664.html
And if I understand it correctly, also one of the "scratch registers"
should exist in 32-bit ARM, which "isn't present in ARMv8/AArch64":
    http://lists.infradead.org/pipermail/linux-arm-kernel/2013-December/221056.html
What kind of registers are these exactly?

In principle, the aux vector can be extended to also contain a pointer
to an array of MIDR values for all the CPU cores if reducing the setup
overhead is critical.

The GCC __builtin_cpu_is() and __builtin_cpu_supports() builtins are
unrelated to the kernel and not really needed for glibc, but it's
important to ensure that they can be implemented efficiently on ARM.
It would be great if we could delegate all this ugly and hairy
runtime CPU features detection task to GCC. Instead of implementing
it in every application and library, which makes use of assembly
optimizations.

-- 
Best regards,
Siarhei Siamashka

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-02 13:57               ` Siarhei Siamashka
@ 2015-09-02 14:52                 ` Andrew Pinski
  2015-09-02 17:11                   ` Catalin Marinas
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Pinski @ 2015-09-02 14:52 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Sep 2, 2015 at 9:57 PM, Siarhei Siamashka
<siarhei.siamashka@gmail.com> wrote:
> On Wed, 2 Sep 2015 00:28:28 +0000
> "Pinski, Andrew" <Andrew.Pinski@caviumnetworks.com> wrote:
>
>> > On Sep 2, 2015, at 3:13 AM, Siarhei Siamashka <siarhei.siamashka@gmail.com> wrote:
>> >
>> > On Wed, 2 Sep 2015 01:58:56 +0800
>> > pinskia at gmail.com wrote:
>> >
>> >>> On Sep 2, 2015, at 1:30 AM, Mark Rutland <mark.rutland@arm.com> wrote:
>> >>>
>> >>> [...]
>> >>>
>> >>>>>>> On Sat, Aug 29, 2015 at 07:46:22PM +0100, Andrew Pinski wrote:
>> >>>>>>> It is useful to pass down MIDR register down to userland if all of
>> >>>>>>> the online cores are all the same type.  This adds AT_ARM64_MIDR
>> >>>>>>> aux vector type and passes down the midr system register.
>> >>>>>>>
>> >>>>>>> This is alternative to MIDR_EL1 part of
>> >>>>>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/358995.html.
>> >>>>>>> It allows for faster access to midr_el1 than going through a trap and
>> >>>>>>> does not exist if the set of cores are not the same.
>> >>>>>>
>> >>>>>> I'm not sure I follow the rationale. If speed is important the
>> >>>>>> application can cache the value the first time it reads it with a trap.
>> >>>>>
>> >>>>> It is also about compatibility also. Exposing the register is not backwards compatible but using the aux vector is.
>> >>>>
>> >>>> That would also break big.little too. So either break it with hot plug or break it in userland, your choice.
>> >>>
>> >>> The value wouldn't be representative of the system as a whole; that is
>> >>> true. However, we never guaranteed that it was, while the aux vector
>> >>> code implied that we did.
>> >>
>> >> Yes but I guess you talk about caching the value in userspace but doing
>> >> it via the aux vector is the same as your suggestion. Just one
>> >> difference is you don't get the aux vector entry if there is a CPU
>> >> that is online which is different. No difference from your suggestion
>> >> of caching it. Without considering hot pug for a second (that is a
>> >> huge different issue all together), if userland wants to know if all
>> >> up CPUs have the same midr, they would either read /sys entries (lots
>> >> of syscalls) or bind to each CPU and do the trap. That means at least
>> >> three or two syscalls/traps for each CPU. My way is none and gets a
>> >> value of midr if they are all the Same for free.
>> >
>> > Andrew, how do you propose to get the value of MIDR? Open the
>> > "/proc/self/auxv", read it, do a linear search in the buffer to find
>> > the required entry and then read the value? Or use the glibc specific
>> > getauxval() function (https://lwn.net/Articles/519085) ?
>>
>> This is inside glibc I am talking about so getauxval.
>
> OK. Point taken.
>
>> > Regarding the caching implementation, one can open and parse the
>> > "/proc/cpuinfo" file (with older kernels) or read the new sysfs
>> > entries to get the MIDR value for each core. Then create a lookup
>> > table. As an additional bonus, this lookup table can contain not
>> > just the MIDR values, but any arbitrary data in any format (for
>> > example, a function pointer to the memcpy function or anything else).
>>
>> You don't want to do that early on in ld.so each time a program
>> starts up. Too much overhead.
>
> Right.
>
>> > After the lookup table is available, one can use the getcpu() syscall
>> > for getting the CPU number and do the table lookup. And for getting
>> > reasonable performance, implement the vdso variant of the getcpu()
>> > syscall.
>> >
>> > All of this internal ugliness would be best abstracted inside
>> > of the GCC __builtin_cpu_init(), __builtin_cpu_is() and
>> > __builtin_cpu_supports() builtins:
>> >    http://gcc.gnu.org/gcc-4.8/changes.html
>>
>> Yes but this is about glibc support and not other userland
>> support. Having glibc depend on that is even worse.
>
> Well, basically you are saying that you only care about your
> individual use case (glibc only, 64-bit only, no support for
> big.LITTLE) and just don't give rats about anything else. This
> is not necessarily wrong, but we are not getting the problem
> solved on a wider scale with such approach.
>
> Regarding the reliance on the getauxval() function. Appears that
> at least it seems to be also supported in recent versions of Android
> and in musl too. So it already has a wide support in the Linux
> different flavours and just needs a configure check in regular
> applications/libraries. This is not perfect, but not bad either
> and will only get better in the future.
>
> Regarding the performance. Avoiding to open and read any files
> surely helps to make the setup cost lower. In this sense, the
>     http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/359127.html
> patch is not very good because we would have to deal with multiple
> files, increasing the overhead even more.
>
> Regarding the big.LITTLE hardware. We can't just ignore it and
> pretend that it does not exist. This would be rather short-sighted.
> Just like not thinking about allowing userspace access to the MIDR
> register was a short-sighted decision in the past for ARMv8 and
> earlier architectures.
>
> So let's review everything. You are suggesting that we should try
> to read AT_ARM64_MIDR via getauxval (btw, why ARM64 in the name?) and
> if it is found, then all the CPU cores are identical and we have the
> MIDR value. If it is not found, then we should check AT_HWCAP for the
> HWCAP_CPUID bit and read the MIDR register directly (which will be
> trapped, but emulated in the kernel).
>
> The HWCAP_CPUID approach and emulation looks interesting, because it
> might probably motivate ARM to actually implement direct userspace
> access to the MIDR register in the future revisions of hardware :-)
>
> Now I wonder about the performance difference between the kernel
> emulated MIDR read and the performance of doing a table lookup, using
> sched_getcpu() result as an index. And I have done some preliminary
> tests. On my Cortex-A7 processor, running "sched_getcpu()" in a loop
> takes ~200 cycles per iteration. Running "getauxval(AT_PLATFORM)" in
> the same loop takes ~100 cycles per iteration. The emulated SWP and
> unaligned LDM instructions take around ~500 and ~400 cycles. But the
> emulation of the MIDR register read should be much more lightweight
> than SWP/LDM, so it might be more competitive with sched_getcpu()
> and I will benchmark this later.
>
> I wonder if we still can try to make "sched_getcpu()" use vDSO
> instead of the syscall to make it faster? Now that there exists a
> vDSO implementation for gettimeofday(), everything should be easy if
> we can find an unused userspace readable coprocessor register.
> In the past, Catalin Marinas mentioned that "We have a user read-only
> thread register unused on arm64":
>     http://lists.infradead.org/pipermail/linux-arm-kernel/2013-December/220664.html
> And if I understand it correctly, also one of the "scratch registers"
> should exist in 32-bit ARM, which "isn't present in ARMv8/AArch64":
>     http://lists.infradead.org/pipermail/linux-arm-kernel/2013-December/221056.html
> What kind of registers are these exactly?
>
> In principle, the aux vector can be extended to also contain a pointer
> to an array of MIDR values for all the CPU cores if reducing the setup
> overhead is critical.

That is not a bad idea.  Put this array in the data section of the
VDSO too.  It should be small enough though on systems with 96 or more
cores (dual socket ThunderX has 96 cores total), it is slightly
getting big.
The struct would be something like:
struct
{
  int32 numcores;
  int32 midr[];
};

Which you place right after the current data.  This would allow for
shared read only data between the processes and not needed to waste
other space.
I will try to  program this up in a few days and that will solve my
issue really.  I can cache in userspace if they are the same.  Also it
avoids a trap which can be more expensive than a cache misses.

Thanks,
Andrew Pinski

>
> The GCC __builtin_cpu_is() and __builtin_cpu_supports() builtins are
> unrelated to the kernel and not really needed for glibc, but it's
> important to ensure that they can be implemented efficiently on ARM.
> It would be great if we could delegate all this ugly and hairy
> runtime CPU features detection task to GCC. Instead of implementing
> it in every application and library, which makes use of assembly
> optimizations.
>
> --
> Best regards,
> Siarhei Siamashka

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-02 14:52                 ` Andrew Pinski
@ 2015-09-02 17:11                   ` Catalin Marinas
  2015-09-02 17:21                     ` Pinski, Andrew
  0 siblings, 1 reply; 16+ messages in thread
From: Catalin Marinas @ 2015-09-02 17:11 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Sep 02, 2015 at 10:52:05PM +0800, Andrew Pinski wrote:
> On Wed, Sep 2, 2015 at 9:57 PM, Siarhei Siamashka <siarhei.siamashka@gmail.com> wrote:
[...]
> >> > On Wed, 2 Sep 2015 01:58:56 +0800 pinskia at gmail.com wrote:
> >> >> Yes but I guess you talk about caching the value in userspace but doing
> >> >> it via the aux vector is the same as your suggestion. Just one
> >> >> difference is you don't get the aux vector entry if there is a CPU
> >> >> that is online which is different. No difference from your suggestion
> >> >> of caching it. Without considering hot pug for a second (that is a
> >> >> huge different issue all together), if userland wants to know if all
> >> >> up CPUs have the same midr, they would either read /sys entries (lots
> >> >> of syscalls) or bind to each CPU and do the trap. That means at least
> >> >> three or two syscalls/traps for each CPU. My way is none and gets a
> >> >> value of midr if they are all the Same for free.

Whether we like it or not, big.LITTLE systems are present in the ARM
ecosystem. You are looking to add a specific AT_ to solve a particular
problem on fully symmetric systems, ignoring the rest. I want this fixed
for all systems rather than trying to invent something else for
big.LITTLE which won't help user space at all.

If you want to avoid parsing all /sys entries, I would rather have a
HWCAP_ASYMMETRIC bit for big.LITTLE systems and let user space decide
whether to read all entries or not.

> > I wonder if we still can try to make "sched_getcpu()" use vDSO
> > instead of the syscall to make it faster? Now that there exists a
> > vDSO implementation for gettimeofday(), everything should be easy if
> > we can find an unused userspace readable coprocessor register.
> > In the past, Catalin Marinas mentioned that "We have a user read-only
> > thread register unused on arm64":
> >     http://lists.infradead.org/pipermail/linux-arm-kernel/2013-December/220664.html

We have a patch under development internally, it will appear on the list
at some point.

> > And if I understand it correctly, also one of the "scratch registers"
> > should exist in 32-bit ARM, which "isn't present in ARMv8/AArch64":
> >     http://lists.infradead.org/pipermail/linux-arm-kernel/2013-December/221056.html
> > What kind of registers are these exactly?
> >
> > In principle, the aux vector can be extended to also contain a pointer
> > to an array of MIDR values for all the CPU cores if reducing the setup
> > overhead is critical.
> 
> That is not a bad idea.  Put this array in the data section of the
> VDSO too.  It should be small enough though on systems with 96 or more
> cores (dual socket ThunderX has 96 cores total), it is slightly
> getting big.
> The struct would be something like:
> struct
> {
>   int32 numcores;
>   int32 midr[];
> };

First of all, I'm against hard-coding (VDSO) data as ABI. So far we used
VDSO to override some weak glibc functions but the VDSO-specific data is
parsed by the VDSO function implementation and not directly by glibc (or
user space). I prefer helper functions that read the VDSO-internal data
structures.

Secondly, you seem to be only interested in MIDR_EL1 but we also have
REVIDR_EL1 and AIDR_EL1 which may be relevant. Once we realise that more
information is needed, it's not always clear where the boundaries are
so I would rather have this exposed via /sys and/or MRS emulation (there
are patches for both).

Anyway, you need to involve the toolchain people in such discussions,
they may have different needs (like ifunc).

-- 
Catalin

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-02 17:11                   ` Catalin Marinas
@ 2015-09-02 17:21                     ` Pinski, Andrew
  2015-09-02 20:11                       ` Thomas Gleixner
  2015-09-03 17:14                       ` Catalin Marinas
  0 siblings, 2 replies; 16+ messages in thread
From: Pinski, Andrew @ 2015-09-02 17:21 UTC (permalink / raw)
  To: linux-arm-kernel





> On Sep 3, 2015, at 1:12 AM, Catalin Marinas <catalin.marinas@arm.com> wrote:
> 
>> On Wed, Sep 02, 2015 at 10:52:05PM +0800, Andrew Pinski wrote:
>> On Wed, Sep 2, 2015 at 9:57 PM, Siarhei Siamashka <siarhei.siamashka@gmail.com> wrote:
> [...]
>>>>> On Wed, 2 Sep 2015 01:58:56 +0800 pinskia at gmail.com wrote:
>>>>>> Yes but I guess you talk about caching the value in userspace but doing
>>>>>> it via the aux vector is the same as your suggestion. Just one
>>>>>> difference is you don't get the aux vector entry if there is a CPU
>>>>>> that is online which is different. No difference from your suggestion
>>>>>> of caching it. Without considering hot pug for a second (that is a
>>>>>> huge different issue all together), if userland wants to know if all
>>>>>> up CPUs have the same midr, they would either read /sys entries (lots
>>>>>> of syscalls) or bind to each CPU and do the trap. That means at least
>>>>>> three or two syscalls/traps for each CPU. My way is none and gets a
>>>>>> value of midr if they are all the Same for free.
> 
> Whether we like it or not, big.LITTLE systems are present in the ARM
> ecosystem. You are looking to add a specific AT_ to solve a particular
> problem on fully symmetric systems, ignoring the rest. I want this fixed
> for all systems rather than trying to invent something else for
> big.LITTLE which won't help user space at all.
> 
> If you want to avoid parsing all /sys entries, I would rather have a
> HWCAP_ASYMMETRIC bit for big.LITTLE systems and let user space decide
> whether to read all entries or not.
> 
>>> I wonder if we still can try to make "sched_getcpu()" use vDSO
>>> instead of the syscall to make it faster? Now that there exists a
>>> vDSO implementation for gettimeofday(), everything should be easy if
>>> we can find an unused userspace readable coprocessor register.
>>> In the past, Catalin Marinas mentioned that "We have a user read-only
>>> thread register unused on arm64":
>>>    http://lists.infradead.org/pipermail/linux-arm-kernel/2013-December/220664.html
> 
> We have a patch under development internally, it will appear on the list
> at some point.
> 
>>> And if I understand it correctly, also one of the "scratch registers"
>>> should exist in 32-bit ARM, which "isn't present in ARMv8/AArch64":
>>>    http://lists.infradead.org/pipermail/linux-arm-kernel/2013-December/221056.html
>>> What kind of registers are these exactly?
>>> 
>>> In principle, the aux vector can be extended to also contain a pointer
>>> to an array of MIDR values for all the CPU cores if reducing the setup
>>> overhead is critical.
>> 
>> That is not a bad idea.  Put this array in the data section of the
>> VDSO too.  It should be small enough though on systems with 96 or more
>> cores (dual socket ThunderX has 96 cores total), it is slightly
>> getting big.
>> The struct would be something like:
>> struct
>> {
>>  int32 numcores;
>>  int32 midr[];
>> };
> 
> First of all, I'm against hard-coding (VDSO) data as ABI. So far we used
> VDSO to override some weak glibc functions but the VDSO-specific data is
> parsed by the VDSO function implementation and not directly by glibc (or
> user space). I prefer helper functions that read the VDSO-internal data
> structures.

You don't like the idea of a fixed structure ABI that resides inside vdso data? Having a fixed struct ABI should be ok.  The location inside the data part was going to be passed via an aux vector entry.  Userland does even need to know it is really located in the vdso at all. It just happens to reside in there. The data structure would be well defined for the aux vector. 

> 
> Secondly, you seem to be only interested in MIDR_EL1 but we also have
> REVIDR_EL1 and AIDR_EL1 which may be relevant. Once we realise that more
> information is needed, it's not always clear where the boundaries are
> so I would rather have this exposed via /sys and/or MRS emulation (there
> are patches for both).
> 
> Anyway, you need to involve the toolchain people in such discussions,
> they may have different needs (like ifunc).

I am a toolchain person first and needed this in the first place for memset and memcpy on thunderx. I had asked our kernel engineers here first before even going forward with my original approach. I touch the kernel only because I need to really. I even had asked on #linaro-kernel and #linaro-toolchain around two-three months ago about this approach and I only had time to post it this week. 

Thanks,
Andrew


> 
> -- 
> Catalin

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-02 17:21                     ` Pinski, Andrew
@ 2015-09-02 20:11                       ` Thomas Gleixner
  2015-09-03 17:14                       ` Catalin Marinas
  1 sibling, 0 replies; 16+ messages in thread
From: Thomas Gleixner @ 2015-09-02 20:11 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 2 Sep 2015, Pinski, Andrew wrote:
> > On Sep 3, 2015, at 1:12 AM, Catalin Marinas <catalin.marinas@arm.com> wrote:
> >> On Wed, Sep 02, 2015 at 10:52:05PM +0800, Andrew Pinski wrote:
> >> That is not a bad idea.  Put this array in the data section of the
> >> VDSO too.  It should be small enough though on systems with 96 or more
> >> cores (dual socket ThunderX has 96 cores total), it is slightly
> >> getting big.
> >> The struct would be something like:
> >> struct
> >> {
> >>  int32 numcores;
> >>  int32 midr[];
> >> };
> > 
> > First of all, I'm against hard-coding (VDSO) data as ABI. So far we used
> > VDSO to override some weak glibc functions but the VDSO-specific data is
> > parsed by the VDSO function implementation and not directly by glibc (or
> > user space). I prefer helper functions that read the VDSO-internal data
> > structures.
> 
> You don't like the idea of a fixed structure ABI that resides inside
> vdso data? Having a fixed struct ABI should be ok.  The location
> inside the data part was going to be passed via an aux vector entry.
> Userland does even need to know it is really located in the vdso at
> all. It just happens to reside in there. The data structure would be
> well defined for the aux vector.

Restrict the VDSO ABI to well defined single purpose functions. It's
way harder to define data struct ABIs right from the beginning.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector
  2015-09-02 17:21                     ` Pinski, Andrew
  2015-09-02 20:11                       ` Thomas Gleixner
@ 2015-09-03 17:14                       ` Catalin Marinas
  1 sibling, 0 replies; 16+ messages in thread
From: Catalin Marinas @ 2015-09-03 17:14 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Sep 02, 2015 at 05:21:24PM +0000, Pinski, Andrew wrote:
> On Sep 3, 2015, at 1:12 AM, Catalin Marinas <catalin.marinas@arm.com> wrote:
> > On Wed, Sep 02, 2015 at 10:52:05PM +0800, Andrew Pinski wrote:
> >> That is not a bad idea.  Put this array in the data section of the
> >> VDSO too.  It should be small enough though on systems with 96 or more
> >> cores (dual socket ThunderX has 96 cores total), it is slightly
> >> getting big.
> >> The struct would be something like:
> >> struct
> >> {
> >>  int32 numcores;
> >>  int32 midr[];
> >> };
> > 
> > First of all, I'm against hard-coding (VDSO) data as ABI. So far we used
> > VDSO to override some weak glibc functions but the VDSO-specific data is
> > parsed by the VDSO function implementation and not directly by glibc (or
> > user space). I prefer helper functions that read the VDSO-internal data
> > structures.
> 
> You don't like the idea of a fixed structure ABI that resides inside
> vdso data? Having a fixed struct ABI should be ok.  The location
> inside the data part was going to be passed via an aux vector entry.
> Userland does even need to know it is really located in the vdso at
> all. It just happens to reside in there. The data structure would be
> well defined for the aux vector. 

The problem is not necessarily the location of the data structure but
whether you are confident it has all the information you need and won't
require changing.

> > Secondly, you seem to be only interested in MIDR_EL1 but we also have
> > REVIDR_EL1 and AIDR_EL1 which may be relevant. Once we realise that more
> > information is needed, it's not always clear where the boundaries are
> > so I would rather have this exposed via /sys and/or MRS emulation (there
> > are patches for both).
> > 
> > Anyway, you need to involve the toolchain people in such discussions,
> > they may have different needs (like ifunc).
> 
> I am a toolchain person first and needed this in the first place for
> memset and memcpy on thunderx.

That's exactly my point. You only care about a micro-optimisation on
thunderx and a quick solution for this specific case. I care about the
wider ecosystem. I'm sure it won't be long before someone asks about
exposing AIDR_EL1 (REVIDR_EL1 was already requested), cache information,
big.LITTLE topology and so on.

We currently have patches to provide all the CPUID information we
_think_ is relevant to user space, though in different forms (/sys and
MRS emulation). As long as you cache such information in user space, the
overhead of a trapped MRS or /sys access is minimal.

-- 
Catalin

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-09-03 17:14 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-29 18:46 [PATCHv2] ARM64: Add AT_ARM64_MIDR to the aux vector Andrew Pinski
2015-09-01 16:33 ` Mark Rutland
2015-09-01 16:51   ` pinskia at gmail.com
2015-09-01 17:06     ` Pinski, Andrew
2015-09-01 17:30       ` Mark Rutland
2015-09-01 17:58         ` pinskia at gmail.com
2015-09-01 19:12           ` Siarhei Siamashka
2015-09-02  0:28             ` Pinski, Andrew
2015-09-02 13:57               ` Siarhei Siamashka
2015-09-02 14:52                 ` Andrew Pinski
2015-09-02 17:11                   ` Catalin Marinas
2015-09-02 17:21                     ` Pinski, Andrew
2015-09-02 20:11                       ` Thomas Gleixner
2015-09-03 17:14                       ` Catalin Marinas
2015-09-01 17:19     ` Mark Rutland
2015-09-01 17:29       ` Pinski, Andrew

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).