All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ARM: test for PMU feature on v7.
       [not found] <4F590411.9060201@arm.com>
@ 2012-03-23  0:32 ` Rusty Russell
  2012-03-23  0:34   ` [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix) Rusty Russell
  2012-03-23  0:38   ` [PATCH] ARM: KVM: Emulate ID_DFR0 to say we don't support anything Rusty Russell
  0 siblings, 2 replies; 14+ messages in thread
From: Rusty Russell @ 2012-03-23  0:32 UTC (permalink / raw)
  To: linux-arm-kernel

The v7 ARM (B4.1.82) specifies that bits 27-24 of the ID_DFR0 reg show
what performance monitoring (if any) is available.  We should test this
before assuming (useful for inside virtualized environments, for example).

Signed-off-by: Rusty Russell <rusty.russell@linaro.org>

diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 6933244..303d1e1 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1108,6 +1108,12 @@ static struct arm_pmu armv7pmu = {
 	.max_period		= (1LLU << 32) - 1,
 };
 
+static bool armv7_pmu_avail(void)
+{
+	/* 0 means unknown (maybe v1), 1 means v2, 2 means v2, 15 means none */
+	return ((read_cpuid_ext(CPUID_EXT_DFR0) >> 24) & 0xF) != 0xF;
+}
+
 static u32 __init armv7_read_num_pmnc_events(void)
 {
 	u32 nb_cnt;
@@ -1121,6 +1127,8 @@ static u32 __init armv7_read_num_pmnc_events(void)
 
 static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
+	if (!armv7_pmu_avail())
+		return NULL;
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
 	armv7pmu.name		= "ARMv7 Cortex-A8";
 	armv7pmu.map_event	= armv7_a8_map_event;
@@ -1130,6 +1138,8 @@ static struct arm_pmu *__init armv7_a8_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
+	if (!armv7_pmu_avail())
+		return NULL;
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA9;
 	armv7pmu.name		= "ARMv7 Cortex-A9";
 	armv7pmu.map_event	= armv7_a9_map_event;
@@ -1139,6 +1149,8 @@ static struct arm_pmu *__init armv7_a9_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
+	if (!armv7_pmu_avail())
+		return NULL;
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA5;
 	armv7pmu.name		= "ARMv7 Cortex-A5";
 	armv7pmu.map_event	= armv7_a5_map_event;
@@ -1148,6 +1160,8 @@ static struct arm_pmu *__init armv7_a5_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
+	if (!armv7_pmu_avail())
+		return NULL;
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA15;
 	armv7pmu.name		= "ARMv7 Cortex-A15";
 	armv7pmu.map_event	= armv7_a15_map_event;

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix)
  2012-03-23  0:32 ` [PATCH] ARM: test for PMU feature on v7 Rusty Russell
@ 2012-03-23  0:34   ` Rusty Russell
  2012-03-23  9:53     ` Will Deacon
  2012-03-23  0:38   ` [PATCH] ARM: KVM: Emulate ID_DFR0 to say we don't support anything Rusty Russell
  1 sibling, 1 reply; 14+ messages in thread
From: Rusty Russell @ 2012-03-23  0:34 UTC (permalink / raw)
  To: linux-arm-kernel

The v7 ARM (B4.1.82) specifies that bits 27-24 of the ID_DFR0 reg show
what performance monitoring (if any) is available.  We should test this
before assuming (useful for inside virtualized environments, for example).

Signed-off-by: Rusty Russell <rusty.russell@linaro.org>

diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 6933244..303d1e1 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1108,6 +1108,12 @@ static struct arm_pmu armv7pmu = {
 	.max_period		= (1LLU << 32) - 1,
 };
 
+static bool armv7_pmu_avail(void)
+{
+	/* 0 means unknown (maybe v1), 1 means v1, 2 means v2, 15 means none */
+	return ((read_cpuid_ext(CPUID_EXT_DFR0) >> 24) & 0xF) != 0xF;
+}
+
 static u32 __init armv7_read_num_pmnc_events(void)
 {
 	u32 nb_cnt;
@@ -1121,6 +1127,8 @@ static u32 __init armv7_read_num_pmnc_events(void)
 
 static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
+	if (!armv7_pmu_avail())
+		return NULL;
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
 	armv7pmu.name		= "ARMv7 Cortex-A8";
 	armv7pmu.map_event	= armv7_a8_map_event;
@@ -1130,6 +1138,8 @@ static struct arm_pmu *__init armv7_a8_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
+	if (!armv7_pmu_avail())
+		return NULL;
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA9;
 	armv7pmu.name		= "ARMv7 Cortex-A9";
 	armv7pmu.map_event	= armv7_a9_map_event;
@@ -1139,6 +1149,8 @@ static struct arm_pmu *__init armv7_a9_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
+	if (!armv7_pmu_avail())
+		return NULL;
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA5;
 	armv7pmu.name		= "ARMv7 Cortex-A5";
 	armv7pmu.map_event	= armv7_a5_map_event;
@@ -1148,6 +1160,8 @@ static struct arm_pmu *__init armv7_a5_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
+	if (!armv7_pmu_avail())
+		return NULL;
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA15;
 	armv7pmu.name		= "ARMv7 Cortex-A15";
 	armv7pmu.map_event	= armv7_a15_map_event;

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH] ARM: KVM: Emulate ID_DFR0 to say we don't support anything.
  2012-03-23  0:32 ` [PATCH] ARM: test for PMU feature on v7 Rusty Russell
  2012-03-23  0:34   ` [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix) Rusty Russell
@ 2012-03-23  0:38   ` Rusty Russell
  2012-03-23  9:04     ` [Android-virt] " Peter Maydell
  1 sibling, 1 reply; 14+ messages in thread
From: Rusty Russell @ 2012-03-23  0:38 UTC (permalink / raw)
  To: Marc Zyngier, Christoffer Dall, Peter Maydell; +Cc: android-virt, kvm

The guest should be checking this before trying to use performance monitors,
for example.

Signed-off-by: Rusty Russell <rusty.russell@linaro.org>

diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index e356d1c..c07eb2b 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -238,6 +238,15 @@ static bool read_l2ctlr(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+static bool read_id_dfr0(struct kvm_vcpu *vcpu,
+			 const struct coproc_params *p,
+			 unsigned long arg)
+{
+	/* We support no performance monitors (27-24 = b1111) and no debug */
+	*vcpu_reg(vcpu, p->Rt1) = 0x0F000000;
+	return true;
+}
+
 static bool access_cp15_reg(struct kvm_vcpu *vcpu,
 			    const struct coproc_params *p,
 			    unsigned long cp15_reg)
@@ -277,6 +286,9 @@ struct coproc_emulate {
 #define RW		.is_w  = DF
 
 static const struct coproc_emulate coproc_emulate[] = {
+	/* ID_DFR0 (guest asks what PMU/debug we support) */
+	{ CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32,  READ,  read_id_dfr0},
+
 	/*
 	 * L2CTLR access:
 	 *

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Android-virt] [PATCH] ARM: KVM: Emulate ID_DFR0 to say we don't support anything.
  2012-03-23  0:38   ` [PATCH] ARM: KVM: Emulate ID_DFR0 to say we don't support anything Rusty Russell
@ 2012-03-23  9:04     ` Peter Maydell
  2012-03-23 23:23       ` Rusty Russell
  0 siblings, 1 reply; 14+ messages in thread
From: Peter Maydell @ 2012-03-23  9:04 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Marc Zyngier, Christoffer Dall, Peter Maydell, kvm, android-virt

On 23 March 2012 00:38, Rusty Russell <rusty.russell@linaro.org> wrote:
> The guest should be checking this before trying to use performance monitors,
> for example.
>
> Signed-off-by: Rusty Russell <rusty.russell@linaro.org>

Does this code get used currently? I thought we hadn't enabled
trapping for feature register reads because of that use of one
of the MMFR registers in a hot path for page allocation...

-- PMM

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix)
  2012-03-23  0:34   ` [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix) Rusty Russell
@ 2012-03-23  9:53     ` Will Deacon
  2012-03-23 23:17       ` Rusty Russell
  0 siblings, 1 reply; 14+ messages in thread
From: Will Deacon @ 2012-03-23  9:53 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Rusty,

On Fri, Mar 23, 2012 at 12:34:57AM +0000, Rusty Russell wrote:
> The v7 ARM (B4.1.82) specifies that bits 27-24 of the ID_DFR0 reg show
> what performance monitoring (if any) is available.  We should test this
> before assuming (useful for inside virtualized environments, for example).

Ok, I can see why you don't want to emulate a PMU but I don't agree with the
patch :)

> diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
> index 6933244..303d1e1 100644
> --- a/arch/arm/kernel/perf_event_v7.c
> +++ b/arch/arm/kernel/perf_event_v7.c
> @@ -1108,6 +1108,12 @@ static struct arm_pmu armv7pmu = {
>  	.max_period		= (1LLU << 32) - 1,
>  };
>  
> +static bool armv7_pmu_avail(void)
> +{
> +	/* 0 means unknown (maybe v1), 1 means v1, 2 means v2, 15 means none */
> +	return ((read_cpuid_ext(CPUID_EXT_DFR0) >> 24) & 0xF) != 0xF;
> +}
> +
>  static u32 __init armv7_read_num_pmnc_events(void)
>  {
>  	u32 nb_cnt;
> @@ -1121,6 +1127,8 @@ static u32 __init armv7_read_num_pmnc_events(void)
>  
>  static struct arm_pmu *__init armv7_a8_pmu_init(void)
>  {
> +	if (!armv7_pmu_avail())
> +		return NULL;

This code is only executed if we know for sure that we are running on a
Cortex-A8. Cortex-A8 implementations have a PMU, so I don't like having the
probe here in case we are running on a virtualised CPU that doesn't quite
match the hardware.

Would you be able to advertise 0 event counters instead and treat the PMU
largely as RAZ/WI? The only tricky bit I can see is the cycle counter, which
is mandated by the presence of a PMU, however this could also just RAZ
initially.

>  	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
>  	armv7pmu.name		= "ARMv7 Cortex-A8";
>  	armv7pmu.map_event	= armv7_a8_map_event;
> @@ -1130,6 +1138,8 @@ static struct arm_pmu *__init armv7_a8_pmu_init(void)
>  
>  static struct arm_pmu *__init armv7_a9_pmu_init(void)
>  {
> +	if (!armv7_pmu_avail())
> +		return NULL;

Similarly for A5, A9, A7 and A15...

Cheers,

Will

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix)
  2012-03-23  9:53     ` Will Deacon
@ 2012-03-23 23:17       ` Rusty Russell
  2012-03-26  9:40         ` Will Deacon
  0 siblings, 1 reply; 14+ messages in thread
From: Rusty Russell @ 2012-03-23 23:17 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 23 Mar 2012 09:53:14 +0000, Will Deacon <will.deacon@arm.com> wrote:
> Hi Rusty,
> 
> On Fri, Mar 23, 2012 at 12:34:57AM +0000, Rusty Russell wrote:
> > The v7 ARM (B4.1.82) specifies that bits 27-24 of the ID_DFR0 reg show
> > what performance monitoring (if any) is available.  We should test this
> > before assuming (useful for inside virtualized environments, for example).
> 
> Ok, I can see why you don't want to emulate a PMU but I don't agree with the
> patch :)
...
> >  static struct arm_pmu *__init armv7_a8_pmu_init(void)
> >  {
> > +	if (!armv7_pmu_avail())
> > +		return NULL;
> 
> This code is only executed if we know for sure that we are running on a
> Cortex-A8. Cortex-A8 implementations have a PMU, so I don't like having the
> probe here in case we are running on a virtualised CPU that doesn't quite
> match the hardware.

OK, I see that argument, but I don't like it.  eg. the Cortex A-15 is
defined to have a PMUv2 with 6 counters, and below you're arguing we
should implement zero of them.

I had assumed that as we trend towards a common cross-platform kernel,
we should be relying on feature registers where available, not the
model.

> Would you be able to advertise 0 event counters instead and treat the PMU
> largely as RAZ/WI? The only tricky bit I can see is the cycle counter, which
> is mandated by the presence of a PMU, however this could also just RAZ
> initially.

Yes, that's the effect of the current emulation hack.  Far better not to
lie to the guest, however, than get weird results from profiling.

Cheers,
Rusty.
-- 
  How could I marry someone with more hair than me?  http://baldalex.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Android-virt] [PATCH] ARM: KVM: Emulate ID_DFR0 to say we don't support anything.
  2012-03-23  9:04     ` [Android-virt] " Peter Maydell
@ 2012-03-23 23:23       ` Rusty Russell
  0 siblings, 0 replies; 14+ messages in thread
From: Rusty Russell @ 2012-03-23 23:23 UTC (permalink / raw)
  To: Peter Maydell, Rusty Russell
  Cc: Marc Zyngier, Christoffer Dall, Peter Maydell, kvm, android-virt

On Fri, 23 Mar 2012 09:04:27 +0000, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 23 March 2012 00:38, Rusty Russell <rusty.russell@linaro.org> wrote:
> > The guest should be checking this before trying to use performance monitors,
> > for example.
> >
> > Signed-off-by: Rusty Russell <rusty.russell@linaro.org>
> 
> Does this code get used currently? I thought we hadn't enabled
> trapping for feature register reads because of that use of one
> of the MMFR registers in a hot path for page allocation...

True (I mis-labelled the patches, they should have been RFC, since this
is tied to the host-side probing patch).

So maybe not today, but we're going to end up doing this eventually.
Everyone expects their virtual machines to work fine with the next model
of ARM CPUs, which means we need to hide more and more host features
over time.

Cheers,
Rusty.
-- 
  How could I marry someone with more hair than me?  http://baldalex.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix)
  2012-03-23 23:17       ` Rusty Russell
@ 2012-03-26  9:40         ` Will Deacon
  2012-03-26  9:50           ` [Android-virt] " Peter Maydell
                             ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Will Deacon @ 2012-03-26  9:40 UTC (permalink / raw)
  To: linux-arm-kernel

[removed some dead email addresses from CC]

On Fri, Mar 23, 2012 at 11:17:07PM +0000, Rusty Russell wrote:
> On Fri, 23 Mar 2012 09:53:14 +0000, Will Deacon <will.deacon@arm.com> wrote:
> > On Fri, Mar 23, 2012 at 12:34:57AM +0000, Rusty Russell wrote:
> ...
> > >  static struct arm_pmu *__init armv7_a8_pmu_init(void)
> > >  {
> > > +	if (!armv7_pmu_avail())
> > > +		return NULL;
> > 
> > This code is only executed if we know for sure that we are running on a
> > Cortex-A8. Cortex-A8 implementations have a PMU, so I don't like having the
> > probe here in case we are running on a virtualised CPU that doesn't quite
> > match the hardware.
> 
> OK, I see that argument, but I don't like it.  eg. the Cortex A-15 is
> defined to have a PMUv2 with 6 counters, and below you're arguing we
> should implement zero of them.

Well, I think you should actually implement all 6 of them if that's the
target which you're emulating. However, there are bigger peripherals to fry
than the PMU so if you want to get something working quickly then a dummy
PMU makes sense as a stopgap.

> I had assumed that as we trend towards a common cross-platform kernel,
> we should be relying on feature registers where available, not the
> model.

The PMU is very tightly coupled with the CPU and it is necessary to know
exactly which CPU you have so that you can map microarchitectural events to
their identifiers. This is not discoverable in the architecture [PMUv2
provides some support for discovering the status of architected events, but
that is still not enough and is only present on A7 and A15 so far] so that's
why we probe the CPU instead.

Now, if everything was device-tree based then we could simply use a
different binding for each CPU but since we support perf on non-DT
platforms, probing the CPU type is the best solution. I would like to avoid
the probing code if we are initialised from DT, but I've not got round to it
yet (this would be useful for big.LITTLE).

> > Would you be able to advertise 0 event counters instead and treat the PMU
> > largely as RAZ/WI? The only tricky bit I can see is the cycle counter, which
> > is mandated by the presence of a PMU, however this could also just RAZ
> > initially.
> 
> Yes, that's the effect of the current emulation hack.  Far better not to
> lie to the guest, however, than get weird results from profiling.

I'd rather lie to the guest than modify it just to avoid emulating the
device correctly. Even then, the profiling results wouldn't be especially
weird -- everything would be 0, which is a lot better than random numbers.

Will

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Android-virt] [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix)
  2012-03-26  9:40         ` Will Deacon
@ 2012-03-26  9:50           ` Peter Maydell
  2012-03-26 15:15             ` Will Deacon
  2012-03-28  6:09           ` Rusty Russell
  2012-03-28 14:17           ` Nicolas Pitre
  2 siblings, 1 reply; 14+ messages in thread
From: Peter Maydell @ 2012-03-26  9:50 UTC (permalink / raw)
  To: linux-arm-kernel

On 26 March 2012 10:40, Will Deacon <will.deacon@arm.com> wrote:
> [removed some dead email addresses from CC]
>
> On Fri, Mar 23, 2012 at 11:17:07PM +0000, Rusty Russell wrote:
>> On Fri, 23 Mar 2012 09:53:14 +0000, Will Deacon <will.deacon@arm.com> wrote:
>> > This code is only executed if we know for sure that we are running on a
>> > Cortex-A8. Cortex-A8 implementations have a PMU, so I don't like having the
>> > probe here in case we are running on a virtualised CPU that doesn't quite
>> > match the hardware.
>>
>> OK, I see that argument, but I don't like it. ?eg. the Cortex A-15 is
>> defined to have a PMUv2 with 6 counters, and below you're arguing we
>> should implement zero of them.
>
> Well, I think you should actually implement all 6 of them if that's the
> target which you're emulating.

The trouble with this line of thought is that it leads to the requirement
to support nested virtualization and virtualization of TrustZone, neither
of which are actually particularly useful or simple to do. At some point
we're going to end up saying that the "A15" we expose to the guest is
not an exact match to the real hardware, that it's missing certain
features (and hopefully that the lack of those features is advertised
via the feature registers). So the question is where you draw the line...

-- PMM

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Android-virt] [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix)
  2012-03-26  9:50           ` [Android-virt] " Peter Maydell
@ 2012-03-26 15:15             ` Will Deacon
  0 siblings, 0 replies; 14+ messages in thread
From: Will Deacon @ 2012-03-26 15:15 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Mar 26, 2012 at 10:50:35AM +0100, Peter Maydell wrote:
> On 26 March 2012 10:40, Will Deacon <will.deacon@arm.com> wrote:
> > On Fri, Mar 23, 2012 at 11:17:07PM +0000, Rusty Russell wrote:
> >>
> >> OK, I see that argument, but I don't like it. ?eg. the Cortex A-15 is
> >> defined to have a PMUv2 with 6 counters, and below you're arguing we
> >> should implement zero of them.
> >
> > Well, I think you should actually implement all 6 of them if that's the
> > target which you're emulating.
> 
> The trouble with this line of thought is that it leads to the requirement
> to support nested virtualization and virtualization of TrustZone, neither
> of which are actually particularly useful or simple to do. At some point
> we're going to end up saying that the "A15" we expose to the guest is
> not an exact match to the real hardware, that it's missing certain
> features (and hopefully that the lack of those features is advertised
> via the feature registers). So the question is where you draw the line...

I'd be inclined to say that, at least for kvm, you should draw the line at
the point where the guest doesn't need modifications to run in the virtual
environment. That is, everything used by Linux is emulated enough for the
kernel to run happily. Clearly you can improve on that in the areas which
are considered to be most critical.

Aside: given that guests have to run non-secure, how would you emulate a
secure platform without emulating the corresponding TrustZone API? Or is
that one of the reasons why you're currently targetting Versatile Express?

Will

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix)
  2012-03-26  9:40         ` Will Deacon
  2012-03-26  9:50           ` [Android-virt] " Peter Maydell
@ 2012-03-28  6:09           ` Rusty Russell
  2012-03-28 14:17           ` Nicolas Pitre
  2 siblings, 0 replies; 14+ messages in thread
From: Rusty Russell @ 2012-03-28  6:09 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 26 Mar 2012 10:40:54 +0100, Will Deacon <will.deacon@arm.com> wrote:
> [removed some dead email addresses from CC]

Thanks!

> On Fri, Mar 23, 2012 at 11:17:07PM +0000, Rusty Russell wrote:
> > I had assumed that as we trend towards a common cross-platform kernel,
> > we should be relying on feature registers where available, not the
> > model.
> 
> The PMU is very tightly coupled with the CPU and it is necessary to know
> exactly which CPU you have so that you can map microarchitectural events to
> their identifiers. This is not discoverable in the architecture [PMUv2
> provides some support for discovering the status of architected events, but
> that is still not enough and is only present on A7 and A15 so far] so that's
> why we probe the CPU instead.

OK.

> Now, if everything was device-tree based then we could simply use a
> different binding for each CPU but since we support perf on non-DT
> platforms, probing the CPU type is the best solution. I would like to avoid
> the probing code if we are initialised from DT, but I've not got round to it
> yet (this would be useful for big.LITTLE).

I'll be interested to see how you encode the mapping of
microarchitectural events to their identifiers here.

> > > Would you be able to advertise 0 event counters instead and treat the PMU
> > > largely as RAZ/WI? The only tricky bit I can see is the cycle counter, which
> > > is mandated by the presence of a PMU, however this could also just RAZ
> > > initially.
> > 
> > Yes, that's the effect of the current emulation hack.  Far better not to
> > lie to the guest, however, than get weird results from profiling.
> 
> I'd rather lie to the guest than modify it just to avoid emulating the
> device correctly. Even then, the profiling results wouldn't be especially
> weird -- everything would be 0, which is a lot better than random
> numbers.

Sure, but nowhere as neat an experience as having "no hardware support
available" printed, and nicely defined failure mode :(

When I get back in May, I'll look at how hard it will to be implement
this properly, if noone else does.

Thanks,
Rusty.
-- 
  How could I marry someone with more hair than me?  http://baldalex.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix)
  2012-03-26  9:40         ` Will Deacon
  2012-03-26  9:50           ` [Android-virt] " Peter Maydell
  2012-03-28  6:09           ` Rusty Russell
@ 2012-03-28 14:17           ` Nicolas Pitre
  2012-03-30 17:04             ` Will Deacon
  2 siblings, 1 reply; 14+ messages in thread
From: Nicolas Pitre @ 2012-03-28 14:17 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 26 Mar 2012, Will Deacon wrote:

> Now, if everything was device-tree based then we could simply use a
> different binding for each CPU but since we support perf on non-DT
> platforms, probing the CPU type is the best solution. I would like to avoid
> the probing code if we are initialised from DT, but I've not got round to it
> yet (this would be useful for big.LITTLE).

Still... my opinion is that we should try to autodetect as much as 
possible and avoid overstuffing the DT with content that can otherwise 
be run-time probed.  OK to use DT to override the probe for corner 
cases, but IMHO the probe should be the default method of 
initialization.  The rational is that we want to spread knowledge about 
part of the system and have it confined into respective drivers and 
subsystems for easier maintenance.  If the guy who has to maintain the 
dts has to know all the details for everything then that won't scale and 
the risk for discrepancies is increased.


Nicolas

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix)
  2012-03-28 14:17           ` Nicolas Pitre
@ 2012-03-30 17:04             ` Will Deacon
  2012-03-30 17:40               ` Nicolas Pitre
  0 siblings, 1 reply; 14+ messages in thread
From: Will Deacon @ 2012-03-30 17:04 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Nicolas,

On Wed, Mar 28, 2012 at 03:17:29PM +0100, Nicolas Pitre wrote:
> On Mon, 26 Mar 2012, Will Deacon wrote:
> 
> > Now, if everything was device-tree based then we could simply use a
> > different binding for each CPU but since we support perf on non-DT
> > platforms, probing the CPU type is the best solution. I would like to avoid
> > the probing code if we are initialised from DT, but I've not got round to it
> > yet (this would be useful for big.LITTLE).
> 
> Still... my opinion is that we should try to autodetect as much as 
> possible and avoid overstuffing the DT with content that can otherwise 
> be run-time probed.  OK to use DT to override the probe for corner 
> cases, but IMHO the probe should be the default method of 
> initialization.  The rational is that we want to spread knowledge about 
> part of the system and have it confined into respective drivers and 
> subsystems for easier maintenance.  If the guy who has to maintain the 
> dts has to know all the details for everything then that won't scale and 
> the risk for discrepancies is increased.

I agree that probing is preferable where possible but, since the PMUs are
banked, we cannot reliably probe them on big.LITTLE platforms. I guess if
there were some infrastructure for probing on the remote cluster, I could
use that, but it seems like DT would be easier.

Will

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix)
  2012-03-30 17:04             ` Will Deacon
@ 2012-03-30 17:40               ` Nicolas Pitre
  0 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2012-03-30 17:40 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 30 Mar 2012, Will Deacon wrote:

> Hi Nicolas,
> 
> On Wed, Mar 28, 2012 at 03:17:29PM +0100, Nicolas Pitre wrote:
> > On Mon, 26 Mar 2012, Will Deacon wrote:
> > 
> > > Now, if everything was device-tree based then we could simply use a
> > > different binding for each CPU but since we support perf on non-DT
> > > platforms, probing the CPU type is the best solution. I would like to avoid
> > > the probing code if we are initialised from DT, but I've not got round to it
> > > yet (this would be useful for big.LITTLE).
> > 
> > Still... my opinion is that we should try to autodetect as much as 
> > possible and avoid overstuffing the DT with content that can otherwise 
> > be run-time probed.  OK to use DT to override the probe for corner 
> > cases, but IMHO the probe should be the default method of 
> > initialization.  The rational is that we want to spread knowledge about 
> > part of the system and have it confined into respective drivers and 
> > subsystems for easier maintenance.  If the guy who has to maintain the 
> > dts has to know all the details for everything then that won't scale and 
> > the risk for discrepancies is increased.
> 
> I agree that probing is preferable where possible but, since the PMUs are
> banked, we cannot reliably probe them on big.LITTLE platforms.

Are they tied to each cluster power domains?  If so you'll have to 
preserve/restore their state when clusters are suspended even if the 
interface tries to present a consistent view to users.  The probe can be 
done there, and differences in capabilities adjusted dynamically.


Nicolas

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-03-30 17:40 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <4F590411.9060201@arm.com>
2012-03-23  0:32 ` [PATCH] ARM: test for PMU feature on v7 Rusty Russell
2012-03-23  0:34   ` [PATCH] ARM: test for PMU feature on v7 (v2 with typo fix) Rusty Russell
2012-03-23  9:53     ` Will Deacon
2012-03-23 23:17       ` Rusty Russell
2012-03-26  9:40         ` Will Deacon
2012-03-26  9:50           ` [Android-virt] " Peter Maydell
2012-03-26 15:15             ` Will Deacon
2012-03-28  6:09           ` Rusty Russell
2012-03-28 14:17           ` Nicolas Pitre
2012-03-30 17:04             ` Will Deacon
2012-03-30 17:40               ` Nicolas Pitre
2012-03-23  0:38   ` [PATCH] ARM: KVM: Emulate ID_DFR0 to say we don't support anything Rusty Russell
2012-03-23  9:04     ` [Android-virt] " Peter Maydell
2012-03-23 23:23       ` Rusty Russell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.