linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] perf/x86/rapl: Enable Apollo Lake RAPL support
@ 2016-09-11  5:18 Harry Pan
  2016-09-11  5:18 ` [PATCH 2/3] x86/perf/rapl: Make quirk a function pointer Harry Pan
  2016-09-11  5:18 ` [PATCH 3/3] perf/x86/rapl: Enable Baytrail/Braswell RAPL support Harry Pan
  0 siblings, 2 replies; 7+ messages in thread
From: Harry Pan @ 2016-09-11  5:18 UTC (permalink / raw)
  To: LKML
  Cc: gs0622, Harry Pan, tglx, mingo, hpa, x86, peterz, bp,
	srinivas.pandruvada

This patch enables RAPL counters (energy consumption counters)
support for Intel Apollo Lake (Goldmont) processors (Model 92):

RAPL of Goldmont, unlikes ESU increment of Silvermont/Airmont,
it likes the Haswell microarchitecture in 1/2^ESU joules and
supports power domains in PP0/PP1/PKG/RAM.

ESU and power domains refer to Intel Software Developers' Manual,
Vol. 3C, Order No. 325384, Table 35-12.

Usage example:

$ perf list
$ perf stat -a -e power/energy-cores/,power/energy-pkg/ sleep 10

Signed-off-by: Harry Pan <harry.pan@intel.com>
---
 arch/x86/events/intel/rapl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 2886593..f7924640 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -765,6 +765,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,	 hsx_rapl_init),
+
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
 	{},
 };
 
-- 
2.6.6

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] x86/perf/rapl: Make quirk a function pointer
  2016-09-11  5:18 [PATCH 1/3] perf/x86/rapl: Enable Apollo Lake RAPL support Harry Pan
@ 2016-09-11  5:18 ` Harry Pan
  2016-09-11  5:18 ` [PATCH 3/3] perf/x86/rapl: Enable Baytrail/Braswell RAPL support Harry Pan
  1 sibling, 0 replies; 7+ messages in thread
From: Harry Pan @ 2016-09-11  5:18 UTC (permalink / raw)
  To: LKML
  Cc: gs0622, Thomas Gleixner, Harry Pan, mingo, hpa, x86, peterz, bp,
	dave.hansen, srinivas.pandruvada

From: Thomas Gleixner <tglx@linutronix.de>

There are more model specific quirks required. So we need to change the
single purpose boolean quirk flag to an easy extensible mechanism.

Make the quirk a function pointer and move the existing quirk into its own
function.

While at it make the init struct initializers readable and rename the
misnomed intel_rapl_hw_init_fun struct to intel_rapl_model_desc because
that's what it is a cpu model descriptor for the rapl features specific to
a particular model.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Harry Pan <harry.pan@intel.com>
---
 arch/x86/events/intel/rapl.c | 92 ++++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index f7924640..94abfdb 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -152,6 +152,12 @@ struct rapl_pmus {
 	struct rapl_pmu		*pmus[];
 };
 
+struct intel_rapl_model_desc {
+	void			(*quirk)(void);
+	int			cntr_mask;
+	struct attribute	**attrs;
+};
+
  /* 1/2^hw_unit Joule */
 static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
 static struct rapl_pmus *rapl_pmus;
@@ -617,7 +623,18 @@ static int rapl_cpu_prepare(unsigned int cpu)
 	return 0;
 }
 
-static int rapl_check_hw_unit(bool apply_quirk)
+static void rapl_hsx_quirk(void)
+{
+	/*
+	 * DRAM domain on HSW server and KNL has fixed energy unit which can be
+	 * different than the unit from power unit MSR. See
+	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+	 */
+	rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+}
+
+static int rapl_check_hw_unit(const struct intel_rapl_model_desc *model)
 {
 	u64 msr_rapl_power_unit_bits;
 	int i;
@@ -628,14 +645,9 @@ static int rapl_check_hw_unit(bool apply_quirk)
 	for (i = 0; i < NR_RAPL_DOMAINS; i++)
 		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
 
-	/*
-	 * DRAM domain on HSW server and KNL has fixed energy unit which can be
-	 * different than the unit from power unit MSR. See
-	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
-	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
-	 */
-	if (apply_quirk)
-		rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+	/* Apply quirk before initializing the timer rate */
+	if (model->quirk)
+		model->quirk();
 
 	/*
 	 * Calculate the timer rate:
@@ -701,46 +713,36 @@ static int __init init_rapl_pmus(void)
 #define X86_RAPL_MODEL_MATCH(model, init)	\
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
 
-struct intel_rapl_init_fun {
-	bool apply_quirk;
-	int cntr_mask;
-	struct attribute **attrs;
-};
-
-static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
-	.apply_quirk = false,
-	.cntr_mask = RAPL_IDX_CLN,
-	.attrs = rapl_events_cln_attr,
+static const struct intel_rapl_model_desc snb_rapl_init __initconst = {
+	.cntr_mask	= RAPL_IDX_CLN,
+	.attrs		= rapl_events_cln_attr,
 };
 
-static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
-	.apply_quirk = true,
-	.cntr_mask = RAPL_IDX_SRV,
-	.attrs = rapl_events_srv_attr,
+static const struct intel_rapl_model_desc hsx_rapl_init __initconst = {
+	.quirk		= rapl_hsx_quirk,
+	.cntr_mask	= RAPL_IDX_SRV,
+	.attrs		= rapl_events_srv_attr,
 };
 
-static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
-	.apply_quirk = false,
-	.cntr_mask = RAPL_IDX_HSW,
-	.attrs = rapl_events_hsw_attr,
+static const struct intel_rapl_model_desc hsw_rapl_init __initconst = {
+	.cntr_mask	= RAPL_IDX_HSW,
+	.attrs		= rapl_events_hsw_attr,
 };
 
-static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
-	.apply_quirk = false,
-	.cntr_mask = RAPL_IDX_SRV,
-	.attrs = rapl_events_srv_attr,
+static const struct intel_rapl_model_desc snbep_rapl_init __initconst = {
+	.cntr_mask	= RAPL_IDX_SRV,
+	.attrs		= rapl_events_srv_attr,
 };
 
-static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
-	.apply_quirk = true,
-	.cntr_mask = RAPL_IDX_KNL,
-	.attrs = rapl_events_knl_attr,
+static const struct intel_rapl_model_desc knl_rapl_init __initconst = {
+	.quirk		= rapl_hsx_quirk,
+	.cntr_mask	= RAPL_IDX_KNL,
+	.attrs		= rapl_events_knl_attr,
 };
 
-static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
-	.apply_quirk = false,
-	.cntr_mask = RAPL_IDX_SKL_CLN,
-	.attrs = rapl_events_skl_attr,
+static const struct intel_rapl_model_desc skl_rapl_init __initconst = {
+	.cntr_mask	= RAPL_IDX_SKL_CLN,
+	.attrs		= rapl_events_skl_attr,
 };
 
 static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
@@ -774,21 +776,19 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
 
 static int __init rapl_pmu_init(void)
 {
+	const struct intel_rapl_model_desc *model;
 	const struct x86_cpu_id *id;
-	struct intel_rapl_init_fun *rapl_init;
-	bool apply_quirk;
 	int ret;
 
 	id = x86_match_cpu(rapl_cpu_match);
 	if (!id)
 		return -ENODEV;
 
-	rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
-	apply_quirk = rapl_init->apply_quirk;
-	rapl_cntr_mask = rapl_init->cntr_mask;
-	rapl_pmu_events_group.attrs = rapl_init->attrs;
+	model = (struct intel_rapl_model_desc *)id->driver_data;
+	rapl_cntr_mask = model->cntr_mask;
+	rapl_pmu_events_group.attrs = model->attrs;
 
-	ret = rapl_check_hw_unit(apply_quirk);
+	ret = rapl_check_hw_unit(model);
 	if (ret)
 		return ret;
 
-- 
2.6.6

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] perf/x86/rapl: Enable Baytrail/Braswell RAPL support
  2016-09-11  5:18 [PATCH 1/3] perf/x86/rapl: Enable Apollo Lake RAPL support Harry Pan
  2016-09-11  5:18 ` [PATCH 2/3] x86/perf/rapl: Make quirk a function pointer Harry Pan
@ 2016-09-11  5:18 ` Harry Pan
  2016-09-13 13:41   ` Thomas Gleixner
  1 sibling, 1 reply; 7+ messages in thread
From: Harry Pan @ 2016-09-11  5:18 UTC (permalink / raw)
  To: LKML
  Cc: gs0622, Harry Pan, tglx, mingo, hpa, x86, peterz, bp,
	srinivas.pandruvada

This patch enables RAPL counters (energy consumption counters)
support for Intel Baytrail and Braswell processors (Model 55 and 76):

The Silvermont/Airmont microarchitecture actually uses fixed
energy status unit (ESU) in smallest unit of microjoule,
this patch adds quirk for these Atom processors (BYT/BSW)
to calculate energy increment in 2^ESU microjoules.

ESU and power domains refer to Intel Software Developers' Manual,
Vol. 3C, Order No. 325384, Table 35-8.

v2: simplify setting rapl_hw_unit[] to reduce runtime overhead.

Usage example:

$ perf list
$ perf stat -a -e power/energy-cores/,power/energy-pkg/ sleep 10

This patch also enables multiple quirks.

Signed-off-by: Harry Pan <harry.pan@intel.com>
---
 arch/x86/events/intel/rapl.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 94abfdb..2af6c18 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -110,6 +110,10 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
 #define RAPL_IDX_KNL	(1<<RAPL_IDX_PKG_NRG_STAT|\
 			 1<<RAPL_IDX_RAM_NRG_STAT)
 
+/* Baytrail/Braswell clients have PP0, PKG */
+#define RAPL_IDX_BYT	(1<<RAPL_IDX_PP0_NRG_STAT|\
+			 1<<RAPL_IDX_PKG_NRG_STAT)
+
 /*
  * event code: LSB 8 bits, passed in attr->config
  * any other bit is reserved
@@ -458,6 +462,14 @@ RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890
 RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
 RAPL_EVENT_ATTR_STR(energy-psys.scale,   rapl_psys_scale, "2.3283064365386962890625e-10");
 
+/*
+ * Some Atom series processors (BYT/BSW) have fixed
+ * energy status unit (ESU) in smallest unit of microjoule,
+ * and its increment is in 2^ESU microjoules.
+ */
+RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_byt_cores_scale, "1.0e-6");
+RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_byt_pkg_scale, "1.0e-6");
+
 static struct attribute *rapl_events_srv_attr[] = {
 	EVENT_PTR(rapl_cores),
 	EVENT_PTR(rapl_pkg),
@@ -539,6 +551,18 @@ static struct attribute *rapl_events_knl_attr[] = {
 	NULL,
 };
 
+static struct attribute *rapl_events_byt_attr[] = {
+	EVENT_PTR(rapl_cores),
+	EVENT_PTR(rapl_pkg),
+
+	EVENT_PTR(rapl_cores_unit),
+	EVENT_PTR(rapl_pkg_unit),
+
+	EVENT_PTR(rapl_byt_cores_scale),
+	EVENT_PTR(rapl_byt_pkg_scale),
+	NULL,
+};
+
 static struct attribute_group rapl_pmu_events_group = {
 	.name = "events",
 	.attrs = NULL, /* patched at runtime */
@@ -634,6 +658,23 @@ static void rapl_hsx_quirk(void)
 	rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
 }
 
+static void rapl_byt_quirk(void)
+{
+	int i;
+
+	/*
+	 * Some Atom processors (BYT/BSW) have 2^ESU microjoules
+	 * increment, refer to Software Developers' Manual, Vol. 3C,
+	 * Order No. 325384, Table 35-8 of MSR_RAPL_POWER_UNIT.
+	 *
+	 * TODO: In order to fit BYT/BSW quirk model, here remind
+	 *	 this generates timer rate in 80ms; by default
+	 *	 ESU of BYT/BSW is 5, so it leads (1000/200)*2^4.
+	 */
+	for (i = 0; i < NR_RAPL_DOMAINS; i++)
+		rapl_hw_unit[i] = 32 - rapl_hw_unit[i];
+}
+
 static int rapl_check_hw_unit(const struct intel_rapl_model_desc *model)
 {
 	u64 msr_rapl_power_unit_bits;
@@ -745,6 +786,12 @@ static const struct intel_rapl_model_desc skl_rapl_init __initconst = {
 	.attrs		= rapl_events_skl_attr,
 };
 
+static const struct intel_rapl_model_desc byt_rapl_init __initconst = {
+	.quirk		= rapl_byt_quirk,
+	.cntr_mask	= RAPL_IDX_BYT,
+	.attrs		= rapl_events_byt_attr,
+};
+
 static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,   snb_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
@@ -768,6 +815,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,	 hsx_rapl_init),
 
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_SILVERMONT1, byt_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_AIRMONT, byt_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
 	{},
 };
-- 
2.6.6

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] perf/x86/rapl: Enable Baytrail/Braswell RAPL support
  2016-09-11  5:18 ` [PATCH 3/3] perf/x86/rapl: Enable Baytrail/Braswell RAPL support Harry Pan
@ 2016-09-13 13:41   ` Thomas Gleixner
  2016-09-13 17:30     ` Pan, Harry
  0 siblings, 1 reply; 7+ messages in thread
From: Thomas Gleixner @ 2016-09-13 13:41 UTC (permalink / raw)
  To: Harry Pan; +Cc: LKML, gs0622, mingo, hpa, x86, peterz, bp, srinivas.pandruvada

On Sun, 11 Sep 2016, Harry Pan wrote:
> This patch also enables multiple quirks.

This patch adds a single quirk for Baytrail. 

Please stop sending out patches 5 seconds after a review. Take your time
and fixup stuff proper.

> +static void rapl_byt_quirk(void)
> +{
> +	int i;
> +
> +	/*
> +	 * Some Atom processors (BYT/BSW) have 2^ESU microjoules
> +	 * increment, refer to Software Developers' Manual, Vol. 3C,
> +	 * Order No. 325384, Table 35-8 of MSR_RAPL_POWER_UNIT.
> +	 *
> +	 * TODO: In order to fit BYT/BSW quirk model, here remind
> +	 *	 this generates timer rate in 80ms; by default
> +	 *	 ESU of BYT/BSW is 5, so it leads (1000/200)*2^4.

This sentence is not a sentence and I can't make any sense of it at
all.

What's the TODO here? And why is that TODO not addressed in this patch?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] perf/x86/rapl: Enable Baytrail/Braswell RAPL support
  2016-09-13 13:41   ` Thomas Gleixner
@ 2016-09-13 17:30     ` Pan, Harry
  2016-09-14 16:16       ` Thomas Gleixner
  0 siblings, 1 reply; 7+ messages in thread
From: Pan, Harry @ 2016-09-13 17:30 UTC (permalink / raw)
  To: tglx; +Cc: hpa, linux-kernel, peterz, srinivas.pandruvada, mingo, x86, bp

On Tue, 2016-09-13 at 15:41 +0200, Thomas Gleixner wrote:
> On Sun, 11 Sep 2016, Harry Pan wrote:
> > This patch also enables multiple quirks.
> 
> This patch adds a single quirk for Baytrail. 
> 
> Please stop sending out patches 5 seconds after a review. Take your time
Definitely I take this seriously because I felt awkward as well.

> > +	/*
> > +	 * Some Atom processors (BYT/BSW) have 2^ESU microjoules
> > +	 * increment, refer to Software Developers' Manual, Vol. 3C,
> > +	 * Order No. 325384, Table 35-8 of MSR_RAPL_POWER_UNIT.
> > +	 *
> > +	 * TODO: In order to fit BYT/BSW quirk model, here remind
> > +	 *	 this generates timer rate in 80ms; by default
> > +	 *	 ESU of BYT/BSW is 5, so it leads (1000/200)*2^4.
> 
> This sentence is not a sentence and I can't make any sense of it at
> all.
> 
> What's the TODO here? And why is that TODO not addressed in this patch?
> 
I reviewed my sentence and agreed your comment; yes, it is incorrect to
be a "TODO" tag since no decent suggestion/option.

This things is because of the Baytrail/Braswell quirk breaks original
assumption of perf RAPL polling timer rate calculation regarding of
counter overflow case based on 200W; in short, it leads every 80ms
system triggers an event to read counters, and this is concern I want to
comment (wrong tag?) because I could no assess any side effect.
Perhaps I should revise it as "remark" or "caveat" because I do not have
decent suggestion (fulfill "TODO" tag) so far.

Alternately, it shall not affect functionality since I compared w/
powercap driver through sysfs nodes during experiment, yet I am humble
to take any advice to make this patch better.

Sincerely,
Harry

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] perf/x86/rapl: Enable Baytrail/Braswell RAPL support
  2016-09-13 17:30     ` Pan, Harry
@ 2016-09-14 16:16       ` Thomas Gleixner
  0 siblings, 0 replies; 7+ messages in thread
From: Thomas Gleixner @ 2016-09-14 16:16 UTC (permalink / raw)
  To: Pan, Harry; +Cc: hpa, linux-kernel, peterz, srinivas.pandruvada, mingo, x86, bp

On Tue, 13 Sep 2016, Pan, Harry wrote:
> This things is because of the Baytrail/Braswell quirk breaks original
> assumption of perf RAPL polling timer rate calculation regarding of
> counter overflow case based on 200W; 

ESU are the 'Energy Status Units' bits in the MSR_RAPL_POWER_UNIT msr.

ESU = (rdmsr(MSR_RAPL_POWER_UNIT) >> 8) & 0x1f;

So we have 5 bits of information and therefor:

0 <= ESU <= 31

The standard readout is:

    joules = counter_value * mult;

    mult = 1 / (2 ^ ESU)

    The resulting multiplier is:

    31		 <= ESU  <= 0
    4.65661e-10J <= mult <= 1J

The scale function does:

    val =  counter << (32 - ESU);

which is converting the readout in to units of

      4.65661e-10J / 2 == 2.32830e-10J

because the shift is actually: (1 + (31 - ESU)).


The math for Baytrail/Braswell is:

    microjoules = counter_value * mult

    mult = 2 ^ ESU
   
    The resulting multiplier is:

    31		 <= ESU  <= 0
    1 uJ         <= mult <= 2.14748e+09 uJ
    1e-6J	 <= mult <= 2147J

So now your baytrail/braswell quirk does:

    ESU = 32 - ESU

so the scale function becomes:

    val = counter << (32 - (32 - ESU))
==> val = counter << ESU

which is converting the readout to units of

     1e-6J

So now you are concerned about the rapl_timer interval which is calculated
so that the counter does not overflow for a total dissipation of 200W,
which is equivalent to 200J/s. The maximum counter width is 32 bit.

So depending on ESU the code scales the timeout to:

   t[ESU] = 1 << (31 - ESU) / 200

So for the normal case we get:

   t[0] = 10.737e6 s
   ...
   t[30] = 0.010 s
   t[31] = 0.005 s

The counter capacity for ESU=31 is 

    cap = (1 << 32) * 4.65661e-10J = 2J

So:    

    toverfl = 2J / 200W = 0.01s

which we cut in half to avoid running the timer and the counter in lockstep
which can cause overflows to go undetected. So this looks correct.
   
 
But for your Baytrail/Braswel that results in:

   t[ESU] = 1 << (31 - (32 - ESU)) / 200

   t[0] = TOTAL CRAP because the shift value becomes -1

But what saves you here is the check for

    if (hwunit < 32)

which catches the hwunit = 32 - ESU[0] case and sets the timer to 2ms. So
for the remaining ones we have:

   t[1]  = 0.005s
   ...
   t[31] = 5.3687e+06s 

So lets look at the counter capacity for ESU=1:

   cap = (1 << 32) * 2 uJ == 8589.92J

The resulting overflow is:

    toverfl = 8589.92J / 200W = 42.9496 s

So if we divide this by two then we result in: 21.4748 s

So your timeout is actually off by factor ~4k, which is not surprising due
to the fact that the capacity has a ratio of 1 : 2147.48 and you have an
additional off by one due to the (32 - ESU) quirk.....

So the overflow prevention timer fires 4k times for no good reason. Indeed
a very power friendly design.

The timer calculation magically works for the original standard conversion,
but in this case it is utter crap. You really want to have a proper scale
factor for the timer calculation so we end up with:

       toverfl = capacity / 200

i.e. you need a way to calculate capacity from the hw_units[] mess and some
factor which is dependent on the base unit. That all can be done with plain
integer math.

> in short, it leads every 80ms system triggers an event to read counters,

I have no idea where these 80ms come from and I can't make any sense from
the rest of your response either.

Fact is, that you did not do the math amd just tinkered the
Baytrail/Braswell support into the existing code and declared it done when
it did not blow up in your face.

Really excellent engineering work - NOT!

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/3] perf/x86/rapl: Enable Baytrail/Braswell RAPL support
  2016-09-11  3:02 [PATCH 1/3] perf/x86/rapl: Enable Apollo Lake " Harry Pan
@ 2016-09-11  3:02 ` Harry Pan
  0 siblings, 0 replies; 7+ messages in thread
From: Harry Pan @ 2016-09-11  3:02 UTC (permalink / raw)
  To: LKML
  Cc: gs0622, Harry Pan, tglx, mingo, hpa, x86, peterz, bp,
	dave.hansen, srinivas.pandruvada, ray.huang

This patch enables RAPL counters (energy consumption counters)
support for Intel Baytrail and Braswell processors (Model 55 and 76):

The Silvermont/Airmont microarchitecture actually uses fixed
energy status unit (ESU) in smallest unit of microjoule,
this patch adds quirk for these Atom processors (BYT/BSW)
to calculate energy increment in 2^ESU microjoules.

ESU and power domains refer to Intel Software Developers' Manual,
Vol. 3C, Order No. 325384, Table 35-8.

v2: simplify setting rapl_hw_unit[] to reduce runtime overhead.

Usage example:

$ perf list
$ perf stat -a -e power/energy-cores/,power/energy-pkg/ sleep 10

This patch also enables multiple quirks.

Signed-off-by: Harry Pan <harry.pan@intel.com>
---
 arch/x86/events/intel/rapl.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 94abfdb..a434087 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -110,6 +110,10 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
 #define RAPL_IDX_KNL	(1<<RAPL_IDX_PKG_NRG_STAT|\
 			 1<<RAPL_IDX_RAM_NRG_STAT)
 
+/* Baytrail/Braswell clients have PP0, PKG */
+#define RAPL_IDX_BYT	(1<<RAPL_IDX_PP0_NRG_STAT|\
+			 1<<RAPL_IDX_PKG_NRG_STAT)
+
 /*
  * event code: LSB 8 bits, passed in attr->config
  * any other bit is reserved
@@ -136,6 +140,12 @@ static struct perf_pmu_events_attr event_attr_##v = {				\
 	.event_str	= str,							\
 };
 
+enum rapl_quirk {
+	RAPL_NO_QUIRK = 0,
+	RAPL_HSX_QUIRK,
+	RAPL_BYT_QUIRK,
+};
+
 struct rapl_pmu {
 	raw_spinlock_t		lock;
 	int			n_active;
@@ -458,6 +468,14 @@ RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890
 RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
 RAPL_EVENT_ATTR_STR(energy-psys.scale,   rapl_psys_scale, "2.3283064365386962890625e-10");
 
+/*
+ * Some Atom series processors (BYT/BSW) have fixed
+ * energy status unit (ESU) in smallest unit of microjoule,
+ * and its increment is in 2^ESU microjoules.
+ */
+RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_byt_cores_scale, "1.0e-6");
+RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_byt_pkg_scale, "1.0e-6");
+
 static struct attribute *rapl_events_srv_attr[] = {
 	EVENT_PTR(rapl_cores),
 	EVENT_PTR(rapl_pkg),
@@ -539,6 +557,18 @@ static struct attribute *rapl_events_knl_attr[] = {
 	NULL,
 };
 
+static struct attribute *rapl_events_byt_attr[] = {
+	EVENT_PTR(rapl_cores),
+	EVENT_PTR(rapl_pkg),
+
+	EVENT_PTR(rapl_cores_unit),
+	EVENT_PTR(rapl_pkg_unit),
+
+	EVENT_PTR(rapl_byt_cores_scale),
+	EVENT_PTR(rapl_byt_pkg_scale),
+	NULL,
+};
+
 static struct attribute_group rapl_pmu_events_group = {
 	.name = "events",
 	.attrs = NULL, /* patched at runtime */
@@ -634,6 +664,23 @@ static void rapl_hsx_quirk(void)
 	rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
 }
 
+static void rapl_byt_quirk(void)
+{
+	int i;
+
+	/*
+	 * Some Atom processors (BYT/BSW) have 2^ESU microjoules
+	 * increment, refer to Software Developers' Manual, Vol. 3C,
+	 * Order No. 325384, Table 35-8 of MSR_RAPL_POWER_UNIT.
+	 *
+	 * TODO: In order to fit BYT/BSW quirk model, here remind
+	 *	 this generates timer rate in 80ms; by default
+	 *	 ESU of BYT/BSW is 5, so it leads (1000/200)*2^4.
+	 */
+	for (i = 0; i < NR_RAPL_DOMAINS; i++)
+		rapl_hw_unit[i] = 32 - rapl_hw_unit[i];
+}
+
 static int rapl_check_hw_unit(const struct intel_rapl_model_desc *model)
 {
 	u64 msr_rapl_power_unit_bits;
@@ -745,6 +792,12 @@ static const struct intel_rapl_model_desc skl_rapl_init __initconst = {
 	.attrs		= rapl_events_skl_attr,
 };
 
+static const struct intel_rapl_model_desc byt_rapl_init __initconst = {
+	.quirk		= rapl_byt_quirk,
+	.cntr_mask	= RAPL_IDX_BYT,
+	.attrs		= rapl_events_byt_attr,
+};
+
 static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,   snb_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
@@ -768,6 +821,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,	 hsx_rapl_init),
 
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_SILVERMONT1, byt_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_AIRMONT, byt_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
 	{},
 };
-- 
2.6.6

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-09-14 16:19 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-11  5:18 [PATCH 1/3] perf/x86/rapl: Enable Apollo Lake RAPL support Harry Pan
2016-09-11  5:18 ` [PATCH 2/3] x86/perf/rapl: Make quirk a function pointer Harry Pan
2016-09-11  5:18 ` [PATCH 3/3] perf/x86/rapl: Enable Baytrail/Braswell RAPL support Harry Pan
2016-09-13 13:41   ` Thomas Gleixner
2016-09-13 17:30     ` Pan, Harry
2016-09-14 16:16       ` Thomas Gleixner
  -- strict thread matches above, loose matches on Subject: below --
2016-09-11  3:02 [PATCH 1/3] perf/x86/rapl: Enable Apollo Lake " Harry Pan
2016-09-11  3:02 ` [PATCH 3/3] perf/x86/rapl: Enable Baytrail/Braswell " Harry Pan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).