All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] perf/rapl: support per domain energy unit
@ 2015-03-26 21:28 Jacob Pan
  2015-04-06 15:21 ` Jacob Pan
  2015-04-18 10:14 ` [tip:perf/urgent] perf/x86/intel/rapl: Fix energy counter measurements but supporing per domain energy units tip-bot for Jacob Pan
  0 siblings, 2 replies; 10+ messages in thread
From: Jacob Pan @ 2015-03-26 21:28 UTC (permalink / raw)
  To: LKML, Peter Zijlstra
  Cc: H. Peter Anvin, Stephane Eranian, Paul Mackerras, Andi Kleen,
	Thomas Gleixner, Arnaldo Carvalho de Melo, Vince Weaver,
	Jacob Pan

RAPL energy hardware unit can vary within a single CPU package, e.g.
HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16) whereas
the unit on other domains can be enumerated from power unit MSR.
There might be other variations in the future, this patch adds
per cpu model quirk to allow special handling of certain cpus.

hw_unit is also removed from per cpu data since it is not per cpu
and the sampling rate for energy counter is typically not high.

Without this patch, DRAM domain on HSW servers will be counted
4x higher than the real energy counter.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_rapl.c | 94 ++++++++++++++++++++++-------
 1 file changed, 73 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index c4bb8b8..999289b9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -62,6 +62,14 @@
 #define RAPL_IDX_PP1_NRG_STAT	3	/* gpu */
 #define INTEL_RAPL_PP1		0x4	/* pseudo-encoding */
 
+#define NR_RAPL_DOMAINS         0x4
+static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+	"pp0-core",
+	"package",
+	"dram",
+	"pp1-gpu",
+};
+
 /* Clients have PP0, PKG */
 #define RAPL_IDX_CLN	(1<<RAPL_IDX_PP0_NRG_STAT|\
 			 1<<RAPL_IDX_PKG_NRG_STAT|\
@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = {			\
 
 struct rapl_pmu {
 	spinlock_t	 lock;
-	int		 hw_unit;  /* 1/2^hw_unit Joule */
 	int		 n_active; /* number of active events */
 	struct list_head active_list;
 	struct pmu	 *pmu; /* pointer to rapl_pmu_class */
@@ -120,6 +127,7 @@ struct rapl_pmu {
 	struct hrtimer   hrtimer;
 };
 
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /* 1/2^hw_unit Joule */
 static struct pmu rapl_pmu_class;
 static cpumask_t rapl_cpu_mask;
 static int rapl_cntr_mask;
@@ -127,6 +135,7 @@ static int rapl_cntr_mask;
 static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
 static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
 
+static struct x86_pmu_quirk *rapl_quirks;
 static inline u64 rapl_read_counter(struct perf_event *event)
 {
 	u64 raw;
@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event)
 	return raw;
 }
 
-static inline u64 rapl_scale(u64 v)
+#define rapl_add_quirk(func_)						\
+do {									\
+	static struct x86_pmu_quirk __quirk __initdata = {		\
+		.func = func_,						\
+	};								\
+	__quirk.next = rapl_quirks;					\
+	rapl_quirks = &__quirk;						\
+} while (0)
+
+static inline u64 rapl_scale(u64 v, int cfg)
 {
+	if (cfg > NR_RAPL_DOMAINS) {
+		pr_warn("invalid domain %d, failed to scale data\n", cfg);
+		return v;
+	}
 	/*
 	 * scale delta to smallest unit (1/2^32)
 	 * users must then scale back: count * 1/(1e9*2^32) to get Joules
 	 * or use ldexp(count, -32).
 	 * Watts = Joules/Time delta
 	 */
-	return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
+	return v << (32 - rapl_hw_unit[cfg - 1]);
 }
 
 static u64 rapl_event_update(struct perf_event *event)
@@ -173,7 +195,7 @@ again:
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 
-	sdelta = rapl_scale(delta);
+	sdelta = rapl_scale(delta, event->hw.config);
 
 	local64_add(sdelta, &event->count);
 
@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
 	cpumask_set_cpu(cpu, &rapl_cpu_mask);
 }
 
+static __init void rapl_hsw_server_quirk(void)
+{
+	/*
+	 * DRAM domain on HSW server has fixed energy unit which can be
+	 * different than the unit from power unit MSR.
+	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+	 */
+	rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+}
+
 static int rapl_cpu_prepare(int cpu)
 {
 	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
 	int phys_id = topology_physical_package_id(cpu);
 	u64 ms;
-	u64 msr_rapl_power_unit_bits;
 
 	if (pmu)
 		return 0;
@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
 	if (phys_id < 0)
 		return -1;
 
-	/* protect rdmsrl() to handle virtualization */
-	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
-		return -1;
-
 	pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
 	if (!pmu)
 		return -1;
-
 	spin_lock_init(&pmu->lock);
 
 	INIT_LIST_HEAD(&pmu->active_list);
 
-	/*
-	 * grab power unit as: 1/2^unit Joules
-	 *
-	 * we cache in local PMU instance
-	 */
-	pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
 	pmu->pmu = &rapl_pmu_class;
 
 	/*
@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
 	 * divide interval by 2 to avoid lockstep (2 * 100)
 	 * if hw unit is 32, then we use 2 ms 1/200/2
 	 */
-	if (pmu->hw_unit < 32)
-		ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1));
+	if (rapl_hw_unit[0] < 32)
+		ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
 	else
 		ms = 2;
 
@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
+static int rapl_check_hw_unit(void)
+{
+	u64 msr_rapl_power_unit_bits;
+	int i;
+
+	/* protect rdmsrl() to handle virtualization */
+	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+		return -1;
+	for (i = 0; i < NR_RAPL_DOMAINS; i++)
+		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+
+	return 0;
+}
+
 static const struct x86_cpu_id rapl_cpu_match[] = {
 	[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
 	[1] = {},
@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
 {
 	struct rapl_pmu *pmu;
 	int cpu, ret;
+	struct x86_pmu_quirk *quirk;
+	int i;
 
 	/*
 	 * check for Intel processor family 6
@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
 		rapl_cntr_mask = RAPL_IDX_CLN;
 		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
 		break;
+	case 63: /* Haswell-Server */
+		rapl_add_quirk(rapl_hsw_server_quirk);
+		rapl_cntr_mask = RAPL_IDX_SRV;
+		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+		break;
 	case 60: /* Haswell */
 	case 69: /* Haswell-Celeron */
 		rapl_cntr_mask = RAPL_IDX_HSW;
@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
 		/* unsupported */
 		return 0;
 	}
+	ret = rapl_check_hw_unit();
+	if (ret)
+		return ret;
 
+	/* run cpu model quirks */
+	for (quirk = rapl_quirks; quirk; quirk = quirk->next)
+		quirk->func();
 	cpu_notifier_register_begin();
 
 	for_each_online_cpu(cpu) {
@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
 
 	pmu = __this_cpu_read(rapl_pmu);
 
-	pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
+	pr_info("RAPL PMU detected,"
 		" API unit is 2^-32 Joules,"
 		" %d fixed counters"
 		" %llu ms ovfl timer\n",
-		pmu->hw_unit,
 		hweight32(rapl_cntr_mask),
 		ktime_to_ms(pmu->timer_interval));
-
+	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+		if (rapl_cntr_mask & (1 << i)) {
+			pr_info("hw unit of domain %s 2^-%d Joules\n",
+				rapl_domain_names[i], rapl_hw_unit[i]);
+		}
+	}
 out:
 	cpu_notifier_register_done();
 
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] perf/rapl: support per domain energy unit
  2015-03-26 21:28 [PATCH v3] perf/rapl: support per domain energy unit Jacob Pan
@ 2015-04-06 15:21 ` Jacob Pan
  2015-04-06 17:30   ` Stephane Eranian
  2015-04-18 10:14 ` [tip:perf/urgent] perf/x86/intel/rapl: Fix energy counter measurements but supporing per domain energy units tip-bot for Jacob Pan
  1 sibling, 1 reply; 10+ messages in thread
From: Jacob Pan @ 2015-04-06 15:21 UTC (permalink / raw)
  To: Jacob Pan
  Cc: LKML, Peter Zijlstra, H. Peter Anvin, Stephane Eranian,
	Paul Mackerras, Andi Kleen, Thomas Gleixner,
	Arnaldo Carvalho de Melo, Vince Weaver

On Thu, 26 Mar 2015 14:28:45 -0700
Jacob Pan <jacob.jun.pan@linux.intel.com> wrote:

> RAPL energy hardware unit can vary within a single CPU package, e.g.
> HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16) whereas
> the unit on other domains can be enumerated from power unit MSR.
> There might be other variations in the future, this patch adds
> per cpu model quirk to allow special handling of certain cpus.
> 
> hw_unit is also removed from per cpu data since it is not per cpu
> and the sampling rate for energy counter is typically not high.
> 
> Without this patch, DRAM domain on HSW servers will be counted
> 4x higher than the real energy counter.
> 

are there any more comments about this patch?

Thanks,

Jacob
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/perf_event_intel_rapl.c | 94
> ++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 21
> deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
> b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index c4bb8b8..999289b9
> 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
> @@ -62,6 +62,14 @@
>  #define RAPL_IDX_PP1_NRG_STAT	3	/* gpu */
>  #define INTEL_RAPL_PP1		0x4	/* pseudo-encoding
> */ 
> +#define NR_RAPL_DOMAINS         0x4
> +static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
> +	"pp0-core",
> +	"package",
> +	"dram",
> +	"pp1-gpu",
> +};
> +
>  /* Clients have PP0, PKG */
>  #define RAPL_IDX_CLN	(1<<RAPL_IDX_PP0_NRG_STAT|\
>  			 1<<RAPL_IDX_PKG_NRG_STAT|\
> @@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v
> = {			\ 
>  struct rapl_pmu {
>  	spinlock_t	 lock;
> -	int		 hw_unit;  /* 1/2^hw_unit Joule */
>  	int		 n_active; /* number of active events */
>  	struct list_head active_list;
>  	struct pmu	 *pmu; /* pointer to rapl_pmu_class */
> @@ -120,6 +127,7 @@ struct rapl_pmu {
>  	struct hrtimer   hrtimer;
>  };
>  
> +static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /*
> 1/2^hw_unit Joule */ static struct pmu rapl_pmu_class;
>  static cpumask_t rapl_cpu_mask;
>  static int rapl_cntr_mask;
> @@ -127,6 +135,7 @@ static int rapl_cntr_mask;
>  static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
>  static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
>  
> +static struct x86_pmu_quirk *rapl_quirks;
>  static inline u64 rapl_read_counter(struct perf_event *event)
>  {
>  	u64 raw;
> @@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct
> perf_event *event) return raw;
>  }
>  
> -static inline u64 rapl_scale(u64 v)
> +#define
> rapl_add_quirk(func_)
> \ +do
> {
> \
> +	static struct x86_pmu_quirk __quirk __initdata =
> {		\
> +		.func =
> func_,						\
> +	};
> \
> +	__quirk.next =
> rapl_quirks;					\
> +	rapl_quirks =
> &__quirk;						\ +} while
> (0) +
> +static inline u64 rapl_scale(u64 v, int cfg)
>  {
> +	if (cfg > NR_RAPL_DOMAINS) {
> +		pr_warn("invalid domain %d, failed to scale data\n",
> cfg);
> +		return v;
> +	}
>  	/*
>  	 * scale delta to smallest unit (1/2^32)
>  	 * users must then scale back: count * 1/(1e9*2^32) to get
> Joules
>  	 * or use ldexp(count, -32).
>  	 * Watts = Joules/Time delta
>  	 */
> -	return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
> +	return v << (32 - rapl_hw_unit[cfg - 1]);
>  }
>  
>  static u64 rapl_event_update(struct perf_event *event)
> @@ -173,7 +195,7 @@ again:
>  	delta = (new_raw_count << shift) - (prev_raw_count << shift);
>  	delta >>= shift;
>  
> -	sdelta = rapl_scale(delta);
> +	sdelta = rapl_scale(delta, event->hw.config);
>  
>  	local64_add(sdelta, &event->count);
>  
> @@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
>  	cpumask_set_cpu(cpu, &rapl_cpu_mask);
>  }
>  
> +static __init void rapl_hsw_server_quirk(void)
> +{
> +	/*
> +	 * DRAM domain on HSW server has fixed energy unit which can
> be
> +	 * different than the unit from power unit MSR.
> +	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product
> Families, V2
> +	 * of 2. Datasheet, September 2014, Reference Number:
> 330784-001 "
> +	 */
> +	rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
> +}
> +
>  static int rapl_cpu_prepare(int cpu)
>  {
>  	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
>  	int phys_id = topology_physical_package_id(cpu);
>  	u64 ms;
> -	u64 msr_rapl_power_unit_bits;
>  
>  	if (pmu)
>  		return 0;
> @@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
>  	if (phys_id < 0)
>  		return -1;
>  
> -	/* protect rdmsrl() to handle virtualization */
> -	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT,
> &msr_rapl_power_unit_bits))
> -		return -1;
> -
>  	pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL,
> cpu_to_node(cpu)); if (!pmu)
>  		return -1;
> -
>  	spin_lock_init(&pmu->lock);
>  
>  	INIT_LIST_HEAD(&pmu->active_list);
>  
> -	/*
> -	 * grab power unit as: 1/2^unit Joules
> -	 *
> -	 * we cache in local PMU instance
> -	 */
> -	pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
>  	pmu->pmu = &rapl_pmu_class;
>  
>  	/*
> @@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
>  	 * divide interval by 2 to avoid lockstep (2 * 100)
>  	 * if hw unit is 32, then we use 2 ms 1/200/2
>  	 */
> -	if (pmu->hw_unit < 32)
> -		ms = (1000 / (2 * 100)) * (1ULL << (32 -
> pmu->hw_unit - 1));
> +	if (rapl_hw_unit[0] < 32)
> +		ms = (1000 / (2 * 100)) * (1ULL << (32 -
> rapl_hw_unit[0] - 1)); else
>  		ms = 2;
>  
> @@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct
> notifier_block *self, return NOTIFY_OK;
>  }
>  
> +static int rapl_check_hw_unit(void)
> +{
> +	u64 msr_rapl_power_unit_bits;
> +	int i;
> +
> +	/* protect rdmsrl() to handle virtualization */
> +	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT,
> &msr_rapl_power_unit_bits))
> +		return -1;
> +	for (i = 0; i < NR_RAPL_DOMAINS; i++)
> +		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) &
> 0x1FULL; +
> +	return 0;
> +}
> +
>  static const struct x86_cpu_id rapl_cpu_match[] = {
>  	[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
>  	[1] = {},
> @@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
>  {
>  	struct rapl_pmu *pmu;
>  	int cpu, ret;
> +	struct x86_pmu_quirk *quirk;
> +	int i;
>  
>  	/*
>  	 * check for Intel processor family 6
> @@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
>  		rapl_cntr_mask = RAPL_IDX_CLN;
>  		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
>  		break;
> +	case 63: /* Haswell-Server */
> +		rapl_add_quirk(rapl_hsw_server_quirk);
> +		rapl_cntr_mask = RAPL_IDX_SRV;
> +		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
> +		break;
>  	case 60: /* Haswell */
>  	case 69: /* Haswell-Celeron */
>  		rapl_cntr_mask = RAPL_IDX_HSW;
> @@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
>  		/* unsupported */
>  		return 0;
>  	}
> +	ret = rapl_check_hw_unit();
> +	if (ret)
> +		return ret;
>  
> +	/* run cpu model quirks */
> +	for (quirk = rapl_quirks; quirk; quirk = quirk->next)
> +		quirk->func();
>  	cpu_notifier_register_begin();
>  
>  	for_each_online_cpu(cpu) {
> @@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
>  
>  	pmu = __this_cpu_read(rapl_pmu);
>  
> -	pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
> +	pr_info("RAPL PMU detected,"
>  		" API unit is 2^-32 Joules,"
>  		" %d fixed counters"
>  		" %llu ms ovfl timer\n",
> -		pmu->hw_unit,
>  		hweight32(rapl_cntr_mask),
>  		ktime_to_ms(pmu->timer_interval));
> -
> +	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
> +		if (rapl_cntr_mask & (1 << i)) {
> +			pr_info("hw unit of domain %s 2^-%d
> Joules\n",
> +				rapl_domain_names[i],
> rapl_hw_unit[i]);
> +		}
> +	}
>  out:
>  	cpu_notifier_register_done();
>  

[Jacob Pan]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] perf/rapl: support per domain energy unit
  2015-04-06 15:21 ` Jacob Pan
@ 2015-04-06 17:30   ` Stephane Eranian
  2015-04-15 15:08     ` Jacob Pan
  0 siblings, 1 reply; 10+ messages in thread
From: Stephane Eranian @ 2015-04-06 17:30 UTC (permalink / raw)
  To: Jacob Pan
  Cc: LKML, Peter Zijlstra, H. Peter Anvin, Paul Mackerras, Andi Kleen,
	Thomas Gleixner, Arnaldo Carvalho de Melo, Vince Weaver

On Mon, Apr 6, 2015 at 8:21 AM, Jacob Pan <jacob.jun.pan@linux.intel.com> wrote:
>
> On Thu, 26 Mar 2015 14:28:45 -0700
> Jacob Pan <jacob.jun.pan@linux.intel.com> wrote:
>
> > RAPL energy hardware unit can vary within a single CPU package, e.g.
> > HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16) whereas
> > the unit on other domains can be enumerated from power unit MSR.
> > There might be other variations in the future, this patch adds
> > per cpu model quirk to allow special handling of certain cpus.
> >
> > hw_unit is also removed from per cpu data since it is not per cpu
> > and the sampling rate for energy counter is typically not high.
> >
> > Without this patch, DRAM domain on HSW servers will be counted
> > 4x higher than the real energy counter.
> >
>
> are there any more comments about this patch?
>
I think the patch is fine.
Reviewed-by: Stephane Eranian <eranian@google.com>

>
> Thanks,
>
> Jacob
> > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > ---
> >  arch/x86/kernel/cpu/perf_event_intel_rapl.c | 94
> > ++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 21
> > deletions(-)
> >
> > diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
> > b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index c4bb8b8..999289b9
> > 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
> > +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
> > @@ -62,6 +62,14 @@
> >  #define RAPL_IDX_PP1_NRG_STAT        3       /* gpu */
> >  #define INTEL_RAPL_PP1               0x4     /* pseudo-encoding
> > */
> > +#define NR_RAPL_DOMAINS         0x4
> > +static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
> > +     "pp0-core",
> > +     "package",
> > +     "dram",
> > +     "pp1-gpu",
> > +};
> > +
> >  /* Clients have PP0, PKG */
> >  #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
> >                        1<<RAPL_IDX_PKG_NRG_STAT|\
> > @@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v
> > = {                   \
> >  struct rapl_pmu {
> >       spinlock_t       lock;
> > -     int              hw_unit;  /* 1/2^hw_unit Joule */
> >       int              n_active; /* number of active events */
> >       struct list_head active_list;
> >       struct pmu       *pmu; /* pointer to rapl_pmu_class */
> > @@ -120,6 +127,7 @@ struct rapl_pmu {
> >       struct hrtimer   hrtimer;
> >  };
> >
> > +static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /*
> > 1/2^hw_unit Joule */ static struct pmu rapl_pmu_class;
> >  static cpumask_t rapl_cpu_mask;
> >  static int rapl_cntr_mask;
> > @@ -127,6 +135,7 @@ static int rapl_cntr_mask;
> >  static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
> >  static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
> >
> > +static struct x86_pmu_quirk *rapl_quirks;
> >  static inline u64 rapl_read_counter(struct perf_event *event)
> >  {
> >       u64 raw;
> > @@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct
> > perf_event *event) return raw;
> >  }
> >
> > -static inline u64 rapl_scale(u64 v)
> > +#define
> > rapl_add_quirk(func_)
> > \ +do
> > {
> > \
> > +     static struct x86_pmu_quirk __quirk __initdata =
> > {             \
> > +             .func =
> > func_,                                                \
> > +     };
> > \
> > +     __quirk.next =
> > rapl_quirks;                                  \
> > +     rapl_quirks =
> > &__quirk;                                             \ +} while
> > (0) +
> > +static inline u64 rapl_scale(u64 v, int cfg)
> >  {
> > +     if (cfg > NR_RAPL_DOMAINS) {
> > +             pr_warn("invalid domain %d, failed to scale data\n",
> > cfg);
> > +             return v;
> > +     }
> >       /*
> >        * scale delta to smallest unit (1/2^32)
> >        * users must then scale back: count * 1/(1e9*2^32) to get
> > Joules
> >        * or use ldexp(count, -32).
> >        * Watts = Joules/Time delta
> >        */
> > -     return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
> > +     return v << (32 - rapl_hw_unit[cfg - 1]);
> >  }
> >
> >  static u64 rapl_event_update(struct perf_event *event)
> > @@ -173,7 +195,7 @@ again:
> >       delta = (new_raw_count << shift) - (prev_raw_count << shift);
> >       delta >>= shift;
> >
> > -     sdelta = rapl_scale(delta);
> > +     sdelta = rapl_scale(delta, event->hw.config);
> >
> >       local64_add(sdelta, &event->count);
> >
> > @@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
> >       cpumask_set_cpu(cpu, &rapl_cpu_mask);
> >  }
> >
> > +static __init void rapl_hsw_server_quirk(void)
> > +{
> > +     /*
> > +      * DRAM domain on HSW server has fixed energy unit which can
> > be
> > +      * different than the unit from power unit MSR.
> > +      * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product
> > Families, V2
> > +      * of 2. Datasheet, September 2014, Reference Number:
> > 330784-001 "
> > +      */
> > +     rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
> > +}
> > +
> >  static int rapl_cpu_prepare(int cpu)
> >  {
> >       struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
> >       int phys_id = topology_physical_package_id(cpu);
> >       u64 ms;
> > -     u64 msr_rapl_power_unit_bits;
> >
> >       if (pmu)
> >               return 0;
> > @@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
> >       if (phys_id < 0)
> >               return -1;
> >
> > -     /* protect rdmsrl() to handle virtualization */
> > -     if (rdmsrl_safe(MSR_RAPL_POWER_UNIT,
> > &msr_rapl_power_unit_bits))
> > -             return -1;
> > -
> >       pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL,
> > cpu_to_node(cpu)); if (!pmu)
> >               return -1;
> > -
> >       spin_lock_init(&pmu->lock);
> >
> >       INIT_LIST_HEAD(&pmu->active_list);
> >
> > -     /*
> > -      * grab power unit as: 1/2^unit Joules
> > -      *
> > -      * we cache in local PMU instance
> > -      */
> > -     pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
> >       pmu->pmu = &rapl_pmu_class;
> >
> >       /*
> > @@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
> >        * divide interval by 2 to avoid lockstep (2 * 100)
> >        * if hw unit is 32, then we use 2 ms 1/200/2
> >        */
> > -     if (pmu->hw_unit < 32)
> > -             ms = (1000 / (2 * 100)) * (1ULL << (32 -
> > pmu->hw_unit - 1));
> > +     if (rapl_hw_unit[0] < 32)
> > +             ms = (1000 / (2 * 100)) * (1ULL << (32 -
> > rapl_hw_unit[0] - 1)); else
> >               ms = 2;
> >
> > @@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct
> > notifier_block *self, return NOTIFY_OK;
> >  }
> >
> > +static int rapl_check_hw_unit(void)
> > +{
> > +     u64 msr_rapl_power_unit_bits;
> > +     int i;
> > +
> > +     /* protect rdmsrl() to handle virtualization */
> > +     if (rdmsrl_safe(MSR_RAPL_POWER_UNIT,
> > &msr_rapl_power_unit_bits))
> > +             return -1;
> > +     for (i = 0; i < NR_RAPL_DOMAINS; i++)
> > +             rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) &
> > 0x1FULL; +
> > +     return 0;
> > +}
> > +
> >  static const struct x86_cpu_id rapl_cpu_match[] = {
> >       [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
> >       [1] = {},
> > @@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
> >  {
> >       struct rapl_pmu *pmu;
> >       int cpu, ret;
> > +     struct x86_pmu_quirk *quirk;
> > +     int i;
> >
> >       /*
> >        * check for Intel processor family 6
> > @@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
> >               rapl_cntr_mask = RAPL_IDX_CLN;
> >               rapl_pmu_events_group.attrs = rapl_events_cln_attr;
> >               break;
> > +     case 63: /* Haswell-Server */
> > +             rapl_add_quirk(rapl_hsw_server_quirk);
> > +             rapl_cntr_mask = RAPL_IDX_SRV;
> > +             rapl_pmu_events_group.attrs = rapl_events_srv_attr;
> > +             break;
> >       case 60: /* Haswell */
> >       case 69: /* Haswell-Celeron */
> >               rapl_cntr_mask = RAPL_IDX_HSW;
> > @@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
> >               /* unsupported */
> >               return 0;
> >       }
> > +     ret = rapl_check_hw_unit();
> > +     if (ret)
> > +             return ret;
> >
> > +     /* run cpu model quirks */
> > +     for (quirk = rapl_quirks; quirk; quirk = quirk->next)
> > +             quirk->func();
> >       cpu_notifier_register_begin();
> >
> >       for_each_online_cpu(cpu) {
> > @@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
> >
> >       pmu = __this_cpu_read(rapl_pmu);
> >
> > -     pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
> > +     pr_info("RAPL PMU detected,"
> >               " API unit is 2^-32 Joules,"
> >               " %d fixed counters"
> >               " %llu ms ovfl timer\n",
> > -             pmu->hw_unit,
> >               hweight32(rapl_cntr_mask),
> >               ktime_to_ms(pmu->timer_interval));
> > -
> > +     for (i = 0; i < NR_RAPL_DOMAINS; i++) {
> > +             if (rapl_cntr_mask & (1 << i)) {
> > +                     pr_info("hw unit of domain %s 2^-%d
> > Joules\n",
> > +                             rapl_domain_names[i],
> > rapl_hw_unit[i]);
> > +             }
> > +     }
> >  out:
> >       cpu_notifier_register_done();
> >
>
> [Jacob Pan]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] perf/rapl: support per domain energy unit
  2015-04-06 17:30   ` Stephane Eranian
@ 2015-04-15 15:08     ` Jacob Pan
  2015-04-15 15:40       ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Jacob Pan @ 2015-04-15 15:08 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: LKML, Peter Zijlstra, H. Peter Anvin, Paul Mackerras, Andi Kleen,
	Thomas Gleixner, Arnaldo Carvalho de Melo, Vince Weaver

On Mon, 6 Apr 2015 10:30:33 -0700
Stephane Eranian <eranian@google.com> wrote:

> On Mon, Apr 6, 2015 at 8:21 AM, Jacob Pan
> <jacob.jun.pan@linux.intel.com> wrote:
> >
> > On Thu, 26 Mar 2015 14:28:45 -0700
> > Jacob Pan <jacob.jun.pan@linux.intel.com> wrote:
> >
> > > RAPL energy hardware unit can vary within a single CPU package,
> > > e.g. HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16)
> > > whereas the unit on other domains can be enumerated from power
> > > unit MSR. There might be other variations in the future, this
> > > patch adds per cpu model quirk to allow special handling of
> > > certain cpus.
> > >
> > > hw_unit is also removed from per cpu data since it is not per cpu
> > > and the sampling rate for energy counter is typically not high.
> > >
> > > Without this patch, DRAM domain on HSW servers will be counted
> > > 4x higher than the real energy counter.
> > >
> >
> > are there any more comments about this patch?
> >
> I think the patch is fine.
> Reviewed-by: Stephane Eranian <eranian@google.com>
> 
Hi Peter,

Any more comments? This is really a bug fix.

Thanks,


Jacob

> >
> > Thanks,
> >
> > Jacob
> > > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > ---
> > >  arch/x86/kernel/cpu/perf_event_intel_rapl.c | 94
> > > ++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 21
> > > deletions(-)
> > >
> > > diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
> > > b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index
> > > c4bb8b8..999289b9 100644 ---
> > > a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++
> > > b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -62,6 +62,14 @@
> > >  #define RAPL_IDX_PP1_NRG_STAT        3       /* gpu */
> > >  #define INTEL_RAPL_PP1               0x4     /* pseudo-encoding
> > > */
> > > +#define NR_RAPL_DOMAINS         0x4
> > > +static const char *rapl_domain_names[NR_RAPL_DOMAINS]
> > > __initconst = {
> > > +     "pp0-core",
> > > +     "package",
> > > +     "dram",
> > > +     "pp1-gpu",
> > > +};
> > > +
> > >  /* Clients have PP0, PKG */
> > >  #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
> > >                        1<<RAPL_IDX_PKG_NRG_STAT|\
> > > @@ -112,7 +120,6 @@ static struct perf_pmu_events_attr
> > > event_attr_##v = {                   \
> > >  struct rapl_pmu {
> > >       spinlock_t       lock;
> > > -     int              hw_unit;  /* 1/2^hw_unit Joule */
> > >       int              n_active; /* number of active events */
> > >       struct list_head active_list;
> > >       struct pmu       *pmu; /* pointer to rapl_pmu_class */
> > > @@ -120,6 +127,7 @@ struct rapl_pmu {
> > >       struct hrtimer   hrtimer;
> > >  };
> > >
> > > +static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /*
> > > 1/2^hw_unit Joule */ static struct pmu rapl_pmu_class;
> > >  static cpumask_t rapl_cpu_mask;
> > >  static int rapl_cntr_mask;
> > > @@ -127,6 +135,7 @@ static int rapl_cntr_mask;
> > >  static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
> > >  static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
> > >
> > > +static struct x86_pmu_quirk *rapl_quirks;
> > >  static inline u64 rapl_read_counter(struct perf_event *event)
> > >  {
> > >       u64 raw;
> > > @@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct
> > > perf_event *event) return raw;
> > >  }
> > >
> > > -static inline u64 rapl_scale(u64 v)
> > > +#define
> > > rapl_add_quirk(func_)
> > > \ +do
> > > {
> > > \
> > > +     static struct x86_pmu_quirk __quirk __initdata =
> > > {             \
> > > +             .func =
> > > func_,                                                \
> > > +     };
> > > \
> > > +     __quirk.next =
> > > rapl_quirks;                                  \
> > > +     rapl_quirks =
> > > &__quirk;                                             \ +} while
> > > (0) +
> > > +static inline u64 rapl_scale(u64 v, int cfg)
> > >  {
> > > +     if (cfg > NR_RAPL_DOMAINS) {
> > > +             pr_warn("invalid domain %d, failed to scale data\n",
> > > cfg);
> > > +             return v;
> > > +     }
> > >       /*
> > >        * scale delta to smallest unit (1/2^32)
> > >        * users must then scale back: count * 1/(1e9*2^32) to get
> > > Joules
> > >        * or use ldexp(count, -32).
> > >        * Watts = Joules/Time delta
> > >        */
> > > -     return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
> > > +     return v << (32 - rapl_hw_unit[cfg - 1]);
> > >  }
> > >
> > >  static u64 rapl_event_update(struct perf_event *event)
> > > @@ -173,7 +195,7 @@ again:
> > >       delta = (new_raw_count << shift) - (prev_raw_count <<
> > > shift); delta >>= shift;
> > >
> > > -     sdelta = rapl_scale(delta);
> > > +     sdelta = rapl_scale(delta, event->hw.config);
> > >
> > >       local64_add(sdelta, &event->count);
> > >
> > > @@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
> > >       cpumask_set_cpu(cpu, &rapl_cpu_mask);
> > >  }
> > >
> > > +static __init void rapl_hsw_server_quirk(void)
> > > +{
> > > +     /*
> > > +      * DRAM domain on HSW server has fixed energy unit which can
> > > be
> > > +      * different than the unit from power unit MSR.
> > > +      * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product
> > > Families, V2
> > > +      * of 2. Datasheet, September 2014, Reference Number:
> > > 330784-001 "
> > > +      */
> > > +     rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
> > > +}
> > > +
> > >  static int rapl_cpu_prepare(int cpu)
> > >  {
> > >       struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
> > >       int phys_id = topology_physical_package_id(cpu);
> > >       u64 ms;
> > > -     u64 msr_rapl_power_unit_bits;
> > >
> > >       if (pmu)
> > >               return 0;
> > > @@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
> > >       if (phys_id < 0)
> > >               return -1;
> > >
> > > -     /* protect rdmsrl() to handle virtualization */
> > > -     if (rdmsrl_safe(MSR_RAPL_POWER_UNIT,
> > > &msr_rapl_power_unit_bits))
> > > -             return -1;
> > > -
> > >       pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL,
> > > cpu_to_node(cpu)); if (!pmu)
> > >               return -1;
> > > -
> > >       spin_lock_init(&pmu->lock);
> > >
> > >       INIT_LIST_HEAD(&pmu->active_list);
> > >
> > > -     /*
> > > -      * grab power unit as: 1/2^unit Joules
> > > -      *
> > > -      * we cache in local PMU instance
> > > -      */
> > > -     pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
> > >       pmu->pmu = &rapl_pmu_class;
> > >
> > >       /*
> > > @@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
> > >        * divide interval by 2 to avoid lockstep (2 * 100)
> > >        * if hw unit is 32, then we use 2 ms 1/200/2
> > >        */
> > > -     if (pmu->hw_unit < 32)
> > > -             ms = (1000 / (2 * 100)) * (1ULL << (32 -
> > > pmu->hw_unit - 1));
> > > +     if (rapl_hw_unit[0] < 32)
> > > +             ms = (1000 / (2 * 100)) * (1ULL << (32 -
> > > rapl_hw_unit[0] - 1)); else
> > >               ms = 2;
> > >
> > > @@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct
> > > notifier_block *self, return NOTIFY_OK;
> > >  }
> > >
> > > +static int rapl_check_hw_unit(void)
> > > +{
> > > +     u64 msr_rapl_power_unit_bits;
> > > +     int i;
> > > +
> > > +     /* protect rdmsrl() to handle virtualization */
> > > +     if (rdmsrl_safe(MSR_RAPL_POWER_UNIT,
> > > &msr_rapl_power_unit_bits))
> > > +             return -1;
> > > +     for (i = 0; i < NR_RAPL_DOMAINS; i++)
> > > +             rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) &
> > > 0x1FULL; +
> > > +     return 0;
> > > +}
> > > +
> > >  static const struct x86_cpu_id rapl_cpu_match[] = {
> > >       [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
> > >       [1] = {},
> > > @@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
> > >  {
> > >       struct rapl_pmu *pmu;
> > >       int cpu, ret;
> > > +     struct x86_pmu_quirk *quirk;
> > > +     int i;
> > >
> > >       /*
> > >        * check for Intel processor family 6
> > > @@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
> > >               rapl_cntr_mask = RAPL_IDX_CLN;
> > >               rapl_pmu_events_group.attrs = rapl_events_cln_attr;
> > >               break;
> > > +     case 63: /* Haswell-Server */
> > > +             rapl_add_quirk(rapl_hsw_server_quirk);
> > > +             rapl_cntr_mask = RAPL_IDX_SRV;
> > > +             rapl_pmu_events_group.attrs = rapl_events_srv_attr;
> > > +             break;
> > >       case 60: /* Haswell */
> > >       case 69: /* Haswell-Celeron */
> > >               rapl_cntr_mask = RAPL_IDX_HSW;
> > > @@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
> > >               /* unsupported */
> > >               return 0;
> > >       }
> > > +     ret = rapl_check_hw_unit();
> > > +     if (ret)
> > > +             return ret;
> > >
> > > +     /* run cpu model quirks */
> > > +     for (quirk = rapl_quirks; quirk; quirk = quirk->next)
> > > +             quirk->func();
> > >       cpu_notifier_register_begin();
> > >
> > >       for_each_online_cpu(cpu) {
> > > @@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
> > >
> > >       pmu = __this_cpu_read(rapl_pmu);
> > >
> > > -     pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
> > > +     pr_info("RAPL PMU detected,"
> > >               " API unit is 2^-32 Joules,"
> > >               " %d fixed counters"
> > >               " %llu ms ovfl timer\n",
> > > -             pmu->hw_unit,
> > >               hweight32(rapl_cntr_mask),
> > >               ktime_to_ms(pmu->timer_interval));
> > > -
> > > +     for (i = 0; i < NR_RAPL_DOMAINS; i++) {
> > > +             if (rapl_cntr_mask & (1 << i)) {
> > > +                     pr_info("hw unit of domain %s 2^-%d
> > > Joules\n",
> > > +                             rapl_domain_names[i],
> > > rapl_hw_unit[i]);
> > > +             }
> > > +     }
> > >  out:
> > >       cpu_notifier_register_done();
> > >
> >
> > [Jacob Pan]

[Jacob Pan]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] perf/rapl: support per domain energy unit
  2015-04-15 15:08     ` Jacob Pan
@ 2015-04-15 15:40       ` Peter Zijlstra
  2015-04-15 16:19         ` Jacob Pan
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2015-04-15 15:40 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Stephane Eranian, LKML, H. Peter Anvin, Paul Mackerras,
	Andi Kleen, Thomas Gleixner, Arnaldo Carvalho de Melo,
	Vince Weaver

On Wed, Apr 15, 2015 at 08:08:07AM -0700, Jacob Pan wrote:
> Any more comments? This is really a bug fix.

I had it queued but it missed the first pull req, I'll put it in
/urgent.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] perf/rapl: support per domain energy unit
  2015-04-15 15:40       ` Peter Zijlstra
@ 2015-04-15 16:19         ` Jacob Pan
  2015-04-15 16:36           ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Jacob Pan @ 2015-04-15 16:19 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Stephane Eranian, LKML, H. Peter Anvin, Paul Mackerras,
	Andi Kleen, Thomas Gleixner, Arnaldo Carvalho de Melo,
	Vince Weaver

On Wed, 15 Apr 2015 17:40:59 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Apr 15, 2015 at 08:08:07AM -0700, Jacob Pan wrote:
> > Any more comments? This is really a bug fix.  
> 
> I had it queued but it missed the first pull req, I'll put it in
> /urgent.
Thanks, just for my learning the next time, which tree should i check
whether these patches are taken or not. Is it in one of the tip
branches?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] perf/rapl: support per domain energy unit
  2015-04-15 16:19         ` Jacob Pan
@ 2015-04-15 16:36           ` Peter Zijlstra
  0 siblings, 0 replies; 10+ messages in thread
From: Peter Zijlstra @ 2015-04-15 16:36 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Stephane Eranian, LKML, H. Peter Anvin, Paul Mackerras,
	Andi Kleen, Thomas Gleixner, Arnaldo Carvalho de Melo,
	Vince Weaver

On Wed, Apr 15, 2015 at 09:19:23AM -0700, Jacob Pan wrote:
> On Wed, 15 Apr 2015 17:40:59 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > On Wed, Apr 15, 2015 at 08:08:07AM -0700, Jacob Pan wrote:
> > > Any more comments? This is really a bug fix.  
> > 
> > I had it queued but it missed the first pull req, I'll put it in
> > /urgent.
> Thanks, just for my learning the next time, which tree should i check
> whether these patches are taken or not. Is it in one of the tip
> branches?

You'll get a tip-commit email if it lands in tip, until that time it
sits in my quilt queue, which I might or might not push out to a git
tree -- I'm not very consistent or timely with that :/

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [tip:perf/urgent] perf/x86/intel/rapl: Fix energy counter measurements but supporing per domain energy units
  2015-03-26 21:28 [PATCH v3] perf/rapl: support per domain energy unit Jacob Pan
  2015-04-06 15:21 ` Jacob Pan
@ 2015-04-18 10:14 ` tip-bot for Jacob Pan
  1 sibling, 0 replies; 10+ messages in thread
From: tip-bot for Jacob Pan @ 2015-04-18 10:14 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, jacob.jun.pan, paulus, mingo, eranian, vincent.weaver,
	andi.kleen, tglx, linux-kernel, peterz, acme

Commit-ID:  645523960102fa0ac0578d070630e49ab05f06d1
Gitweb:     http://git.kernel.org/tip/645523960102fa0ac0578d070630e49ab05f06d1
Author:     Jacob Pan <jacob.jun.pan@linux.intel.com>
AuthorDate: Thu, 26 Mar 2015 14:28:45 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 17 Apr 2015 09:58:56 +0200

perf/x86/intel/rapl: Fix energy counter measurements but supporing per domain energy units

RAPL energy hardware unit can vary within a single CPU package, e.g.
HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16) whereas
the unit on other domains can be enumerated from power unit MSR.

There might be other variations in the future, this patch adds
per cpu model quirk to allow special handling of certain cpus.

hw_unit is also removed from per cpu data since it is not per cpu
and the sampling rate for energy counter is typically not high.

Without this patch, DRAM domain on HSW servers will be counted
4x higher than the real energy counter.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <andi.kleen@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1427405325-780-1-git-send-email-jacob.jun.pan@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_rapl.c | 94 ++++++++++++++++++++++-------
 1 file changed, 73 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index c4bb8b8..999289b9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -62,6 +62,14 @@
 #define RAPL_IDX_PP1_NRG_STAT	3	/* gpu */
 #define INTEL_RAPL_PP1		0x4	/* pseudo-encoding */
 
+#define NR_RAPL_DOMAINS         0x4
+static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+	"pp0-core",
+	"package",
+	"dram",
+	"pp1-gpu",
+};
+
 /* Clients have PP0, PKG */
 #define RAPL_IDX_CLN	(1<<RAPL_IDX_PP0_NRG_STAT|\
 			 1<<RAPL_IDX_PKG_NRG_STAT|\
@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = {			\
 
 struct rapl_pmu {
 	spinlock_t	 lock;
-	int		 hw_unit;  /* 1/2^hw_unit Joule */
 	int		 n_active; /* number of active events */
 	struct list_head active_list;
 	struct pmu	 *pmu; /* pointer to rapl_pmu_class */
@@ -120,6 +127,7 @@ struct rapl_pmu {
 	struct hrtimer   hrtimer;
 };
 
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /* 1/2^hw_unit Joule */
 static struct pmu rapl_pmu_class;
 static cpumask_t rapl_cpu_mask;
 static int rapl_cntr_mask;
@@ -127,6 +135,7 @@ static int rapl_cntr_mask;
 static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
 static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
 
+static struct x86_pmu_quirk *rapl_quirks;
 static inline u64 rapl_read_counter(struct perf_event *event)
 {
 	u64 raw;
@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event)
 	return raw;
 }
 
-static inline u64 rapl_scale(u64 v)
+#define rapl_add_quirk(func_)						\
+do {									\
+	static struct x86_pmu_quirk __quirk __initdata = {		\
+		.func = func_,						\
+	};								\
+	__quirk.next = rapl_quirks;					\
+	rapl_quirks = &__quirk;						\
+} while (0)
+
+static inline u64 rapl_scale(u64 v, int cfg)
 {
+	if (cfg > NR_RAPL_DOMAINS) {
+		pr_warn("invalid domain %d, failed to scale data\n", cfg);
+		return v;
+	}
 	/*
 	 * scale delta to smallest unit (1/2^32)
 	 * users must then scale back: count * 1/(1e9*2^32) to get Joules
 	 * or use ldexp(count, -32).
 	 * Watts = Joules/Time delta
 	 */
-	return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
+	return v << (32 - rapl_hw_unit[cfg - 1]);
 }
 
 static u64 rapl_event_update(struct perf_event *event)
@@ -173,7 +195,7 @@ again:
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 
-	sdelta = rapl_scale(delta);
+	sdelta = rapl_scale(delta, event->hw.config);
 
 	local64_add(sdelta, &event->count);
 
@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
 	cpumask_set_cpu(cpu, &rapl_cpu_mask);
 }
 
+static __init void rapl_hsw_server_quirk(void)
+{
+	/*
+	 * DRAM domain on HSW server has fixed energy unit which can be
+	 * different than the unit from power unit MSR.
+	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+	 */
+	rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+}
+
 static int rapl_cpu_prepare(int cpu)
 {
 	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
 	int phys_id = topology_physical_package_id(cpu);
 	u64 ms;
-	u64 msr_rapl_power_unit_bits;
 
 	if (pmu)
 		return 0;
@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
 	if (phys_id < 0)
 		return -1;
 
-	/* protect rdmsrl() to handle virtualization */
-	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
-		return -1;
-
 	pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
 	if (!pmu)
 		return -1;
-
 	spin_lock_init(&pmu->lock);
 
 	INIT_LIST_HEAD(&pmu->active_list);
 
-	/*
-	 * grab power unit as: 1/2^unit Joules
-	 *
-	 * we cache in local PMU instance
-	 */
-	pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
 	pmu->pmu = &rapl_pmu_class;
 
 	/*
@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
 	 * divide interval by 2 to avoid lockstep (2 * 100)
 	 * if hw unit is 32, then we use 2 ms 1/200/2
 	 */
-	if (pmu->hw_unit < 32)
-		ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1));
+	if (rapl_hw_unit[0] < 32)
+		ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
 	else
 		ms = 2;
 
@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
+static int rapl_check_hw_unit(void)
+{
+	u64 msr_rapl_power_unit_bits;
+	int i;
+
+	/* protect rdmsrl() to handle virtualization */
+	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+		return -1;
+	for (i = 0; i < NR_RAPL_DOMAINS; i++)
+		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+
+	return 0;
+}
+
 static const struct x86_cpu_id rapl_cpu_match[] = {
 	[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
 	[1] = {},
@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
 {
 	struct rapl_pmu *pmu;
 	int cpu, ret;
+	struct x86_pmu_quirk *quirk;
+	int i;
 
 	/*
 	 * check for Intel processor family 6
@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
 		rapl_cntr_mask = RAPL_IDX_CLN;
 		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
 		break;
+	case 63: /* Haswell-Server */
+		rapl_add_quirk(rapl_hsw_server_quirk);
+		rapl_cntr_mask = RAPL_IDX_SRV;
+		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+		break;
 	case 60: /* Haswell */
 	case 69: /* Haswell-Celeron */
 		rapl_cntr_mask = RAPL_IDX_HSW;
@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
 		/* unsupported */
 		return 0;
 	}
+	ret = rapl_check_hw_unit();
+	if (ret)
+		return ret;
 
+	/* run cpu model quirks */
+	for (quirk = rapl_quirks; quirk; quirk = quirk->next)
+		quirk->func();
 	cpu_notifier_register_begin();
 
 	for_each_online_cpu(cpu) {
@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
 
 	pmu = __this_cpu_read(rapl_pmu);
 
-	pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
+	pr_info("RAPL PMU detected,"
 		" API unit is 2^-32 Joules,"
 		" %d fixed counters"
 		" %llu ms ovfl timer\n",
-		pmu->hw_unit,
 		hweight32(rapl_cntr_mask),
 		ktime_to_ms(pmu->timer_interval));
-
+	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+		if (rapl_cntr_mask & (1 << i)) {
+			pr_info("hw unit of domain %s 2^-%d Joules\n",
+				rapl_domain_names[i], rapl_hw_unit[i]);
+		}
+	}
 out:
 	cpu_notifier_register_done();
 

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] perf/rapl: support per domain energy unit
  2015-06-15 17:32 [PATCH v3] perf/rapl: support per domain energy unit Vince Weaver
@ 2015-06-15 20:07 ` Jacob Pan
  0 siblings, 0 replies; 10+ messages in thread
From: Jacob Pan @ 2015-06-15 20:07 UTC (permalink / raw)
  To: Vince Weaver
  Cc: LKML, Peter Zijlstra, H. Peter Anvin, Stephane Eranian,
	Paul Mackerras, Andi Kleen, Thomas Gleixner,
	Arnaldo Carvalho de Melo

On Mon, 15 Jun 2015 13:32:01 -0400 (EDT)
Vince Weaver <vincent.weaver@maine.edu> wrote:

> On Thu, 26 Mar 2015, Jacob Pan wrote:
> 
> > RAPL energy hardware unit can vary within a single CPU package, e.g.
> > HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16) whereas
> > the unit on other domains can be enumerated from power unit MSR.
> > There might be other variations in the future, this patch adds
> > per cpu model quirk to allow special handling of certain cpus.
> 
> So I have a Desktop Haswell machine (model 60) that is instrumented
> to measure actual DRAM power at the DIMM (with a sense resistor).
> 
> We are consistently getting RAPL results roughly a factor of 2
> smaller than the actual, measured results.
> 
> Does desktop Haswell have a similar units issue to Haswell-EP?
> 
Not that I know of. perhaps has to do with the difference of what
and where we are measuring. Let me find a instrumented system to verify
and get back to you.
> I wasted a bunch of time trying to decode the 
> DRAM_ENERGY_SCALEFACTOR_MCHBAR values described in the
> Desktop 4th Generation Intel Core Processor Family
> datasheet but possibly that value in the MCHBAR is unrelated to the
> one exported by the RAPL interface.
> 
This scale factor is not directly related to the energy counter. The
MMIO DRAM RAPL energy status uses the same unit as the MSR interface,
the resolution is ~61uJ.

> Vince

[Jacob Pan]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] perf/rapl: support per domain energy unit
@ 2015-06-15 17:32 Vince Weaver
  2015-06-15 20:07 ` Jacob Pan
  0 siblings, 1 reply; 10+ messages in thread
From: Vince Weaver @ 2015-06-15 17:32 UTC (permalink / raw)
  To: Jacob Pan
  Cc: LKML, Peter Zijlstra, H. Peter Anvin, Stephane Eranian,
	Paul Mackerras, Andi Kleen, Thomas Gleixner,
	Arnaldo Carvalho de Melo

On Thu, 26 Mar 2015, Jacob Pan wrote:

> RAPL energy hardware unit can vary within a single CPU package, e.g.
> HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16) whereas
> the unit on other domains can be enumerated from power unit MSR.
> There might be other variations in the future, this patch adds
> per cpu model quirk to allow special handling of certain cpus.

So I have a Desktop Haswell machine (model 60) that is instrumented to 
measure actual DRAM power at the DIMM (with a sense resistor).

We are consistently getting RAPL results roughly a factor of 2 smaller 
than the actual, measured results.

Does desktop Haswell have a similar units issue to Haswell-EP?

I wasted a bunch of time trying to decode the 
DRAM_ENERGY_SCALEFACTOR_MCHBAR values described in the
Desktop 4th Generation Intel Core Processor Family
datasheet but possibly that value in the MCHBAR is unrelated to the one 
exported by the RAPL interface.

Vince

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-06-15 20:08 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-26 21:28 [PATCH v3] perf/rapl: support per domain energy unit Jacob Pan
2015-04-06 15:21 ` Jacob Pan
2015-04-06 17:30   ` Stephane Eranian
2015-04-15 15:08     ` Jacob Pan
2015-04-15 15:40       ` Peter Zijlstra
2015-04-15 16:19         ` Jacob Pan
2015-04-15 16:36           ` Peter Zijlstra
2015-04-18 10:14 ` [tip:perf/urgent] perf/x86/intel/rapl: Fix energy counter measurements but supporing per domain energy units tip-bot for Jacob Pan
2015-06-15 17:32 [PATCH v3] perf/rapl: support per domain energy unit Vince Weaver
2015-06-15 20:07 ` Jacob Pan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.