All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC v2 0/3] Support perf stat with i915 PMU
@ 2017-08-23 15:26 Dmitry Rogozhkin
  2017-08-23 15:26 ` [RFC v2 1/3] drm/i915/pmu: reorder function to suite next patch Dmitry Rogozhkin
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Dmitry Rogozhkin @ 2017-08-23 15:26 UTC (permalink / raw)
  To: intel-gfx; +Cc: Peter Zijlstra

These patches depend on the RFC patches enabling i915 PMU from Tvrtko:
  https://patchwork.freedesktop.org/series/27488/
Thus, CI failure to build them is expected. I think that my patches should
be squeashed in Tvrtko's one actually.

The first patch simply reorders functions and does nothing comparing to
Tvrtko's patches. Next patches add fixes according to PMU API comments
and clarifications from PMU aware engineers.

v1: Make busy_stats refcounted instead of the whole pmu.

v2: Expose cpumask for the i915 pmu to prevent creation of multiple events
    of the same type. Remove perf-driver level sampling.

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>

Dmitry Rogozhkin (3):
  drm/i915/pmu: reorder function to suite next patch
  drm/i915/pmu: serve global events and support perf stat
  drm/i915/pmu: deny perf driver level sampling of i915 PMU

 drivers/gpu/drm/i915/i915_pmu.c         | 324 ++++++++++++++------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +-
 2 files changed, 141 insertions(+), 185 deletions(-)

-- 
1.8.3.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFC v2 1/3] drm/i915/pmu: reorder function to suite next patch
  2017-08-23 15:26 [RFC v2 0/3] Support perf stat with i915 PMU Dmitry Rogozhkin
@ 2017-08-23 15:26 ` Dmitry Rogozhkin
  2017-08-23 15:26 ` [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat Dmitry Rogozhkin
  2017-08-23 15:26 ` [RFC v2 3/3] drm/i915/pmu: deny perf driver level sampling of i915 PMU Dmitry Rogozhkin
  2 siblings, 0 replies; 13+ messages in thread
From: Dmitry Rogozhkin @ 2017-08-23 15:26 UTC (permalink / raw)
  To: intel-gfx; +Cc: Peter Zijlstra

This patch is doing nover except reordering functions to highlight
changes in the next patch.

Change-Id: I0cd298780503ae8f6f8035b86c59fc8b5191356b
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 drivers/gpu/drm/i915/i915_pmu.c | 180 ++++++++++++++++++++--------------------
 1 file changed, 90 insertions(+), 90 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 3272ec0..bcdf2bc 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -363,6 +363,88 @@ static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
 	       (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
 }
 
+static u64 count_interrupts(struct drm_i915_private *i915)
+{
+	/* open-coded kstat_irqs() */
+	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
+	u64 sum = 0;
+	int cpu;
+
+	if (!desc || !desc->kstat_irqs)
+		return 0;
+
+	for_each_possible_cpu(cpu)
+		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
+
+	return sum;
+}
+
+static void i915_pmu_event_read(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	u64 val = 0;
+
+	if (is_engine_event(event)) {
+		u8 sample = engine_event_sample(event);
+		struct intel_engine_cs *engine;
+
+		engine = intel_engine_lookup_user(i915,
+						  engine_event_class(event),
+						  engine_event_instance(event));
+
+		if (WARN_ON_ONCE(!engine)) {
+			/* Do nothing */
+		} else if (sample == I915_SAMPLE_BUSY &&
+			   engine->pmu.busy_stats) {
+			val = ktime_to_ns(intel_engine_get_busy_time(engine));
+		} else {
+			val = engine->pmu.sample[sample];
+		}
+	} else switch (event->attr.config) {
+	case I915_PMU_ACTUAL_FREQUENCY:
+		val = i915->pmu.sample[__I915_SAMPLE_FREQ_ACT];
+		break;
+	case I915_PMU_REQUESTED_FREQUENCY:
+		val = i915->pmu.sample[__I915_SAMPLE_FREQ_REQ];
+		break;
+	case I915_PMU_ENERGY:
+		val = intel_energy_uJ(i915);
+		break;
+	case I915_PMU_INTERRUPTS:
+		val = count_interrupts(i915);
+		break;
+
+	case I915_PMU_RC6_RESIDENCY:
+		if (!i915->gt.awake)
+			return;
+
+		val = intel_rc6_residency_ns(i915,
+					     IS_VALLEYVIEW(i915) ?
+					     VLV_GT_RENDER_RC6 :
+					     GEN6_GT_GFX_RC6);
+		break;
+
+	case I915_PMU_RC6p_RESIDENCY:
+		if (!i915->gt.awake)
+			return;
+
+		if (!IS_VALLEYVIEW(i915))
+			val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+		break;
+
+	case I915_PMU_RC6pp_RESIDENCY:
+		if (!i915->gt.awake)
+			return;
+
+		if (!IS_VALLEYVIEW(i915))
+			val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+		break;
+	}
+
+	local64_set(&event->count, val);
+}
+
 static void i915_pmu_enable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -440,23 +522,6 @@ static void i915_pmu_disable(struct perf_event *event)
 	i915_pmu_timer_cancel(event);
 }
 
-static int i915_pmu_event_add(struct perf_event *event, int flags)
-{
-	struct hw_perf_event *hwc = &event->hw;
-
-	if (flags & PERF_EF_START)
-		i915_pmu_enable(event);
-
-	hwc->state = !(flags & PERF_EF_START);
-
-	return 0;
-}
-
-static void i915_pmu_event_del(struct perf_event *event, int flags)
-{
-	i915_pmu_disable(event);
-}
-
 static void i915_pmu_event_start(struct perf_event *event, int flags)
 {
 	i915_pmu_enable(event);
@@ -467,86 +532,21 @@ static void i915_pmu_event_stop(struct perf_event *event, int flags)
 	i915_pmu_disable(event);
 }
 
-static u64 count_interrupts(struct drm_i915_private *i915)
+static int i915_pmu_event_add(struct perf_event *event, int flags)
 {
-	/* open-coded kstat_irqs() */
-	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
-	u64 sum = 0;
-	int cpu;
+	struct hw_perf_event *hwc = &event->hw;
 
-	if (!desc || !desc->kstat_irqs)
-		return 0;
+	if (flags & PERF_EF_START)
+		i915_pmu_enable(event);
 
-	for_each_possible_cpu(cpu)
-		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
+	hwc->state = !(flags & PERF_EF_START);
 
-	return sum;
+	return 0;
 }
 
-static void i915_pmu_event_read(struct perf_event *event)
+static void i915_pmu_event_del(struct perf_event *event, int flags)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	u64 val = 0;
-
-	if (is_engine_event(event)) {
-		u8 sample = engine_event_sample(event);
-		struct intel_engine_cs *engine;
-
-		engine = intel_engine_lookup_user(i915,
-						  engine_event_class(event),
-						  engine_event_instance(event));
-
-		if (WARN_ON_ONCE(!engine)) {
-			/* Do nothing */
-		} else if (sample == I915_SAMPLE_BUSY &&
-			   engine->pmu.busy_stats) {
-			val = ktime_to_ns(intel_engine_get_busy_time(engine));
-		} else {
-			val = engine->pmu.sample[sample];
-		}
-	} else switch (event->attr.config) {
-	case I915_PMU_ACTUAL_FREQUENCY:
-		val = i915->pmu.sample[__I915_SAMPLE_FREQ_ACT];
-		break;
-	case I915_PMU_REQUESTED_FREQUENCY:
-		val = i915->pmu.sample[__I915_SAMPLE_FREQ_REQ];
-		break;
-	case I915_PMU_ENERGY:
-		val = intel_energy_uJ(i915);
-		break;
-	case I915_PMU_INTERRUPTS:
-		val = count_interrupts(i915);
-		break;
-
-	case I915_PMU_RC6_RESIDENCY:
-		if (!i915->gt.awake)
-			return;
-
-		val = intel_rc6_residency_ns(i915,
-					     IS_VALLEYVIEW(i915) ?
-					     VLV_GT_RENDER_RC6 :
-					     GEN6_GT_GFX_RC6);
-		break;
-
-	case I915_PMU_RC6p_RESIDENCY:
-		if (!i915->gt.awake)
-			return;
-
-		if (!IS_VALLEYVIEW(i915))
-			val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-		break;
-
-	case I915_PMU_RC6pp_RESIDENCY:
-		if (!i915->gt.awake)
-			return;
-
-		if (!IS_VALLEYVIEW(i915))
-			val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-		break;
-	}
-
-	local64_set(&event->count, val);
+	i915_pmu_disable(event);
 }
 
 static int i915_pmu_event_event_idx(struct perf_event *event)
-- 
1.8.3.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat
  2017-08-23 15:26 [RFC v2 0/3] Support perf stat with i915 PMU Dmitry Rogozhkin
  2017-08-23 15:26 ` [RFC v2 1/3] drm/i915/pmu: reorder function to suite next patch Dmitry Rogozhkin
@ 2017-08-23 15:26 ` Dmitry Rogozhkin
  2017-08-23 23:38   ` Rogozhkin, Dmitry V
  2017-08-23 15:26 ` [RFC v2 3/3] drm/i915/pmu: deny perf driver level sampling of i915 PMU Dmitry Rogozhkin
  2 siblings, 1 reply; 13+ messages in thread
From: Dmitry Rogozhkin @ 2017-08-23 15:26 UTC (permalink / raw)
  To: intel-gfx; +Cc: Peter Zijlstra

This patch should probably be squashed with Tvrtko's PMU enabling
patch...

Making perf-stat workable with i915 PMU. The major point is that
current implementation of i915 PMU exposes global counter rather
thatn per-task counters. Thus, required changes are:
* Register PMU with .task_ctx_nr=perf_invalid_context
* Expose cpumask for the PMU with the single CPU in the mask
* Properly support pmu->stop(): it should call pmu->read()
* Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE)

Examples:
perf stat -e i915/rcs0-busy/ workload.sh
   This should error out with "failed to read counter" because the
requested counter can't be queried in per-task mode (which is the case).

perf stat -e i915/rcs0-busy/ -a workload.sh
perf stat -e i915/rcs0-busy/ -a -C0 workload.sh
   These 2 commands should give the same result with the correct counter
values. Counter value will be queried once in the end of the wrokload.
The example output would be:

 Performance counter stats for 'system wide':
       649,547,987      i915/rcs0-busy/

       1.895530680 seconds time elapsed

perf stat -e i915/rcs0-busy/ -a -I 100 workload.sh
   This will query counter perdiodically (each 100ms) and dump output:

     0.100108369          4,137,438      i915/rcs0-busy/
i915/rcs0-busy/: 37037414 100149071 100149071
     0.200249024         37,037,414      i915/rcs0-busy/
i915/rcs0-busy/: 36935429 100145077 100145077
     0.300391916         36,935,429      i915/rcs0-busy/
i915/rcs0-busy/: 34262017 100126136 100126136
     0.400518037         34,262,017      i915/rcs0-busy/
i915/rcs0-busy/: 34539960 100126217 100126217

v1: Make pmu.busy_stats a refcounter to avoid busy stats going away
    with some deleted event.

v2: Expose cpumask for i915 PMU to avoid multiple events creation of
    the same type followed by counter aggregation by perf-stat.

Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 71 +++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 2 files changed, 55 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index bcdf2bc..c551d64 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -15,6 +15,8 @@
 
 #define ENGINE_SAMPLE_BITS (16)
 
+static cpumask_t i915_pmu_cpumask = CPU_MASK_CPU0;
+
 static u8 engine_config_sample(u64 config)
 {
 	return config & I915_PMU_SAMPLE_MASK;
@@ -285,16 +287,24 @@ static int i915_pmu_event_init(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
+	int cpu;
 	int ret;
 
-	/* XXX ideally only want pid == -1 && cpu == -1 */
-
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
 
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	cpu = cpumask_any_and(&i915_pmu_cpumask,
+		topology_sibling_cpumask(event->cpu));
+
+	if (cpu >= nr_cpu_ids)
+		return -ENODEV;
+
 	ret = 0;
 	if (is_engine_event(event)) {
 		ret = engine_event_init(event);
@@ -314,6 +324,7 @@ static int i915_pmu_event_init(struct perf_event *event)
 	if (ret)
 		return ret;
 
+	event->cpu = cpu;
 	if (!event->parent)
 		event->destroy = i915_pmu_event_destroy;
 
@@ -469,11 +480,16 @@ static void i915_pmu_enable(struct perf_event *event)
 				       ns_to_ktime(PERIOD), 0,
 				       HRTIMER_MODE_REL_PINNED);
 		i915->pmu.timer_enabled = true;
-	} else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
-		   !engine->pmu.busy_stats) {
-		engine->pmu.busy_stats = true;
-		if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
-			queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+	} else if (is_engine_event(event) && engine_needs_busy_stats(engine)) {
+		/* Enable busy stats for the first event demanding it, next
+		 * events just reference counter. So, if some events will go
+		 * away, we will still have busy stats enabled till remaining
+		 * events still use them.
+		 */
+		if (engine->pmu.busy_stats++ == 0) {
+			if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+				queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+		}
 	}
 
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
@@ -495,16 +511,16 @@ static void i915_pmu_disable(struct perf_event *event)
 		enum intel_engine_id id;
 
 		engine = intel_engine_lookup_user(i915,
-						  engine_event_class(event),
-						  engine_event_instance(event));
+						engine_event_class(event),
+						engine_event_instance(event));
 		GEM_BUG_ON(!engine);
 		engine->pmu.enable &= ~BIT(engine_event_sample(event));
-		if (engine->pmu.busy_stats &&
-		    !engine_needs_busy_stats(engine)) {
-			engine->pmu.busy_stats = false;
-			queue_delayed_work(i915->wq,
-					   &engine->pmu.disable_busy_stats,
-					   round_jiffies_up_relative(2 * HZ));
+		if (!engine_needs_busy_stats(engine)) {
+			if (engine->pmu.busy_stats && --engine->pmu.busy_stats == 0) {
+				queue_delayed_work(i915->wq,
+						&engine->pmu.disable_busy_stats,
+						round_jiffies_up_relative(2 * HZ));
+			}
 		}
 		mask = 0;
 		for_each_engine(engine, i915, id)
@@ -529,6 +545,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags)
 
 static void i915_pmu_event_stop(struct perf_event *event, int flags)
 {
+	if (flags & PERF_EF_UPDATE)
+		i915_pmu_event_read(event);
 	i915_pmu_disable(event);
 }
 
@@ -546,7 +564,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags)
 
 static void i915_pmu_event_del(struct perf_event *event, int flags)
 {
-	i915_pmu_disable(event);
+	i915_pmu_event_stop(event, PERF_EF_UPDATE);
 }
 
 static int i915_pmu_event_event_idx(struct perf_event *event)
@@ -656,9 +674,28 @@ static ssize_t i915_pmu_event_show(struct device *dev,
         .attrs = i915_pmu_events_attrs,
 };
 
+static ssize_t i915_pmu_get_attr_cpumask(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, i915_pmu_get_attr_cpumask, NULL);
+
+static struct attribute *i915_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group i915_pmu_cpumask_attr_group = {
+	.attrs = i915_cpumask_attrs,
+};
+
 static const struct attribute_group *i915_pmu_attr_groups[] = {
         &i915_pmu_format_attr_group,
         &i915_pmu_events_attr_group,
+        &i915_pmu_cpumask_attr_group,
         NULL
 };
 
@@ -687,7 +724,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
 		return;
 
 	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
-	i915->pmu.base.task_ctx_nr	= perf_sw_context;
+	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
 	i915->pmu.base.event_init	= i915_pmu_event_init;
 	i915->pmu.base.add		= i915_pmu_event_add;
 	i915->pmu.base.del		= i915_pmu_event_del;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index fd5d838..0530e4e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -248,7 +248,7 @@ struct intel_engine_cs {
 	struct {
 		u32 enable;
 		u64 sample[4];
-		bool busy_stats;
+		unsigned int busy_stats;
 		struct work_struct enable_busy_stats;
 		struct delayed_work disable_busy_stats;
 	} pmu;
-- 
1.8.3.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC v2 3/3] drm/i915/pmu: deny perf driver level sampling of i915 PMU
  2017-08-23 15:26 [RFC v2 0/3] Support perf stat with i915 PMU Dmitry Rogozhkin
  2017-08-23 15:26 ` [RFC v2 1/3] drm/i915/pmu: reorder function to suite next patch Dmitry Rogozhkin
  2017-08-23 15:26 ` [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat Dmitry Rogozhkin
@ 2017-08-23 15:26 ` Dmitry Rogozhkin
  2017-08-23 23:43   ` Rogozhkin, Dmitry V
  2 siblings, 1 reply; 13+ messages in thread
From: Dmitry Rogozhkin @ 2017-08-23 15:26 UTC (permalink / raw)
  To: intel-gfx; +Cc: Peter Zijlstra

This patch should probably be squashed with Tvrtko's PMU enabling patch...

As per discussion with Peter, i915 PMU is an example of uncore PMU which
are prohibited to support perf driver level sampling. This patch removes
hrtimer which we expose to perf core and denies events creation with
non-zero event->attr.sampling_period.

Mind that this patch does _not_ remove i915 PMU _internal_ sampling timer.
So, sampling metrics are still gathered, but can be accessed only by
explicit request to get metric counter, i.e. by sys_read().

Change-Id: I33f345f679f0a5a8ecc9867f9e7c1bfb357e708d
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 drivers/gpu/drm/i915/i915_pmu.c | 89 ++---------------------------------------
 1 file changed, 4 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index c551d64..311aeeb 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -239,50 +239,6 @@ static int engine_event_init(struct perf_event *event)
 	return 0;
 }
 
-static DEFINE_PER_CPU(struct pt_regs, i915_pmu_pt_regs);
-
-static enum hrtimer_restart hrtimer_sample(struct hrtimer *hrtimer)
-{
-	struct pt_regs *regs = this_cpu_ptr(&i915_pmu_pt_regs);
-	struct perf_sample_data data;
-	struct perf_event *event;
-	u64 period;
-
-	event = container_of(hrtimer, struct perf_event, hw.hrtimer);
-	if (event->state != PERF_EVENT_STATE_ACTIVE)
-		return HRTIMER_NORESTART;
-
-	event->pmu->read(event);
-
-	perf_sample_data_init(&data, 0, event->hw.last_period);
-	perf_event_overflow(event, &data, regs);
-
-	period = max_t(u64, 10000, event->hw.sample_period);
-	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
-	return HRTIMER_RESTART;
-}
-
-static void init_hrtimer(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-
-	if (!is_sampling_event(event))
-		return;
-
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hwc->hrtimer.function = hrtimer_sample;
-
-	if (event->attr.freq) {
-		long freq = event->attr.sample_freq;
-
-		event->attr.sample_period = NSEC_PER_SEC / freq;
-		hwc->sample_period = event->attr.sample_period;
-		local64_set(&hwc->period_left, hwc->sample_period);
-		hwc->last_period = hwc->sample_period;
-		event->attr.freq = 0;
-	}
-}
-
 static int i915_pmu_event_init(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -293,6 +249,10 @@ static int i915_pmu_event_init(struct perf_event *event)
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
 
+	/* unsupported modes and filters */
+	if (event->attr.sample_period) /* no sampling */
+		return -EINVAL;
+
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
@@ -328,46 +288,9 @@ static int i915_pmu_event_init(struct perf_event *event)
 	if (!event->parent)
 		event->destroy = i915_pmu_event_destroy;
 
-	init_hrtimer(event);
-
 	return 0;
 }
 
-static void i915_pmu_timer_start(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-	s64 period;
-
-	if (!is_sampling_event(event))
-		return;
-
-	period = local64_read(&hwc->period_left);
-	if (period) {
-		if (period < 0)
-			period = 10000;
-
-		local64_set(&hwc->period_left, 0);
-	} else {
-		period = max_t(u64, 10000, hwc->sample_period);
-	}
-
-	hrtimer_start_range_ns(&hwc->hrtimer,
-			       ns_to_ktime(period), 0,
-			       HRTIMER_MODE_REL_PINNED);
-}
-
-static void i915_pmu_timer_cancel(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-
-	if (!is_sampling_event(event))
-		return;
-
-	local64_set(&hwc->period_left,
-		    ktime_to_ns(hrtimer_get_remaining(&hwc->hrtimer)));
-	hrtimer_cancel(&hwc->hrtimer);
-}
-
 static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
 {
 	return supports_busy_stats() &&
@@ -493,8 +416,6 @@ static void i915_pmu_enable(struct perf_event *event)
 	}
 
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
-
-	i915_pmu_timer_start(event);
 }
 
 static void i915_pmu_disable(struct perf_event *event)
@@ -534,8 +455,6 @@ static void i915_pmu_disable(struct perf_event *event)
 	i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
 
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
-
-	i915_pmu_timer_cancel(event);
 }
 
 static void i915_pmu_event_start(struct perf_event *event, int flags)
-- 
1.8.3.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat
  2017-08-23 15:26 ` [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat Dmitry Rogozhkin
@ 2017-08-23 23:38   ` Rogozhkin, Dmitry V
  2017-08-28 22:45     ` Rogozhkin, Dmitry V
  2017-08-29  9:28     ` Peter Zijlstra
  0 siblings, 2 replies; 13+ messages in thread
From: Rogozhkin, Dmitry V @ 2017-08-23 23:38 UTC (permalink / raw)
  To: intel-gfx, peterz

On Wed, 2017-08-23 at 08:26 -0700, Dmitry Rogozhkin wrote:
> +static cpumask_t i915_pmu_cpumask = CPU_MASK_CPU0;

Peter, this hardcoding of cpumask to use CPU0 works, but should I
implement something smarter or this will be sufficient? I see that
cstate.c you have pointed me to tries to track CPUs going online/offline
and migrates PMU context to another CPU if selected one went offline.
Should I follow this way?

If I should track CPUs going online/offline, then I have questions:
1. How I should register tracking callbacks? I see that cstate.c
registers CPUHP_AP_PERF_X86_CSTATE_STARTING and
CPUHP_AP_PERF_X86_CSTATE_ONLINE, uncore.c registers
CPUHP_AP_PERF_X86_UNCORE_ONLINE. What I should use? I incline to UNCORE.
2. If I will register for, say UNCORE, then how double registrations
will be handled if both uncore.c and i915.c will register callbacks? Any
conflict here?
3. What I should pass as 2nd argument? Will "perf/x86/intel/i915:online"
be ok?

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC v2 3/3] drm/i915/pmu: deny perf driver level sampling of i915 PMU
  2017-08-23 15:26 ` [RFC v2 3/3] drm/i915/pmu: deny perf driver level sampling of i915 PMU Dmitry Rogozhkin
@ 2017-08-23 23:43   ` Rogozhkin, Dmitry V
       [not found]     ` <3EDB40B547243546A1017B798F8C1AA8847FC002@ORSMSX114.amr.corp.intel.com>
  2017-08-31  7:41     ` Peter Zijlstra
  0 siblings, 2 replies; 13+ messages in thread
From: Rogozhkin, Dmitry V @ 2017-08-23 23:43 UTC (permalink / raw)
  To: intel-gfx, chris; +Cc: peterz

Hi Chris,

Why we had event->hw->hrtimer in i915 PMU? Was there any particular
reason? You had some use case which did not work?

According to Peter we should not expose the timer out of our pmu, and I
do not see the reason why we need it at the first place. So, I went
forward and wiped it out and prohibited events to be intialized with the
sampling_period. I don't see what will be broken. From my perspective
nothing because internal sampling timer still remains.

Could you, please, comment?

Dmitry.

On Wed, 2017-08-23 at 08:26 -0700, Dmitry Rogozhkin wrote:
> This patch should probably be squashed with Tvrtko's PMU enabling patch...
> 
> As per discussion with Peter, i915 PMU is an example of uncore PMU which
> are prohibited to support perf driver level sampling. This patch removes
> hrtimer which we expose to perf core and denies events creation with
> non-zero event->attr.sampling_period.
> 
> Mind that this patch does _not_ remove i915 PMU _internal_ sampling timer.
> So, sampling metrics are still gathered, but can be accessed only by
> explicit request to get metric counter, i.e. by sys_read().
> 
> Change-Id: I33f345f679f0a5a8ecc9867f9e7c1bfb357e708d
> Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Peter Zijlstra <peterz@infradead.org>
> ---
>  drivers/gpu/drm/i915/i915_pmu.c | 89 ++---------------------------------------
>  1 file changed, 4 insertions(+), 85 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index c551d64..311aeeb 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -239,50 +239,6 @@ static int engine_event_init(struct perf_event *event)
>  	return 0;
>  }
>  
> -static DEFINE_PER_CPU(struct pt_regs, i915_pmu_pt_regs);
> -
> -static enum hrtimer_restart hrtimer_sample(struct hrtimer *hrtimer)
> -{
> -	struct pt_regs *regs = this_cpu_ptr(&i915_pmu_pt_regs);
> -	struct perf_sample_data data;
> -	struct perf_event *event;
> -	u64 period;
> -
> -	event = container_of(hrtimer, struct perf_event, hw.hrtimer);
> -	if (event->state != PERF_EVENT_STATE_ACTIVE)
> -		return HRTIMER_NORESTART;
> -
> -	event->pmu->read(event);
> -
> -	perf_sample_data_init(&data, 0, event->hw.last_period);
> -	perf_event_overflow(event, &data, regs);
> -
> -	period = max_t(u64, 10000, event->hw.sample_period);
> -	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
> -	return HRTIMER_RESTART;
> -}
> -
> -static void init_hrtimer(struct perf_event *event)
> -{
> -	struct hw_perf_event *hwc = &event->hw;
> -
> -	if (!is_sampling_event(event))
> -		return;
> -
> -	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> -	hwc->hrtimer.function = hrtimer_sample;
> -
> -	if (event->attr.freq) {
> -		long freq = event->attr.sample_freq;
> -
> -		event->attr.sample_period = NSEC_PER_SEC / freq;
> -		hwc->sample_period = event->attr.sample_period;
> -		local64_set(&hwc->period_left, hwc->sample_period);
> -		hwc->last_period = hwc->sample_period;
> -		event->attr.freq = 0;
> -	}
> -}
> -
>  static int i915_pmu_event_init(struct perf_event *event)
>  {
>  	struct drm_i915_private *i915 =
> @@ -293,6 +249,10 @@ static int i915_pmu_event_init(struct perf_event *event)
>  	if (event->attr.type != event->pmu->type)
>  		return -ENOENT;
>  
> +	/* unsupported modes and filters */
> +	if (event->attr.sample_period) /* no sampling */
> +		return -EINVAL;
> +
>  	if (has_branch_stack(event))
>  		return -EOPNOTSUPP;
>  
> @@ -328,46 +288,9 @@ static int i915_pmu_event_init(struct perf_event *event)
>  	if (!event->parent)
>  		event->destroy = i915_pmu_event_destroy;
>  
> -	init_hrtimer(event);
> -
>  	return 0;
>  }
>  
> -static void i915_pmu_timer_start(struct perf_event *event)
> -{
> -	struct hw_perf_event *hwc = &event->hw;
> -	s64 period;
> -
> -	if (!is_sampling_event(event))
> -		return;
> -
> -	period = local64_read(&hwc->period_left);
> -	if (period) {
> -		if (period < 0)
> -			period = 10000;
> -
> -		local64_set(&hwc->period_left, 0);
> -	} else {
> -		period = max_t(u64, 10000, hwc->sample_period);
> -	}
> -
> -	hrtimer_start_range_ns(&hwc->hrtimer,
> -			       ns_to_ktime(period), 0,
> -			       HRTIMER_MODE_REL_PINNED);
> -}
> -
> -static void i915_pmu_timer_cancel(struct perf_event *event)
> -{
> -	struct hw_perf_event *hwc = &event->hw;
> -
> -	if (!is_sampling_event(event))
> -		return;
> -
> -	local64_set(&hwc->period_left,
> -		    ktime_to_ns(hrtimer_get_remaining(&hwc->hrtimer)));
> -	hrtimer_cancel(&hwc->hrtimer);
> -}
> -
>  static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
>  {
>  	return supports_busy_stats() &&
> @@ -493,8 +416,6 @@ static void i915_pmu_enable(struct perf_event *event)
>  	}
>  
>  	spin_unlock_irqrestore(&i915->pmu.lock, flags);
> -
> -	i915_pmu_timer_start(event);
>  }
>  
>  static void i915_pmu_disable(struct perf_event *event)
> @@ -534,8 +455,6 @@ static void i915_pmu_disable(struct perf_event *event)
>  	i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
>  
>  	spin_unlock_irqrestore(&i915->pmu.lock, flags);
> -
> -	i915_pmu_timer_cancel(event);
>  }
>  
>  static void i915_pmu_event_start(struct perf_event *event, int flags)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC v2 3/3] drm/i915/pmu: deny perf driver level sampling of i915 PMU
       [not found]       ` <150368515309.27971.17522435118048475155@mail.alporthouse.com>
@ 2017-08-25 19:01         ` Rogozhkin, Dmitry V
  0 siblings, 0 replies; 13+ messages in thread
From: Rogozhkin, Dmitry V @ 2017-08-25 19:01 UTC (permalink / raw)
  To: Chris Wilson, Wilson, Chris, intel-gfx; +Cc: peterz

Returning mailing list back.

Just tried ./intel-gpu-overlay. From what I see it wonderfully works even when I wiped out event->sampling_period from i915 RFC PMU and prohibited it. Mind that I did not remove sampling inside i915 RFC PMU: I removed only timer staff which is exposed to the perf core. Sampling itself is still there and works as far as I see.

Thus, the question: why event->hw->hrtimer staff was introduced in i915 RFC PMU? Right now it does not make sense to me.

Dmitry.

-----Original Message-----
From: Chris Wilson [mailto:chris@chris-wilson.co.uk] 
Sent: Friday, August 25, 2017 11:19 AM
To: Rogozhkin, Dmitry V <dmitry.v.rogozhkin@intel.com>; Wilson, Chris <chris.wilson@intel.com>
Subject: RE: [RFC v2 3/3] drm/i915/pmu: deny perf driver level sampling of i915 PMU

Quoting Rogozhkin, Dmitry V (2017-08-25 19:06:13)
> Hi Chris, not sure you saw my mail. So, I try to remind.

It's integral to all the sampling counters we use; which are the majority of the events exposed and used by intel-gpu-overlay.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat
  2017-08-23 23:38   ` Rogozhkin, Dmitry V
@ 2017-08-28 22:45     ` Rogozhkin, Dmitry V
  2017-08-29  9:28     ` Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: Rogozhkin, Dmitry V @ 2017-08-28 22:45 UTC (permalink / raw)
  To: intel-gfx, peterz

Peter, any comments?

On Wed, 2017-08-23 at 23:38 +0000, Rogozhkin, Dmitry V wrote:
> On Wed, 2017-08-23 at 08:26 -0700, Dmitry Rogozhkin wrote:
> > +static cpumask_t i915_pmu_cpumask = CPU_MASK_CPU0;
> 
> 
> Peter, this hardcoding of cpumask to use CPU0 works, but should I
> implement something smarter or this will be sufficient? I see that
> cstate.c you have pointed me to tries to track CPUs going online/offline
> and migrates PMU context to another CPU if selected one went offline.
> Should I follow this way?
> 
> If I should track CPUs going online/offline, then I have questions:
> 1. How I should register tracking callbacks? I see that cstate.c
> registers CPUHP_AP_PERF_X86_CSTATE_STARTING and
> CPUHP_AP_PERF_X86_CSTATE_ONLINE, uncore.c registers
> CPUHP_AP_PERF_X86_UNCORE_ONLINE. What I should use? I incline to UNCORE.
> 2. If I will register for, say UNCORE, then how double registrations
> will be handled if both uncore.c and i915.c will register callbacks? Any
> conflict here?
> 3. What I should pass as 2nd argument? Will "perf/x86/intel/i915:online"
> be ok?
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat
  2017-08-23 23:38   ` Rogozhkin, Dmitry V
  2017-08-28 22:45     ` Rogozhkin, Dmitry V
@ 2017-08-29  9:28     ` Peter Zijlstra
  2017-08-30 17:24       ` Rogozhkin, Dmitry V
  1 sibling, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2017-08-29  9:28 UTC (permalink / raw)
  To: Rogozhkin, Dmitry V; +Cc: intel-gfx

On Wed, Aug 23, 2017 at 11:38:43PM +0000, Rogozhkin, Dmitry V wrote:
> On Wed, 2017-08-23 at 08:26 -0700, Dmitry Rogozhkin wrote:
> > +static cpumask_t i915_pmu_cpumask = CPU_MASK_CPU0;
> 
> Peter, this hardcoding of cpumask to use CPU0 works, but should I
> implement something smarter or this will be sufficient? I see that
> cstate.c you have pointed me to tries to track CPUs going online/offline
> and migrates PMU context to another CPU if selected one went offline.
> Should I follow this way?

Yes.. x86 used to not allow hotplug of CPU0, but they 'fixed' that :/

And the perf core needs _a_ valid CPU to run things from, which leaves
you having to track online/offline things.

Now, I suppose its all fairly similar for a lot of uncore PMUs, so maybe
you can pull some of this into a library and avoid the endless
duplication between all (most?) uncore driveres.

> If I should track CPUs going online/offline, then I have questions:
> 1. How I should register tracking callbacks? I see that cstate.c
> registers CPUHP_AP_PERF_X86_CSTATE_STARTING and
> CPUHP_AP_PERF_X86_CSTATE_ONLINE, uncore.c registers
> CPUHP_AP_PERF_X86_UNCORE_ONLINE. What I should use? I incline to UNCORE.

Egads, what a mess :/ Clearly I didn't pay too much attention there.

So ideally we'd not hate a state per PMU, and
__cpuhp_setup_state_cpuslocked() has a .multi_instance argument that
allows reuse of a state.

So yes, please use the PERF_X86_UNCORE ones if possible.

> 2. If I will register for, say UNCORE, then how double registrations
> will be handled if both uncore.c and i915.c will register callbacks? Any
> conflict here?

Should work with .multi_instance I _think_, I've not had the pleasure of
using the new and improved CPU hotplug infrastructure much.

> 3. What I should pass as 2nd argument? Will "perf/x86/intel/i915:online"
> be ok?

Yeah, whatever I think.. something unique. Someone or something will
eventually yell if its no good I suppose ;-)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat
  2017-08-29  9:28     ` Peter Zijlstra
@ 2017-08-30 17:24       ` Rogozhkin, Dmitry V
  2017-08-30 20:03         ` Peter Zijlstra
  2017-08-30 20:05         ` Peter Zijlstra
  0 siblings, 2 replies; 13+ messages in thread
From: Rogozhkin, Dmitry V @ 2017-08-30 17:24 UTC (permalink / raw)
  To: peterz; +Cc: intel-gfx

On Tue, 2017-08-29 at 11:28 +0200, Peter Zijlstra wrote:
> On Wed, Aug 23, 2017 at 11:38:43PM +0000, Rogozhkin, Dmitry V wrote:
> > On Wed, 2017-08-23 at 08:26 -0700, Dmitry Rogozhkin wrote:
> > > +static cpumask_t i915_pmu_cpumask = CPU_MASK_CPU0;
> > 
> > Peter, this hardcoding of cpumask to use CPU0 works, but should I
> > implement something smarter or this will be sufficient? I see that
> > cstate.c you have pointed me to tries to track CPUs going online/offline
> > and migrates PMU context to another CPU if selected one went offline.
> > Should I follow this way?
> 
> Yes.. x86 used to not allow hotplug of CPU0, but they 'fixed' that :/
> 
> And the perf core needs _a_ valid CPU to run things from, which leaves
> you having to track online/offline things.
> 
> Now, I suppose its all fairly similar for a lot of uncore PMUs, so maybe
> you can pull some of this into a library and avoid the endless
> duplication between all (most?) uncore driveres.
> 
> > If I should track CPUs going online/offline, then I have questions:
> > 1. How I should register tracking callbacks? I see that cstate.c
> > registers CPUHP_AP_PERF_X86_CSTATE_STARTING and
> > CPUHP_AP_PERF_X86_CSTATE_ONLINE, uncore.c registers
> > CPUHP_AP_PERF_X86_UNCORE_ONLINE. What I should use? I incline to UNCORE.
> 
> Egads, what a mess :/ Clearly I didn't pay too much attention there.
> 
> So ideally we'd not hate a state per PMU, and
> __cpuhp_setup_state_cpuslocked() has a .multi_instance argument that
> allows reuse of a state.
> 
> So yes, please use the PERF_X86_UNCORE ones if possible.
> 
> > 2. If I will register for, say UNCORE, then how double registrations
> > will be handled if both uncore.c and i915.c will register callbacks? Any
> > conflict here?
> 
> Should work with .multi_instance I _think_, I've not had the pleasure of
> using the new and improved CPU hotplug infrastructure much.
> 
> > 3. What I should pass as 2nd argument? Will "perf/x86/intel/i915:online"
> > be ok?
> 
> Yeah, whatever I think.. something unique. Someone or something will
> eventually yell if its no good I suppose ;-)


I figured out how to track cpus online/offline status in PMU. Here is a
question however. What is the reason for uncore PMUs (cstate.c for
example) to register for cpus other than cpu0? I see it registers for
first thread of each cpu, on my 8 logical-core systems it registers for
cpu0-3 it seems. If they register for few cpus then perf-stat will
aggregate counters which can be disabled with '-A, --no-aggr' option.
Ok... but they could register for just cpu0. Besides, it looks like on
Linux cpu0 can't go offline at all at least of x86 architecture. Peter,
could you, please, clarify the reasoning to register designated readers
of uncore PMU for few CPUs?

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat
  2017-08-30 17:24       ` Rogozhkin, Dmitry V
@ 2017-08-30 20:03         ` Peter Zijlstra
  2017-08-30 20:05         ` Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: Peter Zijlstra @ 2017-08-30 20:03 UTC (permalink / raw)
  To: Rogozhkin, Dmitry V; +Cc: intel-gfx

On Wed, Aug 30, 2017 at 05:24:30PM +0000, Rogozhkin, Dmitry V wrote:
> Ok... but they could register for just cpu0. Besides, it looks like on
> Linux cpu0 can't go offline at all at least of x86 architecture. Peter,
> could you, please, clarify the reasoning to register designated readers
> of uncore PMU for few CPUs?

arch/x86/Kconfig:config BOOTPARAM_HOTPLUG_CPU0

means we cannot rely on CPU0 being present, even on x86.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat
  2017-08-30 17:24       ` Rogozhkin, Dmitry V
  2017-08-30 20:03         ` Peter Zijlstra
@ 2017-08-30 20:05         ` Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: Peter Zijlstra @ 2017-08-30 20:05 UTC (permalink / raw)
  To: Rogozhkin, Dmitry V; +Cc: intel-gfx

On Wed, Aug 30, 2017 at 05:24:30PM +0000, Rogozhkin, Dmitry V wrote:
> I figured out how to track cpus online/offline status in PMU. Here is a
> question however. What is the reason for uncore PMUs (cstate.c for
> example) to register for cpus other than cpu0? I see it registers for
> first thread of each cpu, on my 8 logical-core systems it registers for
> cpu0-3 it seems.

The other answer is that on multi-socket, C-state needs more than 1 CPU.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC v2 3/3] drm/i915/pmu: deny perf driver level sampling of i915 PMU
  2017-08-23 23:43   ` Rogozhkin, Dmitry V
       [not found]     ` <3EDB40B547243546A1017B798F8C1AA8847FC002@ORSMSX114.amr.corp.intel.com>
@ 2017-08-31  7:41     ` Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: Peter Zijlstra @ 2017-08-31  7:41 UTC (permalink / raw)
  To: Rogozhkin, Dmitry V; +Cc: intel-gfx

On Wed, Aug 23, 2017 at 11:43:13PM +0000, Rogozhkin, Dmitry V wrote:
> Hi Chris,
> 
> Why we had event->hw->hrtimer in i915 PMU? Was there any particular
> reason? You had some use case which did not work?

Some uncore drivers have a hrtimer to poll the counter to deal with
counter overflow. This is needed when the hardware has 'short' counters.

For instance, the regular uncore stuff has 48 bit counters, so we need a
timer to occasionally read them to fold the delta into our 64bit counter
value.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-08-31 12:09 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-23 15:26 [RFC v2 0/3] Support perf stat with i915 PMU Dmitry Rogozhkin
2017-08-23 15:26 ` [RFC v2 1/3] drm/i915/pmu: reorder function to suite next patch Dmitry Rogozhkin
2017-08-23 15:26 ` [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat Dmitry Rogozhkin
2017-08-23 23:38   ` Rogozhkin, Dmitry V
2017-08-28 22:45     ` Rogozhkin, Dmitry V
2017-08-29  9:28     ` Peter Zijlstra
2017-08-30 17:24       ` Rogozhkin, Dmitry V
2017-08-30 20:03         ` Peter Zijlstra
2017-08-30 20:05         ` Peter Zijlstra
2017-08-23 15:26 ` [RFC v2 3/3] drm/i915/pmu: deny perf driver level sampling of i915 PMU Dmitry Rogozhkin
2017-08-23 23:43   ` Rogozhkin, Dmitry V
     [not found]     ` <3EDB40B547243546A1017B798F8C1AA8847FC002@ORSMSX114.amr.corp.intel.com>
     [not found]       ` <150368515309.27971.17522435118048475155@mail.alporthouse.com>
2017-08-25 19:01         ` Rogozhkin, Dmitry V
2017-08-31  7:41     ` Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.