All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tursulin@ursulin.net>
To: Intel-gfx@lists.freedesktop.org
Subject: [CI 6/9] drm/i915/pmu: Wire up engine busy stats to PMU
Date: Tue, 21 Nov 2017 18:18:49 +0000	[thread overview]
Message-ID: <20171121181852.16128-6-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <20171121181852.16128-1-tvrtko.ursulin@linux.intel.com>

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We can use engine busy stats instead of the sampling timer for
better accuracy.

By doing this we replace the stohastic sampling with busyness
metric derived directly from engine activity. This is context
switch interrupt driven, so as accurate as we can get from
software tracking.

As a secondary benefit, we can also not run the sampling timer
in cases only busyness metric is enabled.

v2: Rebase.
v3:
 * Rebase, comments.
 * Leave engine busyness controls out of workers.
v4: Checkpatch cleanup.
v5: Added comment to pmu_needs_timer change.
v6:
 * Rebase.
 * Fix style of some comments. (Chris Wilson)
v7: Rebase and commit message update. (Chris Wilson)
v8: Add delayed stats disabling to improve accuracy in face of
    CPU hotplug events.
v9: Rebase.
v10: Rebase - i915_modparams.enable_execlists removal.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 78 ++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h | 14 ++++++
 2 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index fb95f0ac30ea..6a428e7218d2 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -90,6 +90,11 @@ static unsigned int event_enabled_bit(struct perf_event *event)
 	return config_enabled_bit(event->attr.config);
 }
 
+static bool supports_busy_stats(struct drm_i915_private *i915)
+{
+	return INTEL_GEN(i915) >= 8;
+}
+
 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
 {
 	u64 enable;
@@ -115,6 +120,12 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
 	 */
 	if (!gpu_active)
 		enable &= ~ENGINE_SAMPLE_MASK;
+	/*
+	 * Also there is software busyness tracking available we do not
+	 * need the timer for I915_SAMPLE_BUSY counter.
+	 */
+	else if (supports_busy_stats(i915))
+		enable &= ~BIT(I915_SAMPLE_BUSY);
 
 	/*
 	 * If some bits remain it means we need the sampling timer running.
@@ -363,6 +374,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 
 		if (WARN_ON_ONCE(!engine)) {
 			/* Do nothing */
+		} else if (sample == I915_SAMPLE_BUSY &&
+			   engine->pmu.busy_stats) {
+			val = ktime_to_ns(intel_engine_get_busy_time(engine));
 		} else {
 			val = engine->pmu.sample[sample].cur;
 		}
@@ -399,6 +413,12 @@ static void i915_pmu_event_read(struct perf_event *event)
 	local64_add(new - prev, &event->count);
 }
 
+static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
+{
+	return supports_busy_stats(engine->i915) &&
+	       (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
+}
+
 static void i915_pmu_enable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -438,7 +458,21 @@ static void i915_pmu_enable(struct perf_event *event)
 
 		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
-		engine->pmu.enable_count[sample]++;
+		if (engine->pmu.enable_count[sample]++ == 0) {
+			/*
+			 * Enable engine busy stats tracking if needed or
+			 * alternatively cancel the scheduled disable.
+			 *
+			 * If the delayed disable was pending, cancel it and
+			 * in this case do not enable since it already is.
+			 */
+			if (engine_needs_busy_stats(engine) &&
+			    !engine->pmu.busy_stats) {
+				engine->pmu.busy_stats = true;
+				if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+					intel_enable_engine_stats(engine);
+			}
+		}
 	}
 
 	/*
@@ -451,6 +485,14 @@ static void i915_pmu_enable(struct perf_event *event)
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
 }
 
+static void __disable_busy_stats(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+	       container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
+
+	intel_disable_engine_stats(engine);
+}
+
 static void i915_pmu_disable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -474,8 +516,26 @@ static void i915_pmu_disable(struct perf_event *event)
 		 * Decrement the reference count and clear the enabled
 		 * bitmask when the last listener on an event goes away.
 		 */
-		if (--engine->pmu.enable_count[sample] == 0)
+		if (--engine->pmu.enable_count[sample] == 0) {
 			engine->pmu.enable &= ~BIT(sample);
+			if (!engine_needs_busy_stats(engine) &&
+			    engine->pmu.busy_stats) {
+				engine->pmu.busy_stats = false;
+				/*
+				 * We request a delayed disable to handle the
+				 * rapid on/off cycles on events, which can
+				 * happen when tools like perf stat start, in a
+				 * nicer way.
+				 *
+				 * In addition, this also helps with busy stats
+				 * accuracy with background CPU offline/online
+				 * migration events.
+				 */
+				queue_delayed_work(system_wq,
+						   &engine->pmu.disable_busy_stats,
+						   round_jiffies_up_relative(HZ));
+			}
+		}
 	}
 
 	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
@@ -702,6 +762,8 @@ static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
 
 void i915_pmu_register(struct drm_i915_private *i915)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int ret;
 
 	if (INTEL_GEN(i915) <= 2) {
@@ -723,6 +785,10 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	i915->pmu.timer.function = i915_sample;
 
+	for_each_engine(engine, i915, id)
+		INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
+				  __disable_busy_stats);
+
 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
 	if (ret)
 		goto err;
@@ -742,6 +808,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
 	if (!i915->pmu.base.event_init)
 		return;
 
@@ -749,6 +818,11 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
 	hrtimer_cancel(&i915->pmu.timer);
 
+	for_each_engine(engine, i915, id) {
+		GEM_BUG_ON(engine->pmu.busy_stats);
+		flush_delayed_work(&engine->pmu.disable_busy_stats);
+	}
+
 	i915_pmu_unregister_cpuhp_state(i915);
 
 	perf_pmu_unregister(&i915->pmu.base);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 42ee815d8f28..3bd30d011866 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -360,6 +360,20 @@ struct intel_engine_cs {
 		 * Our internal timer stores the current counters in this field.
 		 */
 		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
+		/**
+		 * @busy_stats: Has enablement of engine stats tracking been
+		 * 		requested.
+		 */
+		bool busy_stats;
+		/**
+		 * @disable_busy_stats: Work item for busy stats disabling.
+		 *
+		 * Same as with @enable_busy_stats action, with the difference
+		 * that we delay it in case there are rapid enable-disable
+		 * actions, which can happen during tool startup (like perf
+		 * stat).
+		 */
+		struct delayed_work disable_busy_stats;
 	} pmu;
 
 	/*
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2017-11-21 18:19 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-21 18:18 [CI 1/9] drm/i915: Extract intel_get_cagf Tvrtko Ursulin
2017-11-21 18:18 ` [CI 2/9] drm/i915/pmu: Expose a PMU interface for perf queries Tvrtko Ursulin
2017-12-01  1:01   ` Rodrigo Vivi
2017-12-01  9:51     ` Tvrtko Ursulin
2017-11-21 18:18 ` [CI 3/9] drm/i915/pmu: Suspend sampling when GPU is idle Tvrtko Ursulin
2017-11-21 18:18 ` [CI 4/9] drm/i915: Wrap context schedule notification Tvrtko Ursulin
2017-11-21 18:18 ` [CI 5/9] drm/i915: Engine busy time tracking Tvrtko Ursulin
2017-11-21 18:18 ` Tvrtko Ursulin [this message]
2017-11-21 18:18 ` [CI 7/9] drm/i915/pmu: Add interrupt count metric Tvrtko Ursulin
2017-11-21 18:18 ` [CI 8/9] drm/i915: Convert intel_rc6_residency_us to ns Tvrtko Ursulin
2017-11-21 18:18 ` [CI 9/9] drm/i915/pmu: Add RC6 residency metrics Tvrtko Ursulin
2017-11-21 18:40 ` ✓ Fi.CI.BAT: success for series starting with [CI,1/9] drm/i915: Extract intel_get_cagf Patchwork
2017-11-22 11:31   ` Tvrtko Ursulin
2017-11-22 11:34     ` Tomi Sarvela
2017-11-21 19:37 ` ✗ Fi.CI.IGT: warning " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171121181852.16128-6-tvrtko.ursulin@linux.intel.com \
    --to=tursulin@ursulin.net \
    --cc=Intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.