All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tursulin@ursulin.net>
To: Intel-gfx@lists.freedesktop.org
Subject: [PATCH 2/4] drm/i915/pmu: Fix PMU enable vs execlists tasklet race
Date: Tue, 13 Feb 2018 09:29:06 +0000	[thread overview]
Message-ID: <20180213092908.1600-2-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <20180213092908.1600-1-tvrtko.ursulin@linux.intel.com>

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Commit 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking
inside for busy-stats") added a tasklet_disable call in busy stats
enabling, but we failed to understand that the PMU enable callback runs
as an hard IRQ (IPI).

Consequence of this is that the PMU enable callback can interrupt the
execlists tasklet, and will then deadlock when it calls
intel_engine_stats_enable->tasklet_disable.

To fix this, I realized it is possible to move the engine stats enablement
and disablement to PMU event init and destroy hooks. This allows for much
simpler implementation since those hooks run in normal context (can
sleep).

v2: Extract engine_event_destroy. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside for busy-stats")
Testcase: igt/perf_pmu/enable-race-*
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: intel-gfx@lists.freedesktop.org
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180205093448.13877-1-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/i915_pmu.c         | 125 +++++++++++++-------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  14 ----
 2 files changed, 52 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 55a8a1e29424..337eaa6ede52 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -285,26 +285,41 @@ static u64 count_interrupts(struct drm_i915_private *i915)
 	return sum;
 }
 
-static void i915_pmu_event_destroy(struct perf_event *event)
+static void engine_event_destroy(struct perf_event *event)
 {
-	WARN_ON(event->parent);
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct intel_engine_cs *engine;
+
+	engine = intel_engine_lookup_user(i915,
+					  engine_event_class(event),
+					  engine_event_instance(event));
+	if (WARN_ON_ONCE(!engine))
+		return;
+
+	if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
+	    intel_engine_supports_stats(engine))
+		intel_disable_engine_stats(engine);
 }
 
-static int engine_event_init(struct perf_event *event)
+static void i915_pmu_event_destroy(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	WARN_ON(event->parent);
 
-	if (!intel_engine_lookup_user(i915, engine_event_class(event),
-				      engine_event_instance(event)))
-		return -ENODEV;
+	if (is_engine_event(event))
+		engine_event_destroy(event);
+}
 
-	switch (engine_event_sample(event)) {
+static int
+engine_event_status(struct intel_engine_cs *engine,
+		    enum drm_i915_pmu_engine_sample sample)
+{
+	switch (sample) {
 	case I915_SAMPLE_BUSY:
 	case I915_SAMPLE_WAIT:
 		break;
 	case I915_SAMPLE_SEMA:
-		if (INTEL_GEN(i915) < 6)
+		if (INTEL_GEN(engine->i915) < 6)
 			return -ENODEV;
 		break;
 	default:
@@ -314,6 +329,30 @@ static int engine_event_init(struct perf_event *event)
 	return 0;
 }
 
+static int engine_event_init(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct intel_engine_cs *engine;
+	u8 sample;
+	int ret;
+
+	engine = intel_engine_lookup_user(i915, engine_event_class(event),
+					  engine_event_instance(event));
+	if (!engine)
+		return -ENODEV;
+
+	sample = engine_event_sample(event);
+	ret = engine_event_status(engine, sample);
+	if (ret)
+		return ret;
+
+	if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
+		ret = intel_enable_engine_stats(engine);
+
+	return ret;
+}
+
 static int i915_pmu_event_init(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -387,7 +426,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 		if (WARN_ON_ONCE(!engine)) {
 			/* Do nothing */
 		} else if (sample == I915_SAMPLE_BUSY &&
-			   engine->pmu.busy_stats) {
+			   intel_engine_supports_stats(engine)) {
 			val = ktime_to_ns(intel_engine_get_busy_time(engine));
 		} else {
 			val = engine->pmu.sample[sample].cur;
@@ -442,12 +481,6 @@ static void i915_pmu_event_read(struct perf_event *event)
 	local64_add(new - prev, &event->count);
 }
 
-static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
-{
-	return intel_engine_supports_stats(engine) &&
-	       (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
-}
-
 static void i915_pmu_enable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -487,21 +520,7 @@ static void i915_pmu_enable(struct perf_event *event)
 
 		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
-		if (engine->pmu.enable_count[sample]++ == 0) {
-			/*
-			 * Enable engine busy stats tracking if needed or
-			 * alternatively cancel the scheduled disable.
-			 *
-			 * If the delayed disable was pending, cancel it and
-			 * in this case do not enable since it already is.
-			 */
-			if (engine_needs_busy_stats(engine) &&
-			    !engine->pmu.busy_stats) {
-				engine->pmu.busy_stats = true;
-				if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
-					intel_enable_engine_stats(engine);
-			}
-		}
+		engine->pmu.enable_count[sample]++;
 	}
 
 	/*
@@ -514,14 +533,6 @@ static void i915_pmu_enable(struct perf_event *event)
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
 }
 
-static void __disable_busy_stats(struct work_struct *work)
-{
-	struct intel_engine_cs *engine =
-	       container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
-
-	intel_disable_engine_stats(engine);
-}
-
 static void i915_pmu_disable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -545,26 +556,8 @@ static void i915_pmu_disable(struct perf_event *event)
 		 * Decrement the reference count and clear the enabled
 		 * bitmask when the last listener on an event goes away.
 		 */
-		if (--engine->pmu.enable_count[sample] == 0) {
+		if (--engine->pmu.enable_count[sample] == 0)
 			engine->pmu.enable &= ~BIT(sample);
-			if (!engine_needs_busy_stats(engine) &&
-			    engine->pmu.busy_stats) {
-				engine->pmu.busy_stats = false;
-				/*
-				 * We request a delayed disable to handle the
-				 * rapid on/off cycles on events, which can
-				 * happen when tools like perf stat start, in a
-				 * nicer way.
-				 *
-				 * In addition, this also helps with busy stats
-				 * accuracy with background CPU offline/online
-				 * migration events.
-				 */
-				queue_delayed_work(system_wq,
-						   &engine->pmu.disable_busy_stats,
-						   round_jiffies_up_relative(HZ));
-			}
-		}
 	}
 
 	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
@@ -797,8 +790,6 @@ static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
 
 void i915_pmu_register(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	int ret;
 
 	if (INTEL_GEN(i915) <= 2) {
@@ -820,10 +811,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	i915->pmu.timer.function = i915_sample;
 
-	for_each_engine(engine, i915, id)
-		INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
-				  __disable_busy_stats);
-
 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
 	if (ret)
 		goto err;
@@ -843,9 +830,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
 	if (!i915->pmu.base.event_init)
 		return;
 
@@ -853,11 +837,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
 	hrtimer_cancel(&i915->pmu.timer);
 
-	for_each_engine(engine, i915, id) {
-		GEM_BUG_ON(engine->pmu.busy_stats);
-		flush_delayed_work(&engine->pmu.disable_busy_stats);
-	}
-
 	i915_pmu_unregister_cpuhp_state(i915);
 
 	perf_pmu_unregister(&i915->pmu.base);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..a0e7a6c2a57c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -366,20 +366,6 @@ struct intel_engine_cs {
 		 */
 #define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
 		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
-		/**
-		 * @busy_stats: Has enablement of engine stats tracking been
-		 * 		requested.
-		 */
-		bool busy_stats;
-		/**
-		 * @disable_busy_stats: Work item for busy stats disabling.
-		 *
-		 * Same as with @enable_busy_stats action, with the difference
-		 * that we delay it in case there are rapid enable-disable
-		 * actions, which can happen during tool startup (like perf
-		 * stat).
-		 */
-		struct delayed_work disable_busy_stats;
 	} pmu;
 
 	/*
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2018-02-13  9:29 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-13  7:38 patches that failed to cherry-pick on drm-intel-fixes for 4.16-rc1 Rodrigo Vivi
2018-02-13  9:01 ` [PATCH] drm/i915/breadcrumbs: Ignore unsubmitted signalers Chris Wilson
2018-02-14  1:00   ` Rodrigo Vivi
2018-02-14  1:00     ` Rodrigo Vivi
2018-02-13  9:20 ` patches that failed to cherry-pick on drm-intel-fixes for 4.16-rc1 Tvrtko Ursulin
2018-02-13  9:38   ` Jani Nikula
2018-02-13  9:57     ` [PATCH 1/4] drm/i915: Lock out execlist tasklet while peeking inside for busy-stats Tvrtko Ursulin
2018-02-13  9:57       ` [PATCH 2/4] drm/i915/pmu: Fix PMU enable vs execlists tasklet race Tvrtko Ursulin
2018-02-13  9:57       ` [PATCH 3/4] drm/i915/pmu: Fix sleep under atomic in RC6 readout Tvrtko Ursulin
2018-02-13  9:57       ` [PATCH 4/4] drm/i915/pmu: Fix building without CONFIG_PM Tvrtko Ursulin
2018-02-14  0:58         ` Rodrigo Vivi
2018-02-13  9:29 ` [PATCH 1/4] drm/i915: Lock out execlist tasklet while peeking inside for busy-stats Tvrtko Ursulin
2018-02-13  9:29   ` Tvrtko Ursulin [this message]
2018-02-13  9:29   ` [PATCH 3/4] drm/i915/pmu: Fix sleep under atomic in RC6 readout Tvrtko Ursulin
2018-02-13  9:29   ` [PATCH 4/4] drm/i915/pmu: Fix building without CONFIG_PM Tvrtko Ursulin
2018-02-13  9:43 ` ✗ Fi.CI.BAT: failure for drm/i915/breadcrumbs: Ignore unsubmitted signalers (rev2) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180213092908.1600-2-tvrtko.ursulin@linux.intel.com \
    --to=tursulin@ursulin.net \
    --cc=Intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.