All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/pmu: Fix PMU enable vs execlists tasklet race
@ 2018-02-02 18:38 Tvrtko Ursulin
  2018-02-02 19:01 ` ✓ Fi.CI.BAT: success for " Patchwork
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Tvrtko Ursulin @ 2018-02-02 18:38 UTC (permalink / raw)
  To: Intel-gfx; +Cc: intel-gfx, Rodrigo Vivi

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Commit 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking
inside for busy-stats") added a tasklet_disable call in busy stats
enabling, but we failed to understand that the PMU enable callback runs
as an hard IRQ (IPI).

Consequence of this is that the PMU enable callback can interrupt the
execlists tasklet, and will then deadlock when it calls
intel_engine_stats_enable->tasklet_disable.

To fix this, I realized it is possible to move the engine stats enablement
and disablement to PMU event init and destroy hooks. This allows for much
simpler implementation since those hooks run in normal context (can
sleep).

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside for busy-stats")
Testcase: igt/perf_pmu/enable-race-*
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: intel-gfx@lists.freedesktop.org
---
 drivers/gpu/drm/i915/i915_pmu.c         | 95 +++++++++++----------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 14 -----
 2 files changed, 31 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index ecb0198bfb7a..e97860b44004 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -287,7 +287,24 @@ static u64 count_interrupts(struct drm_i915_private *i915)
 
 static void i915_pmu_event_destroy(struct perf_event *event)
 {
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct intel_engine_cs *engine;
+
 	WARN_ON(event->parent);
+
+	if (!is_engine_event(event))
+		return;
+
+	engine = intel_engine_lookup_user(i915,
+					  engine_event_class(event),
+					  engine_event_instance(event));
+	if (WARN_ON_ONCE(!engine))
+		return;
+
+	if ((engine_event_sample(event) == I915_SAMPLE_BUSY) &&
+	    intel_engine_supports_stats(engine))
+		intel_disable_engine_stats(engine);
 }
 
 static int
@@ -340,13 +357,23 @@ static int engine_event_init(struct perf_event *event)
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
 	struct intel_engine_cs *engine;
+	u8 sample;
+	int ret;
 
 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
 					  engine_event_instance(event));
 	if (!engine)
 		return -ENODEV;
 
-	return engine_event_status(engine, engine_event_sample(event));
+	sample = engine_event_sample(event);
+	ret = engine_event_status(engine, sample);
+	if (ret)
+		return ret;
+
+	if ((sample == I915_SAMPLE_BUSY) && intel_engine_supports_stats(engine))
+		ret = intel_enable_engine_stats(engine);
+
+	return ret;
 }
 
 static int i915_pmu_event_init(struct perf_event *event)
@@ -402,7 +429,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 		if (WARN_ON_ONCE(!engine)) {
 			/* Do nothing */
 		} else if (sample == I915_SAMPLE_BUSY &&
-			   engine->pmu.busy_stats) {
+			   intel_engine_supports_stats(engine)) {
 			val = ktime_to_ns(intel_engine_get_busy_time(engine));
 		} else {
 			val = engine->pmu.sample[sample].cur;
@@ -457,12 +484,6 @@ static void i915_pmu_event_read(struct perf_event *event)
 	local64_add(new - prev, &event->count);
 }
 
-static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
-{
-	return intel_engine_supports_stats(engine) &&
-	       (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
-}
-
 static void i915_pmu_enable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -502,21 +523,7 @@ static void i915_pmu_enable(struct perf_event *event)
 
 		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
-		if (engine->pmu.enable_count[sample]++ == 0) {
-			/*
-			 * Enable engine busy stats tracking if needed or
-			 * alternatively cancel the scheduled disable.
-			 *
-			 * If the delayed disable was pending, cancel it and
-			 * in this case do not enable since it already is.
-			 */
-			if (engine_needs_busy_stats(engine) &&
-			    !engine->pmu.busy_stats) {
-				engine->pmu.busy_stats = true;
-				if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
-					intel_enable_engine_stats(engine);
-			}
-		}
+		engine->pmu.enable_count[sample]++;
 	}
 
 	/*
@@ -529,14 +536,6 @@ static void i915_pmu_enable(struct perf_event *event)
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
 }
 
-static void __disable_busy_stats(struct work_struct *work)
-{
-	struct intel_engine_cs *engine =
-	       container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
-
-	intel_disable_engine_stats(engine);
-}
-
 static void i915_pmu_disable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -560,26 +559,8 @@ static void i915_pmu_disable(struct perf_event *event)
 		 * Decrement the reference count and clear the enabled
 		 * bitmask when the last listener on an event goes away.
 		 */
-		if (--engine->pmu.enable_count[sample] == 0) {
+		if (--engine->pmu.enable_count[sample] == 0)
 			engine->pmu.enable &= ~BIT(sample);
-			if (!engine_needs_busy_stats(engine) &&
-			    engine->pmu.busy_stats) {
-				engine->pmu.busy_stats = false;
-				/*
-				 * We request a delayed disable to handle the
-				 * rapid on/off cycles on events, which can
-				 * happen when tools like perf stat start, in a
-				 * nicer way.
-				 *
-				 * In addition, this also helps with busy stats
-				 * accuracy with background CPU offline/online
-				 * migration events.
-				 */
-				queue_delayed_work(system_wq,
-						   &engine->pmu.disable_busy_stats,
-						   round_jiffies_up_relative(HZ));
-			}
-		}
 	}
 
 	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
@@ -956,8 +937,6 @@ static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
 
 void i915_pmu_register(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	int ret;
 
 	if (INTEL_GEN(i915) <= 2) {
@@ -985,10 +964,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	i915->pmu.timer.function = i915_sample;
 
-	for_each_engine(engine, i915, id)
-		INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
-				  __disable_busy_stats);
-
 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
 	if (ret)
 		goto err;
@@ -1009,9 +984,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
 	if (!i915->pmu.base.event_init)
 		return;
 
@@ -1019,11 +991,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
 	hrtimer_cancel(&i915->pmu.timer);
 
-	for_each_engine(engine, i915, id) {
-		GEM_BUG_ON(engine->pmu.busy_stats);
-		flush_delayed_work(&engine->pmu.disable_busy_stats);
-	}
-
 	i915_pmu_unregister_cpuhp_state(i915);
 
 	perf_pmu_unregister(&i915->pmu.base);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..a0e7a6c2a57c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -366,20 +366,6 @@ struct intel_engine_cs {
 		 */
 #define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
 		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
-		/**
-		 * @busy_stats: Has enablement of engine stats tracking been
-		 * 		requested.
-		 */
-		bool busy_stats;
-		/**
-		 * @disable_busy_stats: Work item for busy stats disabling.
-		 *
-		 * Same as with @enable_busy_stats action, with the difference
-		 * that we delay it in case there are rapid enable-disable
-		 * actions, which can happen during tool startup (like perf
-		 * stat).
-		 */
-		struct delayed_work disable_busy_stats;
 	} pmu;
 
 	/*
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/pmu: Fix PMU enable vs execlists tasklet race
  2018-02-02 18:38 [PATCH] drm/i915/pmu: Fix PMU enable vs execlists tasklet race Tvrtko Ursulin
@ 2018-02-02 19:01 ` Patchwork
  2018-02-02 21:41 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2018-02-02 19:01 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Fix PMU enable vs execlists tasklet race
URL   : https://patchwork.freedesktop.org/series/37575/
State : success

== Summary ==

Series 37575v1 drm/i915/pmu: Fix PMU enable vs execlists tasklet race
https://patchwork.freedesktop.org/api/1.0/series/37575/revisions/1/mbox/

Test gem_exec_suspend:
        Subgroup basic-s4-devices:
                incomplete -> PASS       (fi-bdw-5557u) fdo#104162

fdo#104162 https://bugs.freedesktop.org/show_bug.cgi?id=104162

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:419s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:426s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:373s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:489s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:284s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:485s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:481s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:468s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:458s
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:571s
fi-elk-e7500     total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:415s
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:287s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:511s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:391s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:403s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:413s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:458s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:415s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:455s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:500s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:455s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:500s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:575s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:427s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:516s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:529s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:492s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:477s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:415s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:431s
fi-snb-2520m     total:3    pass:2    dwarn:0   dfail:0   fail:0   skip:0  
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:396s
Blacklisted hosts:
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:467s

bde4e003ce549b8225142843e0d5aee19dd37d13 drm-tip: 2018y-02m-02d-15h-03m-15s UTC integration manifest
0056845d822a drm/i915/pmu: Fix PMU enable vs execlists tasklet race

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7867/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915/pmu: Fix PMU enable vs execlists tasklet race
  2018-02-02 18:38 [PATCH] drm/i915/pmu: Fix PMU enable vs execlists tasklet race Tvrtko Ursulin
  2018-02-02 19:01 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2018-02-02 21:41 ` Patchwork
  2018-02-02 21:48 ` [PATCH] " Chris Wilson
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2018-02-02 21:41 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Fix PMU enable vs execlists tasklet race
URL   : https://patchwork.freedesktop.org/series/37575/
State : success

== Summary ==

Test perf:
        Subgroup enable-disable:
                fail       -> PASS       (shard-apl) fdo#103715
        Subgroup blocking:
                fail       -> PASS       (shard-hsw) fdo#102252
Test kms_cursor_crc:
        Subgroup cursor-128x128-suspend:
                pass       -> INCOMPLETE (shard-snb) fdo#103880
                incomplete -> PASS       (shard-hsw) fdo#103540
Test perf_pmu:
        Subgroup busy-double-start-vcs0:
                incomplete -> PASS       (shard-apl)
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-shrfb-fliptrack:
                pass       -> FAIL       (shard-apl) fdo#103167
Test kms_flip:
        Subgroup plain-flip-fb-recreate-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#100368 +1
        Subgroup 2x-plain-flip-ts-check:
                fail       -> PASS       (shard-hsw)
Test kms_sysfs_edid_timing:
                pass       -> WARN       (shard-apl) fdo#100047

fdo#103715 https://bugs.freedesktop.org/show_bug.cgi?id=103715
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#103880 https://bugs.freedesktop.org/show_bug.cgi?id=103880
fdo#103540 https://bugs.freedesktop.org/show_bug.cgi?id=103540
fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047

shard-apl        total:2836 pass:1749 dwarn:1   dfail:0   fail:21  skip:1064 time:12388s
shard-hsw        total:2836 pass:1732 dwarn:1   dfail:0   fail:12  skip:1090 time:11553s
shard-snb        total:2800 pass:1309 dwarn:1   dfail:0   fail:10  skip:1479 time:6086s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7867/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915/pmu: Fix PMU enable vs execlists tasklet race
  2018-02-02 18:38 [PATCH] drm/i915/pmu: Fix PMU enable vs execlists tasklet race Tvrtko Ursulin
  2018-02-02 19:01 ` ✓ Fi.CI.BAT: success for " Patchwork
  2018-02-02 21:41 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-02-02 21:48 ` Chris Wilson
  2018-02-05  9:34   ` [PATCH v2] " Tvrtko Ursulin
  2018-02-05 10:16 ` ✓ Fi.CI.BAT: success for drm/i915/pmu: Fix PMU enable vs execlists tasklet race (rev2) Patchwork
  2018-02-06  9:10 ` ✓ Fi.CI.IGT: " Patchwork
  4 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2018-02-02 21:48 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx; +Cc: intel-gfx, Rodrigo Vivi

Quoting Tvrtko Ursulin (2018-02-02 18:38:16)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Commit 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking
> inside for busy-stats") added a tasklet_disable call in busy stats
> enabling, but we failed to understand that the PMU enable callback runs
> as an hard IRQ (IPI).
> 
> Consequence of this is that the PMU enable callback can interrupt the
> execlists tasklet, and will then deadlock when it calls
> intel_engine_stats_enable->tasklet_disable.
> 
> To fix this, I realized it is possible to move the engine stats enablement
> and disablement to PMU event init and destroy hooks. This allows for much
> simpler implementation since those hooks run in normal context (can
> sleep).
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Fixes: 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside for busy-stats")
> Testcase: igt/perf_pmu/enable-race-*
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Jani Nikula <jani.nikula@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: intel-gfx@lists.freedesktop.org
> ---
>  drivers/gpu/drm/i915/i915_pmu.c         | 95 +++++++++++----------------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 14 -----
>  2 files changed, 31 insertions(+), 78 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index ecb0198bfb7a..e97860b44004 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -287,7 +287,24 @@ static u64 count_interrupts(struct drm_i915_private *i915)
>  
>  static void i915_pmu_event_destroy(struct perf_event *event)
>  {
> +       struct drm_i915_private *i915 =
> +               container_of(event->pmu, typeof(*i915), pmu.base);
> +       struct intel_engine_cs *engine;
> +
>         WARN_ON(event->parent);
> +
> +       if (!is_engine_event(event))
> +               return;

I think engine_event_destroy() for symmetry with engine_event_init() ?

> +
> +       engine = intel_engine_lookup_user(i915,
> +                                         engine_event_class(event),
> +                                         engine_event_instance(event));
> +       if (WARN_ON_ONCE(!engine))
> +               return;
> +
> +       if ((engine_event_sample(event) == I915_SAMPLE_BUSY) &&

Rogue ()

> +           intel_engine_supports_stats(engine))
> +               intel_disable_engine_stats(engine);
>  }
>  
>  static int
> @@ -340,13 +357,23 @@ static int engine_event_init(struct perf_event *event)
>         struct drm_i915_private *i915 =
>                 container_of(event->pmu, typeof(*i915), pmu.base);
>         struct intel_engine_cs *engine;
> +       u8 sample;
> +       int ret;
>  
>         engine = intel_engine_lookup_user(i915, engine_event_class(event),
>                                           engine_event_instance(event));
>         if (!engine)
>                 return -ENODEV;
>  
> -       return engine_event_status(engine, engine_event_sample(event));
> +       sample = engine_event_sample(event);
> +       ret = engine_event_status(engine, sample);
> +       if (ret)
> +               return ret;
> +
> +       if ((sample == I915_SAMPLE_BUSY) && intel_engine_supports_stats(engine))
> +               ret = intel_enable_engine_stats(engine);
> +
> +       return ret;
>  }

Yeah, that is disgustingly simpler. On a second look nothing springs to
mind why this shouldn't work. Will sleep on it.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] drm/i915/pmu: Fix PMU enable vs execlists tasklet race
  2018-02-02 21:48 ` [PATCH] " Chris Wilson
@ 2018-02-05  9:34   ` Tvrtko Ursulin
  2018-02-05 10:20     ` Chris Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Tvrtko Ursulin @ 2018-02-05  9:34 UTC (permalink / raw)
  To: Intel-gfx; +Cc: intel-gfx, Rodrigo Vivi

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Commit 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking
inside for busy-stats") added a tasklet_disable call in busy stats
enabling, but we failed to understand that the PMU enable callback runs
as an hard IRQ (IPI).

Consequence of this is that the PMU enable callback can interrupt the
execlists tasklet, and will then deadlock when it calls
intel_engine_stats_enable->tasklet_disable.

To fix this, I realized it is possible to move the engine stats enablement
and disablement to PMU event init and destroy hooks. This allows for much
simpler implementation since those hooks run in normal context (can
sleep).

v2: Extract engine_event_destroy. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside for busy-stats")
Testcase: igt/perf_pmu/enable-race-*
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: intel-gfx@lists.freedesktop.org
---
 drivers/gpu/drm/i915/i915_pmu.c         | 98 ++++++++++++---------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 14 -----
 2 files changed, 34 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index ecb0198bfb7a..1c440460255d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -285,9 +285,29 @@ static u64 count_interrupts(struct drm_i915_private *i915)
 	return sum;
 }
 
+static void engine_event_destroy(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct intel_engine_cs *engine;
+
+	engine = intel_engine_lookup_user(i915,
+					  engine_event_class(event),
+					  engine_event_instance(event));
+	if (WARN_ON_ONCE(!engine))
+		return;
+
+	if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
+	    intel_engine_supports_stats(engine))
+		intel_disable_engine_stats(engine);
+}
+
 static void i915_pmu_event_destroy(struct perf_event *event)
 {
 	WARN_ON(event->parent);
+
+	if (is_engine_event(event))
+		engine_event_destroy(event);
 }
 
 static int
@@ -340,13 +360,23 @@ static int engine_event_init(struct perf_event *event)
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
 	struct intel_engine_cs *engine;
+	u8 sample;
+	int ret;
 
 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
 					  engine_event_instance(event));
 	if (!engine)
 		return -ENODEV;
 
-	return engine_event_status(engine, engine_event_sample(event));
+	sample = engine_event_sample(event);
+	ret = engine_event_status(engine, sample);
+	if (ret)
+		return ret;
+
+	if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
+		ret = intel_enable_engine_stats(engine);
+
+	return ret;
 }
 
 static int i915_pmu_event_init(struct perf_event *event)
@@ -402,7 +432,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 		if (WARN_ON_ONCE(!engine)) {
 			/* Do nothing */
 		} else if (sample == I915_SAMPLE_BUSY &&
-			   engine->pmu.busy_stats) {
+			   intel_engine_supports_stats(engine)) {
 			val = ktime_to_ns(intel_engine_get_busy_time(engine));
 		} else {
 			val = engine->pmu.sample[sample].cur;
@@ -457,12 +487,6 @@ static void i915_pmu_event_read(struct perf_event *event)
 	local64_add(new - prev, &event->count);
 }
 
-static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
-{
-	return intel_engine_supports_stats(engine) &&
-	       (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
-}
-
 static void i915_pmu_enable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -502,21 +526,7 @@ static void i915_pmu_enable(struct perf_event *event)
 
 		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
-		if (engine->pmu.enable_count[sample]++ == 0) {
-			/*
-			 * Enable engine busy stats tracking if needed or
-			 * alternatively cancel the scheduled disable.
-			 *
-			 * If the delayed disable was pending, cancel it and
-			 * in this case do not enable since it already is.
-			 */
-			if (engine_needs_busy_stats(engine) &&
-			    !engine->pmu.busy_stats) {
-				engine->pmu.busy_stats = true;
-				if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
-					intel_enable_engine_stats(engine);
-			}
-		}
+		engine->pmu.enable_count[sample]++;
 	}
 
 	/*
@@ -529,14 +539,6 @@ static void i915_pmu_enable(struct perf_event *event)
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
 }
 
-static void __disable_busy_stats(struct work_struct *work)
-{
-	struct intel_engine_cs *engine =
-	       container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
-
-	intel_disable_engine_stats(engine);
-}
-
 static void i915_pmu_disable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -560,26 +562,8 @@ static void i915_pmu_disable(struct perf_event *event)
 		 * Decrement the reference count and clear the enabled
 		 * bitmask when the last listener on an event goes away.
 		 */
-		if (--engine->pmu.enable_count[sample] == 0) {
+		if (--engine->pmu.enable_count[sample] == 0)
 			engine->pmu.enable &= ~BIT(sample);
-			if (!engine_needs_busy_stats(engine) &&
-			    engine->pmu.busy_stats) {
-				engine->pmu.busy_stats = false;
-				/*
-				 * We request a delayed disable to handle the
-				 * rapid on/off cycles on events, which can
-				 * happen when tools like perf stat start, in a
-				 * nicer way.
-				 *
-				 * In addition, this also helps with busy stats
-				 * accuracy with background CPU offline/online
-				 * migration events.
-				 */
-				queue_delayed_work(system_wq,
-						   &engine->pmu.disable_busy_stats,
-						   round_jiffies_up_relative(HZ));
-			}
-		}
 	}
 
 	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
@@ -956,8 +940,6 @@ static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
 
 void i915_pmu_register(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	int ret;
 
 	if (INTEL_GEN(i915) <= 2) {
@@ -985,10 +967,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	i915->pmu.timer.function = i915_sample;
 
-	for_each_engine(engine, i915, id)
-		INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
-				  __disable_busy_stats);
-
 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
 	if (ret)
 		goto err;
@@ -1009,9 +987,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
 	if (!i915->pmu.base.event_init)
 		return;
 
@@ -1019,11 +994,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
 	hrtimer_cancel(&i915->pmu.timer);
 
-	for_each_engine(engine, i915, id) {
-		GEM_BUG_ON(engine->pmu.busy_stats);
-		flush_delayed_work(&engine->pmu.disable_busy_stats);
-	}
-
 	i915_pmu_unregister_cpuhp_state(i915);
 
 	perf_pmu_unregister(&i915->pmu.base);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..a0e7a6c2a57c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -366,20 +366,6 @@ struct intel_engine_cs {
 		 */
 #define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
 		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
-		/**
-		 * @busy_stats: Has enablement of engine stats tracking been
-		 * 		requested.
-		 */
-		bool busy_stats;
-		/**
-		 * @disable_busy_stats: Work item for busy stats disabling.
-		 *
-		 * Same as with @enable_busy_stats action, with the difference
-		 * that we delay it in case there are rapid enable-disable
-		 * actions, which can happen during tool startup (like perf
-		 * stat).
-		 */
-		struct delayed_work disable_busy_stats;
 	} pmu;
 
 	/*
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/pmu: Fix PMU enable vs execlists tasklet race (rev2)
  2018-02-02 18:38 [PATCH] drm/i915/pmu: Fix PMU enable vs execlists tasklet race Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2018-02-02 21:48 ` [PATCH] " Chris Wilson
@ 2018-02-05 10:16 ` Patchwork
  2018-02-06  9:10 ` ✓ Fi.CI.IGT: " Patchwork
  4 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2018-02-05 10:16 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Fix PMU enable vs execlists tasklet race (rev2)
URL   : https://patchwork.freedesktop.org/series/37575/
State : success

== Summary ==

Series 37575v2 drm/i915/pmu: Fix PMU enable vs execlists tasklet race
https://patchwork.freedesktop.org/api/1.0/series/37575/revisions/2/mbox/

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_chamelium:
        Subgroup dp-crc-fast:
                pass       -> FAIL       (fi-kbl-7500u) fdo#103841

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:416s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:420s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:369s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:483s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:280s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:480s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:483s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:464s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:451s
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:563s
fi-elk-e7500     total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:402s
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:279s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:509s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:390s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:397s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:407s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:458s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:413s
fi-kbl-7500u     total:288  pass:262  dwarn:1   dfail:0   fail:1   skip:24  time:456s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:495s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:450s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:497s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:575s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:429s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:503s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:525s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:482s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:490s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:414s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:430s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:517s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:395s
Blacklisted hosts:
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:464s

2e76a2952923eba64c4f9baf461613bc42ee997a drm-tip: 2018y-02m-02d-20h-33m-12s UTC integration manifest
c0ee935c8efd drm/i915/pmu: Fix PMU enable vs execlists tasklet race

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7883/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/i915/pmu: Fix PMU enable vs execlists tasklet race
  2018-02-05  9:34   ` [PATCH v2] " Tvrtko Ursulin
@ 2018-02-05 10:20     ` Chris Wilson
  0 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2018-02-05 10:20 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx; +Cc: intel-gfx, Rodrigo Vivi

Quoting Tvrtko Ursulin (2018-02-05 09:34:48)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Commit 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking
> inside for busy-stats") added a tasklet_disable call in busy stats
> enabling, but we failed to understand that the PMU enable callback runs
> as an hard IRQ (IPI).
> 
> Consequence of this is that the PMU enable callback can interrupt the
> execlists tasklet, and will then deadlock when it calls
> intel_engine_stats_enable->tasklet_disable.
> 
> To fix this, I realized it is possible to move the engine stats enablement
> and disablement to PMU event init and destroy hooks. This allows for much
> simpler implementation since those hooks run in normal context (can
> sleep).
> 
> v2: Extract engine_event_destroy. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Fixes: 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside for busy-stats")
> Testcase: igt/perf_pmu/enable-race-*
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Jani Nikula <jani.nikula@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: intel-gfx@lists.freedesktop.org

I didn't have any nightmares about this,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915/pmu: Fix PMU enable vs execlists tasklet race (rev2)
  2018-02-02 18:38 [PATCH] drm/i915/pmu: Fix PMU enable vs execlists tasklet race Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  2018-02-05 10:16 ` ✓ Fi.CI.BAT: success for drm/i915/pmu: Fix PMU enable vs execlists tasklet race (rev2) Patchwork
@ 2018-02-06  9:10 ` Patchwork
  4 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2018-02-06  9:10 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Fix PMU enable vs execlists tasklet race (rev2)
URL   : https://patchwork.freedesktop.org/series/37575/
State : success

== Summary ==

Test kms_cursor_legacy:
        Subgroup flip-vs-cursor-atomic:
                fail       -> PASS       (shard-hsw) fdo#102670 +1
Test gem_eio:
        Subgroup in-flight-contexts:
                dmesg-warn -> PASS       (shard-snb) fdo#104058 +1
Test perf:
        Subgroup oa-exponents:
                pass       -> FAIL       (shard-apl) fdo#102254
Test kms_flip:
        Subgroup 2x-plain-flip-ts-check:
                fail       -> PASS       (shard-hsw) fdo#100368
        Subgroup 2x-flip-vs-expired-vblank-interruptible:
                fail       -> PASS       (shard-hsw) fdo#102887

fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
fdo#104058 https://bugs.freedesktop.org/show_bug.cgi?id=104058
fdo#102254 https://bugs.freedesktop.org/show_bug.cgi?id=102254
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887

shard-apl        total:2836 pass:1750 dwarn:1   dfail:0   fail:21  skip:1064 time:12561s
shard-hsw        total:2836 pass:1733 dwarn:1   dfail:0   fail:11  skip:1090 time:11703s
shard-snb        total:2836 pass:1327 dwarn:2   dfail:0   fail:10  skip:1497 time:6407s
Blacklisted hosts:
shard-kbl        total:2836 pass:1867 dwarn:5   dfail:0   fail:22  skip:942 time:9368s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7883/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-02-06  9:10 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-02 18:38 [PATCH] drm/i915/pmu: Fix PMU enable vs execlists tasklet race Tvrtko Ursulin
2018-02-02 19:01 ` ✓ Fi.CI.BAT: success for " Patchwork
2018-02-02 21:41 ` ✓ Fi.CI.IGT: " Patchwork
2018-02-02 21:48 ` [PATCH] " Chris Wilson
2018-02-05  9:34   ` [PATCH v2] " Tvrtko Ursulin
2018-02-05 10:20     ` Chris Wilson
2018-02-05 10:16 ` ✓ Fi.CI.BAT: success for drm/i915/pmu: Fix PMU enable vs execlists tasklet race (rev2) Patchwork
2018-02-06  9:10 ` ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.