All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex
@ 2017-04-07 10:49 Joonas Lahtinen
  2017-04-07 10:49 ` [PATCH 2/2] drm/i915: Simplify shrinker locking Joonas Lahtinen
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Joonas Lahtinen @ 2017-04-07 10:49 UTC (permalink / raw)
  To: Intel graphics driver community testing & development
  Cc: Andrea Arcangeli, Daniel Vetter, Jani Nikula

Only call synchronize_rcu_expedited after unlocking struct_mutex to
avoid deadlock because the workqueues depend on struct_mutex.

From original patch by Andrea:

synchronize_rcu/synchronize_sched/synchronize_rcu_expedited() will
hang until its own workqueues are run. The i915 gem workqueues will
wait on the struct_mutex to be released. So we cannot wait for a
quiescent state using those rcu primitives while holding the
struct_mutex or it creates a circular lock dependency resulting in
kernel hangs (which is reproducible but goes undetected by lockdep).

kswapd0         D    0   700      2 0x00000000
Call Trace:
? __schedule+0x1a5/0x660
? schedule+0x36/0x80
? _synchronize_rcu_expedited.constprop.65+0x2ef/0x300
? wake_up_bit+0x20/0x20
? rcu_stall_kick_kthreads.part.54+0xc0/0xc0
? rcu_exp_wait_wake+0x530/0x530
? i915_gem_shrink+0x34b/0x4b0
? i915_gem_shrinker_scan+0x7c/0x90
? i915_gem_shrinker_scan+0x7c/0x90
? shrink_slab.part.61.constprop.72+0x1c1/0x3a0
? shrink_zone+0x154/0x160
? kswapd+0x40a/0x720
? kthread+0xf4/0x130
? try_to_free_pages+0x450/0x450
? kthread_create_on_node+0x40/0x40
? ret_from_fork+0x23/0x30
plasmashell     D    0  4657   4614 0x00000000
Call Trace:
? __schedule+0x1a5/0x660
? schedule+0x36/0x80
? schedule_preempt_disabled+0xe/0x10
? __mutex_lock.isra.4+0x1c9/0x790
? i915_gem_close_object+0x26/0xc0
? i915_gem_close_object+0x26/0xc0
? drm_gem_object_release_handle+0x48/0x90
? drm_gem_handle_delete+0x50/0x80
? drm_ioctl+0x1fa/0x420
? drm_gem_handle_create+0x40/0x40
? pipe_write+0x391/0x410
? __vfs_write+0xc6/0x120
? do_vfs_ioctl+0x8b/0x5d0
? SyS_ioctl+0x3b/0x70
? entry_SYSCALL_64_fastpath+0x13/0x94
kworker/0:0     D    0 29186      2 0x00000000
Workqueue: events __i915_gem_free_work
Call Trace:
? __schedule+0x1a5/0x660
? schedule+0x36/0x80
? schedule_preempt_disabled+0xe/0x10
? __mutex_lock.isra.4+0x1c9/0x790
? del_timer_sync+0x44/0x50
? update_curr+0x57/0x110
? __i915_gem_free_objects+0x31/0x300
? __i915_gem_free_objects+0x31/0x300
? __i915_gem_free_work+0x2d/0x40
? process_one_work+0x13a/0x3b0
? worker_thread+0x4a/0x460
? kthread+0xf4/0x130
? process_one_work+0x3b0/0x3b0
? kthread_create_on_node+0x40/0x40
? ret_from_fork+0x23/0x30

Fixes: 3d3d18f086cd ("drm/i915: Avoid rcu_barrier() from reclaim paths (shrinker)")
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 2978acd..129ed30 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -53,6 +53,17 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 	BUG();
 }
 
+static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
+{
+	if (!unlock)
+		return;
+
+	mutex_unlock(&dev->struct_mutex);
+
+	/* expedite the RCU grace period to free some request slabs */
+	synchronize_rcu_expedited();
+}
+
 static bool any_vma_pinned(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -232,11 +243,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 		intel_runtime_pm_put(dev_priv);
 
 	i915_gem_retire_requests(dev_priv);
-	if (unlock)
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	/* expedite the RCU grace period to free some request slabs */
-	synchronize_rcu_expedited();
+	i915_gem_shrinker_unlock(&dev_priv->drm, unlock);
 
 	return count;
 }
@@ -296,8 +304,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
-	if (unlock)
-		mutex_unlock(&dev->struct_mutex);
+	i915_gem_shrinker_unlock(dev, unlock);
 
 	return count;
 }
@@ -324,8 +331,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 sc->nr_to_scan - freed,
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
-	if (unlock)
-		mutex_unlock(&dev->struct_mutex);
+
+	i915_gem_shrinker_unlock(dev, unlock);
 
 	return freed;
 }
@@ -367,8 +374,7 @@ i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
 					 struct shrinker_lock_uninterruptible *slu)
 {
 	dev_priv->mm.interruptible = slu->was_interruptible;
-	if (slu->unlock)
-		mutex_unlock(&dev_priv->drm.struct_mutex);
+	i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
 }
 
 static int
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] drm/i915: Simplify shrinker locking
  2017-04-07 10:49 [PATCH 1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Joonas Lahtinen
@ 2017-04-07 10:49 ` Joonas Lahtinen
  2017-04-07 11:30   ` Chris Wilson
  2017-04-07 11:25 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Patchwork
  2017-04-07 13:32 ` [PATCH 1/2] " Andrea Arcangeli
  2 siblings, 1 reply; 7+ messages in thread
From: Joonas Lahtinen @ 2017-04-07 10:49 UTC (permalink / raw)
  To: Intel graphics driver community testing & development

By using the same structure for both interruptible and
uninterruptible locking in shrinker code, combined with the
information that mm.interruptible is only being written to, the
code can be greatly simplified.

Also removing the i915_gem_ prefix from the locking functions so
that nobody in their wildest dreams considers exporting them.

Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          |  6 ----
 drivers/gpu/drm/i915/i915_gem.c          |  2 --
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 56 +++++++++++---------------------
 drivers/gpu/drm/i915/intel_display.c     |  3 --
 4 files changed, 19 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9b0949..f990f0e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1511,12 +1511,6 @@ struct i915_gem_mm {
 	/** LRU list of objects with fence regs on them. */
 	struct list_head fence_list;
 
-	/**
-	 * Are we in a non-interruptible section of code like
-	 * modesetting?
-	 */
-	bool interruptible;
-
 	/* the indicator for dispatch video commands on two BSD rings */
 	atomic_t bsd_engine_dispatch_index;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 391aa69..c8139be 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4822,8 +4822,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 
 	init_waitqueue_head(&dev_priv->pending_flip_queue);
 
-	dev_priv->mm.interruptible = true;
-
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
 	spin_lock_init(&dev_priv->fb_tracking.lock);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 129ed30..0e7352d 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -35,9 +35,9 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 
-static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
+static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
 {
-	switch (mutex_trylock_recursive(&dev->struct_mutex)) {
+	switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) {
 	case MUTEX_TRYLOCK_FAILED:
 		return false;
 
@@ -53,12 +53,12 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 	BUG();
 }
 
-static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
+static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock)
 {
 	if (!unlock)
 		return;
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	/* expedite the RCU grace period to free some request slabs */
 	synchronize_rcu_expedited();
@@ -156,7 +156,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 	unsigned long count = 0;
 	bool unlock;
 
-	if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return 0;
 
 	trace_i915_gem_shrink(dev_priv, target, flags);
@@ -244,7 +244,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 
 	i915_gem_retire_requests(dev_priv);
 
-	i915_gem_shrinker_unlock(&dev_priv->drm, unlock);
+	shrinker_unlock(dev_priv, unlock);
 
 	return count;
 }
@@ -284,12 +284,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
-	struct drm_device *dev = &dev_priv->drm;
 	struct drm_i915_gem_object *obj;
 	unsigned long count;
 	bool unlock;
 
-	if (!i915_gem_shrinker_lock(dev, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return 0;
 
 	i915_gem_retire_requests(dev_priv);
@@ -304,7 +303,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
-	i915_gem_shrinker_unlock(dev, unlock);
+	shrinker_unlock(dev_priv, unlock);
 
 	return count;
 }
@@ -314,11 +313,10 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
-	struct drm_device *dev = &dev_priv->drm;
 	unsigned long freed;
 	bool unlock;
 
-	if (!i915_gem_shrinker_lock(dev, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return SHRINK_STOP;
 
 	freed = i915_gem_shrink(dev_priv,
@@ -332,26 +330,20 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
 
-	i915_gem_shrinker_unlock(dev, unlock);
+	shrinker_unlock(dev_priv, unlock);
 
 	return freed;
 }
 
-struct shrinker_lock_uninterruptible {
-	bool was_interruptible;
-	bool unlock;
-};
-
 static bool
-i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
-				       struct shrinker_lock_uninterruptible *slu,
-				       int timeout_ms)
+shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock,
+			      int timeout_ms)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
 
 	do {
 		if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
-		    i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock))
+		    shrinker_lock(dev_priv, unlock))
 			break;
 
 		schedule_timeout_killable(1);
@@ -364,29 +356,19 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
 		}
 	} while (1);
 
-	slu->was_interruptible = dev_priv->mm.interruptible;
-	dev_priv->mm.interruptible = false;
 	return true;
 }
 
-static void
-i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
-					 struct shrinker_lock_uninterruptible *slu)
-{
-	dev_priv->mm.interruptible = slu->was_interruptible;
-	i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
-}
-
 static int
 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(nb, struct drm_i915_private, mm.oom_notifier);
-	struct shrinker_lock_uninterruptible slu;
 	struct drm_i915_gem_object *obj;
 	unsigned long unevictable, bound, unbound, freed_pages;
+	bool unlock;
 
-	if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
+	if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
 		return NOTIFY_DONE;
 
 	freed_pages = i915_gem_shrink_all(dev_priv);
@@ -415,7 +397,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 			bound += obj->base.size >> PAGE_SHIFT;
 	}
 
-	i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu);
+	shrinker_unlock(dev_priv, unlock);
 
 	if (freed_pages || unbound || bound)
 		pr_info("Purging GPU memory, %lu pages freed, "
@@ -435,12 +417,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 {
 	struct drm_i915_private *dev_priv =
 		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
-	struct shrinker_lock_uninterruptible slu;
 	struct i915_vma *vma, *next;
 	unsigned long freed_pages = 0;
+	bool unlock;
 	int ret;
 
-	if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
+	if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
 		return NOTIFY_DONE;
 
 	/* Force everything onto the inactive lists */
@@ -465,7 +447,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	}
 
 out:
-	i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu);
+	shrinker_unlock(dev_priv, unlock);
 
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3617927..c683508 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4861,12 +4861,9 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc)
 {
 	if (intel_crtc->overlay) {
 		struct drm_device *dev = intel_crtc->base.dev;
-		struct drm_i915_private *dev_priv = to_i915(dev);
 
 		mutex_lock(&dev->struct_mutex);
-		dev_priv->mm.interruptible = false;
 		(void) intel_overlay_switch_off(intel_crtc->overlay);
-		dev_priv->mm.interruptible = true;
 		mutex_unlock(&dev->struct_mutex);
 	}
 
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex
  2017-04-07 10:49 [PATCH 1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Joonas Lahtinen
  2017-04-07 10:49 ` [PATCH 2/2] drm/i915: Simplify shrinker locking Joonas Lahtinen
@ 2017-04-07 11:25 ` Patchwork
  2017-04-07 11:44   ` Joonas Lahtinen
  2017-04-07 13:32 ` [PATCH 1/2] " Andrea Arcangeli
  2 siblings, 1 reply; 7+ messages in thread
From: Patchwork @ 2017-04-07 11:25 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex
URL   : https://patchwork.freedesktop.org/series/22652/
State : success

== Summary ==

Series 22652v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/22652/revisions/1/mbox/

Test gem_exec_flush:
        Subgroup basic-batch-kernel-default-uc:
                fail       -> PASS       (fi-snb-2600) fdo#100007

fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007

fi-bdw-5557u     total:278  pass:266  dwarn:0   dfail:0   fail:1   skip:11  time:430s
fi-bdw-gvtdvm    total:278  pass:255  dwarn:8   dfail:0   fail:1   skip:14  time:424s
fi-bsw-n3050     total:278  pass:241  dwarn:0   dfail:0   fail:1   skip:36  time:577s
fi-bxt-j4205     total:278  pass:258  dwarn:0   dfail:0   fail:1   skip:19  time:506s
fi-byt-j1900     total:278  pass:253  dwarn:0   dfail:0   fail:1   skip:24  time:479s
fi-byt-n2820     total:278  pass:249  dwarn:0   dfail:0   fail:1   skip:28  time:488s
fi-hsw-4770      total:278  pass:261  dwarn:0   dfail:0   fail:1   skip:16  time:412s
fi-hsw-4770r     total:278  pass:261  dwarn:0   dfail:0   fail:1   skip:16  time:410s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:1   skip:49  time:428s
fi-ivb-3520m     total:278  pass:259  dwarn:0   dfail:0   fail:1   skip:18  time:475s
fi-ivb-3770      total:278  pass:259  dwarn:0   dfail:0   fail:1   skip:18  time:487s
fi-kbl-7500u     total:278  pass:259  dwarn:0   dfail:0   fail:1   skip:18  time:456s
fi-kbl-7560u     total:278  pass:267  dwarn:0   dfail:0   fail:1   skip:10  time:565s
fi-skl-6260u     total:278  pass:267  dwarn:0   dfail:0   fail:1   skip:10  time:451s
fi-skl-6700hq    total:278  pass:260  dwarn:0   dfail:0   fail:1   skip:17  time:567s
fi-skl-6700k     total:278  pass:255  dwarn:4   dfail:0   fail:1   skip:18  time:467s
fi-skl-6770hq    total:278  pass:267  dwarn:0   dfail:0   fail:1   skip:10  time:486s
fi-skl-gvtdvm    total:278  pass:264  dwarn:0   dfail:0   fail:1   skip:13  time:436s
fi-snb-2520m     total:278  pass:249  dwarn:0   dfail:0   fail:1   skip:28  time:527s
fi-snb-2600      total:278  pass:248  dwarn:0   dfail:0   fail:1   skip:29  time:412s

fdc0fa0d6347cadec14396a172a98636e9dfebc0 drm-tip: 2017y-04m-07d-10h-32m-49s UTC integration manifest
08e143d drm/i915: Simplify shrinker locking
60dc19c drm/i915: Don't call synchronize_rcu_expedited under struct_mutex

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4439/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/i915: Simplify shrinker locking
  2017-04-07 10:49 ` [PATCH 2/2] drm/i915: Simplify shrinker locking Joonas Lahtinen
@ 2017-04-07 11:30   ` Chris Wilson
  0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2017-04-07 11:30 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: Intel graphics driver community testing & development

On Fri, Apr 07, 2017 at 01:49:35PM +0300, Joonas Lahtinen wrote:
> By using the same structure for both interruptible and
> uninterruptible locking in shrinker code, combined with the
> information that mm.interruptible is only being written to, the
> code can be greatly simplified.
> 
> Also removing the i915_gem_ prefix from the locking functions so
> that nobody in their wildest dreams considers exporting them.
> 
> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>

I still feel uneasy that I'm not using mm.uninterruptible in some parts of
modesetting, nevertheless the change is correct, simplies the current
code and means I have to fix gen2 properly if anyone ever notices.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex
  2017-04-07 11:25 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Patchwork
@ 2017-04-07 11:44   ` Joonas Lahtinen
  0 siblings, 0 replies; 7+ messages in thread
From: Joonas Lahtinen @ 2017-04-07 11:44 UTC (permalink / raw)
  To: intel-gfx

Thanks for the review, pushing series.

On pe, 2017-04-07 at 11:25 +0000, Patchwork wrote:
> == Series Details ==
> 
> Series: series starting with [1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex
> URL   : https://patchwork.freedesktop.org/series/22652/
> State : success
> 
> == Summary ==
> 
> Series 22652v1 Series without cover letter
> https://patchwork.freedesktop.org/api/1.0/series/22652/revisions/1/mbox/
> 
> Test gem_exec_flush:
>         Subgroup basic-batch-kernel-default-uc:
>                 fail       -> PASS       (fi-snb-2600) fdo#100007
> 
> fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
> 
> fi-bdw-5557u     total:278  pass:266  dwarn:0   dfail:0   fail:1   skip:11  time:430s
> fi-bdw-gvtdvm    total:278  pass:255  dwarn:8   dfail:0   fail:1   skip:14  time:424s
> fi-bsw-n3050     total:278  pass:241  dwarn:0   dfail:0   fail:1   skip:36  time:577s
> fi-bxt-j4205     total:278  pass:258  dwarn:0   dfail:0   fail:1   skip:19  time:506s
> fi-byt-j1900     total:278  pass:253  dwarn:0   dfail:0   fail:1   skip:24  time:479s
> fi-byt-n2820     total:278  pass:249  dwarn:0   dfail:0   fail:1   skip:28  time:488s
> fi-hsw-4770      total:278  pass:261  dwarn:0   dfail:0   fail:1   skip:16  time:412s
> fi-hsw-4770r     total:278  pass:261  dwarn:0   dfail:0   fail:1   skip:16  time:410s
> fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:1   skip:49  time:428s
> fi-ivb-3520m     total:278  pass:259  dwarn:0   dfail:0   fail:1   skip:18  time:475s
> fi-ivb-3770      total:278  pass:259  dwarn:0   dfail:0   fail:1   skip:18  time:487s
> fi-kbl-7500u     total:278  pass:259  dwarn:0   dfail:0   fail:1   skip:18  time:456s
> fi-kbl-7560u     total:278  pass:267  dwarn:0   dfail:0   fail:1   skip:10  time:565s
> fi-skl-6260u     total:278  pass:267  dwarn:0   dfail:0   fail:1   skip:10  time:451s
> fi-skl-6700hq    total:278  pass:260  dwarn:0   dfail:0   fail:1   skip:17  time:567s
> fi-skl-6700k     total:278  pass:255  dwarn:4   dfail:0   fail:1   skip:18  time:467s
> fi-skl-6770hq    total:278  pass:267  dwarn:0   dfail:0   fail:1   skip:10  time:486s
> fi-skl-gvtdvm    total:278  pass:264  dwarn:0   dfail:0   fail:1   skip:13  time:436s
> fi-snb-2520m     total:278  pass:249  dwarn:0   dfail:0   fail:1   skip:28  time:527s
> fi-snb-2600      total:278  pass:248  dwarn:0   dfail:0   fail:1   skip:29  time:412s
> 
> fdc0fa0d6347cadec14396a172a98636e9dfebc0 drm-tip: 2017y-04m-07d-10h-32m-49s UTC integration manifest
> 08e143d drm/i915: Simplify shrinker locking
> 60dc19c drm/i915: Don't call synchronize_rcu_expedited under struct_mutex
> 
> == Logs ==
> 
> For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4439/
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex
  2017-04-07 10:49 [PATCH 1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Joonas Lahtinen
  2017-04-07 10:49 ` [PATCH 2/2] drm/i915: Simplify shrinker locking Joonas Lahtinen
  2017-04-07 11:25 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Patchwork
@ 2017-04-07 13:32 ` Andrea Arcangeli
  2017-04-07 14:20   ` Andrea Arcangeli
  2 siblings, 1 reply; 7+ messages in thread
From: Andrea Arcangeli @ 2017-04-07 13:32 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Jani Nikula, Daniel Vetter,
	Intel graphics driver community testing & development

Hello Joonas,

On Fri, Apr 07, 2017 at 01:49:34PM +0300, Joonas Lahtinen wrote:
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 2978acd..129ed30 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -53,6 +53,17 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
>  	BUG();
>  }
>  
> +static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
> +{
> +	if (!unlock)
> +		return;
> +
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	/* expedite the RCU grace period to free some request slabs */
> +	synchronize_rcu_expedited();
> +}
> +
>  static bool any_vma_pinned(struct drm_i915_gem_object *obj)
>  {
>  	struct i915_vma *vma;
> @@ -232,11 +243,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>  		intel_runtime_pm_put(dev_priv);
>  
>  	i915_gem_retire_requests(dev_priv);
> -	if (unlock)
> -		mutex_unlock(&dev_priv->drm.struct_mutex);
>  
> -	/* expedite the RCU grace period to free some request slabs */
> -	synchronize_rcu_expedited();
> +	i915_gem_shrinker_unlock(&dev_priv->drm, unlock);
>  }
> @@ -296,8 +304,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
>  			count += obj->base.size >> PAGE_SHIFT;
>  	}
>  
> -	if (unlock)
> -		mutex_unlock(&dev->struct_mutex);
> +	i915_gem_shrinker_unlock(dev, unlock);

Why here? This doesn't make sense, all it matters is the throttling to
happen when scan_objects is run, if count_objects is run it's not
worth it to wait a quiescent point and to call
synchronize_rcu_expedited(), it is *very* expensive. I recommend to
reverse this hunk, I think it's worsening the runtime with zero
benefits to increase the reliability of reclaim.

>  	return count;
>  }
> @@ -324,8 +331,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>  					 sc->nr_to_scan - freed,
>  					 I915_SHRINK_BOUND |
>  					 I915_SHRINK_UNBOUND);
> -	if (unlock)
> -		mutex_unlock(&dev->struct_mutex);
> +
> +	i915_gem_shrinker_unlock(dev, unlock);
>  
>  	return freed;
>  }

Perfect here, faster than re-reading the mutex too, already thought of
checking unlock instead. Although if that's the only place it can as
well be explicit.

> @@ -367,8 +374,7 @@ i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
>  					 struct shrinker_lock_uninterruptible *slu)
>  {
>  	dev_priv->mm.interruptible = slu->was_interruptible;
> -	if (slu->unlock)
> -		mutex_unlock(&dev_priv->drm.struct_mutex);
> +	i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
>  }

i915_gem_shrinker_unlock_uninterruptible is more of a helper function
but it doesn't always make sense to wait a rcu grace period. I think
it'd be better to be selective in when to explicitly run
synchronize_rcu_expedited(); in fact in some case synchronize_sched()
may be prefereable and it will cause less wasted CPU cycles at the
only downside of higher latency, it's all about tradeoffs which one
should be called as they're equivalent as far as i915 is concerned. I
don't think calling synchronize_rcu_expedited(); unconditionally in a
unlock helper is ok and it'd be better to decided if to call (and
which one) on a case by case basis.

I kind I prefer my patch, just cleaned up with the
synchronize_rcu_expedited under if (unlock) { mutex_unlock;
synchronize_rcu... }.

I'd also like to see all those mutex_trylock_recursive dropped, the
only place where it makes sense to use it really is
i915_gem_shrinker_scan and i915_gem_shrinker_count, unless in fact we
want to replace it there too with a mutex_trylock and stop the
recursive behavior of reclaim into DRM code. The other cases where the
code uses lock recursion don't make much sense to me, I think the code
would be simpler if the information on the struct_mutex being hold
would be passed as parameter in all other cases. It'd be likely faster
as well for the same reason why checking "unlock" is better above than
checking the mutex.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex
  2017-04-07 13:32 ` [PATCH 1/2] " Andrea Arcangeli
@ 2017-04-07 14:20   ` Andrea Arcangeli
  0 siblings, 0 replies; 7+ messages in thread
From: Andrea Arcangeli @ 2017-04-07 14:20 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Jani Nikula, Daniel Vetter,
	Intel graphics driver community testing & development

On Fri, Apr 07, 2017 at 03:32:30PM +0200, Andrea Arcangeli wrote:
> Hello Joonas,
> I kind I prefer my patch, just cleaned up with the
> synchronize_rcu_expedited under if (unlock) { mutex_unlock;
> synchronize_rcu... }.

Here a cleaned up version below using "unlock" instead of checking the
mutex again (otherwise equivalent).

From 930f1779c1430062852f7796e9c555db8e795319 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 7 Apr 2017 16:15:55 +0200
Subject: [PATCH 1/1] drm/i915: Don't call synchronize_rcu_expedited under
 struct_mutex

synchronize_rcu/synchronize_sched/synchronize_rcu_expedited() will
hang until its own workqueues are run. The i915 gem workqueues will
wait on the struct_mutex to be released. So we cannot wait for a
quiescent state using those rcu primitives while holding the
struct_mutex or it creates a circular lock dependency resulting in
kernel hangs (which is reproducible but goes undetected by lockdep).

This started in commit 3d3d18f086cdda72ee18a454db70ca72c6e3246c and
lockdep didn't detect it apparently.

kswapd0         D    0   700      2 0x00000000
Call Trace:
? __schedule+0x1a5/0x660
? schedule+0x36/0x80
? _synchronize_rcu_expedited.constprop.65+0x2ef/0x300
? wake_up_bit+0x20/0x20
? rcu_stall_kick_kthreads.part.54+0xc0/0xc0
? rcu_exp_wait_wake+0x530/0x530
? i915_gem_shrink+0x34b/0x4b0
? i915_gem_shrinker_scan+0x7c/0x90
? i915_gem_shrinker_scan+0x7c/0x90
? shrink_slab.part.61.constprop.72+0x1c1/0x3a0
? shrink_zone+0x154/0x160
? kswapd+0x40a/0x720
? kthread+0xf4/0x130
? try_to_free_pages+0x450/0x450
? kthread_create_on_node+0x40/0x40
? ret_from_fork+0x23/0x30
plasmashell     D    0  4657   4614 0x00000000
Call Trace:
? __schedule+0x1a5/0x660
? schedule+0x36/0x80
? schedule_preempt_disabled+0xe/0x10
? __mutex_lock.isra.4+0x1c9/0x790
? i915_gem_close_object+0x26/0xc0
? i915_gem_close_object+0x26/0xc0
? drm_gem_object_release_handle+0x48/0x90
? drm_gem_handle_delete+0x50/0x80
? drm_ioctl+0x1fa/0x420
? drm_gem_handle_create+0x40/0x40
? pipe_write+0x391/0x410
? __vfs_write+0xc6/0x120
? do_vfs_ioctl+0x8b/0x5d0
? SyS_ioctl+0x3b/0x70
? entry_SYSCALL_64_fastpath+0x13/0x94
kworker/0:0     D    0 29186      2 0x00000000
Workqueue: events __i915_gem_free_work
Call Trace:
? __schedule+0x1a5/0x660
? schedule+0x36/0x80
? schedule_preempt_disabled+0xe/0x10
? __mutex_lock.isra.4+0x1c9/0x790
? del_timer_sync+0x44/0x50
? update_curr+0x57/0x110
? __i915_gem_free_objects+0x31/0x300
? __i915_gem_free_objects+0x31/0x300
? __i915_gem_free_work+0x2d/0x40
? process_one_work+0x13a/0x3b0
? worker_thread+0x4a/0x460
? kthread+0xf4/0x130
? process_one_work+0x3b0/0x3b0
? kthread_create_on_node+0x40/0x40
? ret_from_fork+0x23/0x30

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
 drivers/gpu/drm/i915/i915_gem.c          |  9 +++++++++
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 15 ++++++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 67b1fc5..3982489 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4742,6 +4742,13 @@ int i915_gem_freeze(struct drm_i915_private *dev_priv)
 	i915_gem_shrink_all(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	/*
+	 * Cannot call synchronize_rcu() inside the struct_mutex
+	 * because it may block until workqueues complete, and the
+	 * running workqueue may wait on the struct_mutex.
+	 */
+	synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
+
 	intel_runtime_pm_put(dev_priv);
 
 	return 0;
@@ -4781,6 +4788,8 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
 	}
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	synchronize_rcu_expedited();
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index d5d2b4c..4636e52 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -235,9 +235,6 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 	if (unlock)
 		mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	/* expedite the RCU grace period to free some request slabs */
-	synchronize_rcu_expedited();
-
 	return count;
 }
 
@@ -263,7 +260,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
 				I915_SHRINK_ACTIVE);
-	synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
 
 	return freed;
 }
@@ -321,8 +317,17 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 sc->nr_to_scan - freed,
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
-	if (unlock)
+	if (unlock) {
 		mutex_unlock(&dev->struct_mutex);
+		/*
+		 * If reclaim was invoked by an allocation done while
+		 * holding the struct mutex, we cannot call
+		 * synchronize_rcu_expedited() as it depends on
+		 * workqueues to run but the running workqueue may be
+		 * blocked waiting on us to release struct_mutex.
+		 */
+		synchronize_rcu_expedited();
+	}
 
 	return freed;
 }
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-04-07 14:20 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-07 10:49 [PATCH 1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Joonas Lahtinen
2017-04-07 10:49 ` [PATCH 2/2] drm/i915: Simplify shrinker locking Joonas Lahtinen
2017-04-07 11:30   ` Chris Wilson
2017-04-07 11:25 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Patchwork
2017-04-07 11:44   ` Joonas Lahtinen
2017-04-07 13:32 ` [PATCH 1/2] " Andrea Arcangeli
2017-04-07 14:20   ` Andrea Arcangeli

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.