All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
Date: Mon,  7 Jan 2019 11:54:24 +0000	[thread overview]
Message-ID: <20190107115509.12523-1-chris@chris-wilson.co.uk> (raw)

Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
in the shrinker while performing direct-reclaim. The trade-off being
(much) lower latency for non-i915 clients at an increased risk of being
unable to obtain a page from direct-reclaim without hitting the
oom-notifier. The proviso being that we still keep trying to hard
obtain the lock for kswapd so that we can reap under heavy memory
pressure.

v2: Taint all mutexes taken within the shrinker with the struct_mutex
subclass as an early warning system, and drop I915_SHRINK_ACTIVE from
vmap to reduce the number of dangerous paths. We also have to drop
I915_SHRINK_ACTIVE from oom-notifier to be able to make the same claim
that ACTIVE is only used from outside context, which fits in with a
longer strategy of avoiding stalls due to scanning active during
shrinking.

The danger in using the subclass struct_mutex is that we declare
ourselves more knowledgable than lockdep and deprive ourselves of
automatic coverage. Instead, we require ourselves to mark up any mutex
taken inside the shrinker in order to detect lock-inversion, and if we
miss any we are doomed to a deadlock at the worst possible moment.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  7 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c      |  8 +--
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 68 ++++++++++++++++--------
 3 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7fa2a405c5fe..17a017645c5d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2899,9 +2899,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 	__i915_gem_object_unpin_pages(obj);
 }
 
-enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
+enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
 	I915_MM_NORMAL = 0,
-	I915_MM_SHRINKER
+	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
 };
 
 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
@@ -3187,7 +3187,8 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915,
 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
 void i915_gem_shrinker_register(struct drm_i915_private *i915);
 void i915_gem_shrinker_unregister(struct drm_i915_private *i915);
-void i915_gem_shrinker_taints_mutex(struct mutex *mutex);
+void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
+				    struct mutex *mutex);
 
 /* i915_gem_tiling.c */
 static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d4c5973ea33d..5cc8968eb3bf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -483,7 +483,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
 	 * attempt holding the lock is immediately reported by lockdep.
 	 */
 	mutex_init(&vm->mutex);
-	i915_gem_shrinker_taints_mutex(&vm->mutex);
+	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
 
 	GEM_BUG_ON(!vm->total);
 	drm_mm_init(&vm->mm, 0, vm->total);
@@ -2245,7 +2245,8 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 				     DMA_ATTR_NO_WARN))
 			return 0;
 
-		/* If the DMA remap fails, one cause can be that we have
+		/*
+		 * If the DMA remap fails, one cause can be that we have
 		 * too many objects pinned in a small remapping table,
 		 * such as swiotlb. Incrementally purge all other objects and
 		 * try again - if there are no more pages to remove from
@@ -2255,8 +2256,7 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 	} while (i915_gem_shrink(to_i915(obj->base.dev),
 				 obj->base.size >> PAGE_SHIFT, NULL,
 				 I915_SHRINK_BOUND |
-				 I915_SHRINK_UNBOUND |
-				 I915_SHRINK_ACTIVE));
+				 I915_SHRINK_UNBOUND));
 
 	return -ENOSPC;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index ea90d3a0d511..72d6ea0cac7e 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -36,7 +36,9 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 
-static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
+static bool shrinker_lock(struct drm_i915_private *i915,
+			  unsigned int flags,
+			  bool *unlock)
 {
 	switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
 	case MUTEX_TRYLOCK_RECURSIVE:
@@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
 
 	case MUTEX_TRYLOCK_FAILED:
 		*unlock = false;
-		preempt_disable();
-		do {
-			cpu_relax();
-			if (mutex_trylock(&i915->drm.struct_mutex)) {
-				*unlock = true;
-				break;
-			}
-		} while (!need_resched());
-		preempt_enable();
+		if (flags & I915_SHRINK_ACTIVE) {
+			mutex_lock_nested(&i915->drm.struct_mutex,
+					  I915_MM_SHRINKER);
+			*unlock = true;
+		}
 		return *unlock;
 
 	case MUTEX_TRYLOCK_SUCCESS:
@@ -160,7 +158,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	unsigned long scanned = 0;
 	bool unlock;
 
-	if (!shrinker_lock(i915, &unlock))
+	if (!shrinker_lock(i915, flags, &unlock))
 		return 0;
 
 	/*
@@ -357,7 +355,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 
 	sc->nr_scanned = 0;
 
-	if (!shrinker_lock(i915, &unlock))
+	if (!shrinker_lock(i915, 0, &unlock))
 		return SHRINK_STOP;
 
 	freed = i915_gem_shrink(i915,
@@ -397,7 +395,7 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock,
 	do {
 		if (i915_gem_wait_for_idle(i915,
 					   0, MAX_SCHEDULE_TIMEOUT) == 0 &&
-		    shrinker_lock(i915, unlock))
+		    shrinker_lock(i915, 0, unlock))
 			break;
 
 		schedule_timeout_killable(1);
@@ -421,7 +419,11 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	struct drm_i915_gem_object *obj;
 	unsigned long unevictable, bound, unbound, freed_pages;
 
-	freed_pages = i915_gem_shrink_all(i915);
+	intel_runtime_pm_get(i915);
+	freed_pages = i915_gem_shrink(i915, -1UL, NULL,
+				      I915_SHRINK_BOUND |
+				      I915_SHRINK_UNBOUND);
+	intel_runtime_pm_put(i915);
 
 	/* Because we may be allocating inside our own driver, we cannot
 	 * assert that there are no objects with pinned pages that are not
@@ -447,10 +449,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 		pr_info("Purging GPU memory, %lu pages freed, "
 			"%lu pages still pinned.\n",
 			freed_pages, unevictable);
-	if (unbound || bound)
-		pr_err("%lu and %lu pages still available in the "
-		       "bound and unbound GPU page lists.\n",
-		       bound, unbound);
 
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
@@ -480,7 +478,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
 				       I915_SHRINK_BOUND |
 				       I915_SHRINK_UNBOUND |
-				       I915_SHRINK_ACTIVE |
 				       I915_SHRINK_VMAPS);
 	intel_runtime_pm_put(i915);
 
@@ -533,13 +530,40 @@ void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
 	unregister_shrinker(&i915->mm.shrinker);
 }
 
-void i915_gem_shrinker_taints_mutex(struct mutex *mutex)
+void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
+				    struct mutex *mutex)
 {
+	bool unlock = false;
+
 	if (!IS_ENABLED(CONFIG_LOCKDEP))
 		return;
 
+	if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) {
+		mutex_acquire(&i915->drm.struct_mutex.dep_map,
+			      I915_MM_NORMAL, 0, _RET_IP_);
+		unlock = true;
+	}
+
 	fs_reclaim_acquire(GFP_KERNEL);
-	mutex_lock(mutex);
-	mutex_unlock(mutex);
+
+	/*
+	 * As we invariably rely on the struct_mutex within the shrinker,
+	 * but have a complicated recursion dance, taint all the mutexes used
+	 * within the shrinker with the struct_mutex. For completeness, we
+	 * taint with all subclass of struct_mutex, even though we should
+	 * only need tainting by I915_MM_NORMAL to catch possible ABBA
+	 * deadlocks from using struct_mutex inside @mutex.
+	 */
+	mutex_acquire(&i915->drm.struct_mutex.dep_map,
+		      I915_MM_SHRINKER, 0, _RET_IP_);
+
+	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
+	mutex_release(&mutex->dep_map, 0, _RET_IP_);
+
+	mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
+
 	fs_reclaim_release(GFP_KERNEL);
+
+	if (unlock)
+		mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

             reply	other threads:[~2019-01-07 11:57 UTC|newest]

Thread overview: 111+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-07 11:54 Chris Wilson [this message]
2019-01-07 11:54 ` [PATCH 02/46] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
2019-01-07 12:35   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 03/46] drm/i915: Track all held rpm wakerefs Chris Wilson
2019-01-07 13:14   ` Mika Kuoppala
2019-01-07 13:22     ` Chris Wilson
2019-01-08 11:45   ` [PATCH v2] " Chris Wilson
2019-01-08 12:22   ` [PATCH v3] " Chris Wilson
2019-01-08 12:49     ` Mika Kuoppala
2019-01-08 20:05     ` kbuild test robot
2019-01-07 11:54 ` [PATCH 04/46] drm/i915: Markup paired operations on wakerefs Chris Wilson
2019-01-08 16:23   ` Mika Kuoppala
2019-01-08 16:41     ` Chris Wilson
2019-01-09  9:23       ` Mika Kuoppala
2019-01-09 11:51         ` Chris Wilson
2019-01-09 23:33           ` John Harrison
2019-01-07 11:54 ` [PATCH 05/46] drm/i915: Track GT wakeref Chris Wilson
2019-01-09  9:52   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 06/46] drm/i915: Track the rpm wakerefs for error handling Chris Wilson
2019-01-09 10:12   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 07/46] drm/i915: Mark up sysfs with rpm wakeref tracking Chris Wilson
2019-01-09 10:13   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 08/46] drm/i915: Mark up debugfs " Chris Wilson
2019-01-09 10:20   ` Mika Kuoppala
2019-01-09 11:49     ` Chris Wilson
2019-01-07 11:54 ` [PATCH 09/46] drm/i915/perf: Track the rpm wakeref Chris Wilson
2019-01-09 10:30   ` Mika Kuoppala
2019-01-09 11:45     ` Chris Wilson
2019-01-07 11:54 ` [PATCH 10/46] drm/i915/pmu: Track " Chris Wilson
2019-01-09 10:37   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 11/46] drm/i915/guc: Track the " Chris Wilson
2019-01-09 10:53   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 12/46] drm/i915/gem: Track the rpm wakerefs Chris Wilson
2019-01-09 11:16   ` Mika Kuoppala
2019-01-09 23:45     ` John Harrison
2019-01-07 11:54 ` [PATCH 13/46] drm/i915/fb: Track " Chris Wilson
2019-01-09 11:39   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 14/46] drm/i915/hotplug: Track temporary rpm wakeref Chris Wilson
2019-01-09 11:40   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 15/46] drm/i915/panel: " Chris Wilson
2019-01-09 11:41   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 16/46] drm/i915/selftests: Mark up rpm wakerefs Chris Wilson
2019-01-09 12:54   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
2019-01-09 14:30   ` Mika Kuoppala
2019-01-10  0:24   ` John Harrison
2019-01-10  1:10     ` John Harrison
2019-01-10  9:59       ` Chris Wilson
2019-01-07 11:54 ` [PATCH 18/46] drm/i915: Markup paired operations on display power domains Chris Wilson
2019-01-10  0:55   ` John Harrison
2019-01-10 10:00     ` Chris Wilson
2019-01-07 11:54 ` [PATCH 19/46] drm/i915: Track the wakeref used to initialise " Chris Wilson
2019-01-07 11:54 ` [PATCH 20/46] drm/i915: Combined gt.awake/gt.power wakerefs Chris Wilson
2019-01-07 11:54 ` [PATCH 21/46] drm/i915/dp: Markup pps lock power well Chris Wilson
2019-01-07 11:54 ` [PATCH 22/46] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice Chris Wilson
2019-01-07 11:54 ` [PATCH 23/46] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
2019-01-07 11:54 ` [PATCH 24/46] drm/i915: Serialise concurrent calls to i915_gem_set_wedged() Chris Wilson
2019-01-07 11:54 ` [PATCH 25/46] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
2019-01-08  9:00   ` Tvrtko Ursulin
2019-01-07 11:54 ` [PATCH 26/46] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
2019-01-07 11:54 ` [PATCH 27/46] drm/i915: Make all GPU resets atomic Chris Wilson
2019-01-07 11:54 ` [PATCH 28/46] drm/i915/guc: Disable global reset Chris Wilson
2019-01-07 11:54 ` [PATCH 29/46] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
2019-01-07 11:54 ` [PATCH 30/46] drm/i915: Issue engine resets onto idle engines Chris Wilson
2019-01-07 11:54 ` [PATCH 31/46] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
2019-01-16 16:27   ` Tvrtko Ursulin
2019-01-16 16:37     ` Chris Wilson
2019-01-07 11:54 ` [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex Chris Wilson
2019-01-16 16:47   ` Tvrtko Ursulin
2019-01-16 17:01     ` Chris Wilson
2019-01-17 16:23       ` Tvrtko Ursulin
2019-01-17 23:20         ` Chris Wilson
2019-01-07 11:54 ` [PATCH 33/46] drm/i915: Move vma lookup to its own lock Chris Wilson
2019-01-07 11:54 ` [PATCH 34/46] drm/i915: Move intel_execlists_show_requests() aside Chris Wilson
2019-01-07 11:54 ` [PATCH 35/46] drm/i915: Use b->irq_enable() as predicate for mock engine Chris Wilson
2019-01-07 11:54 ` [PATCH 36/46] drm/i915/selftests: Allocate mock ring/timeline per context Chris Wilson
2019-01-07 11:55 ` [PATCH 37/46] drm/i915/selftests: Make evict tolerant of foreign objects Chris Wilson
2019-01-07 11:55 ` [PATCH 38/46] drm/i915: Remove the intel_engine_notify tracepoint Chris Wilson
2019-01-07 11:55 ` [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP Chris Wilson
2019-01-10 10:52   ` Matthew Auld
2019-01-10 11:07     ` Chris Wilson
2019-01-10 11:24   ` Matthew Auld
2019-01-07 11:55 ` [PATCH 40/46] drm/i915: Move list of timelines under its own lock Chris Wilson
2019-01-07 11:55 ` [PATCH 41/46] drm/i915: Introduce concept of per-timeline (context) HWSP Chris Wilson
2019-01-15  0:55   ` John Harrison
2019-01-15  9:14     ` Chris Wilson
2019-01-15 15:40       ` Chris Wilson
2019-01-15 17:56         ` John Harrison
2019-01-07 11:55 ` [PATCH 42/46] drm/i915: Enlarge vma->pin_count Chris Wilson
2019-01-15 19:57   ` John Harrison
2019-01-15 20:17     ` Chris Wilson
2019-01-16  0:18       ` John Harrison
2019-01-07 11:55 ` [PATCH 43/46] drm/i915: Allocate a status page for each timeline Chris Wilson
2019-01-15  0:56   ` John Harrison
2019-01-15  9:50     ` Chris Wilson
2019-01-15 18:17       ` John Harrison
2019-01-15 18:43         ` Chris Wilson
2019-01-16 21:06           ` John Harrison
2019-01-16 21:15             ` Chris Wilson
2019-01-07 11:55 ` [PATCH 44/46] drm/i915: Track the context's seqno in its own timeline HWSP Chris Wilson
2019-01-07 11:55 ` [PATCH 45/46] drm/i915: Identify active requests Chris Wilson
2019-01-07 11:55 ` [PATCH 46/46] drm/i915: Replace global breadcrumbs with per-context interrupt tracking Chris Wilson
2019-01-07 12:45 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Patchwork
2019-01-07 13:02 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-01-07 13:05 ` ✓ Fi.CI.BAT: success " Patchwork
2019-01-07 14:00 ` [PATCH 01/46] " Tvrtko Ursulin
2019-01-07 14:07   ` Chris Wilson
2019-01-08  8:58     ` Tvrtko Ursulin
2019-01-07 17:10 ` ✗ Fi.CI.IGT: failure for series starting with [01/46] " Patchwork
2019-01-07 17:19   ` Chris Wilson
2019-01-08 13:50 ` ✗ Fi.CI.BAT: failure for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim (rev3) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190107115509.12523-1-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.