All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state
@ 2020-04-30 11:18 Chris Wilson
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 2/9] drm/i915/gt: Move the batch buffer pool from the engine to the gt Chris Wilson
                   ` (10 more replies)
  0 siblings, 11 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-30 11:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

As we only restore the default context state upon banning a context, we
only need enough of the state to run the ring and nothing more. That is
we only need our bare protocontext.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Andi Shyti <andi.shyti@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c    | 14 +-----
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  1 -
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 14 ++----
 drivers/gpu/drm/i915/gt/selftest_context.c   | 11 ++--
 drivers/gpu/drm/i915/gt/selftest_lrc.c       | 53 +++++++++++++++-----
 5 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 446e35ac0224..cf46076c59b2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -22,18 +22,11 @@ static int __engine_unpark(struct intel_wakeref *wf)
 	struct intel_engine_cs *engine =
 		container_of(wf, typeof(*engine), wakeref);
 	struct intel_context *ce;
-	void *map;
 
 	ENGINE_TRACE(engine, "\n");
 
 	intel_gt_pm_get(engine->gt);
 
-	/* Pin the default state for fast resets from atomic context. */
-	map = NULL;
-	if (engine->default_state)
-		map = shmem_pin_map(engine->default_state);
-	engine->pinned_default_state = map;
-
 	/* Discard stale context state from across idling */
 	ce = engine->kernel_context;
 	if (ce) {
@@ -43,6 +36,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
 		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
 			struct drm_i915_gem_object *obj = ce->state->obj;
 			int type = i915_coherent_map_type(engine->i915);
+			void *map;
 
 			map = i915_gem_object_pin_map(obj, type);
 			if (!IS_ERR(map)) {
@@ -262,12 +256,6 @@ static int __engine_park(struct intel_wakeref *wf)
 	if (engine->park)
 		engine->park(engine);
 
-	if (engine->pinned_default_state) {
-		shmem_unpin_map(engine->default_state,
-				engine->pinned_default_state);
-		engine->pinned_default_state = NULL;
-	}
-
 	engine->execlists.no_priolist = false;
 
 	/* While gt calls i915_vma_parked(), we have to break the lock cycle */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index f760e2ef285b..68a382229b4a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -340,7 +340,6 @@ struct intel_engine_cs {
 	unsigned long wakeref_serial;
 	struct intel_wakeref wakeref;
 	struct file *default_state;
-	void *pinned_default_state;
 
 	struct {
 		struct intel_ring *ring;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 4311b12542fb..dc5517c85df1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1271,14 +1271,11 @@ execlists_check_context(const struct intel_context *ce,
 static void restore_default_state(struct intel_context *ce,
 				  struct intel_engine_cs *engine)
 {
-	u32 *regs = ce->lrc_reg_state;
+	u32 *regs;
 
-	if (engine->pinned_default_state)
-		memcpy(regs, /* skip restoring the vanilla PPHWSP */
-		       engine->pinned_default_state + LRC_STATE_OFFSET,
-		       engine->context_size - PAGE_SIZE);
+	regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
+	execlists_init_reg_state(regs, ce, engine, ce->ring, true);
 
-	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
 	ce->runtime.last = intel_context_get_runtime(ce);
 }
 
@@ -4166,8 +4163,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * image back to the expected values to skip over the guilty request.
 	 */
 	__i915_request_reset(rq, stalled);
-	if (!stalled)
-		goto out_replay;
 
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
@@ -4177,9 +4172,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	GEM_BUG_ON(!intel_context_is_pinned(ce));
-	restore_default_state(ce, engine);
-
 out_replay:
 	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
 		     head, ce->ring->tail);
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index b8ed3cbe1277..a56dff3b157a 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -154,10 +154,7 @@ static int live_context_size(void *arg)
 	 */
 
 	for_each_engine(engine, gt, id) {
-		struct {
-			struct file *state;
-			void *pinned;
-		} saved;
+		struct file *saved;
 
 		if (!engine->context_size)
 			continue;
@@ -171,8 +168,7 @@ static int live_context_size(void *arg)
 		 * active state is sufficient, we are only checking that we
 		 * don't use more than we planned.
 		 */
-		saved.state = fetch_and_zero(&engine->default_state);
-		saved.pinned = fetch_and_zero(&engine->pinned_default_state);
+		saved = fetch_and_zero(&engine->default_state);
 
 		/* Overlaps with the execlists redzone */
 		engine->context_size += I915_GTT_PAGE_SIZE;
@@ -181,8 +177,7 @@ static int live_context_size(void *arg)
 
 		engine->context_size -= I915_GTT_PAGE_SIZE;
 
-		engine->pinned_default_state = saved.pinned;
-		engine->default_state = saved.state;
+		engine->default_state = saved;
 
 		intel_engine_pm_put(engine);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 7529df92f6a2..d946c0521dbc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -5206,6 +5206,7 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
 {
 	struct i915_vma *batch;
 	u32 dw, x, *cs, *hw;
+	u32 *defaults;
 
 	batch = create_user_vma(ce->vm, SZ_64K);
 	if (IS_ERR(batch))
@@ -5217,9 +5218,16 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
 		return ERR_CAST(cs);
 	}
 
+	defaults = shmem_pin_map(ce->engine->default_state);
+	if (!defaults) {
+		i915_gem_object_unpin_map(batch->obj);
+		i915_vma_put(batch);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	x = 0;
 	dw = 0;
-	hw = ce->engine->pinned_default_state;
+	hw = defaults;
 	hw += LRC_STATE_OFFSET / sizeof(*hw);
 	do {
 		u32 len = hw[dw] & 0x7f;
@@ -5250,6 +5258,8 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
 
 	*cs++ = MI_BATCH_BUFFER_END;
 
+	shmem_unpin_map(ce->engine->default_state, defaults);
+
 	i915_gem_object_flush_map(batch->obj);
 	i915_gem_object_unpin_map(batch->obj);
 
@@ -5360,6 +5370,7 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
 {
 	struct i915_vma *batch;
 	u32 dw, *cs, *hw;
+	u32 *defaults;
 
 	batch = create_user_vma(ce->vm, SZ_64K);
 	if (IS_ERR(batch))
@@ -5371,8 +5382,15 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
 		return ERR_CAST(cs);
 	}
 
+	defaults = shmem_pin_map(ce->engine->default_state);
+	if (!defaults) {
+		i915_gem_object_unpin_map(batch->obj);
+		i915_vma_put(batch);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	dw = 0;
-	hw = ce->engine->pinned_default_state;
+	hw = defaults;
 	hw += LRC_STATE_OFFSET / sizeof(*hw);
 	do {
 		u32 len = hw[dw] & 0x7f;
@@ -5400,6 +5418,8 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
 
 	*cs++ = MI_BATCH_BUFFER_END;
 
+	shmem_unpin_map(ce->engine->default_state, defaults);
+
 	i915_gem_object_flush_map(batch->obj);
 	i915_gem_object_unpin_map(batch->obj);
 
@@ -5467,6 +5487,7 @@ static int compare_isolation(struct intel_engine_cs *engine,
 {
 	u32 x, dw, *hw, *lrc;
 	u32 *A[2], *B[2];
+	u32 *defaults;
 	int err = 0;
 
 	A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
@@ -5499,9 +5520,15 @@ static int compare_isolation(struct intel_engine_cs *engine,
 	}
 	lrc += LRC_STATE_OFFSET / sizeof(*hw);
 
+	defaults = shmem_pin_map(ce->engine->default_state);
+	if (!defaults) {
+		err = -ENOMEM;
+		goto err_lrc;
+	}
+
 	x = 0;
 	dw = 0;
-	hw = engine->pinned_default_state;
+	hw = defaults;
 	hw += LRC_STATE_OFFSET / sizeof(*hw);
 	do {
 		u32 len = hw[dw] & 0x7f;
@@ -5541,6 +5568,8 @@ static int compare_isolation(struct intel_engine_cs *engine,
 	} while (dw < PAGE_SIZE / sizeof(u32) &&
 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
 
+	shmem_unpin_map(ce->engine->default_state, defaults);
+err_lrc:
 	i915_gem_object_unpin_map(ce->state->obj);
 err_B1:
 	i915_gem_object_unpin_map(result[1]->obj);
@@ -5690,18 +5719,16 @@ static int live_lrc_isolation(void *arg)
 			continue;
 
 		intel_engine_pm_get(engine);
-		if (engine->pinned_default_state) {
-			for (i = 0; i < ARRAY_SIZE(poison); i++) {
-				int result;
+		for (i = 0; i < ARRAY_SIZE(poison); i++) {
+			int result;
 
-				result = __lrc_isolation(engine, poison[i]);
-				if (result && !err)
-					err = result;
+			result = __lrc_isolation(engine, poison[i]);
+			if (result && !err)
+				err = result;
 
-				result = __lrc_isolation(engine, ~poison[i]);
-				if (result && !err)
-					err = result;
-			}
+			result = __lrc_isolation(engine, ~poison[i]);
+			if (result && !err)
+				err = result;
 		}
 		intel_engine_pm_put(engine);
 		if (igt_flush_test(gt->i915)) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 2/9] drm/i915/gt: Move the batch buffer pool from the engine to the gt
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
@ 2020-04-30 11:18 ` Chris Wilson
  2020-04-30 17:04   ` Tvrtko Ursulin
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 3/9] drm/i915: Always defer fenced work to the worker Chris Wilson
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-04-30 11:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Since the introduction of 'soft-rc6', we aim to park the device quickly
and that results in frequent idling of the whole device. Currently upon
idling we free the batch buffer pool, and so this renders the cache
ineffective for many workloads. If we want to have an effective cache of
recently allocated buffers available for reuse, we need to decouple that
cache from the engine powermanagement and make it timer based. As there
is no reason then to keep it within the engine (where it once made
retirement order easier to track), we can move it up the hierarchy to the
owner of the memory allocations.

v2: Hook up to debugfs/drop_caches to clear the cache on demand.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   2 +-
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    |   1 -
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  20 +--
 .../gpu/drm/i915/gem/i915_gem_object_blt.c    |  18 +--
 .../gpu/drm/i915/gem/i915_gem_object_blt.h    |   1 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   4 -
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   2 -
 drivers/gpu/drm/i915/gt/intel_engine_pool.h   |  34 ------
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   8 --
 drivers/gpu/drm/i915/gt/intel_gt.c            |   3 +
 ...l_engine_pool.c => intel_gt_buffer_pool.c} | 114 ++++++++++++------
 .../gpu/drm/i915/gt/intel_gt_buffer_pool.h    |  37 ++++++
 ...l_types.h => intel_gt_buffer_pool_types.h} |  15 ++-
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |  11 ++
 drivers/gpu/drm/i915/gt/mock_engine.c         |   2 -
 drivers/gpu/drm/i915/i915_debugfs.c           |   4 +
 16 files changed, 160 insertions(+), 116 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pool.h
 rename drivers/gpu/drm/i915/gt/{intel_engine_pool.c => intel_gt_buffer_pool.c} (53%)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
 rename drivers/gpu/drm/i915/gt/{intel_engine_pool_types.h => intel_gt_buffer_pool_types.h} (54%)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index caf00d92ea9d..5359c736c789 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -87,11 +87,11 @@ gt-y += \
 	gt/intel_engine_cs.o \
 	gt/intel_engine_heartbeat.o \
 	gt/intel_engine_pm.o \
-	gt/intel_engine_pool.o \
 	gt/intel_engine_user.o \
 	gt/intel_ggtt.o \
 	gt/intel_ggtt_fencing.o \
 	gt/intel_gt.o \
+	gt/intel_gt_buffer_pool.o \
 	gt/intel_gt_clock_utils.o \
 	gt/intel_gt_irq.o \
 	gt/intel_gt_pm.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 0598e5382a1d..3a146aa2593b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -6,7 +6,6 @@
 #include "i915_drv.h"
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
-#include "gt/intel_engine_pool.h"
 #include "i915_gem_client_blt.h"
 #include "i915_gem_object_blt.h"
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 964f73f062c1..414859fa2673 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -15,8 +15,8 @@
 
 #include "gem/i915_gem_ioctls.h"
 #include "gt/intel_context.h"
-#include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_buffer_pool.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_ring.h"
 
@@ -1194,13 +1194,13 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     unsigned int len)
 {
 	struct reloc_cache *cache = &eb->reloc_cache;
-	struct intel_engine_pool_node *pool;
+	struct intel_gt_buffer_pool_node *pool;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	u32 *cmd;
 	int err;
 
-	pool = intel_engine_get_pool(eb->engine, PAGE_SIZE);
+	pool = intel_gt_get_buffer_pool(eb->engine->gt, PAGE_SIZE);
 	if (IS_ERR(pool))
 		return PTR_ERR(pool);
 
@@ -1229,7 +1229,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_unpin;
 	}
 
-	err = intel_engine_pool_mark_active(pool, rq);
+	err = intel_gt_buffer_pool_mark_active(pool, rq);
 	if (err)
 		goto err_request;
 
@@ -1270,7 +1270,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 err_unmap:
 	i915_gem_object_unpin_map(pool->obj);
 out_pool:
-	intel_engine_pool_put(pool);
+	intel_gt_buffer_pool_put(pool);
 	return err;
 }
 
@@ -1887,7 +1887,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 static int eb_parse(struct i915_execbuffer *eb)
 {
 	struct drm_i915_private *i915 = eb->i915;
-	struct intel_engine_pool_node *pool;
+	struct intel_gt_buffer_pool_node *pool;
 	struct i915_vma *shadow, *trampoline;
 	unsigned int len;
 	int err;
@@ -1910,7 +1910,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 		len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
 	}
 
-	pool = intel_engine_get_pool(eb->engine, len);
+	pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
 	if (IS_ERR(pool))
 		return PTR_ERR(pool);
 
@@ -1958,7 +1958,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 err_shadow:
 	i915_vma_unpin(shadow);
 err:
-	intel_engine_pool_put(pool);
+	intel_gt_buffer_pool_put(pool);
 	return err;
 }
 
@@ -2643,7 +2643,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 */
 	eb.request->batch = batch;
 	if (batch->private)
-		intel_engine_pool_mark_active(batch->private, eb.request);
+		intel_gt_buffer_pool_mark_active(batch->private, eb.request);
 
 	trace_i915_request_queue(eb.request, eb.batch_flags);
 	err = eb_submit(&eb, batch);
@@ -2672,7 +2672,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		i915_vma_unpin(batch);
 err_parse:
 	if (batch->private)
-		intel_engine_pool_put(batch->private);
+		intel_gt_buffer_pool_put(batch->private);
 err_vma:
 	if (eb.trampoline)
 		i915_vma_unpin(eb.trampoline);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index e00792158f13..2fc7737ef5f4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -6,8 +6,8 @@
 #include "i915_drv.h"
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
-#include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_buffer_pool.h"
 #include "gt/intel_ring.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_object_blt.h"
@@ -18,7 +18,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 {
 	struct drm_i915_private *i915 = ce->vm->i915;
 	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
-	struct intel_engine_pool_node *pool;
+	struct intel_gt_buffer_pool_node *pool;
 	struct i915_vma *batch;
 	u64 offset;
 	u64 count;
@@ -33,7 +33,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 	count = div_u64(round_up(vma->size, block_size), block_size);
 	size = (1 + 8 * count) * sizeof(u32);
 	size = round_up(size, PAGE_SIZE);
-	pool = intel_engine_get_pool(ce->engine, size);
+	pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
 	if (IS_ERR(pool)) {
 		err = PTR_ERR(pool);
 		goto out_pm;
@@ -96,7 +96,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 	return batch;
 
 out_put:
-	intel_engine_pool_put(pool);
+	intel_gt_buffer_pool_put(pool);
 out_pm:
 	intel_engine_pm_put(ce->engine);
 	return ERR_PTR(err);
@@ -114,13 +114,13 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
 	if (unlikely(err))
 		return err;
 
-	return intel_engine_pool_mark_active(vma->private, rq);
+	return intel_gt_buffer_pool_mark_active(vma->private, rq);
 }
 
 void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
 {
 	i915_vma_unpin(vma);
-	intel_engine_pool_put(vma->private);
+	intel_gt_buffer_pool_put(vma->private);
 	intel_engine_pm_put(ce->engine);
 }
 
@@ -213,7 +213,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 {
 	struct drm_i915_private *i915 = ce->vm->i915;
 	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
-	struct intel_engine_pool_node *pool;
+	struct intel_gt_buffer_pool_node *pool;
 	struct i915_vma *batch;
 	u64 src_offset, dst_offset;
 	u64 count, rem;
@@ -228,7 +228,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 	count = div_u64(round_up(dst->size, block_size), block_size);
 	size = (1 + 11 * count) * sizeof(u32);
 	size = round_up(size, PAGE_SIZE);
-	pool = intel_engine_get_pool(ce->engine, size);
+	pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
 	if (IS_ERR(pool)) {
 		err = PTR_ERR(pool);
 		goto out_pm;
@@ -307,7 +307,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 	return batch;
 
 out_put:
-	intel_engine_pool_put(pool);
+	intel_gt_buffer_pool_put(pool);
 out_pm:
 	intel_engine_pm_put(ce->engine);
 	return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
index 243a43a87824..8bcd336a90dc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -10,7 +10,6 @@
 
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
-#include "gt/intel_engine_pool.h"
 #include "i915_vma.h"
 
 struct drm_i915_gem_object;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c9e46c5ced43..98b326a1568d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -31,7 +31,6 @@
 #include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
 #include "intel_engine_user.h"
 #include "intel_gt.h"
 #include "intel_gt_requests.h"
@@ -631,8 +630,6 @@ static int engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init__pm(engine);
 	intel_engine_init_retire(engine);
 
-	intel_engine_pool_init(&engine->pool);
-
 	/* Use the whole device by default */
 	engine->sseu =
 		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
@@ -829,7 +826,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	cleanup_status_page(engine);
 
 	intel_engine_fini_retire(engine);
-	intel_engine_pool_fini(&engine->pool);
 	intel_engine_fini_breadcrumbs(engine);
 	intel_engine_cleanup_cmd_parser(engine);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index cf46076c59b2..d0a1078ef632 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -10,7 +10,6 @@
 #include "intel_engine.h"
 #include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
 #include "intel_rc6.h"
@@ -248,7 +247,6 @@ static int __engine_park(struct intel_wakeref *wf)
 
 	intel_engine_park_heartbeat(engine);
 	intel_engine_disarm_breadcrumbs(engine);
-	intel_engine_pool_park(&engine->pool);
 
 	/* Must be reset upon idling, or we may miss the busy wakeup. */
 	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
deleted file mode 100644
index 1bd89cadc3b7..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#ifndef INTEL_ENGINE_POOL_H
-#define INTEL_ENGINE_POOL_H
-
-#include "intel_engine_pool_types.h"
-#include "i915_active.h"
-#include "i915_request.h"
-
-struct intel_engine_pool_node *
-intel_engine_get_pool(struct intel_engine_cs *engine, size_t size);
-
-static inline int
-intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
-			      struct i915_request *rq)
-{
-	return i915_active_add_request(&node->active, rq);
-}
-
-static inline void
-intel_engine_pool_put(struct intel_engine_pool_node *node)
-{
-	i915_active_release(&node->active);
-}
-
-void intel_engine_pool_init(struct intel_engine_pool *pool);
-void intel_engine_pool_park(struct intel_engine_pool *pool);
-void intel_engine_pool_fini(struct intel_engine_pool *pool);
-
-#endif /* INTEL_ENGINE_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 68a382229b4a..489deb3b8358 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -22,7 +22,6 @@
 #include "i915_pmu.h"
 #include "i915_priolist_types.h"
 #include "i915_selftest.h"
-#include "intel_engine_pool_types.h"
 #include "intel_sseu.h"
 #include "intel_timeline_types.h"
 #include "intel_wakeref.h"
@@ -404,13 +403,6 @@ struct intel_engine_cs {
 		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
 	} pmu;
 
-	/*
-	 * A pool of objects to use as shadow copies of client batch buffers
-	 * when the command parser is enabled. Prevents the client from
-	 * modifying the batch contents after software parsing.
-	 */
-	struct intel_engine_pool pool;
-
 	struct intel_hw_status_page status_page;
 	struct i915_ctx_workarounds wa_ctx;
 	struct i915_wa_list ctx_wa_list;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 52593edf8aa0..f069551e412f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -7,6 +7,7 @@
 #include "i915_drv.h"
 #include "intel_context.h"
 #include "intel_gt.h"
+#include "intel_gt_buffer_pool.h"
 #include "intel_gt_clock_utils.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
@@ -28,6 +29,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&gt->closed_vma);
 	spin_lock_init(&gt->closed_lock);
 
+	intel_gt_init_buffer_pool(gt);
 	intel_gt_init_reset(gt);
 	intel_gt_init_requests(gt);
 	intel_gt_init_timelines(gt);
@@ -621,6 +623,7 @@ void intel_gt_driver_release(struct intel_gt *gt)
 
 	intel_gt_pm_fini(gt);
 	intel_gt_fini_scratch(gt);
+	intel_gt_fini_buffer_pool(gt);
 }
 
 void intel_gt_driver_late_release(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
similarity index 53%
rename from drivers/gpu/drm/i915/gt/intel_engine_pool.c
rename to drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 397186818305..1495054a4305 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2014-2018 Intel Corporation
  */
 
@@ -8,15 +7,15 @@
 
 #include "i915_drv.h"
 #include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
+#include "intel_gt_buffer_pool.h"
 
-static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool)
+static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool)
 {
-	return container_of(pool, struct intel_engine_cs, pool);
+	return container_of(pool, struct intel_gt, buffer_pool);
 }
 
 static struct list_head *
-bucket_for_size(struct intel_engine_pool *pool, size_t sz)
+bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz)
 {
 	int n;
 
@@ -32,16 +31,50 @@ bucket_for_size(struct intel_engine_pool *pool, size_t sz)
 	return &pool->cache_list[n];
 }
 
-static void node_free(struct intel_engine_pool_node *node)
+static void node_free(struct intel_gt_buffer_pool_node *node)
 {
 	i915_gem_object_put(node->obj);
 	i915_active_fini(&node->active);
 	kfree(node);
 }
 
+static void pool_free_work(struct work_struct *wrk)
+{
+	struct intel_gt_buffer_pool *pool =
+		container_of(wrk, typeof(*pool), work.work);
+	struct intel_gt_buffer_pool_node *node, *next;
+	unsigned long old = jiffies - HZ;
+	bool active = false;
+	LIST_HEAD(stale);
+	int n;
+
+	/* Free buffers that have not been used in the past second */
+	spin_lock_irq(&pool->lock);
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+		struct list_head *list = &pool->cache_list[n];
+
+		/* Most recent at head; oldest at tail */
+		list_for_each_entry_safe_reverse(node, next, list, link) {
+			if (time_before(node->age, old))
+				break;
+
+			list_move(&node->link, &stale);
+		}
+		active |= !list_empty(list);
+	}
+	spin_unlock_irq(&pool->lock);
+
+	list_for_each_entry_safe(node, next, &stale, link)
+		node_free(node);
+
+	if (active)
+		schedule_delayed_work(&pool->work,
+				      round_jiffies_up_relative(HZ));
+}
+
 static int pool_active(struct i915_active *ref)
 {
-	struct intel_engine_pool_node *node =
+	struct intel_gt_buffer_pool_node *node =
 		container_of(ref, typeof(*node), active);
 	struct dma_resv *resv = node->obj->base.resv;
 	int err;
@@ -64,29 +97,31 @@ static int pool_active(struct i915_active *ref)
 __i915_active_call
 static void pool_retire(struct i915_active *ref)
 {
-	struct intel_engine_pool_node *node =
+	struct intel_gt_buffer_pool_node *node =
 		container_of(ref, typeof(*node), active);
-	struct intel_engine_pool *pool = node->pool;
+	struct intel_gt_buffer_pool *pool = node->pool;
 	struct list_head *list = bucket_for_size(pool, node->obj->base.size);
 	unsigned long flags;
 
-	GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
-
 	i915_gem_object_unpin_pages(node->obj);
 
 	/* Return this object to the shrinker pool */
 	i915_gem_object_make_purgeable(node->obj);
 
 	spin_lock_irqsave(&pool->lock, flags);
+	node->age = jiffies;
 	list_add(&node->link, list);
 	spin_unlock_irqrestore(&pool->lock, flags);
+
+	schedule_delayed_work(&pool->work,
+			      round_jiffies_up_relative(HZ));
 }
 
-static struct intel_engine_pool_node *
-node_create(struct intel_engine_pool *pool, size_t sz)
+static struct intel_gt_buffer_pool_node *
+node_create(struct intel_gt_buffer_pool *pool, size_t sz)
 {
-	struct intel_engine_cs *engine = to_engine(pool);
-	struct intel_engine_pool_node *node;
+	struct intel_gt *gt = to_gt(pool);
+	struct intel_gt_buffer_pool_node *node;
 	struct drm_i915_gem_object *obj;
 
 	node = kmalloc(sizeof(*node),
@@ -97,7 +132,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
 	node->pool = pool;
 	i915_active_init(&node->active, pool_active, pool_retire);
 
-	obj = i915_gem_object_create_internal(engine->i915, sz);
+	obj = i915_gem_object_create_internal(gt->i915, sz);
 	if (IS_ERR(obj)) {
 		i915_active_fini(&node->active);
 		kfree(node);
@@ -110,26 +145,15 @@ node_create(struct intel_engine_pool *pool, size_t sz)
 	return node;
 }
 
-static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine)
+struct intel_gt_buffer_pool_node *
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
 {
-	if (intel_engine_is_virtual(engine))
-		engine = intel_virtual_engine_get_sibling(engine, 0);
-
-	GEM_BUG_ON(!engine);
-	return &engine->pool;
-}
-
-struct intel_engine_pool_node *
-intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
-{
-	struct intel_engine_pool *pool = lookup_pool(engine);
-	struct intel_engine_pool_node *node;
+	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
+	struct intel_gt_buffer_pool_node *node;
 	struct list_head *list;
 	unsigned long flags;
 	int ret;
 
-	GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
-
 	size = PAGE_ALIGN(size);
 	list = bucket_for_size(pool, size);
 
@@ -157,34 +181,48 @@ intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
 	return node;
 }
 
-void intel_engine_pool_init(struct intel_engine_pool *pool)
+void intel_gt_init_buffer_pool(struct intel_gt *gt)
 {
+	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
 	int n;
 
 	spin_lock_init(&pool->lock);
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
 		INIT_LIST_HEAD(&pool->cache_list[n]);
+	INIT_DELAYED_WORK(&pool->work, pool_free_work);
 }
 
-void intel_engine_pool_park(struct intel_engine_pool *pool)
+static void pool_free_imm(struct intel_gt_buffer_pool *pool)
 {
 	int n;
 
+	spin_lock_irq(&pool->lock);
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+		struct intel_gt_buffer_pool_node *node, *next;
 		struct list_head *list = &pool->cache_list[n];
-		struct intel_engine_pool_node *node, *nn;
 
-		list_for_each_entry_safe(node, nn, list, link)
+		list_for_each_entry_safe(node, next, list, link)
 			node_free(node);
-
 		INIT_LIST_HEAD(list);
 	}
+	spin_unlock_irq(&pool->lock);
+}
+
+void intel_gt_flush_buffer_pool(struct intel_gt *gt)
+{
+	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
+
+	if (cancel_delayed_work_sync(&pool->work))
+		pool_free_imm(pool);
 }
 
-void intel_engine_pool_fini(struct intel_engine_pool *pool)
+void intel_gt_fini_buffer_pool(struct intel_gt *gt)
 {
+	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
 	int n;
 
+	intel_gt_flush_buffer_pool(gt);
+
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
 		GEM_BUG_ON(!list_empty(&pool->cache_list[n]));
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
new file mode 100644
index 000000000000..42cbac003e8a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef INTEL_GT_BUFFER_POOL_H
+#define INTEL_GT_BUFFER_POOL_H
+
+#include <linux/types.h>
+
+#include "i915_active.h"
+#include "intel_gt_buffer_pool_types.h"
+
+struct intel_gt;
+struct i915_request;
+
+struct intel_gt_buffer_pool_node *
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size);
+
+static inline int
+intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node,
+				 struct i915_request *rq)
+{
+	return i915_active_add_request(&node->active, rq);
+}
+
+static inline void
+intel_gt_buffer_pool_put(struct intel_gt_buffer_pool_node *node)
+{
+	i915_active_release(&node->active);
+}
+
+void intel_gt_init_buffer_pool(struct intel_gt *gt);
+void intel_gt_flush_buffer_pool(struct intel_gt *gt);
+void intel_gt_fini_buffer_pool(struct intel_gt *gt);
+
+#endif /* INTEL_GT_BUFFER_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
similarity index 54%
rename from drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
rename to drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
index e31ee361b76f..e28bdda771ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
@@ -4,26 +4,29 @@
  * Copyright © 2014-2018 Intel Corporation
  */
 
-#ifndef INTEL_ENGINE_POOL_TYPES_H
-#define INTEL_ENGINE_POOL_TYPES_H
+#ifndef INTEL_GT_BUFFER_POOL_TYPES_H
+#define INTEL_GT_BUFFER_POOL_TYPES_H
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/workqueue.h>
 
 #include "i915_active_types.h"
 
 struct drm_i915_gem_object;
 
-struct intel_engine_pool {
+struct intel_gt_buffer_pool {
 	spinlock_t lock;
 	struct list_head cache_list[4];
+	struct delayed_work work;
 };
 
-struct intel_engine_pool_node {
+struct intel_gt_buffer_pool_node {
 	struct i915_active active;
 	struct drm_i915_gem_object *obj;
 	struct list_head link;
-	struct intel_engine_pool *pool;
+	struct intel_gt_buffer_pool *pool;
+	unsigned long age;
 };
 
-#endif /* INTEL_ENGINE_POOL_TYPES_H */
+#endif /* INTEL_GT_BUFFER_POOL_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index d02ccb735e24..0cc1d6b185dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -17,6 +17,7 @@
 
 #include "i915_vma.h"
 #include "intel_engine_types.h"
+#include "intel_gt_buffer_pool_types.h"
 #include "intel_llc_types.h"
 #include "intel_reset_types.h"
 #include "intel_rc6_types.h"
@@ -97,6 +98,16 @@ struct intel_gt {
 	 */
 	struct i915_address_space *vm;
 
+	/*
+	 * A pool of objects to use as shadow copies of client batch buffers
+	 * when the command parser is enabled. Prevents the client from
+	 * modifying the batch contents after software parsing.
+	 *
+	 * Buffers older than 1s are periodically reaped from the pool,
+	 * or may be reclaimed by the shrinker before then.
+	 */
+	struct intel_gt_buffer_pool buffer_pool;
+
 	struct i915_vma *scratch;
 };
 
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 4a53ded7c2dd..b8dd3cbc8696 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -28,7 +28,6 @@
 #include "i915_drv.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
 
 #include "mock_engine.h"
 #include "selftests/mock_request.h"
@@ -328,7 +327,6 @@ int mock_engine_init(struct intel_engine_cs *engine)
 	intel_engine_init_execlists(engine);
 	intel_engine_init__pm(engine);
 	intel_engine_init_retire(engine);
-	intel_engine_pool_init(&engine->pool);
 
 	ce = create_kernel_context(engine);
 	if (IS_ERR(ce))
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c09e1afb5f79..8e98df6a3045 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -32,6 +32,7 @@
 #include <drm/drm_debugfs.h>
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_gt_buffer_pool.h"
 #include "gt/intel_gt_clock_utils.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_gt_requests.h"
@@ -1484,6 +1485,9 @@ gt_drop_caches(struct intel_gt *gt, u64 val)
 	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt))
 		intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL);
 
+	if (val & DROP_FREED)
+		intel_gt_flush_buffer_pool(gt);
+
 	return 0;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 3/9] drm/i915: Always defer fenced work to the worker
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 2/9] drm/i915/gt: Move the batch buffer pool from the engine to the gt Chris Wilson
@ 2020-04-30 11:18 ` Chris Wilson
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 4/9] drm/i915/gem: Include PIN_GLOBAL prior to using I915_DISPATCH_SECURE Chris Wilson
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-30 11:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Currently, if an error is raised we always call the cleanup locally
[and skip the main work callback]. However, some future users may need
to take a mutex to cleanup and so we cannot immediately execute the
cleanup as we may still be in interrupt context.

With the execute-immediate flag, for most cases this should result in
immediate cleanup of an error.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_sw_fence_work.c | 25 +++++++++++------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c
index a3a81bb8f2c3..29f63ebc24e8 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence_work.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c
@@ -16,11 +16,14 @@ static void fence_complete(struct dma_fence_work *f)
 static void fence_work(struct work_struct *work)
 {
 	struct dma_fence_work *f = container_of(work, typeof(*f), work);
-	int err;
 
-	err = f->ops->work(f);
-	if (err)
-		dma_fence_set_error(&f->dma, err);
+	if (!f->dma.error) {
+		int err;
+
+		err = f->ops->work(f);
+		if (err)
+			dma_fence_set_error(&f->dma, err);
+	}
 
 	fence_complete(f);
 	dma_fence_put(&f->dma);
@@ -36,15 +39,11 @@ fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 		if (fence->error)
 			dma_fence_set_error(&f->dma, fence->error);
 
-		if (!f->dma.error) {
-			dma_fence_get(&f->dma);
-			if (test_bit(DMA_FENCE_WORK_IMM, &f->dma.flags))
-				fence_work(&f->work);
-			else
-				queue_work(system_unbound_wq, &f->work);
-		} else {
-			fence_complete(f);
-		}
+		dma_fence_get(&f->dma);
+		if (test_bit(DMA_FENCE_WORK_IMM, &f->dma.flags))
+			fence_work(&f->work);
+		else
+			queue_work(system_unbound_wq, &f->work);
 		break;
 
 	case FENCE_FREE:
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 4/9] drm/i915/gem: Include PIN_GLOBAL prior to using I915_DISPATCH_SECURE
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 2/9] drm/i915/gt: Move the batch buffer pool from the engine to the gt Chris Wilson
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 3/9] drm/i915: Always defer fenced work to the worker Chris Wilson
@ 2020-04-30 11:18 ` Chris Wilson
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 5/9] drm/i915/gem: Assign context id for async work Chris Wilson
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-30 11:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

For our gpu relocs, on the older gen 4 and 5 devices, we must use a
privileged buffer for MI_STORE_DWORD_IMM. This also presumes that we
operate from the global GTT, for consistency we should tell
i915_vma_pin() that it will be used with the global address. While there
is only the single GTT available for these devices, so it does not change
the actual GTT used for pinning.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 414859fa2673..cc5ccc37be8c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1197,6 +1197,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	struct intel_gt_buffer_pool_node *pool;
 	struct i915_request *rq;
 	struct i915_vma *batch;
+	u32 flags;
 	u32 *cmd;
 	int err;
 
@@ -1219,7 +1220,11 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_unmap;
 	}
 
-	err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+	flags = PIN_USER | PIN_NOEVICT | PIN_NONBLOCK;
+	if (cache->gen <= 5)
+		flags |= PIN_GLOBAL;
+
+	err = i915_vma_pin(batch, 0, 0, flags);
 	if (err)
 		goto err_unmap;
 
@@ -1239,7 +1244,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 
 	err = eb->engine->emit_bb_start(rq,
 					batch->node.start, PAGE_SIZE,
-					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
+					flags & PIN_GLOBAL ?
+					I915_DISPATCH_SECURE : 0);
 	if (err)
 		goto skip_request;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 5/9] drm/i915/gem: Assign context id for async work
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
                   ` (2 preceding siblings ...)
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 4/9] drm/i915/gem: Include PIN_GLOBAL prior to using I915_DISPATCH_SECURE Chris Wilson
@ 2020-04-30 11:18 ` Chris Wilson
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 6/9] drm/i915: Export a preallocate variant of i915_active_acquire() Chris Wilson
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-30 11:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Allocate a few dma fence context id that we can use to associate async work
[for the CPU] launched on behalf of this context. For extra fun, we allow
a configurable concurrency width.

A current example would be that we spawn an unbound worker for every
userptr get_pages. In the future, we wish to charge this work to the
context that initiated the async work and to impose concurrency limits
based on the context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c       | 4 ++++
 drivers/gpu/drm/i915/gem/i915_gem_context.h       | 6 ++++++
 drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 6 ++++++
 3 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 900ea8b7fc8f..fd7d064a0e46 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -715,6 +715,10 @@ __create_context(struct drm_i915_private *i915)
 	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
 	mutex_init(&ctx->mutex);
 
+	ctx->async.width = rounddown_pow_of_two(num_online_cpus());
+	ctx->async.context = dma_fence_context_alloc(ctx->async.width);
+	ctx->async.width--;
+
 	spin_lock_init(&ctx->stale.lock);
 	INIT_LIST_HEAD(&ctx->stale.engines);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 3702b2fb27ab..e104ff0ae740 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -134,6 +134,12 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 				       struct drm_file *file);
 
+static inline u64 i915_gem_context_async_id(struct i915_gem_context *ctx)
+{
+	return (ctx->async.context +
+		(atomic_fetch_inc(&ctx->async.cur) & ctx->async.width));
+}
+
 static inline struct i915_gem_context *
 i915_gem_context_get(struct i915_gem_context *ctx)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 28760bd03265..5f5cfa3a3e9b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -85,6 +85,12 @@ struct i915_gem_context {
 
 	struct intel_timeline *timeline;
 
+	struct {
+		u64 context;
+		atomic_t cur;
+		unsigned int width;
+	} async;
+
 	/**
 	 * @vm: unique address space (GTT)
 	 *
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 6/9] drm/i915: Export a preallocate variant of i915_active_acquire()
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
                   ` (3 preceding siblings ...)
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 5/9] drm/i915/gem: Assign context id for async work Chris Wilson
@ 2020-04-30 11:18 ` Chris Wilson
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 7/9] drm/i915/gem: Separate the ww_mutex walker into its own list Chris Wilson
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-30 11:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Sometimes we have to be very careful not to allocate underneath a mutex
(or spinlock) and yet still want to track activity. Enter
i915_active_acquire_for_context(). This raises the activity counter on
i915_active prior to use and ensures that the fence-tree contains a slot
for the context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_active.c | 107 ++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_active.h |   5 ++
 2 files changed, 103 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index d960d0be5bd2..71ad0d452680 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -217,11 +217,10 @@ excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 }
 
 static struct i915_active_fence *
-active_instance(struct i915_active *ref, struct intel_timeline *tl)
+active_instance(struct i915_active *ref, u64 idx)
 {
 	struct active_node *node, *prealloc;
 	struct rb_node **p, *parent;
-	u64 idx = tl->fence_context;
 
 	/*
 	 * We track the most recently used timeline to skip a rbtree search
@@ -367,7 +366,7 @@ int i915_active_ref(struct i915_active *ref,
 	if (err)
 		return err;
 
-	active = active_instance(ref, tl);
+	active = active_instance(ref, tl->fence_context);
 	if (!active) {
 		err = -ENOMEM;
 		goto out;
@@ -384,32 +383,104 @@ int i915_active_ref(struct i915_active *ref,
 		atomic_dec(&ref->count);
 	}
 	if (!__i915_active_fence_set(active, fence))
-		atomic_inc(&ref->count);
+		__i915_active_acquire(ref);
 
 out:
 	i915_active_release(ref);
 	return err;
 }
 
-struct dma_fence *
-i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+static struct dma_fence *
+__i915_active_set_fence(struct i915_active *ref,
+			struct i915_active_fence *active,
+			struct dma_fence *fence)
 {
 	struct dma_fence *prev;
 
 	/* We expect the caller to manage the exclusive timeline ordering */
 	GEM_BUG_ON(i915_active_is_idle(ref));
 
+	if (is_barrier(active)) { /* proto-node used by our idle barrier */
+		/*
+		 * This request is on the kernel_context timeline, and so
+		 * we can use it to substitute for the pending idle-barrer
+		 * request that we want to emit on the kernel_context.
+		 */
+		__active_del_barrier(ref, node_from_active(active));
+		RCU_INIT_POINTER(active->fence, NULL);
+		atomic_dec(&ref->count);
+	}
+
 	rcu_read_lock();
-	prev = __i915_active_fence_set(&ref->excl, f);
+	prev = __i915_active_fence_set(active, fence);
 	if (prev)
 		prev = dma_fence_get_rcu(prev);
 	else
-		atomic_inc(&ref->count);
+		__i915_active_acquire(ref);
 	rcu_read_unlock();
 
 	return prev;
 }
 
+static struct i915_active_fence *
+__active_lookup(struct i915_active *ref, u64 idx)
+{
+	struct active_node *node;
+	struct rb_node *p;
+
+	/* Like active_instance() but with no malloc */
+
+	node = READ_ONCE(ref->cache);
+	if (node && node->timeline == idx)
+		return &node->base;
+
+	spin_lock_irq(&ref->tree_lock);
+	GEM_BUG_ON(i915_active_is_idle(ref));
+
+	p = ref->tree.rb_node;
+	while (p) {
+		node = rb_entry(p, struct active_node, node);
+		if (node->timeline == idx) {
+			ref->cache = node;
+			spin_unlock_irq(&ref->tree_lock);
+			return &node->base;
+		}
+
+		if (node->timeline < idx)
+			p = p->rb_right;
+		else
+			p = p->rb_left;
+	}
+
+	spin_unlock_irq(&ref->tree_lock);
+
+	return NULL;
+}
+
+struct dma_fence *
+__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+{
+	struct dma_fence *prev = ERR_PTR(-ENOENT);
+	struct i915_active_fence *active;
+
+	if (!i915_active_acquire_if_busy(ref))
+		return ERR_PTR(-EINVAL);
+
+	active = __active_lookup(ref, idx);
+	if (active)
+		prev = __i915_active_set_fence(ref, active, fence);
+
+	i915_active_release(ref);
+	return prev;
+}
+
+struct dma_fence *
+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+{
+	/* We expect the caller to manage the exclusive timeline ordering */
+	return __i915_active_set_fence(ref, &ref->excl, f);
+}
+
 bool i915_active_acquire_if_busy(struct i915_active *ref)
 {
 	debug_active_assert(ref);
@@ -443,6 +514,24 @@ int i915_active_acquire(struct i915_active *ref)
 	return err;
 }
 
+int i915_active_acquire_for_context(struct i915_active *ref, u64 idx)
+{
+	struct i915_active_fence *active;
+	int err;
+
+	err = i915_active_acquire(ref);
+	if (err)
+		return err;
+
+	active = active_instance(ref, idx);
+	if (!active) {
+		i915_active_release(ref);
+		return -ENOMEM;
+	}
+
+	return 0; /* return with active ref */
+}
+
 void i915_active_release(struct i915_active *ref)
 {
 	debug_active_assert(ref);
@@ -804,7 +893,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 			 */
 			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
 			node->base.cb.node.prev = (void *)engine;
-			atomic_inc(&ref->count);
+			__i915_active_acquire(ref);
 		}
 		GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
 
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index cf4058150966..042502abefe5 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -163,6 +163,9 @@ void __i915_active_init(struct i915_active *ref,
 	__i915_active_init(ref, active, retire, &__mkey, &__wkey);	\
 } while (0)
 
+struct dma_fence *
+__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
+
 int i915_active_ref(struct i915_active *ref,
 		    struct intel_timeline *tl,
 		    struct dma_fence *fence);
@@ -198,7 +201,9 @@ int i915_request_await_active(struct i915_request *rq,
 #define I915_ACTIVE_AWAIT_BARRIER BIT(2)
 
 int i915_active_acquire(struct i915_active *ref);
+int i915_active_acquire_for_context(struct i915_active *ref, u64 idx);
 bool i915_active_acquire_if_busy(struct i915_active *ref);
+
 void i915_active_release(struct i915_active *ref);
 
 static inline void __i915_active_acquire(struct i915_active *ref)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 7/9] drm/i915/gem: Separate the ww_mutex walker into its own list
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
                   ` (4 preceding siblings ...)
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 6/9] drm/i915: Export a preallocate variant of i915_active_acquire() Chris Wilson
@ 2020-04-30 11:18 ` Chris Wilson
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 8/9] drm/i915/gem: Asynchronous GTT unbinding Chris Wilson
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-30 11:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

In preparation for making eb_vma bigger and heavy to run inn parallel,
we need to stop apply an in-place swap() to reorder around ww_mutex
deadlocks. Keep the array intact and reorder the locks using a dedicated
list.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 48 +++++++++++--------
 drivers/gpu/drm/i915/i915_utils.h             |  6 +++
 2 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cc5ccc37be8c..dcb79891dc94 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@ struct eb_vma {
 	struct drm_i915_gem_exec_object2 *exec;
 	struct list_head bind_link;
 	struct list_head reloc_link;
+	struct list_head lock_link;
 
 	struct hlist_node node;
 	u32 handle;
@@ -253,6 +254,8 @@ struct i915_execbuffer {
 	/** list of vma that have execobj.relocation_count */
 	struct list_head relocs;
 
+	struct list_head lock;
+
 	/**
 	 * Track the most recently used object for relocations, as we
 	 * frequently have to perform multiple relocations within the same
@@ -396,6 +399,10 @@ static int eb_create(struct i915_execbuffer *eb)
 		eb->lut_size = -eb->buffer_count;
 	}
 
+	INIT_LIST_HEAD(&eb->relocs);
+	INIT_LIST_HEAD(&eb->unbound);
+	INIT_LIST_HEAD(&eb->lock);
+
 	return 0;
 }
 
@@ -601,6 +608,8 @@ eb_add_vma(struct i915_execbuffer *eb,
 		eb_unreserve_vma(ev);
 		list_add_tail(&ev->bind_link, &eb->unbound);
 	}
+
+	list_add_tail(&ev->lock_link, &eb->lock);
 }
 
 static inline int use_cpu_reloc(const struct reloc_cache *cache,
@@ -877,9 +886,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	unsigned int i;
 	int err = 0;
 
-	INIT_LIST_HEAD(&eb->relocs);
-	INIT_LIST_HEAD(&eb->unbound);
-
 	for (i = 0; i < eb->buffer_count; i++) {
 		struct i915_vma *vma;
 
@@ -1615,38 +1621,39 @@ static int eb_relocate(struct i915_execbuffer *eb)
 
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
-	const unsigned int count = eb->buffer_count;
 	struct ww_acquire_ctx acquire;
-	unsigned int i;
+	struct eb_vma *ev;
 	int err = 0;
 
 	ww_acquire_init(&acquire, &reservation_ww_class);
 
-	for (i = 0; i < count; i++) {
-		struct eb_vma *ev = &eb->vma[i];
+	list_for_each_entry(ev, &eb->lock, lock_link) {
 		struct i915_vma *vma = ev->vma;
 
 		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
 		if (err == -EDEADLK) {
-			GEM_BUG_ON(i == 0);
-			do {
-				int j = i - 1;
+			struct eb_vma *unlock = ev, *en;
 
-				ww_mutex_unlock(&eb->vma[j].vma->resv->lock);
-
-				swap(eb->vma[i],  eb->vma[j]);
-			} while (--i);
+			list_for_each_entry_safe_continue_reverse(unlock, en, &eb->lock, lock_link) {
+				ww_mutex_unlock(&unlock->vma->resv->lock);
+				list_move_tail(&unlock->lock_link, &eb->lock);
+			}
 
+			GEM_BUG_ON(!list_is_first(&ev->lock_link, &eb->lock));
 			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
 							       &acquire);
 		}
-		if (err)
-			break;
+		if (err) {
+			list_for_each_entry_continue_reverse(ev, &eb->lock, lock_link)
+				ww_mutex_unlock(&ev->vma->resv->lock);
+
+			ww_acquire_fini(&acquire);
+			goto err_skip;
+		}
 	}
 	ww_acquire_done(&acquire);
 
-	while (i--) {
-		struct eb_vma *ev = &eb->vma[i];
+	list_for_each_entry(ev, &eb->lock, lock_link) {
 		struct i915_vma *vma = ev->vma;
 		unsigned int flags = ev->flags;
 		struct drm_i915_gem_object *obj = vma->obj;
@@ -1947,9 +1954,10 @@ static int eb_parse(struct i915_execbuffer *eb)
 	if (err)
 		goto err_trampoline;
 
-	eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
-	eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
 	eb->batch = &eb->vma[eb->buffer_count++];
+	eb->batch->vma = i915_vma_get(shadow);
+	eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
+	list_add_tail(&eb->batch->lock_link, &eb->lock);
 	eb->vma[eb->buffer_count].vma = NULL;
 
 	eb->trampoline = trampoline;
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 03a73d2bd50d..28813806bc19 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -266,6 +266,12 @@ static inline int list_is_last_rcu(const struct list_head *list,
 	return READ_ONCE(list->next) == head;
 }
 
+#define list_for_each_entry_safe_continue_reverse(pos, n, head, member)	\
+	for (pos = list_prev_entry(pos, member),			\
+		n = list_prev_entry(pos, member);			\
+	     &pos->member != (head);					\
+	     pos = n, n = list_prev_entry(n, member))
+
 /*
  * Wait until the work is finally complete, even if it tries to postpone
  * by requeueing itself. Note, that if the worker never cancels itself,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 8/9] drm/i915/gem: Asynchronous GTT unbinding
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
                   ` (5 preceding siblings ...)
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 7/9] drm/i915/gem: Separate the ww_mutex walker into its own list Chris Wilson
@ 2020-04-30 11:18 ` Chris Wilson
  2020-04-30 11:28   ` Chris Wilson
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 9/9] drm/i915/gem: Bind the fence async for execbuf Chris Wilson
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-04-30 11:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Matthew Auld

It is reasonably common for userspace (even modern drivers like iris) to
reuse an active address for a new buffer. This would cause the
application to stall under its mutex (originally struct_mutex) until the
old batches were idle and it could synchronously remove the stale PTE.
However, we can queue up a job that waits on the signal for the old
nodes to complete and upon those signals, remove the old nodes replacing
them with the new ones for the batch. This is still CPU driven, but in
theory we can do the GTT patching from the GPU. The job itself has a
completion signal allowing the execbuf to wait upon the rebinding, and
also other observers to coordinate with the common VM activity.

Letting userspace queue up more work, lets it do more stuff without
blocking other clients. In turn, we take care not to let it too much
concurrent work, creating a small number of queues for each context to
limit the number of concurrent tasks.

The implementation relies on only scheduling one unbind operation per
vma as we use the unbound vma->node location to track the stale PTE.

Closes: https://gitlab.freedesktop.org/drm/intel/issues/1402
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Andi Shyti <andi.shyti@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 799 ++++++++++++++++--
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c          |   1 +
 drivers/gpu/drm/i915/gt/intel_ggtt.c          |   3 +-
 drivers/gpu/drm/i915/gt/intel_gtt.c           |   4 +
 drivers/gpu/drm/i915/gt/intel_gtt.h           |   2 +
 drivers/gpu/drm/i915/gt/intel_ppgtt.c         |   3 +-
 drivers/gpu/drm/i915/i915_gem.c               |   7 +
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   5 +
 drivers/gpu/drm/i915/i915_vma.c               | 130 +--
 drivers/gpu/drm/i915/i915_vma.h               |   5 +
 10 files changed, 837 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index dcb79891dc94..1063db957f17 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -18,6 +18,7 @@
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_buffer_pool.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
 #include "gt/intel_ring.h"
 
 #include "i915_drv.h"
@@ -31,6 +32,9 @@ struct eb_vma {
 	struct i915_vma *vma;
 	unsigned int flags;
 
+	struct drm_mm_node hole;
+	unsigned int bind_flags;
+
 	/** This vma's place in the execbuf reservation list */
 	struct drm_i915_gem_exec_object2 *exec;
 	struct list_head bind_link;
@@ -57,7 +61,8 @@ enum {
 #define __EXEC_OBJECT_HAS_FENCE		BIT(30)
 #define __EXEC_OBJECT_NEEDS_MAP		BIT(29)
 #define __EXEC_OBJECT_NEEDS_BIAS	BIT(28)
-#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 28) /* all of the above */
+#define __EXEC_OBJECT_HAS_PAGES		BIT(27)
+#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
 
 #define __EXEC_HAS_RELOC	BIT(31)
 #define __EXEC_INTERNAL_FLAGS	(~0u << 31)
@@ -71,11 +76,12 @@ enum {
 	 I915_EXEC_RESOURCE_STREAMER)
 
 /* Catch emission of unexpected errors for CI! */
+#define __EINVAL__ 22
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 #undef EINVAL
 #define EINVAL ({ \
 	DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \
-	22; \
+	__EINVAL__; \
 })
 #endif
 
@@ -314,6 +320,12 @@ static struct eb_vma_array *eb_vma_array_create(unsigned int count)
 	return arr;
 }
 
+static struct eb_vma_array *eb_vma_array_get(struct eb_vma_array *arr)
+{
+	kref_get(&arr->kref);
+	return arr;
+}
+
 static inline void eb_unreserve_vma(struct eb_vma *ev)
 {
 	struct i915_vma *vma = ev->vma;
@@ -324,8 +336,12 @@ static inline void eb_unreserve_vma(struct eb_vma *ev)
 	if (ev->flags & __EXEC_OBJECT_HAS_PIN)
 		__i915_vma_unpin(vma);
 
+	if (ev->flags & __EXEC_OBJECT_HAS_PAGES)
+		i915_vma_put_pages(vma);
+
 	ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
-		       __EXEC_OBJECT_HAS_FENCE);
+		       __EXEC_OBJECT_HAS_FENCE |
+		       __EXEC_OBJECT_HAS_PAGES);
 }
 
 static void eb_vma_array_destroy(struct kref *kref)
@@ -411,7 +427,7 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 		 const struct i915_vma *vma,
 		 unsigned int flags)
 {
-	if (vma->node.size < entry->pad_to_size)
+	if (vma->node.size < max(vma->size, entry->pad_to_size))
 		return true;
 
 	if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
@@ -484,12 +500,18 @@ eb_pin_vma(struct i915_execbuffer *eb,
 		if (entry->flags & EXEC_OBJECT_PINNED)
 			return false;
 
+		/* Concurrent async binds in progress, get in the queue */
+		if (!i915_active_is_idle(&vma->vm->active))
+			return false;
+
 		/* Failing that pick any _free_ space if suitable */
 		if (unlikely(i915_vma_pin(vma,
 					  entry->pad_to_size,
 					  entry->alignment,
 					  eb_pin_flags(entry, ev->flags) |
-					  PIN_USER | PIN_NOEVICT)))
+					  PIN_USER |
+					  PIN_NOEVICT |
+					  PIN_NOSEARCH)))
 			return false;
 	}
 
@@ -629,85 +651,700 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
 		obj->cache_level != I915_CACHE_NONE);
 }
 
-static int eb_reserve_vma(const struct i915_execbuffer *eb,
-			  struct eb_vma *ev,
-			  u64 pin_flags)
+struct eb_vm_work {
+	struct dma_fence_work base;
+	struct list_head unbound;
+	struct eb_vma_array *array;
+	struct i915_address_space *vm;
+	struct list_head evict_list;
+	u64 *p_flags;
+	u64 active;
+};
+
+static inline u64 node_end(const struct drm_mm_node *node)
+{
+	return node->start + node->size;
+}
+
+static int set_bind_fence(struct i915_vma *vma, struct eb_vm_work *work)
+{
+	struct dma_fence *prev;
+	int err = 0;
+
+	lockdep_assert_held(&vma->vm->mutex);
+	prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
+	if (unlikely(prev)) {
+		err = i915_sw_fence_await_dma_fence(&work->base.chain, prev, 0,
+						    GFP_NOWAIT | __GFP_NOWARN);
+		dma_fence_put(prev);
+	}
+
+	return err < 0 ? err : 0;
+}
+
+static int await_evict(struct eb_vm_work *work, struct i915_vma *vma)
+{
+	int err;
+
+	if (rcu_access_pointer(vma->active.excl.fence) == &work->base.dma)
+		return 0;
+
+	/* Wait for all other previous activity */
+	err = i915_sw_fence_await_active(&work->base.chain,
+					 &vma->active,
+					 I915_ACTIVE_AWAIT_ACTIVE);
+	/* Then insert along the exclusive vm->mutex timeline */
+	if (err == 0)
+		err = set_bind_fence(vma, work);
+
+	return err;
+}
+
+static int
+evict_for_node(struct eb_vm_work *work,
+	       struct eb_vma *const target,
+	       unsigned int flags)
+{
+	struct i915_address_space *vm = target->vma->vm;
+	const unsigned long color = target->vma->node.color;
+	const u64 start = target->vma->node.start;
+	const u64 end = start + target->vma->node.size;
+	u64 hole_start = start, hole_end = end;
+	struct i915_vma *vma, *next;
+	struct drm_mm_node *node;
+	LIST_HEAD(evict_list);
+	LIST_HEAD(steal_list);
+	int err = 0;
+
+	lockdep_assert_held(&vm->mutex);
+	GEM_BUG_ON(drm_mm_node_allocated(&target->vma->node));
+	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
+
+	if (i915_vm_has_cache_coloring(vm)) {
+		/* Expand search to cover neighbouring guard pages (or lack!) */
+		if (hole_start)
+			hole_start -= I915_GTT_PAGE_SIZE;
+
+		/* Always look at the page afterwards to avoid the end-of-GTT */
+		hole_end += I915_GTT_PAGE_SIZE;
+	}
+	GEM_BUG_ON(hole_start >= hole_end);
+
+	drm_mm_for_each_node_in_range(node, &vm->mm, hole_start, hole_end) {
+		GEM_BUG_ON(node == &target->vma->node);
+
+		/* If we find any non-objects (!vma), we cannot evict them */
+		if (node->color == I915_COLOR_UNEVICTABLE) {
+			err = -ENOSPC;
+			goto err;
+		}
+
+		/*
+		 * If we are using coloring to insert guard pages between
+		 * different cache domains within the address space, we have
+		 * to check whether the objects on either side of our range
+		 * abutt and conflict. If they are in conflict, then we evict
+		 * those as well to make room for our guard pages.
+		 */
+		if (i915_vm_has_cache_coloring(vm)) {
+			if (node_end(node) == start && node->color == color)
+				continue;
+
+			if (node->start == end && node->color == color)
+				continue;
+		}
+
+		GEM_BUG_ON(!drm_mm_node_allocated(node));
+		vma = container_of(node, typeof(*vma), node);
+
+		if (i915_vma_is_pinned(vma)) {
+			err = -ENOSPC;
+			goto err;
+		}
+
+		/* If this VMA is already being freed, or idle, steal it! */
+		if (!i915_active_acquire_if_busy(&vma->active)) {
+			list_move(&vma->vm_link, &steal_list);
+			continue;
+		}
+
+		if (flags & PIN_NONBLOCK)
+			err = -EAGAIN;
+		else
+			err = await_evict(work, vma);
+		i915_active_release(&vma->active);
+		if (err)
+			goto err;
+
+		GEM_BUG_ON(!i915_vma_is_active(vma));
+		list_move(&vma->vm_link, &evict_list);
+	}
+
+	list_for_each_entry_safe(vma, next, &steal_list, vm_link) {
+		atomic_and(~I915_VMA_BIND_MASK, &vma->flags);
+		__i915_vma_evict(vma);
+		drm_mm_remove_node(&vma->node);
+		/* No ref held; vma may now be concurrently freed */
+	}
+
+	/* No overlapping nodes to evict, claim the slot for ourselves! */
+	if (list_empty(&evict_list))
+		return drm_mm_reserve_node(&vm->mm, &target->vma->node);
+
+	/*
+	 * Mark this range as reserved.
+	 *
+	 * We have not yet removed the PTEs for the old evicted nodes, so
+	 * must prevent this range from being reused for anything else. The
+	 * PTE will be cleared when the range is idle (during the rebind
+	 * phase in the worker).
+	 */
+	target->hole.color = I915_COLOR_UNEVICTABLE;
+	target->hole.start = start;
+	target->hole.size = end;
+
+	list_for_each_entry(vma, &evict_list, vm_link) {
+		target->hole.start =
+			min(target->hole.start, vma->node.start);
+		target->hole.size =
+			max(target->hole.size, node_end(&vma->node));
+
+		GEM_BUG_ON(vma->node.mm != &vm->mm);
+		drm_mm_remove_node(&vma->node);
+		atomic_and(~I915_VMA_BIND_MASK, &vma->flags);
+		GEM_BUG_ON(i915_vma_is_pinned(vma));
+	}
+	list_splice(&evict_list, &work->evict_list);
+
+	target->hole.size -= target->hole.start;
+
+	return drm_mm_reserve_node(&vm->mm, &target->hole);
+
+err:
+	list_splice(&evict_list, &vm->bound_list);
+	list_splice(&steal_list, &vm->bound_list);
+	return err;
+}
+
+static int
+evict_in_range(struct eb_vm_work *work,
+	       struct eb_vma * const target,
+	       u64 start, u64 end, u64 align)
+{
+	struct i915_address_space *vm = target->vma->vm;
+	struct i915_vma *vma, *next;
+	struct drm_mm_scan scan;
+	LIST_HEAD(evict_list);
+	bool found = false;
+
+	lockdep_assert_held(&vm->mutex);
+
+	drm_mm_scan_init_with_range(&scan, &vm->mm,
+				    target->vma->node.size,
+				    align,
+				    target->vma->node.color,
+				    start, end,
+				    DRM_MM_INSERT_BEST);
+
+	list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
+		if (i915_vma_is_pinned(vma))
+			continue;
+
+		list_move(&vma->vm_link, &evict_list);
+		if (drm_mm_scan_add_block(&scan, &vma->node)) {
+			target->vma->node.start =
+				round_up(scan.hit_start, align);
+			found = true;
+			break;
+		}
+	}
+
+	list_for_each_entry(vma, &evict_list, vm_link)
+		drm_mm_scan_remove_block(&scan, &vma->node);
+	list_splice(&evict_list, &vm->bound_list);
+	if (!found)
+		return -ENOSPC;
+
+	return evict_for_node(work, target, 0);
+}
+
+static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
+{
+	u64 range, addr;
+
+	GEM_BUG_ON(range_overflows(start, len, end));
+	GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
+
+	range = round_down(end - len, align) - round_up(start, align);
+	if (range) {
+		if (sizeof(unsigned long) == sizeof(u64)) {
+			addr = get_random_long();
+		} else {
+			addr = get_random_int();
+			if (range > U32_MAX) {
+				addr <<= 32;
+				addr |= get_random_int();
+			}
+		}
+		div64_u64_rem(addr, range, &addr);
+		start += addr;
+	}
+
+	return round_up(start, align);
+}
+
+static u64 align0(u64 align)
+{
+	return align <= I915_GTT_MIN_ALIGNMENT ? 0 : align;
+}
+
+static struct drm_mm_node *__best_hole(struct drm_mm *mm, u64 size)
+{
+	struct rb_node *rb = mm->holes_size.rb_root.rb_node;
+	struct drm_mm_node *best = NULL;
+
+	do {
+		struct drm_mm_node *node =
+			rb_entry(rb, struct drm_mm_node, rb_hole_size);
+
+		if (size <= node->hole_size) {
+			best = node;
+			rb = rb->rb_right;
+		} else {
+			rb = rb->rb_left;
+		}
+	} while (rb);
+
+	return best;
+}
+
+static int best_hole(struct drm_mm *mm, struct drm_mm_node *node,
+		     u64 start, u64 end, u64 align)
+{
+	struct drm_mm_node *hole;
+	u64 size = node->size;
+
+	do {
+		hole = __best_hole(mm, size);
+		if (!hole)
+			return -ENOSPC;
+
+		node->start = round_up(max(start, drm_mm_hole_node_start(hole)),
+				       align);
+		if (min(drm_mm_hole_node_end(hole), end) >=
+		    node->start + node->size)
+			return drm_mm_reserve_node(mm, node);
+
+		/*
+		 * Too expensive to search for every single hole every time,
+		 * so just look for the next bigger hole, introducing enough
+		 * space for alignments. Finding the smallest hole with ideal
+		 * alignment scales very poorly, so we choose to waste space
+		 * if an alignment is forced. On the other hand, simply
+		 * randomly selecting an offset in 48b space will cause us
+		 * to use the majority of that space and exhaust all memory
+		 * in storing the page directories. Compromise is required.
+		 */
+		size = hole->hole_size + align;
+	} while (1);
+}
+
+static int eb_reserve_vma(struct eb_vm_work *work, struct eb_vma *ev)
 {
 	struct drm_i915_gem_exec_object2 *entry = ev->exec;
+	const unsigned int exec_flags = ev->flags;
 	struct i915_vma *vma = ev->vma;
+	struct i915_address_space *vm = vma->vm;
+	u64 start = 0, end = vm->total;
+	u64 align = entry->alignment ?: I915_GTT_MIN_ALIGNMENT;
+	unsigned int bind_flags;
 	int err;
 
-	if (drm_mm_node_allocated(&vma->node) &&
-	    eb_vma_misplaced(entry, vma, ev->flags)) {
-		err = i915_vma_unbind(vma);
-		if (err)
-			return err;
+	lockdep_assert_held(&vm->mutex);
+
+	bind_flags = PIN_USER;
+	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
+		bind_flags |= PIN_GLOBAL;
+
+	if (drm_mm_node_allocated(&vma->node))
+		goto pin;
+
+	GEM_BUG_ON(i915_vma_is_pinned(vma));
+	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_BIND_MASK));
+	GEM_BUG_ON(i915_active_fence_isset(&vma->active.excl));
+	GEM_BUG_ON(!vma->size);
+
+	/* Reuse old address (if it doesn't conflict with new requirements) */
+	if (eb_vma_misplaced(entry, vma, exec_flags)) {
+		vma->node.start = entry->offset & PIN_OFFSET_MASK;
+		vma->node.size = max(entry->pad_to_size, vma->size);
+		vma->node.color = 0;
+		if (i915_vm_has_cache_coloring(vm))
+			vma->node.color = vma->obj->cache_level;
 	}
 
-	err = i915_vma_pin(vma,
-			   entry->pad_to_size, entry->alignment,
-			   eb_pin_flags(entry, ev->flags) | pin_flags);
-	if (err)
-		return err;
+	/*
+	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
+	 * limit address to the first 4GBs for unflagged objects.
+	 */
+	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
 
-	if (entry->offset != vma->node.start) {
-		entry->offset = vma->node.start | UPDATE;
-		eb->args->flags |= __EXEC_HAS_RELOC;
+	align = max(align, vma->display_alignment);
+	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) {
+		vma->node.size = max_t(u64, vma->node.size, vma->fence_size);
+		end = min_t(u64, end, i915_vm_to_ggtt(vm)->mappable_end);
+		align = max_t(u64, align, vma->fence_alignment);
 	}
 
-	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
-		err = i915_vma_pin_fence(vma);
-		if (unlikely(err)) {
-			i915_vma_unpin(vma);
+	if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
+		start = BATCH_OFFSET_BIAS;
+
+	GEM_BUG_ON(!vma->node.size);
+	if (vma->node.size > end - start)
+		return -E2BIG;
+
+	/* Try the user's preferred location first (mandatory if soft-pinned) */
+	err = -__EINVAL__;
+	if (vma->node.start >= start &&
+	    IS_ALIGNED(vma->node.start, align) &&
+	    !range_overflows(vma->node.start, vma->node.size, end)) {
+		unsigned int pin_flags;
+
+		if (drm_mm_reserve_node(&vm->mm, &vma->node) == 0)
+			goto pin;
+
+		pin_flags = 0;
+		if (!(exec_flags & EXEC_OBJECT_PINNED))
+			pin_flags = PIN_NONBLOCK;
+
+		err = evict_for_node(work, ev, pin_flags);
+		if (err == 0)
+			goto pin;
+	}
+	if (exec_flags & EXEC_OBJECT_PINNED)
+		return err;
+
+	/* Try the first available free space */
+	if (!best_hole(&vm->mm, &vma->node, start, end, align))
+		goto pin;
+
+	/* Pick a random slot and see if it's available [O(N) worst case] */
+	vma->node.start = random_offset(start, end, vma->node.size, align);
+	if (evict_for_node(work, ev, 0) == 0)
+		goto pin;
+
+	pr_err("nothing!: size:%llx, align:%llx\n", vma->node.size, align);
+
+	/* Otherwise search all free space [degrades to O(N^2)] */
+	if (drm_mm_insert_node_in_range(&vm->mm, &vma->node,
+					vma->node.size,
+					align0(align),
+					vma->node.color,
+					start, end,
+					DRM_MM_INSERT_BEST) == 0)
+		goto pin;
+
+	/* Pretty busy! Loop over "LRU" and evict oldest in our search range */
+	err = evict_in_range(work, ev, start, end, align);
+	if (unlikely(err))
+		return err;
+
+pin:
+	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
+		err = __i915_vma_pin_fence(vma); /* XXX no waiting */
+		if (unlikely(err))
 			return err;
-		}
 
 		if (vma->fence)
 			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
+	bind_flags &= ~atomic_read(&vma->flags);
+	if (bind_flags) {
+		err = set_bind_fence(vma, work);
+		if (unlikely(err))
+			return err;
+
+		atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
+		atomic_or(bind_flags, &vma->flags);
+
+		if (i915_vma_is_ggtt(vma))
+			__i915_vma_set_map_and_fenceable(vma);
+
+		GEM_BUG_ON(!i915_vma_is_active(vma));
+		list_move_tail(&vma->vm_link, &vm->bound_list);
+		ev->bind_flags = bind_flags;
+	}
+	__i915_vma_pin(vma); /* and release */
+
+	GEM_BUG_ON(!bind_flags && !drm_mm_node_allocated(&vma->node));
+	GEM_BUG_ON(!(drm_mm_node_allocated(&vma->node) ^
+		     drm_mm_node_allocated(&ev->hole)));
+
+	if (entry->offset != vma->node.start) {
+		entry->offset = vma->node.start | UPDATE;
+		*work->p_flags |= __EXEC_HAS_RELOC;
+	}
+
 	ev->flags |= __EXEC_OBJECT_HAS_PIN;
 	GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
 
 	return 0;
 }
 
-static int eb_reserve(struct i915_execbuffer *eb)
+static int __eb_bind_vma(struct eb_vm_work *work, int err)
 {
-	const unsigned int count = eb->buffer_count;
-	unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
-	struct list_head last;
+	struct i915_address_space *vm = work->vm;
 	struct eb_vma *ev;
-	unsigned int i, pass;
-	int err = 0;
+
+	GEM_BUG_ON(!intel_gt_pm_is_awake(vm->gt));
 
 	/*
-	 * Attempt to pin all of the buffers into the GTT.
-	 * This is done in 3 phases:
-	 *
-	 * 1a. Unbind all objects that do not match the GTT constraints for
-	 *     the execbuffer (fenceable, mappable, alignment etc).
-	 * 1b. Increment pin count for already bound objects.
-	 * 2.  Bind new objects.
-	 * 3.  Decrement pin count.
-	 *
-	 * This avoid unnecessary unbinding of later objects in order to make
-	 * room for the earlier objects *unless* we need to defragment.
+	 * We have to wait until the stale nodes are completely idle before
+	 * we can remove their PTE and unbind their pages. Hence, after
+	 * claiming their slot in the drm_mm, we defer their removal to
+	 * after the fences are signaled.
 	 */
+	if (!list_empty(&work->evict_list)) {
+		struct i915_vma *vma, *vn;
+
+		mutex_lock(&vm->mutex);
+		list_for_each_entry_safe(vma, vn, &work->evict_list, vm_link) {
+			GEM_BUG_ON(vma->vm != vm);
+			__i915_vma_evict(vma);
+			GEM_BUG_ON(!i915_vma_is_active(vma));
+		}
+		mutex_unlock(&vm->mutex);
+	}
+
+	/*
+	 * Now we know the nodes we require in drm_mm are idle, we can
+	 * replace the PTE in those ranges with our own.
+	 */
+	list_for_each_entry(ev, &work->unbound, bind_link) {
+		struct i915_vma *vma = ev->vma;
+
+		if (!ev->bind_flags)
+			goto put;
 
-	if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
-		return -EINTR;
+		GEM_BUG_ON(vma->vm != vm);
+		GEM_BUG_ON(!i915_vma_is_active(vma));
+
+		if (err == 0)
+			err = vma->ops->bind_vma(vma,
+						 vma->obj->cache_level,
+						 ev->bind_flags |
+						 I915_VMA_ALLOC);
+		if (err)
+			atomic_and(~ev->bind_flags, &vma->flags);
+
+		if (drm_mm_node_allocated(&ev->hole)) {
+			mutex_lock(&vm->mutex);
+			GEM_BUG_ON(ev->hole.mm != &vm->mm);
+			GEM_BUG_ON(ev->hole.color != I915_COLOR_UNEVICTABLE);
+			GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
+			drm_mm_remove_node(&ev->hole);
+			if (!err) {
+				drm_mm_reserve_node(&vm->mm, &vma->node);
+				GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+			} else {
+				list_del_init(&vma->vm_link);
+			}
+			mutex_unlock(&vm->mutex);
+		}
+		ev->bind_flags = 0;
+
+put:
+		GEM_BUG_ON(drm_mm_node_allocated(&ev->hole));
+	}
+	INIT_LIST_HEAD(&work->unbound);
+
+	return err;
+}
+
+static int eb_bind_vma(struct dma_fence_work *base)
+{
+	struct eb_vm_work *work = container_of(base, typeof(*work), base);
+
+	return __eb_bind_vma(work, 0);
+}
+
+static void eb_vma_work_release(struct dma_fence_work *base)
+{
+	struct eb_vm_work *work = container_of(base, typeof(*work), base);
+
+	if (work->active) {
+		if (!list_empty(&work->unbound)) {
+			GEM_BUG_ON(!work->base.dma.error);
+			__eb_bind_vma(work, work->base.dma.error);
+		}
+		i915_active_release(&work->vm->active);
+	}
+
+	eb_vma_array_put(work->array);
+}
+
+static const struct dma_fence_work_ops eb_bind_ops = {
+	.name = "eb_bind",
+	.work = eb_bind_vma,
+	.release = eb_vma_work_release,
+};
+
+static struct eb_vm_work *eb_vm_work(struct i915_execbuffer *eb)
+{
+	struct eb_vm_work *work;
+
+	work = kzalloc(sizeof(*work), GFP_KERNEL);
+	if (!work)
+		return NULL;
+
+	dma_fence_work_init(&work->base, &eb_bind_ops);
+	list_replace_init(&eb->unbound, &work->unbound);
+	work->array = eb_vma_array_get(eb->array);
+	work->p_flags = &eb->args->flags;
+	work->vm = eb->context->vm;
+
+	/* Preallocate our slot in vm->active, outside of vm->mutex */
+	work->active = i915_gem_context_async_id(eb->gem_context);
+	if (i915_active_acquire_for_context(&work->vm->active, work->active)) {
+		work->active = 0;
+		work->base.dma.error = -ENOMEM;
+		dma_fence_work_commit(&work->base);
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&work->evict_list);
+
+	GEM_BUG_ON(list_empty(&work->unbound));
+	GEM_BUG_ON(!list_empty(&eb->unbound));
+
+	return work;
+}
+
+static int eb_vm_throttle(struct eb_vm_work *work)
+{
+	struct dma_fence *p;
+	int err;
+
+	/* Keep async work queued per context */
+	p = __i915_active_ref(&work->vm->active, work->active, &work->base.dma);
+	if (IS_ERR_OR_NULL(p))
+		return PTR_ERR(p);
+
+	err = i915_sw_fence_await_dma_fence(&work->base.chain, p, 0,
+					    GFP_NOWAIT | __GFP_NOWARN);
+	dma_fence_put(p);
+
+	return err < 0 ? err : 0;
+}
+
+static int eb_prepare_vma(struct eb_vma *ev)
+{
+	struct i915_vma *vma = ev->vma;
+	int err;
+
+	ev->hole.flags = 0;
+	ev->bind_flags = 0;
+
+	if (!(ev->flags &  __EXEC_OBJECT_HAS_PAGES)) {
+		err = i915_vma_get_pages(vma);
+		if (err)
+			return err;
+
+		ev->flags |=  __EXEC_OBJECT_HAS_PAGES;
+	}
+
+	return 0;
+}
+
+static int eb_reserve(struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	struct i915_address_space *vm = eb->context->vm;
+	struct list_head last;
+	unsigned int i, pass;
+	struct eb_vma *ev;
+	int err = 0;
 
 	pass = 0;
 	do {
+		struct eb_vm_work *work;
+
 		list_for_each_entry(ev, &eb->unbound, bind_link) {
-			err = eb_reserve_vma(eb, ev, pin_flags);
+			err = eb_prepare_vma(ev);
+			switch (err) {
+			case 0:
+				break;
+			case -EAGAIN:
+				goto retry;
+			default:
+				return err;
+			}
+		}
+
+		work = eb_vm_work(eb);
+		if (!work)
+			return -ENOMEM;
+
+		/* No allocations allowed beyond this point */
+		if (mutex_lock_interruptible(&vm->mutex)) {
+			work->base.dma.error = -EINTR;
+			dma_fence_work_commit(&work->base);
+			return -EINTR;
+		}
+
+		err = eb_vm_throttle(work);
+		if (err) {
+			mutex_unlock(&vm->mutex);
+			work->base.dma.error = err;
+			dma_fence_work_commit(&work->base);
+			return err;
+		}
+
+		list_for_each_entry(ev, &work->unbound, bind_link) {
+			struct i915_vma *vma = ev->vma;
+
+			/*
+			 * Check if this node is being evicted or must be.
+			 *
+			 * As we use the single node inside the vma to track
+			 * both the eviction and where to insert the new node,
+			 * we cannot handle migrating the vma inside the worker.
+			 */
+			if (drm_mm_node_allocated(&vma->node)) {
+				if (eb_vma_misplaced(ev->exec, vma, ev->flags)) {
+					err = -ENOSPC;
+					break;
+				}
+			} else {
+				if (i915_vma_is_active(vma)) {
+					err = -ENOSPC;
+					break;
+				}
+			}
+
+			err = i915_active_acquire(&vma->active);
+			if (!err) {
+				err = eb_reserve_vma(work, ev);
+				i915_active_release(&vma->active);
+			}
 			if (err)
 				break;
 		}
-		if (!(err == -ENOSPC || err == -EAGAIN))
-			break;
 
+		mutex_unlock(&vm->mutex);
+
+		dma_fence_get(&work->base.dma);
+		dma_fence_work_commit_imm(&work->base);
+		if (err == -ENOSPC && dma_fence_wait(&work->base.dma, true))
+			err = -EINTR;
+		dma_fence_put(&work->base.dma);
+		if (err != -ENOSPC)
+			return err;
+
+retry:
 		/* Resort *all* the objects into priority order */
 		INIT_LIST_HEAD(&eb->unbound);
 		INIT_LIST_HEAD(&last);
@@ -736,37 +1373,52 @@ static int eb_reserve(struct i915_execbuffer *eb)
 		}
 		list_splice_tail(&last, &eb->unbound);
 
+		if (signal_pending(current))
+			return -EINTR;
+
 		if (err == -EAGAIN) {
-			mutex_unlock(&eb->i915->drm.struct_mutex);
 			flush_workqueue(eb->i915->mm.userptr_wq);
-			mutex_lock(&eb->i915->drm.struct_mutex);
 			continue;
 		}
 
-		switch (pass++) {
-		case 0:
-			break;
+		/* Now safe to wait with no reservations held */
+		list_for_each_entry(ev, &eb->unbound, bind_link) {
+			struct i915_vma *vma = ev->vma;
 
-		case 1:
-			/* Too fragmented, unbind everything and retry */
-			mutex_lock(&eb->context->vm->mutex);
-			err = i915_gem_evict_vm(eb->context->vm);
-			mutex_unlock(&eb->context->vm->mutex);
+			GEM_BUG_ON(ev->flags & __EXEC_OBJECT_HAS_PIN);
+
+			if (drm_mm_node_allocated(&vma->node) &&
+			    eb_vma_misplaced(ev->exec, vma, ev->flags)) {
+				err = i915_vma_unbind(vma);
+				if (err)
+					return err;
+			}
+
+			/* Wait for previous to avoid reusing vma->node */
+			err = i915_vma_wait_for_unbind(vma);
 			if (err)
-				goto unlock;
-			break;
+				return err;
+		}
 
+		switch (pass++) {
 		default:
-			err = -ENOSPC;
-			goto unlock;
-		}
+			return -ENOSPC;
 
-		pin_flags = PIN_USER;
-	} while (1);
+		case 2:
+			if (intel_gt_wait_for_idle(vm->gt,
+						   MAX_SCHEDULE_TIMEOUT))
+				return -EINTR;
 
-unlock:
-	mutex_unlock(&eb->i915->drm.struct_mutex);
-	return err;
+			fallthrough;
+		case 1:
+			if (i915_active_wait(&vm->active))
+				return -EINTR;
+
+			fallthrough;
+		case 0:
+			break;
+		}
+	} while (1);
 }
 
 static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
@@ -1139,6 +1791,8 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
 {
 	void *vaddr;
 
+	GEM_BUG_ON(page >= obj->base.size >> PAGE_SHIFT);
+
 	if (cache->page == page) {
 		vaddr = unmask_page(cache->vaddr);
 	} else {
@@ -1148,6 +1802,7 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
 		if (!vaddr)
 			vaddr = reloc_kmap(obj, cache, page);
 	}
+	GEM_BUG_ON(!vaddr);
 
 	return vaddr;
 }
@@ -1397,6 +2052,7 @@ relocate_entry(struct i915_vma *vma,
 	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
 
+	GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
 	clflush_write32(vaddr + offset_in_page(offset),
 			lower_32_bits(target_offset),
 			eb->reloc_cache.vaddr);
@@ -1426,6 +2082,8 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	if (unlikely(!target))
 		return -ENOENT;
 
+	GEM_BUG_ON(!i915_vma_is_pinned(target->vma));
+
 	/* Validate that the target is in a valid r/w GPU domain */
 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
 		drm_dbg(&i915->drm, "reloc with multiple write domains: "
@@ -1697,7 +2355,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 			err = i915_vma_move_to_active(vma, eb->request, flags);
 
 		i915_vma_unlock(vma);
-		eb_unreserve_vma(ev);
 	}
 	ww_acquire_fini(&acquire);
 
@@ -2588,7 +3245,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
-	batch = eb.batch->vma;
+	batch = i915_vma_get(eb.batch->vma);
 	if (eb.batch_flags & I915_DISPATCH_SECURE) {
 		struct i915_vma *vma;
 
@@ -2608,6 +3265,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			goto err_parse;
 		}
 
+		GEM_BUG_ON(vma->obj != batch->obj);
 		batch = vma;
 	}
 
@@ -2687,6 +3345,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_parse:
 	if (batch->private)
 		intel_gt_buffer_pool_put(batch->private);
+	i915_vma_put(batch);
 err_vma:
 	if (eb.trampoline)
 		i915_vma_unpin(eb.trampoline);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index f4fec7eb4064..2c5ac598ade2 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -370,6 +370,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 	atomic_set(&vma->flags, I915_VMA_GGTT);
 	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
 
+	INIT_LIST_HEAD(&vma->vm_link);
 	INIT_LIST_HEAD(&vma->obj_link);
 	INIT_LIST_HEAD(&vma->closed_link);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 66165b10256e..dfc979a24450 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -568,7 +568,8 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_LOCAL_BIND) {
 		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
 
-		if (flags & I915_VMA_ALLOC) {
+		if (flags & I915_VMA_ALLOC &&
+		    !test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
 			ret = alias->vm.allocate_va_range(&alias->vm,
 							  vma->node.start,
 							  vma->size);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 2a72cce63fd9..82d4f943c346 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -194,6 +194,8 @@ void __i915_vm_close(struct i915_address_space *vm)
 
 void i915_address_space_fini(struct i915_address_space *vm)
 {
+	i915_active_fini(&vm->active);
+
 	spin_lock(&vm->free_pages.lock);
 	if (pagevec_count(&vm->free_pages.pvec))
 		vm_free_pages_release(vm, true);
@@ -246,6 +248,8 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
 	drm_mm_init(&vm->mm, 0, vm->total);
 	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 
+	i915_active_init(&vm->active, NULL, NULL);
+
 	stash_init(&vm->free_pages);
 
 	INIT_LIST_HEAD(&vm->bound_list);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index d93ebdf3fa0e..773fc76dfa1b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -263,6 +263,8 @@ struct i915_address_space {
 	 */
 	struct list_head bound_list;
 
+	struct i915_active active;
+
 	struct pagestash free_pages;
 
 	/* Global GTT */
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index f86f7e68ce5e..ecdd58f4b993 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -162,7 +162,8 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 	u32 pte_flags;
 	int err;
 
-	if (flags & I915_VMA_ALLOC) {
+	if (flags & I915_VMA_ALLOC &&
+	    !test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
 		err = vma->vm->allocate_va_range(vma->vm,
 						 vma->node.start, vma->size);
 		if (err)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0cbcb9f54e7d..6effa85532c6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -984,6 +984,9 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 		return vma;
 
 	if (i915_vma_misplaced(vma, size, alignment, flags)) {
+		if (flags & PIN_NOEVICT)
+			return ERR_PTR(-ENOSPC);
+
 		if (flags & PIN_NONBLOCK) {
 			if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
 				return ERR_PTR(-ENOSPC);
@@ -998,6 +1001,10 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			return ERR_PTR(ret);
 	}
 
+	if (flags & PIN_NONBLOCK &&
+	    i915_active_fence_isset(&vma->active.excl))
+		return ERR_PTR(-EAGAIN);
+
 	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
 	if (ret)
 		return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index cb43381b0d37..da081401142e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -219,6 +219,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 		mode = DRM_MM_INSERT_HIGHEST;
 	if (flags & PIN_MAPPABLE)
 		mode = DRM_MM_INSERT_LOW;
+	if (flags & PIN_NOSEARCH)
+		mode = DRM_MM_INSERT_ONCE;
 
 	/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
 	 * so we know that we always have a minimum alignment of 4096.
@@ -236,6 +238,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	if (err != -ENOSPC)
 		return err;
 
+	if (flags & PIN_NOSEARCH)
+		return -ENOSPC;
+
 	if (mode & DRM_MM_INSERT_ONCE) {
 		err = drm_mm_insert_node_in_range(&vm->mm, node,
 						  size, alignment, color,
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index fc14ebf9a0b7..9da9ef99a8bb 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -132,6 +132,7 @@ vma_create(struct drm_i915_gem_object *obj,
 		fs_reclaim_release(GFP_KERNEL);
 	}
 
+	INIT_LIST_HEAD(&vma->vm_link);
 	INIT_LIST_HEAD(&vma->closed_link);
 
 	if (view && view->type != I915_GGTT_VIEW_NORMAL) {
@@ -342,25 +343,37 @@ struct i915_vma_work *i915_vma_work(void)
 	return vw;
 }
 
-int i915_vma_wait_for_bind(struct i915_vma *vma)
+static int
+__i915_vma_wait_excl(struct i915_vma *vma, bool bound, unsigned int flags)
 {
+	struct dma_fence *fence;
 	int err = 0;
 
-	if (rcu_access_pointer(vma->active.excl.fence)) {
-		struct dma_fence *fence;
+	fence = i915_active_fence_get(&vma->active.excl);
+	if (!fence)
+		return 0;
 
-		rcu_read_lock();
-		fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
-		rcu_read_unlock();
-		if (fence) {
-			err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
-			dma_fence_put(fence);
-		}
+	if (drm_mm_node_allocated(&vma->node) == bound) {
+		if (flags & PIN_NOEVICT)
+			err = -EBUSY;
+		else
+			err = dma_fence_wait(fence, true);
 	}
 
+	dma_fence_put(fence);
 	return err;
 }
 
+int i915_vma_wait_for_bind(struct i915_vma *vma)
+{
+	return __i915_vma_wait_excl(vma, true, 0);
+}
+
+int i915_vma_wait_for_unbind(struct i915_vma *vma)
+{
+	return __i915_vma_wait_excl(vma, false, 0);
+}
+
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
@@ -630,8 +643,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	u64 start, end;
 	int ret;
 
-	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_BIND_MASK));
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
+	GEM_BUG_ON(i915_active_fence_isset(&vma->active.excl));
 
 	size = max(size, vma->size);
 	alignment = max(alignment, vma->display_alignment);
@@ -727,7 +741,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
-	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 
 	return 0;
 }
@@ -735,15 +749,12 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 static void
 i915_vma_detach(struct i915_vma *vma)
 {
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
-
 	/*
 	 * And finally now the object is completely decoupled from this
 	 * vma, we can drop its hold on the backing storage and allow
 	 * it to be reaped by the shrinker.
 	 */
-	list_del(&vma->vm_link);
+	list_del_init(&vma->vm_link);
 }
 
 static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
@@ -789,7 +800,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
 	return pinned;
 }
 
-static int vma_get_pages(struct i915_vma *vma)
+int i915_vma_get_pages(struct i915_vma *vma)
 {
 	int err = 0;
 
@@ -836,7 +847,7 @@ static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
 	mutex_unlock(&vma->pages_mutex);
 }
 
-static void vma_put_pages(struct i915_vma *vma)
+void i915_vma_put_pages(struct i915_vma *vma)
 {
 	if (atomic_add_unless(&vma->pages_count, -1, 1))
 		return;
@@ -853,9 +864,13 @@ static void vma_unbind_pages(struct i915_vma *vma)
 	/* The upper portion of pages_count is the number of bindings */
 	count = atomic_read(&vma->pages_count);
 	count >>= I915_VMA_PAGES_BIAS;
-	GEM_BUG_ON(!count);
+	if (count)
+		__vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
+}
 
-	__vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
+static int __wait_for_unbind(struct i915_vma *vma, unsigned int flags)
+{
+	return __i915_vma_wait_excl(vma, false, flags);
 }
 
 int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
@@ -875,10 +890,14 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
 		return 0;
 
-	err = vma_get_pages(vma);
+	err = i915_vma_get_pages(vma);
 	if (err)
 		return err;
 
+	err = __wait_for_unbind(vma, flags);
+	if (err)
+		goto err_pages;
+
 	if (flags & vma->vm->bind_async_flags) {
 		work = i915_vma_work();
 		if (!work) {
@@ -940,6 +959,10 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		goto err_unlock;
 
 	if (!(bound & I915_VMA_BIND_MASK)) {
+		err = __wait_for_unbind(vma, flags);
+		if (err)
+			goto err_active;
+
 		err = i915_vma_insert(vma, size, alignment, flags);
 		if (err)
 			goto err_active;
@@ -959,6 +982,7 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound);
 	atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
 	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	GEM_BUG_ON(!i915_vma_is_active(vma));
 
 	__i915_vma_pin(vma);
 	GEM_BUG_ON(!i915_vma_is_pinned(vma));
@@ -980,7 +1004,7 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	if (wakeref)
 		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
 err_pages:
-	vma_put_pages(vma);
+	i915_vma_put_pages(vma);
 	return err;
 }
 
@@ -1084,6 +1108,7 @@ void i915_vma_release(struct kref *ref)
 		GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 	}
 	GEM_BUG_ON(i915_vma_is_active(vma));
+	GEM_BUG_ON(!list_empty(&vma->vm_link));
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
@@ -1142,7 +1167,7 @@ static void __i915_vma_iounmap(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
-	if (vma->iomap == NULL)
+	if (!vma->iomap)
 		return;
 
 	io_mapping_unmap(vma->iomap);
@@ -1232,31 +1257,9 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	return 0;
 }
 
-int __i915_vma_unbind(struct i915_vma *vma)
+void __i915_vma_evict(struct i915_vma *vma)
 {
-	int ret;
-
-	lockdep_assert_held(&vma->vm->mutex);
-
-	if (i915_vma_is_pinned(vma)) {
-		vma_print_allocator(vma, "is pinned");
-		return -EAGAIN;
-	}
-
-	/*
-	 * After confirming that no one else is pinning this vma, wait for
-	 * any laggards who may have crept in during the wait (through
-	 * a residual pin skipping the vm->mutex) to complete.
-	 */
-	ret = i915_vma_sync(vma);
-	if (ret)
-		return ret;
-
-	if (!drm_mm_node_allocated(&vma->node))
-		return 0;
-
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
-	GEM_BUG_ON(i915_vma_is_active(vma));
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
 		/* Force a pagefault for domain tracking on next user access */
@@ -1295,6 +1298,33 @@ int __i915_vma_unbind(struct i915_vma *vma)
 
 	i915_vma_detach(vma);
 	vma_unbind_pages(vma);
+}
+
+int __i915_vma_unbind(struct i915_vma *vma)
+{
+	int ret;
+
+	lockdep_assert_held(&vma->vm->mutex);
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return 0;
+
+	if (i915_vma_is_pinned(vma)) {
+		vma_print_allocator(vma, "is pinned");
+		return -EAGAIN;
+	}
+
+	/*
+	 * After confirming that no one else is pinning this vma, wait for
+	 * any laggards who may have crept in during the wait (through
+	 * a residual pin skipping the vm->mutex) to complete.
+	 */
+	ret = i915_vma_sync(vma);
+	if (ret)
+		return ret;
+
+	GEM_BUG_ON(i915_vma_is_active(vma));
+	__i915_vma_evict(vma);
 
 	drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
 	return 0;
@@ -1306,13 +1336,13 @@ int i915_vma_unbind(struct i915_vma *vma)
 	intel_wakeref_t wakeref = 0;
 	int err;
 
-	if (!drm_mm_node_allocated(&vma->node))
-		return 0;
-
 	/* Optimistic wait before taking the mutex */
 	err = i915_vma_sync(vma);
 	if (err)
-		goto out_rpm;
+		return err;
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return 0;
 
 	if (i915_vma_is_pinned(vma)) {
 		vma_print_allocator(vma, "is pinned");
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 8ad1daabcd58..478e8679f331 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -203,6 +203,7 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 void i915_vma_revoke_mmap(struct i915_vma *vma);
+void __i915_vma_evict(struct i915_vma *vma);
 int __i915_vma_unbind(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
 void i915_vma_unlink_ctx(struct i915_vma *vma);
@@ -239,6 +240,9 @@ int __must_check
 i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
 int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags);
 
+int i915_vma_get_pages(struct i915_vma *vma);
+void i915_vma_put_pages(struct i915_vma *vma);
+
 static inline int i915_vma_pin_count(const struct i915_vma *vma)
 {
 	return atomic_read(&vma->flags) & I915_VMA_PIN_MASK;
@@ -376,6 +380,7 @@ void i915_vma_make_shrinkable(struct i915_vma *vma);
 void i915_vma_make_purgeable(struct i915_vma *vma);
 
 int i915_vma_wait_for_bind(struct i915_vma *vma);
+int i915_vma_wait_for_unbind(struct i915_vma *vma);
 
 static inline int i915_vma_sync(struct i915_vma *vma)
 {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 9/9] drm/i915/gem: Bind the fence async for execbuf
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
                   ` (6 preceding siblings ...)
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 8/9] drm/i915/gem: Asynchronous GTT unbinding Chris Wilson
@ 2020-04-30 11:18 ` Chris Wilson
  2020-04-30 11:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915/gt: Stop holding onto the pinned_default_state Patchwork
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-30 11:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

It is illegal to wait on an another vma while holding the vm->mutex, as
that easily leads to ABBA deadlocks (we wait on a second vma that waits
on us to release the vm->mutex). So while the vm->mutex exists, move the
waiting outside of the lock into the async binding pipeline.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  41 ++++--
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c  | 137 +++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h  |   5 +
 3 files changed, 166 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1063db957f17..78bac5debcf7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -516,13 +516,23 @@ eb_pin_vma(struct i915_execbuffer *eb,
 	}
 
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
-		if (unlikely(i915_vma_pin_fence(vma))) {
-			i915_vma_unpin(vma);
-			return false;
-		}
+		struct i915_fence_reg *reg = vma->fence;
 
-		if (vma->fence)
+		/* Avoid waiting to change the fence; defer to async worker */
+		if (reg) {
+			if (READ_ONCE(reg->dirty)) {
+				__i915_vma_unpin(vma);
+				return false;
+			}
+
+			atomic_inc(&reg->pin_count);
 			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
+		} else {
+			if (i915_gem_object_is_tiled(vma->obj)) {
+				__i915_vma_unpin(vma);
+				return false;
+			}
+		}
 	}
 
 	ev->flags |= __EXEC_OBJECT_HAS_PIN;
@@ -1052,15 +1062,6 @@ static int eb_reserve_vma(struct eb_vm_work *work, struct eb_vma *ev)
 		return err;
 
 pin:
-	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
-		err = __i915_vma_pin_fence(vma); /* XXX no waiting */
-		if (unlikely(err))
-			return err;
-
-		if (vma->fence)
-			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
-	}
-
 	bind_flags &= ~atomic_read(&vma->flags);
 	if (bind_flags) {
 		err = set_bind_fence(vma, work);
@@ -1091,6 +1092,15 @@ static int eb_reserve_vma(struct eb_vm_work *work, struct eb_vma *ev)
 	ev->flags |= __EXEC_OBJECT_HAS_PIN;
 	GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
 
+	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
+		err = __i915_vma_pin_fence_async(vma, &work->base);
+		if (unlikely(err))
+			return err;
+
+		if (vma->fence)
+			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
+	}
+
 	return 0;
 }
 
@@ -1126,6 +1136,9 @@ static int __eb_bind_vma(struct eb_vm_work *work, int err)
 	list_for_each_entry(ev, &work->unbound, bind_link) {
 		struct i915_vma *vma = ev->vma;
 
+		if (ev->flags & __EXEC_OBJECT_HAS_FENCE)
+			__i915_vma_apply_fence_async(vma);
+
 		if (!ev->bind_flags)
 			goto put;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index 7fb36b12fe7a..734b6aa61809 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -21,10 +21,13 @@
  * IN THE SOFTWARE.
  */
 
+#include "i915_active.h"
 #include "i915_drv.h"
 #include "i915_scatterlist.h"
+#include "i915_sw_fence_work.h"
 #include "i915_pvinfo.h"
 #include "i915_vgpu.h"
+#include "i915_vma.h"
 
 /**
  * DOC: fence register handling
@@ -340,19 +343,37 @@ static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
 	return ERR_PTR(-EDEADLK);
 }
 
+static int fence_wait_bind(struct i915_fence_reg *reg)
+{
+	struct dma_fence *fence;
+	int err = 0;
+
+	fence = i915_active_fence_get(&reg->active.excl);
+	if (fence) {
+		err = dma_fence_wait(fence, true);
+		dma_fence_put(fence);
+	}
+
+	return err;
+}
+
 int __i915_vma_pin_fence(struct i915_vma *vma)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
-	struct i915_fence_reg *fence;
+	struct i915_fence_reg *fence = vma->fence;
 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 	int err;
 
 	lockdep_assert_held(&vma->vm->mutex);
 
 	/* Just update our place in the LRU if our fence is getting reused. */
-	if (vma->fence) {
-		fence = vma->fence;
+	if (fence) {
 		GEM_BUG_ON(fence->vma != vma);
+
+		err = fence_wait_bind(fence);
+		if (err)
+			return err;
+
 		atomic_inc(&fence->pin_count);
 		if (!fence->dirty) {
 			list_move_tail(&fence->link, &ggtt->fence_list);
@@ -384,6 +405,116 @@ int __i915_vma_pin_fence(struct i915_vma *vma)
 	return err;
 }
 
+static int set_bind_fence(struct i915_fence_reg *fence,
+			  struct dma_fence_work *work)
+{
+	struct dma_fence *prev;
+	int err;
+
+	if (rcu_access_pointer(fence->active.excl.fence) == &work->dma)
+		return 0;
+
+	err = i915_sw_fence_await_active(&work->chain,
+					 &fence->active,
+					 I915_ACTIVE_AWAIT_ACTIVE);
+	if (err)
+		return err;
+
+	if (i915_active_acquire(&fence->active))
+		return -ENOENT;
+
+	prev = i915_active_set_exclusive(&fence->active, &work->dma);
+	if (unlikely(prev)) {
+		err = i915_sw_fence_await_dma_fence(&work->chain, prev, 0,
+						    GFP_NOWAIT | __GFP_NOWARN);
+		dma_fence_put(prev);
+	}
+
+	i915_active_release(&fence->active);
+	return err < 0 ? err : 0;
+}
+
+int __i915_vma_pin_fence_async(struct i915_vma *vma,
+			       struct dma_fence_work *work)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
+	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
+	struct i915_fence_reg *fence = vma->fence;
+	int err;
+
+	lockdep_assert_held(&vma->vm->mutex);
+
+	/* Just update our place in the LRU if our fence is getting reused. */
+	if (fence) {
+		GEM_BUG_ON(fence->vma != vma);
+		GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
+	} else if (set) {
+		if (!i915_vma_is_map_and_fenceable(vma))
+			return -EINVAL;
+
+		fence = fence_find(ggtt);
+		if (IS_ERR(fence))
+			return -ENOSPC;
+
+		GEM_BUG_ON(atomic_read(&fence->pin_count));
+		fence->dirty = true;
+	} else {
+		return 0;
+	}
+
+	atomic_inc(&fence->pin_count);
+	list_move_tail(&fence->link, &ggtt->fence_list);
+	if (!fence->dirty)
+		return 0;
+
+	if (INTEL_GEN(fence_to_i915(fence)) < 4 &&
+	    rcu_access_pointer(vma->active.excl.fence) != &work->dma) {
+		/* implicit 'unfenced' GPU blits */
+		err = i915_sw_fence_await_active(&work->chain,
+						 &vma->active,
+						 I915_ACTIVE_AWAIT_ACTIVE);
+		if (err)
+			goto err_unpin;
+	}
+
+	err = set_bind_fence(fence, work);
+	if (err)
+		goto err_unpin;
+
+	if (set) {
+		fence->start = vma->node.start;
+		fence->size  = vma->fence_size;
+		fence->stride = i915_gem_object_get_stride(vma->obj);
+		fence->tiling = i915_gem_object_get_tiling(vma->obj);
+
+		vma->fence = fence;
+	} else {
+		fence->tiling = 0;
+		vma->fence = NULL;
+	}
+
+	set = xchg(&fence->vma, set);
+	if (set && set != vma) {
+		GEM_BUG_ON(set->fence != fence);
+		WRITE_ONCE(set->fence, NULL);
+		i915_vma_revoke_mmap(set);
+	}
+
+	return 0;
+
+err_unpin:
+	atomic_dec(&fence->pin_count);
+	return err;
+}
+
+void __i915_vma_apply_fence_async(struct i915_vma *vma)
+{
+	struct i915_fence_reg *fence = vma->fence;
+
+	if (fence->dirty)
+		fence_write(fence);
+}
+
 /**
  * i915_vma_pin_fence - set up fencing for a vma
  * @vma: vma to map through a fence reg
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
index 9eef679e1311..d306ac14d47e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
@@ -30,6 +30,7 @@
 
 #include "i915_active.h"
 
+struct dma_fence_work;
 struct drm_i915_gem_object;
 struct i915_ggtt;
 struct i915_vma;
@@ -70,6 +71,10 @@ void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
 void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
 					 struct sg_table *pages);
 
+int __i915_vma_pin_fence_async(struct i915_vma *vma,
+			       struct dma_fence_work *work);
+void __i915_vma_apply_fence_async(struct i915_vma *vma);
+
 void intel_ggtt_init_fences(struct i915_ggtt *ggtt);
 void intel_ggtt_fini_fences(struct i915_ggtt *ggtt);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 8/9] drm/i915/gem: Asynchronous GTT unbinding
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 8/9] drm/i915/gem: Asynchronous GTT unbinding Chris Wilson
@ 2020-04-30 11:28   ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-30 11:28 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Quoting Chris Wilson (2020-04-30 12:18:18)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index cb43381b0d37..da081401142e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -219,6 +219,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>                 mode = DRM_MM_INSERT_HIGHEST;
>         if (flags & PIN_MAPPABLE)
>                 mode = DRM_MM_INSERT_LOW;
> +       if (flags & PIN_NOSEARCH)
> +               mode = DRM_MM_INSERT_ONCE;

Fortuitously only used in patch with DRM_MM_INSERT_BEST, so the mistake
is not noticed, but it should be mode |= ONCE;

Now the question is does CI notice this mistake? Unlikely. Could CI even
notice? Unsure.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915/gt: Stop holding onto the pinned_default_state
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
                   ` (7 preceding siblings ...)
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 9/9] drm/i915/gem: Bind the fence async for execbuf Chris Wilson
@ 2020-04-30 11:58 ` Patchwork
  2020-04-30 12:20 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  2020-04-30 20:37 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  10 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-30 11:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915/gt: Stop holding onto the pinned_default_state
URL   : https://patchwork.freedesktop.org/series/76771/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
0e9eeaba9f3e drm/i915/gt: Stop holding onto the pinned_default_state
bdbc12ae68bf drm/i915/gt: Move the batch buffer pool from the engine to the gt
-:291: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#291: 
deleted file mode 100644

total: 0 errors, 1 warnings, 0 checks, 589 lines checked
fc352b043c47 drm/i915: Always defer fenced work to the worker
83f24f70e12c drm/i915/gem: Include PIN_GLOBAL prior to using I915_DISPATCH_SECURE
d5bcc12365db drm/i915/gem: Assign context id for async work
24191a6a4fde drm/i915: Export a preallocate variant of i915_active_acquire()
e96477842906 drm/i915/gem: Separate the ww_mutex walker into its own list
-:92: WARNING:LONG_LINE: line over 100 characters
#92: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1637:
+			list_for_each_entry_safe_continue_reverse(unlock, en, &eb->lock, lock_link) {

-:140: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'pos' - possible side-effects?
#140: FILE: drivers/gpu/drm/i915/i915_utils.h:269:
+#define list_for_each_entry_safe_continue_reverse(pos, n, head, member)	\
+	for (pos = list_prev_entry(pos, member),			\
+		n = list_prev_entry(pos, member);			\
+	     &pos->member != (head);					\
+	     pos = n, n = list_prev_entry(n, member))

-:140: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'n' - possible side-effects?
#140: FILE: drivers/gpu/drm/i915/i915_utils.h:269:
+#define list_for_each_entry_safe_continue_reverse(pos, n, head, member)	\
+	for (pos = list_prev_entry(pos, member),			\
+		n = list_prev_entry(pos, member);			\
+	     &pos->member != (head);					\
+	     pos = n, n = list_prev_entry(n, member))

-:140: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects?
#140: FILE: drivers/gpu/drm/i915/i915_utils.h:269:
+#define list_for_each_entry_safe_continue_reverse(pos, n, head, member)	\
+	for (pos = list_prev_entry(pos, member),			\
+		n = list_prev_entry(pos, member);			\
+	     &pos->member != (head);					\
+	     pos = n, n = list_prev_entry(n, member))

total: 0 errors, 1 warnings, 3 checks, 120 lines checked
0187f28a9b9b drm/i915/gem: Asynchronous GTT unbinding
16a3283586a7 drm/i915/gem: Bind the fence async for execbuf

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/9] drm/i915/gt: Stop holding onto the pinned_default_state
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
                   ` (8 preceding siblings ...)
  2020-04-30 11:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915/gt: Stop holding onto the pinned_default_state Patchwork
@ 2020-04-30 12:20 ` Patchwork
  2020-04-30 20:37 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  10 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-30 12:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915/gt: Stop holding onto the pinned_default_state
URL   : https://patchwork.freedesktop.org/series/76771/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8401 -> Patchwork_17526
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_17526:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@debugfs_test@read_all_entries:
    - fi-kbl-7500u:       NOTRUN -> [{ABORT}][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/fi-kbl-7500u/igt@debugfs_test@read_all_entries.html

  
Known issues
------------

  Here are the changes found in Patchwork_17526 that come from known issues:

### IGT changes ###

#### Possible fixes ####

  * igt@i915_selftest@live@gem_contexts:
    - fi-bwr-2160:        [INCOMPLETE][2] ([i915#489]) -> [PASS][3]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/fi-bwr-2160/igt@i915_selftest@live@gem_contexts.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/fi-bwr-2160/igt@i915_selftest@live@gem_contexts.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#489]: https://gitlab.freedesktop.org/drm/intel/issues/489
  [i915#541]: https://gitlab.freedesktop.org/drm/intel/issues/541


Participating hosts (50 -> 43)
------------------------------

  Missing    (7): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8401 -> Patchwork_17526

  CI-20190529: 20190529
  CI_DRM_8401: 41fac0e3809be301af095c66e717eb9843b80212 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5617: 807b26292a3f21057ef7865a4028d22c512590df @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17526: 16a3283586a72e2e921fdb2e8646a92f6af32078 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

16a3283586a7 drm/i915/gem: Bind the fence async for execbuf
0187f28a9b9b drm/i915/gem: Asynchronous GTT unbinding
e96477842906 drm/i915/gem: Separate the ww_mutex walker into its own list
24191a6a4fde drm/i915: Export a preallocate variant of i915_active_acquire()
d5bcc12365db drm/i915/gem: Assign context id for async work
83f24f70e12c drm/i915/gem: Include PIN_GLOBAL prior to using I915_DISPATCH_SECURE
fc352b043c47 drm/i915: Always defer fenced work to the worker
bdbc12ae68bf drm/i915/gt: Move the batch buffer pool from the engine to the gt
0e9eeaba9f3e drm/i915/gt: Stop holding onto the pinned_default_state

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 2/9] drm/i915/gt: Move the batch buffer pool from the engine to the gt
  2020-04-30 11:18 ` [Intel-gfx] [PATCH 2/9] drm/i915/gt: Move the batch buffer pool from the engine to the gt Chris Wilson
@ 2020-04-30 17:04   ` Tvrtko Ursulin
  0 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2020-04-30 17:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 30/04/2020 12:18, Chris Wilson wrote:
> Since the introduction of 'soft-rc6', we aim to park the device quickly
> and that results in frequent idling of the whole device. Currently upon
> idling we free the batch buffer pool, and so this renders the cache
> ineffective for many workloads. If we want to have an effective cache of
> recently allocated buffers available for reuse, we need to decouple that
> cache from the engine powermanagement and make it timer based. As there
> is no reason then to keep it within the engine (where it once made
> retirement order easier to track), we can move it up the hierarchy to the
> owner of the memory allocations.
> 
> v2: Hook up to debugfs/drop_caches to clear the cache on demand.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile                 |   2 +-
>   .../gpu/drm/i915/gem/i915_gem_client_blt.c    |   1 -
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  20 +--
>   .../gpu/drm/i915/gem/i915_gem_object_blt.c    |  18 +--
>   .../gpu/drm/i915/gem/i915_gem_object_blt.h    |   1 -
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   4 -
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   2 -
>   drivers/gpu/drm/i915/gt/intel_engine_pool.h   |  34 ------
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |   8 --
>   drivers/gpu/drm/i915/gt/intel_gt.c            |   3 +
>   ...l_engine_pool.c => intel_gt_buffer_pool.c} | 114 ++++++++++++------
>   .../gpu/drm/i915/gt/intel_gt_buffer_pool.h    |  37 ++++++
>   ...l_types.h => intel_gt_buffer_pool_types.h} |  15 ++-
>   drivers/gpu/drm/i915/gt/intel_gt_types.h      |  11 ++
>   drivers/gpu/drm/i915/gt/mock_engine.c         |   2 -
>   drivers/gpu/drm/i915/i915_debugfs.c           |   4 +
>   16 files changed, 160 insertions(+), 116 deletions(-)
>   delete mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pool.h
>   rename drivers/gpu/drm/i915/gt/{intel_engine_pool.c => intel_gt_buffer_pool.c} (53%)
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
>   rename drivers/gpu/drm/i915/gt/{intel_engine_pool_types.h => intel_gt_buffer_pool_types.h} (54%)
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index caf00d92ea9d..5359c736c789 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -87,11 +87,11 @@ gt-y += \
>   	gt/intel_engine_cs.o \
>   	gt/intel_engine_heartbeat.o \
>   	gt/intel_engine_pm.o \
> -	gt/intel_engine_pool.o \
>   	gt/intel_engine_user.o \
>   	gt/intel_ggtt.o \
>   	gt/intel_ggtt_fencing.o \
>   	gt/intel_gt.o \
> +	gt/intel_gt_buffer_pool.o \
>   	gt/intel_gt_clock_utils.o \
>   	gt/intel_gt_irq.o \
>   	gt/intel_gt_pm.o \
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> index 0598e5382a1d..3a146aa2593b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> @@ -6,7 +6,6 @@
>   #include "i915_drv.h"
>   #include "gt/intel_context.h"
>   #include "gt/intel_engine_pm.h"
> -#include "gt/intel_engine_pool.h"
>   #include "i915_gem_client_blt.h"
>   #include "i915_gem_object_blt.h"
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 964f73f062c1..414859fa2673 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -15,8 +15,8 @@
>   
>   #include "gem/i915_gem_ioctls.h"
>   #include "gt/intel_context.h"
> -#include "gt/intel_engine_pool.h"
>   #include "gt/intel_gt.h"
> +#include "gt/intel_gt_buffer_pool.h"
>   #include "gt/intel_gt_pm.h"
>   #include "gt/intel_ring.h"
>   
> @@ -1194,13 +1194,13 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   			     unsigned int len)
>   {
>   	struct reloc_cache *cache = &eb->reloc_cache;
> -	struct intel_engine_pool_node *pool;
> +	struct intel_gt_buffer_pool_node *pool;
>   	struct i915_request *rq;
>   	struct i915_vma *batch;
>   	u32 *cmd;
>   	int err;
>   
> -	pool = intel_engine_get_pool(eb->engine, PAGE_SIZE);
> +	pool = intel_gt_get_buffer_pool(eb->engine->gt, PAGE_SIZE);
>   	if (IS_ERR(pool))
>   		return PTR_ERR(pool);
>   
> @@ -1229,7 +1229,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   		goto err_unpin;
>   	}
>   
> -	err = intel_engine_pool_mark_active(pool, rq);
> +	err = intel_gt_buffer_pool_mark_active(pool, rq);
>   	if (err)
>   		goto err_request;
>   
> @@ -1270,7 +1270,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   err_unmap:
>   	i915_gem_object_unpin_map(pool->obj);
>   out_pool:
> -	intel_engine_pool_put(pool);
> +	intel_gt_buffer_pool_put(pool);
>   	return err;
>   }
>   
> @@ -1887,7 +1887,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
>   static int eb_parse(struct i915_execbuffer *eb)
>   {
>   	struct drm_i915_private *i915 = eb->i915;
> -	struct intel_engine_pool_node *pool;
> +	struct intel_gt_buffer_pool_node *pool;
>   	struct i915_vma *shadow, *trampoline;
>   	unsigned int len;
>   	int err;
> @@ -1910,7 +1910,7 @@ static int eb_parse(struct i915_execbuffer *eb)
>   		len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
>   	}
>   
> -	pool = intel_engine_get_pool(eb->engine, len);
> +	pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
>   	if (IS_ERR(pool))
>   		return PTR_ERR(pool);
>   
> @@ -1958,7 +1958,7 @@ static int eb_parse(struct i915_execbuffer *eb)
>   err_shadow:
>   	i915_vma_unpin(shadow);
>   err:
> -	intel_engine_pool_put(pool);
> +	intel_gt_buffer_pool_put(pool);
>   	return err;
>   }
>   
> @@ -2643,7 +2643,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	 */
>   	eb.request->batch = batch;
>   	if (batch->private)
> -		intel_engine_pool_mark_active(batch->private, eb.request);
> +		intel_gt_buffer_pool_mark_active(batch->private, eb.request);
>   
>   	trace_i915_request_queue(eb.request, eb.batch_flags);
>   	err = eb_submit(&eb, batch);
> @@ -2672,7 +2672,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   		i915_vma_unpin(batch);
>   err_parse:
>   	if (batch->private)
> -		intel_engine_pool_put(batch->private);
> +		intel_gt_buffer_pool_put(batch->private);
>   err_vma:
>   	if (eb.trampoline)
>   		i915_vma_unpin(eb.trampoline);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> index e00792158f13..2fc7737ef5f4 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> @@ -6,8 +6,8 @@
>   #include "i915_drv.h"
>   #include "gt/intel_context.h"
>   #include "gt/intel_engine_pm.h"
> -#include "gt/intel_engine_pool.h"
>   #include "gt/intel_gt.h"
> +#include "gt/intel_gt_buffer_pool.h"
>   #include "gt/intel_ring.h"
>   #include "i915_gem_clflush.h"
>   #include "i915_gem_object_blt.h"
> @@ -18,7 +18,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
>   {
>   	struct drm_i915_private *i915 = ce->vm->i915;
>   	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
> -	struct intel_engine_pool_node *pool;
> +	struct intel_gt_buffer_pool_node *pool;
>   	struct i915_vma *batch;
>   	u64 offset;
>   	u64 count;
> @@ -33,7 +33,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
>   	count = div_u64(round_up(vma->size, block_size), block_size);
>   	size = (1 + 8 * count) * sizeof(u32);
>   	size = round_up(size, PAGE_SIZE);
> -	pool = intel_engine_get_pool(ce->engine, size);
> +	pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
>   	if (IS_ERR(pool)) {
>   		err = PTR_ERR(pool);
>   		goto out_pm;
> @@ -96,7 +96,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
>   	return batch;
>   
>   out_put:
> -	intel_engine_pool_put(pool);
> +	intel_gt_buffer_pool_put(pool);
>   out_pm:
>   	intel_engine_pm_put(ce->engine);
>   	return ERR_PTR(err);
> @@ -114,13 +114,13 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
>   	if (unlikely(err))
>   		return err;
>   
> -	return intel_engine_pool_mark_active(vma->private, rq);
> +	return intel_gt_buffer_pool_mark_active(vma->private, rq);
>   }
>   
>   void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
>   {
>   	i915_vma_unpin(vma);
> -	intel_engine_pool_put(vma->private);
> +	intel_gt_buffer_pool_put(vma->private);
>   	intel_engine_pm_put(ce->engine);
>   }
>   
> @@ -213,7 +213,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
>   {
>   	struct drm_i915_private *i915 = ce->vm->i915;
>   	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
> -	struct intel_engine_pool_node *pool;
> +	struct intel_gt_buffer_pool_node *pool;
>   	struct i915_vma *batch;
>   	u64 src_offset, dst_offset;
>   	u64 count, rem;
> @@ -228,7 +228,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
>   	count = div_u64(round_up(dst->size, block_size), block_size);
>   	size = (1 + 11 * count) * sizeof(u32);
>   	size = round_up(size, PAGE_SIZE);
> -	pool = intel_engine_get_pool(ce->engine, size);
> +	pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
>   	if (IS_ERR(pool)) {
>   		err = PTR_ERR(pool);
>   		goto out_pm;
> @@ -307,7 +307,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
>   	return batch;
>   
>   out_put:
> -	intel_engine_pool_put(pool);
> +	intel_gt_buffer_pool_put(pool);
>   out_pm:
>   	intel_engine_pm_put(ce->engine);
>   	return ERR_PTR(err);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
> index 243a43a87824..8bcd336a90dc 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
> @@ -10,7 +10,6 @@
>   
>   #include "gt/intel_context.h"
>   #include "gt/intel_engine_pm.h"
> -#include "gt/intel_engine_pool.h"
>   #include "i915_vma.h"
>   
>   struct drm_i915_gem_object;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index c9e46c5ced43..98b326a1568d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -31,7 +31,6 @@
>   #include "intel_context.h"
>   #include "intel_engine.h"
>   #include "intel_engine_pm.h"
> -#include "intel_engine_pool.h"
>   #include "intel_engine_user.h"
>   #include "intel_gt.h"
>   #include "intel_gt_requests.h"
> @@ -631,8 +630,6 @@ static int engine_setup_common(struct intel_engine_cs *engine)
>   	intel_engine_init__pm(engine);
>   	intel_engine_init_retire(engine);
>   
> -	intel_engine_pool_init(&engine->pool);
> -
>   	/* Use the whole device by default */
>   	engine->sseu =
>   		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
> @@ -829,7 +826,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>   	cleanup_status_page(engine);
>   
>   	intel_engine_fini_retire(engine);
> -	intel_engine_pool_fini(&engine->pool);
>   	intel_engine_fini_breadcrumbs(engine);
>   	intel_engine_cleanup_cmd_parser(engine);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index cf46076c59b2..d0a1078ef632 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -10,7 +10,6 @@
>   #include "intel_engine.h"
>   #include "intel_engine_heartbeat.h"
>   #include "intel_engine_pm.h"
> -#include "intel_engine_pool.h"
>   #include "intel_gt.h"
>   #include "intel_gt_pm.h"
>   #include "intel_rc6.h"
> @@ -248,7 +247,6 @@ static int __engine_park(struct intel_wakeref *wf)
>   
>   	intel_engine_park_heartbeat(engine);
>   	intel_engine_disarm_breadcrumbs(engine);
> -	intel_engine_pool_park(&engine->pool);
>   
>   	/* Must be reset upon idling, or we may miss the busy wakeup. */
>   	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
> deleted file mode 100644
> index 1bd89cadc3b7..000000000000
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
> +++ /dev/null
> @@ -1,34 +0,0 @@
> -/*
> - * SPDX-License-Identifier: MIT
> - *
> - * Copyright © 2014-2018 Intel Corporation
> - */
> -
> -#ifndef INTEL_ENGINE_POOL_H
> -#define INTEL_ENGINE_POOL_H
> -
> -#include "intel_engine_pool_types.h"
> -#include "i915_active.h"
> -#include "i915_request.h"
> -
> -struct intel_engine_pool_node *
> -intel_engine_get_pool(struct intel_engine_cs *engine, size_t size);
> -
> -static inline int
> -intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
> -			      struct i915_request *rq)
> -{
> -	return i915_active_add_request(&node->active, rq);
> -}
> -
> -static inline void
> -intel_engine_pool_put(struct intel_engine_pool_node *node)
> -{
> -	i915_active_release(&node->active);
> -}
> -
> -void intel_engine_pool_init(struct intel_engine_pool *pool);
> -void intel_engine_pool_park(struct intel_engine_pool *pool);
> -void intel_engine_pool_fini(struct intel_engine_pool *pool);
> -
> -#endif /* INTEL_ENGINE_POOL_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 68a382229b4a..489deb3b8358 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -22,7 +22,6 @@
>   #include "i915_pmu.h"
>   #include "i915_priolist_types.h"
>   #include "i915_selftest.h"
> -#include "intel_engine_pool_types.h"
>   #include "intel_sseu.h"
>   #include "intel_timeline_types.h"
>   #include "intel_wakeref.h"
> @@ -404,13 +403,6 @@ struct intel_engine_cs {
>   		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
>   	} pmu;
>   
> -	/*
> -	 * A pool of objects to use as shadow copies of client batch buffers
> -	 * when the command parser is enabled. Prevents the client from
> -	 * modifying the batch contents after software parsing.
> -	 */
> -	struct intel_engine_pool pool;
> -
>   	struct intel_hw_status_page status_page;
>   	struct i915_ctx_workarounds wa_ctx;
>   	struct i915_wa_list ctx_wa_list;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 52593edf8aa0..f069551e412f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -7,6 +7,7 @@
>   #include "i915_drv.h"
>   #include "intel_context.h"
>   #include "intel_gt.h"
> +#include "intel_gt_buffer_pool.h"
>   #include "intel_gt_clock_utils.h"
>   #include "intel_gt_pm.h"
>   #include "intel_gt_requests.h"
> @@ -28,6 +29,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
>   	INIT_LIST_HEAD(&gt->closed_vma);
>   	spin_lock_init(&gt->closed_lock);
>   
> +	intel_gt_init_buffer_pool(gt);
>   	intel_gt_init_reset(gt);
>   	intel_gt_init_requests(gt);
>   	intel_gt_init_timelines(gt);
> @@ -621,6 +623,7 @@ void intel_gt_driver_release(struct intel_gt *gt)
>   
>   	intel_gt_pm_fini(gt);
>   	intel_gt_fini_scratch(gt);
> +	intel_gt_fini_buffer_pool(gt);
>   }
>   
>   void intel_gt_driver_late_release(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> similarity index 53%
> rename from drivers/gpu/drm/i915/gt/intel_engine_pool.c
> rename to drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> index 397186818305..1495054a4305 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> @@ -1,6 +1,5 @@
> +// SPDX-License-Identifier: MIT
>   /*
> - * SPDX-License-Identifier: MIT
> - *
>    * Copyright © 2014-2018 Intel Corporation
>    */
>   
> @@ -8,15 +7,15 @@
>   
>   #include "i915_drv.h"
>   #include "intel_engine_pm.h"
> -#include "intel_engine_pool.h"
> +#include "intel_gt_buffer_pool.h"
>   
> -static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool)
> +static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool)
>   {
> -	return container_of(pool, struct intel_engine_cs, pool);
> +	return container_of(pool, struct intel_gt, buffer_pool);
>   }
>   
>   static struct list_head *
> -bucket_for_size(struct intel_engine_pool *pool, size_t sz)
> +bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz)
>   {
>   	int n;
>   
> @@ -32,16 +31,50 @@ bucket_for_size(struct intel_engine_pool *pool, size_t sz)
>   	return &pool->cache_list[n];
>   }
>   
> -static void node_free(struct intel_engine_pool_node *node)
> +static void node_free(struct intel_gt_buffer_pool_node *node)
>   {
>   	i915_gem_object_put(node->obj);
>   	i915_active_fini(&node->active);
>   	kfree(node);
>   }
>   
> +static void pool_free_work(struct work_struct *wrk)
> +{
> +	struct intel_gt_buffer_pool *pool =
> +		container_of(wrk, typeof(*pool), work.work);
> +	struct intel_gt_buffer_pool_node *node, *next;
> +	unsigned long old = jiffies - HZ;
> +	bool active = false;
> +	LIST_HEAD(stale);
> +	int n;
> +
> +	/* Free buffers that have not been used in the past second */
> +	spin_lock_irq(&pool->lock);
> +	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
> +		struct list_head *list = &pool->cache_list[n];
> +
> +		/* Most recent at head; oldest at tail */
> +		list_for_each_entry_safe_reverse(node, next, list, link) {
> +			if (time_before(node->age, old))
> +				break;
> +
> +			list_move(&node->link, &stale);
> +		}
> +		active |= !list_empty(list);
> +	}
> +	spin_unlock_irq(&pool->lock);
> +
> +	list_for_each_entry_safe(node, next, &stale, link)
> +		node_free(node);
> +
> +	if (active)
> +		schedule_delayed_work(&pool->work,
> +				      round_jiffies_up_relative(HZ));
> +}
> +
>   static int pool_active(struct i915_active *ref)
>   {
> -	struct intel_engine_pool_node *node =
> +	struct intel_gt_buffer_pool_node *node =
>   		container_of(ref, typeof(*node), active);
>   	struct dma_resv *resv = node->obj->base.resv;
>   	int err;
> @@ -64,29 +97,31 @@ static int pool_active(struct i915_active *ref)
>   __i915_active_call
>   static void pool_retire(struct i915_active *ref)
>   {
> -	struct intel_engine_pool_node *node =
> +	struct intel_gt_buffer_pool_node *node =
>   		container_of(ref, typeof(*node), active);
> -	struct intel_engine_pool *pool = node->pool;
> +	struct intel_gt_buffer_pool *pool = node->pool;
>   	struct list_head *list = bucket_for_size(pool, node->obj->base.size);
>   	unsigned long flags;
>   
> -	GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
> -
>   	i915_gem_object_unpin_pages(node->obj);
>   
>   	/* Return this object to the shrinker pool */
>   	i915_gem_object_make_purgeable(node->obj);
>   
>   	spin_lock_irqsave(&pool->lock, flags);
> +	node->age = jiffies;
>   	list_add(&node->link, list);
>   	spin_unlock_irqrestore(&pool->lock, flags);
> +
> +	schedule_delayed_work(&pool->work,
> +			      round_jiffies_up_relative(HZ));
>   }
>   
> -static struct intel_engine_pool_node *
> -node_create(struct intel_engine_pool *pool, size_t sz)
> +static struct intel_gt_buffer_pool_node *
> +node_create(struct intel_gt_buffer_pool *pool, size_t sz)
>   {
> -	struct intel_engine_cs *engine = to_engine(pool);
> -	struct intel_engine_pool_node *node;
> +	struct intel_gt *gt = to_gt(pool);
> +	struct intel_gt_buffer_pool_node *node;
>   	struct drm_i915_gem_object *obj;
>   
>   	node = kmalloc(sizeof(*node),
> @@ -97,7 +132,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
>   	node->pool = pool;
>   	i915_active_init(&node->active, pool_active, pool_retire);
>   
> -	obj = i915_gem_object_create_internal(engine->i915, sz);
> +	obj = i915_gem_object_create_internal(gt->i915, sz);
>   	if (IS_ERR(obj)) {
>   		i915_active_fini(&node->active);
>   		kfree(node);
> @@ -110,26 +145,15 @@ node_create(struct intel_engine_pool *pool, size_t sz)
>   	return node;
>   }
>   
> -static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine)
> +struct intel_gt_buffer_pool_node *
> +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
>   {
> -	if (intel_engine_is_virtual(engine))
> -		engine = intel_virtual_engine_get_sibling(engine, 0);
> -
> -	GEM_BUG_ON(!engine);
> -	return &engine->pool;
> -}
> -
> -struct intel_engine_pool_node *
> -intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
> -{
> -	struct intel_engine_pool *pool = lookup_pool(engine);
> -	struct intel_engine_pool_node *node;
> +	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
> +	struct intel_gt_buffer_pool_node *node;
>   	struct list_head *list;
>   	unsigned long flags;
>   	int ret;
>   
> -	GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
> -
>   	size = PAGE_ALIGN(size);
>   	list = bucket_for_size(pool, size);
>   
> @@ -157,34 +181,48 @@ intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
>   	return node;
>   }
>   
> -void intel_engine_pool_init(struct intel_engine_pool *pool)
> +void intel_gt_init_buffer_pool(struct intel_gt *gt)
>   {
> +	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
>   	int n;
>   
>   	spin_lock_init(&pool->lock);
>   	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
>   		INIT_LIST_HEAD(&pool->cache_list[n]);
> +	INIT_DELAYED_WORK(&pool->work, pool_free_work);
>   }
>   
> -void intel_engine_pool_park(struct intel_engine_pool *pool)
> +static void pool_free_imm(struct intel_gt_buffer_pool *pool)
>   {
>   	int n;
>   
> +	spin_lock_irq(&pool->lock);
>   	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
> +		struct intel_gt_buffer_pool_node *node, *next;
>   		struct list_head *list = &pool->cache_list[n];
> -		struct intel_engine_pool_node *node, *nn;
>   
> -		list_for_each_entry_safe(node, nn, list, link)
> +		list_for_each_entry_safe(node, next, list, link)
>   			node_free(node);
> -
>   		INIT_LIST_HEAD(list);
>   	}
> +	spin_unlock_irq(&pool->lock);
> +}
> +
> +void intel_gt_flush_buffer_pool(struct intel_gt *gt)
> +{
> +	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
> +
> +	if (cancel_delayed_work_sync(&pool->work))
> +		pool_free_imm(pool);
>   }
>   
> -void intel_engine_pool_fini(struct intel_engine_pool *pool)
> +void intel_gt_fini_buffer_pool(struct intel_gt *gt)
>   {
> +	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
>   	int n;
>   
> +	intel_gt_flush_buffer_pool(gt);
> +
>   	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
>   		GEM_BUG_ON(!list_empty(&pool->cache_list[n]));
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
> new file mode 100644
> index 000000000000..42cbac003e8a
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
> @@ -0,0 +1,37 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2014-2018 Intel Corporation
> + */
> +
> +#ifndef INTEL_GT_BUFFER_POOL_H
> +#define INTEL_GT_BUFFER_POOL_H
> +
> +#include <linux/types.h>
> +
> +#include "i915_active.h"
> +#include "intel_gt_buffer_pool_types.h"
> +
> +struct intel_gt;
> +struct i915_request;
> +
> +struct intel_gt_buffer_pool_node *
> +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size);
> +
> +static inline int
> +intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node,
> +				 struct i915_request *rq)
> +{
> +	return i915_active_add_request(&node->active, rq);
> +}
> +
> +static inline void
> +intel_gt_buffer_pool_put(struct intel_gt_buffer_pool_node *node)
> +{
> +	i915_active_release(&node->active);
> +}
> +
> +void intel_gt_init_buffer_pool(struct intel_gt *gt);
> +void intel_gt_flush_buffer_pool(struct intel_gt *gt);
> +void intel_gt_fini_buffer_pool(struct intel_gt *gt);
> +
> +#endif /* INTEL_GT_BUFFER_POOL_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
> similarity index 54%
> rename from drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
> rename to drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
> index e31ee361b76f..e28bdda771ed 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
> @@ -4,26 +4,29 @@
>    * Copyright © 2014-2018 Intel Corporation
>    */
>   
> -#ifndef INTEL_ENGINE_POOL_TYPES_H
> -#define INTEL_ENGINE_POOL_TYPES_H
> +#ifndef INTEL_GT_BUFFER_POOL_TYPES_H
> +#define INTEL_GT_BUFFER_POOL_TYPES_H
>   
>   #include <linux/list.h>
>   #include <linux/spinlock.h>
> +#include <linux/workqueue.h>
>   
>   #include "i915_active_types.h"
>   
>   struct drm_i915_gem_object;
>   
> -struct intel_engine_pool {
> +struct intel_gt_buffer_pool {
>   	spinlock_t lock;
>   	struct list_head cache_list[4];
> +	struct delayed_work work;
>   };
>   
> -struct intel_engine_pool_node {
> +struct intel_gt_buffer_pool_node {
>   	struct i915_active active;
>   	struct drm_i915_gem_object *obj;
>   	struct list_head link;
> -	struct intel_engine_pool *pool;
> +	struct intel_gt_buffer_pool *pool;
> +	unsigned long age;
>   };
>   
> -#endif /* INTEL_ENGINE_POOL_TYPES_H */
> +#endif /* INTEL_GT_BUFFER_POOL_TYPES_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index d02ccb735e24..0cc1d6b185dc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -17,6 +17,7 @@
>   
>   #include "i915_vma.h"
>   #include "intel_engine_types.h"
> +#include "intel_gt_buffer_pool_types.h"
>   #include "intel_llc_types.h"
>   #include "intel_reset_types.h"
>   #include "intel_rc6_types.h"
> @@ -97,6 +98,16 @@ struct intel_gt {
>   	 */
>   	struct i915_address_space *vm;
>   
> +	/*
> +	 * A pool of objects to use as shadow copies of client batch buffers
> +	 * when the command parser is enabled. Prevents the client from
> +	 * modifying the batch contents after software parsing.
> +	 *
> +	 * Buffers older than 1s are periodically reaped from the pool,
> +	 * or may be reclaimed by the shrinker before then.
> +	 */
> +	struct intel_gt_buffer_pool buffer_pool;
> +
>   	struct i915_vma *scratch;
>   };
>   
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index 4a53ded7c2dd..b8dd3cbc8696 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -28,7 +28,6 @@
>   #include "i915_drv.h"
>   #include "intel_context.h"
>   #include "intel_engine_pm.h"
> -#include "intel_engine_pool.h"
>   
>   #include "mock_engine.h"
>   #include "selftests/mock_request.h"
> @@ -328,7 +327,6 @@ int mock_engine_init(struct intel_engine_cs *engine)
>   	intel_engine_init_execlists(engine);
>   	intel_engine_init__pm(engine);
>   	intel_engine_init_retire(engine);
> -	intel_engine_pool_init(&engine->pool);
>   
>   	ce = create_kernel_context(engine);
>   	if (IS_ERR(ce))
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c09e1afb5f79..8e98df6a3045 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -32,6 +32,7 @@
>   #include <drm/drm_debugfs.h>
>   
>   #include "gem/i915_gem_context.h"
> +#include "gt/intel_gt_buffer_pool.h"
>   #include "gt/intel_gt_clock_utils.h"
>   #include "gt/intel_gt_pm.h"
>   #include "gt/intel_gt_requests.h"
> @@ -1484,6 +1485,9 @@ gt_drop_caches(struct intel_gt *gt, u64 val)
>   	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt))
>   		intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL);
>   
> +	if (val & DROP_FREED)
> +		intel_gt_flush_buffer_pool(gt);
> +
>   	return 0;
>   }
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [1/9] drm/i915/gt: Stop holding onto the pinned_default_state
  2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
                   ` (9 preceding siblings ...)
  2020-04-30 12:20 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-04-30 20:37 ` Patchwork
  10 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-30 20:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915/gt: Stop holding onto the pinned_default_state
URL   : https://patchwork.freedesktop.org/series/76771/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8401_full -> Patchwork_17526_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_17526_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_params@invalid-bsd-ring:
    - shard-iclb:         [PASS][1] -> [SKIP][2] ([fdo#109276])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-iclb1/igt@gem_exec_params@invalid-bsd-ring.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-iclb3/igt@gem_exec_params@invalid-bsd-ring.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [PASS][3] -> [FAIL][4] ([fdo#108145] / [i915#265]) +1 similar issue
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-skl5/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-skl9/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_psr2_su@frontbuffer:
    - shard-iclb:         [PASS][5] -> [SKIP][6] ([fdo#109642] / [fdo#111068])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-iclb2/igt@kms_psr2_su@frontbuffer.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-iclb6/igt@kms_psr2_su@frontbuffer.html

  * igt@kms_psr@psr2_basic:
    - shard-iclb:         [PASS][7] -> [SKIP][8] ([fdo#109441]) +2 similar issues
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-iclb2/igt@kms_psr@psr2_basic.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-iclb6/igt@kms_psr@psr2_basic.html

  * igt@kms_vblank@pipe-c-ts-continuation-suspend:
    - shard-apl:          [PASS][9] -> [DMESG-WARN][10] ([i915#180]) +1 similar issue
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-apl8/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-apl4/igt@kms_vblank@pipe-c-ts-continuation-suspend.html

  
#### Possible fixes ####

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [FAIL][11] ([i915#454]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-iclb4/igt@i915_pm_dc@dc6-psr.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-iclb4/igt@i915_pm_dc@dc6-psr.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-kbl:          [DMESG-WARN][13] ([i915#180]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-kbl1/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-kbl4/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * {igt@kms_flip@flip-vs-expired-vblank@c-edp1}:
    - shard-skl:          [FAIL][15] ([i915#79]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-skl7/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-skl4/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html

  * {igt@kms_flip@flip-vs-suspend-interruptible@a-dp1}:
    - shard-apl:          [DMESG-WARN][17] ([i915#180]) -> [PASS][18] +2 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-apl8/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-apl4/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html

  * igt@kms_hdr@bpc-switch-dpms:
    - shard-skl:          [FAIL][19] ([i915#1188]) -> [PASS][20]
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-skl10/igt@kms_hdr@bpc-switch-dpms.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-skl1/igt@kms_hdr@bpc-switch-dpms.html

  * igt@kms_psr@no_drrs:
    - shard-iclb:         [FAIL][21] ([i915#173]) -> [PASS][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-iclb1/igt@kms_psr@no_drrs.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-iclb3/igt@kms_psr@no_drrs.html

  * igt@kms_psr@psr2_primary_page_flip:
    - shard-iclb:         [SKIP][23] ([fdo#109441]) -> [PASS][24] +2 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-iclb7/igt@kms_psr@psr2_primary_page_flip.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-iclb2/igt@kms_psr@psr2_primary_page_flip.html

  * igt@kms_setmode@basic:
    - shard-hsw:          [FAIL][25] ([i915#31]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-hsw7/igt@kms_setmode@basic.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-hsw7/igt@kms_setmode@basic.html

  
#### Warnings ####

  * igt@kms_psr2_su@page_flip:
    - shard-iclb:         [SKIP][27] ([fdo#109642] / [fdo#111068]) -> [FAIL][28] ([i915#608])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8401/shard-iclb3/igt@kms_psr2_su@page_flip.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/shard-iclb2/igt@kms_psr2_su@page_flip.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#155]: https://gitlab.freedesktop.org/drm/intel/issues/155
  [i915#173]: https://gitlab.freedesktop.org/drm/intel/issues/173
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#608]: https://gitlab.freedesktop.org/drm/intel/issues/608
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8401 -> Patchwork_17526

  CI-20190529: 20190529
  CI_DRM_8401: 41fac0e3809be301af095c66e717eb9843b80212 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5617: 807b26292a3f21057ef7865a4028d22c512590df @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17526: 16a3283586a72e2e921fdb2e8646a92f6af32078 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17526/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-04-30 20:37 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-30 11:18 [Intel-gfx] [PATCH 1/9] drm/i915/gt: Stop holding onto the pinned_default_state Chris Wilson
2020-04-30 11:18 ` [Intel-gfx] [PATCH 2/9] drm/i915/gt: Move the batch buffer pool from the engine to the gt Chris Wilson
2020-04-30 17:04   ` Tvrtko Ursulin
2020-04-30 11:18 ` [Intel-gfx] [PATCH 3/9] drm/i915: Always defer fenced work to the worker Chris Wilson
2020-04-30 11:18 ` [Intel-gfx] [PATCH 4/9] drm/i915/gem: Include PIN_GLOBAL prior to using I915_DISPATCH_SECURE Chris Wilson
2020-04-30 11:18 ` [Intel-gfx] [PATCH 5/9] drm/i915/gem: Assign context id for async work Chris Wilson
2020-04-30 11:18 ` [Intel-gfx] [PATCH 6/9] drm/i915: Export a preallocate variant of i915_active_acquire() Chris Wilson
2020-04-30 11:18 ` [Intel-gfx] [PATCH 7/9] drm/i915/gem: Separate the ww_mutex walker into its own list Chris Wilson
2020-04-30 11:18 ` [Intel-gfx] [PATCH 8/9] drm/i915/gem: Asynchronous GTT unbinding Chris Wilson
2020-04-30 11:28   ` Chris Wilson
2020-04-30 11:18 ` [Intel-gfx] [PATCH 9/9] drm/i915/gem: Bind the fence async for execbuf Chris Wilson
2020-04-30 11:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915/gt: Stop holding onto the pinned_default_state Patchwork
2020-04-30 12:20 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-04-30 20:37 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.