All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] drm/i915: Allocate a common scratch page
@ 2018-12-04 14:15 Chris Wilson
  2018-12-04 14:15 ` [PATCH 2/7] drm/i915: Flush GPU relocs harder for gen3 Chris Wilson
                   ` (10 more replies)
  0 siblings, 11 replies; 19+ messages in thread
From: Chris Wilson @ 2018-12-04 14:15 UTC (permalink / raw)
  To: intel-gfx

Currently we allocate a scratch page for each engine, but since we only
ever write into it for post-sync operations, it is not exposed to
userspace nor do we care for coherency. As we then do not care about its
contents, we can use one page for all, reducing our allocations and
avoid complications by not assuming per-engine isolation.

For later use, it simplifies engine initialisation (by removing the
allocation that required struct_mutex!) and means that we can always rely
on there being a scratch page.

v2: Check that we allocated a large enough scratch for I830 w/a

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  7 ++++
 drivers/gpu/drm/i915/i915_gem.c         | 50 ++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c  | 42 ---------------------
 drivers/gpu/drm/i915/intel_lrc.c        | 17 +++------
 drivers/gpu/drm/i915/intel_ringbuffer.c | 37 ++++++------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  5 ---
 7 files changed, 74 insertions(+), 86 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 23a3dc6f3907..c5f01964f0fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1983,6 +1983,8 @@ struct drm_i915_private {
 		struct delayed_work idle_work;
 
 		ktime_t last_init_time;
+
+		struct i915_vma *scratch;
 	} gt;
 
 	/* perform PHY state sanity checks? */
@@ -3713,4 +3715,9 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
 		return I915_HWS_CSB_WRITE_INDEX;
 }
 
+static inline u32 i915_scratch_offset(const struct drm_i915_private *i915)
+{
+	return i915_ggtt_offset(i915->gt.scratch);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 35ecfea4e903..d36a9755ad91 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5498,6 +5498,44 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	goto out_ctx;
 }
 
+static int
+i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int ret;
+
+	obj = i915_gem_object_create_stolen(i915, size);
+	if (!obj)
+		obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj)) {
+		DRM_ERROR("Failed to allocate scratch page\n");
+		return PTR_ERR(obj);
+	}
+
+	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unref;
+	}
+
+	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (ret)
+		goto err_unref;
+
+	i915->gt.scratch = vma;
+	return 0;
+
+err_unref:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+static void i915_gem_fini_scratch(struct drm_i915_private *i915)
+{
+	i915_vma_unpin_and_release(&i915->gt.scratch, 0);
+}
+
 int i915_gem_init(struct drm_i915_private *dev_priv)
 {
 	int ret;
@@ -5544,12 +5582,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_unlock;
 	}
 
-	ret = i915_gem_contexts_init(dev_priv);
+	ret = i915_gem_init_scratch(dev_priv,
+				    IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
 		goto err_ggtt;
 	}
 
+	ret = i915_gem_contexts_init(dev_priv);
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
+		goto err_scratch;
+	}
+
 	ret = intel_engines_init(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
@@ -5622,6 +5667,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_context:
 	if (ret != -EIO)
 		i915_gem_contexts_fini(dev_priv);
+err_scratch:
+	i915_gem_fini_scratch(dev_priv);
 err_ggtt:
 err_unlock:
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
@@ -5673,6 +5720,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 	intel_uc_fini(dev_priv);
 	i915_gem_cleanup_engines(dev_priv);
 	i915_gem_contexts_fini(dev_priv);
+	i915_gem_fini_scratch(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	intel_wa_list_free(&dev_priv->gt_wa_list);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a6885a59568b..07465123c166 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1571,7 +1571,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
 			if (HAS_BROKEN_CS_TLB(i915))
 				ee->wa_batchbuffer =
 					i915_error_object_create(i915,
-								 engine->scratch);
+								 i915->gt.scratch);
 			request_record_user_bo(request, ee);
 
 			ee->ctx =
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 6b427bc52f78..af2873403009 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -493,46 +493,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_cmd_parser(engine);
 }
 
-int intel_engine_create_scratch(struct intel_engine_cs *engine,
-				unsigned int size)
-{
-	struct drm_i915_gem_object *obj;
-	struct i915_vma *vma;
-	int ret;
-
-	WARN_ON(engine->scratch);
-
-	obj = i915_gem_object_create_stolen(engine->i915, size);
-	if (!obj)
-		obj = i915_gem_object_create_internal(engine->i915, size);
-	if (IS_ERR(obj)) {
-		DRM_ERROR("Failed to allocate scratch page\n");
-		return PTR_ERR(obj);
-	}
-
-	vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto err_unref;
-	}
-
-	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-	if (ret)
-		goto err_unref;
-
-	engine->scratch = vma;
-	return 0;
-
-err_unref:
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
-{
-	i915_vma_unpin_and_release(&engine->scratch, 0);
-}
-
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
 	if (HWS_NEEDS_PHYSICAL(engine->i915)) {
@@ -707,8 +667,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
 
-	intel_engine_cleanup_scratch(engine);
-
 	cleanup_status_page(engine);
 
 	intel_engine_fini_breadcrumbs(engine);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 87227fd9ae5f..d7fa301b5ec7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1288,9 +1288,10 @@ static int execlists_request_alloc(struct i915_request *request)
 static u32 *
 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 {
+	/* NB no one else is allowed to scribble over scratch + 256! */
 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
+	*batch++ = i915_scratch_offset(engine->i915) + 256;
 	*batch++ = 0;
 
 	*batch++ = MI_LOAD_REGISTER_IMM(1);
@@ -1304,7 +1305,7 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 
 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
+	*batch++ = i915_scratch_offset(engine->i915) + 256;
 	*batch++ = 0;
 
 	return batch;
@@ -1341,7 +1342,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 				       PIPE_CONTROL_GLOBAL_GTT_IVB |
 				       PIPE_CONTROL_CS_STALL |
 				       PIPE_CONTROL_QW_WRITE,
-				       i915_ggtt_offset(engine->scratch) +
+				       i915_scratch_offset(engine->i915) +
 				       2 * CACHELINE_BYTES);
 
 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -1973,7 +1974,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
 {
 	struct intel_engine_cs *engine = request->engine;
 	u32 scratch_addr =
-		i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
+		i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
 	bool vf_flush_wa = false, dc_flush_wa = false;
 	u32 *cs, flags = 0;
 	int len;
@@ -2292,10 +2293,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	if (ret)
 		return ret;
 
-	ret = intel_engine_create_scratch(engine, PAGE_SIZE);
-	if (ret)
-		goto err_cleanup_common;
-
 	ret = intel_init_workaround_bb(engine);
 	if (ret) {
 		/*
@@ -2311,10 +2308,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	intel_engine_init_workarounds(engine);
 
 	return 0;
-
-err_cleanup_common:
-	intel_engine_cleanup_common(engine);
-	return ret;
 }
 
 int logical_xcs_ring_init(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7f88df5bff09..c5eb26a7ee79 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -150,8 +150,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 	 */
 	if (mode & EMIT_INVALIDATE) {
 		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
-		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
-			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
 		*cs++ = 0;
 		*cs++ = 0;
 
@@ -159,8 +158,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 			*cs++ = MI_FLUSH;
 
 		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
-		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
-			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
 		*cs++ = 0;
 		*cs++ = 0;
 	}
@@ -212,8 +210,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 static int
 intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
 {
-	u32 scratch_addr =
-		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
 	u32 *cs;
 
 	cs = intel_ring_begin(rq, 6);
@@ -246,8 +243,7 @@ intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
 static int
 gen6_render_ring_flush(struct i915_request *rq, u32 mode)
 {
-	u32 scratch_addr =
-		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
 	u32 *cs, flags = 0;
 	int ret;
 
@@ -316,8 +312,7 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq)
 static int
 gen7_render_ring_flush(struct i915_request *rq, u32 mode)
 {
-	u32 scratch_addr =
-		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
 	u32 *cs, flags = 0;
 
 	/*
@@ -994,7 +989,7 @@ i965_emit_bb_start(struct i915_request *rq,
 }
 
 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
-#define I830_BATCH_LIMIT (256*1024)
+#define I830_BATCH_LIMIT SZ_256K
 #define I830_TLB_ENTRIES (2)
 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
 static int
@@ -1002,7 +997,9 @@ i830_emit_bb_start(struct i915_request *rq,
 		   u64 offset, u32 len,
 		   unsigned int dispatch_flags)
 {
-	u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch);
+	u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
+
+	GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
 
 	cs = intel_ring_begin(rq, 6);
 	if (IS_ERR(cs))
@@ -1459,7 +1456,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 {
 	struct i915_timeline *timeline;
 	struct intel_ring *ring;
-	unsigned int size;
 	int err;
 
 	intel_engine_setup_common(engine);
@@ -1484,21 +1480,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	GEM_BUG_ON(engine->buffer);
 	engine->buffer = ring;
 
-	size = PAGE_SIZE;
-	if (HAS_BROKEN_CS_TLB(engine->i915))
-		size = I830_WA_SIZE;
-	err = intel_engine_create_scratch(engine, size);
-	if (err)
-		goto err_unpin;
-
 	err = intel_engine_init_common(engine);
 	if (err)
-		goto err_scratch;
+		goto err_unpin;
 
 	return 0;
 
-err_scratch:
-	intel_engine_cleanup_scratch(engine);
 err_unpin:
 	intel_ring_unpin(ring);
 err_ring:
@@ -1572,7 +1559,7 @@ static int flush_pd_dir(struct i915_request *rq)
 	/* Stall until the page table load is complete */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
-	*cs++ = i915_ggtt_offset(engine->scratch);
+	*cs++ = i915_scratch_offset(rq->i915);
 	*cs++ = MI_NOOP;
 
 	intel_ring_advance(rq, cs);
@@ -1681,7 +1668,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 			/* Insert a delay before the next switch! */
 			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 			*cs++ = i915_mmio_reg_offset(last_reg);
-			*cs++ = i915_ggtt_offset(engine->scratch);
+			*cs++ = i915_scratch_offset(rq->i915);
 			*cs++ = MI_NOOP;
 		}
 		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 927bb21a2b0b..72edaa7ff411 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -439,7 +439,6 @@ struct intel_engine_cs {
 	struct i915_wa_list ctx_wa_list;
 	struct i915_wa_list wa_list;
 	struct i915_wa_list whitelist;
-	struct i915_vma *scratch;
 
 	u32             irq_keep_mask; /* always keep these interrupts */
 	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
@@ -896,10 +895,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
-int intel_engine_create_scratch(struct intel_engine_cs *engine,
-				unsigned int size);
-void intel_engine_cleanup_scratch(struct intel_engine_cs *engine);
-
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
-- 
2.20.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/7] drm/i915: Flush GPU relocs harder for gen3
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
@ 2018-12-04 14:15 ` Chris Wilson
  2018-12-04 14:15 ` [PATCH 3/7] drm/i915/selftests: Reorder request allocation vs vma pinning Chris Wilson
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-12-04 14:15 UTC (permalink / raw)
  To: intel-gfx

Adding an extra MI_STORE_DWORD_IMM to the gpu relocation path for gen3
was good, but still not good enough. To survive 24+ hours under test we
needed to perform not one, not two but three extra store-dw. Doing so
for each GPU relocation was a little unsightly and since we need to
worry about userspace hitting the same issues, we should apply the dummy
store-dw into the EMIT_FLUSH.

Fixes: 7dd4f6729f92 ("drm/i915: Async GPU relocation processing")
References: 7fa28e146994 ("drm/i915: Write GPU relocs harder with gen3")
Testcase: igt/gem_tiled_fence_blits # blb/pnv
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  7 +------
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d4fac09095f8..1aaccbe7e1de 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1268,7 +1268,7 @@ relocate_entry(struct i915_vma *vma,
 		else if (gen >= 4)
 			len = 4;
 		else
-			len = 6;
+			len = 3;
 
 		batch = reloc_gpu(eb, vma, len);
 		if (IS_ERR(batch))
@@ -1309,11 +1309,6 @@ relocate_entry(struct i915_vma *vma,
 			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 			*batch++ = addr;
 			*batch++ = target_offset;
-
-			/* And again for good measure (blb/pnv) */
-			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-			*batch++ = addr;
-			*batch++ = target_offset;
 		}
 
 		goto out;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c5eb26a7ee79..fbeaec3994e7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -69,19 +69,28 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
 static int
 gen2_render_ring_flush(struct i915_request *rq, u32 mode)
 {
+	unsigned int num_store_dw;
 	u32 cmd, *cs;
 
 	cmd = MI_FLUSH;
-
+	num_store_dw = 0;
 	if (mode & EMIT_INVALIDATE)
 		cmd |= MI_READ_FLUSH;
+	if (mode & EMIT_FLUSH)
+		num_store_dw = 4;
 
-	cs = intel_ring_begin(rq, 2);
+	cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
 	*cs++ = cmd;
-	*cs++ = MI_NOOP;
+	while (num_store_dw--) {
+		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*cs++ = i915_scratch_offset(rq->i915);
+		*cs++ = 0;
+	}
+	*cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
+
 	intel_ring_advance(rq, cs);
 
 	return 0;
-- 
2.20.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/7] drm/i915/selftests: Reorder request allocation vs vma pinning
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
  2018-12-04 14:15 ` [PATCH 2/7] drm/i915: Flush GPU relocs harder for gen3 Chris Wilson
@ 2018-12-04 14:15 ` Chris Wilson
  2018-12-04 14:15 ` [PATCH 4/7] drm/i915: Pipeline PDP updates for Braswell Chris Wilson
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-12-04 14:15 UTC (permalink / raw)
  To: intel-gfx

Impose a restraint that we have all vma pinned for a request prior to
its allocation. This is to simplify request construction, and should
facilitate unravelling the lock interdependencies later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/selftests/huge_pages.c   |  31 +++--
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |  86 ++++++------
 .../gpu/drm/i915/selftests/intel_hangcheck.c  | 123 +++++++++---------
 3 files changed, 119 insertions(+), 121 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 26c065c8d2c0..a0c7cbc212ba 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -972,7 +972,6 @@ static int gpu_write(struct i915_vma *vma,
 {
 	struct i915_request *rq;
 	struct i915_vma *batch;
-	int flags = 0;
 	int err;
 
 	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
@@ -981,14 +980,14 @@ static int gpu_write(struct i915_vma *vma,
 	if (err)
 		return err;
 
-	rq = i915_request_alloc(engine, ctx);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
-
 	batch = gpu_write_dw(vma, dword * sizeof(u32), value);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto err_request;
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
+
+	rq = i915_request_alloc(engine, ctx);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_batch;
 	}
 
 	err = i915_vma_move_to_active(batch, rq, 0);
@@ -996,21 +995,21 @@ static int gpu_write(struct i915_vma *vma,
 		goto err_request;
 
 	i915_gem_object_set_active_reference(batch->obj);
-	i915_vma_unpin(batch);
-	i915_vma_close(batch);
 
-	err = engine->emit_bb_start(rq,
-				    batch->node.start, batch->node.size,
-				    flags);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	if (err)
 		goto err_request;
 
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = engine->emit_bb_start(rq,
+				    batch->node.start, batch->node.size,
+				    0);
+err_request:
 	if (err)
 		i915_request_skip(rq, err);
-
-err_request:
 	i915_request_add(rq);
+err_batch:
+	i915_vma_unpin(batch);
+	i915_vma_close(batch);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 8cd34f6e6859..0e70df0230b8 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -68,48 +68,65 @@ static u64 hws_address(const struct i915_vma *hws,
 	return hws->node.start + seqno_offset(rq->fence.context);
 }
 
-static int emit_recurse_batch(struct igt_spinner *spin,
-			      struct i915_request *rq,
-			      u32 arbitration_command)
+static int move_to_active(struct i915_vma *vma,
+			  struct i915_request *rq,
+			  unsigned int flags)
 {
-	struct i915_address_space *vm = &rq->gem_context->ppgtt->vm;
+	int err;
+
+	err = i915_vma_move_to_active(vma, rq, flags);
+	if (err)
+		return err;
+
+	if (!i915_gem_object_has_active_reference(vma->obj)) {
+		i915_gem_object_get(vma->obj);
+		i915_gem_object_set_active_reference(vma->obj);
+	}
+
+	return 0;
+}
+
+struct i915_request *
+igt_spinner_create_request(struct igt_spinner *spin,
+			   struct i915_gem_context *ctx,
+			   struct intel_engine_cs *engine,
+			   u32 arbitration_command)
+{
+	struct i915_address_space *vm = &ctx->ppgtt->vm;
+	struct i915_request *rq = NULL;
 	struct i915_vma *hws, *vma;
 	u32 *batch;
 	int err;
 
 	vma = i915_vma_instance(spin->obj, vm, NULL);
 	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+		return ERR_CAST(vma);
 
 	hws = i915_vma_instance(spin->hws, vm, NULL);
 	if (IS_ERR(hws))
-		return PTR_ERR(hws);
+		return ERR_CAST(hws);
 
 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
 	if (err)
-		return err;
+		return ERR_PTR(err);
 
 	err = i915_vma_pin(hws, 0, 0, PIN_USER);
 	if (err)
 		goto unpin_vma;
 
-	err = i915_vma_move_to_active(vma, rq, 0);
-	if (err)
+	rq = i915_request_alloc(engine, ctx);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
 		goto unpin_hws;
-
-	if (!i915_gem_object_has_active_reference(vma->obj)) {
-		i915_gem_object_get(vma->obj);
-		i915_gem_object_set_active_reference(vma->obj);
 	}
 
-	err = i915_vma_move_to_active(hws, rq, 0);
+	err = move_to_active(vma, rq, 0);
 	if (err)
-		goto unpin_hws;
+		goto cancel_rq;
 
-	if (!i915_gem_object_has_active_reference(hws->obj)) {
-		i915_gem_object_get(hws->obj);
-		i915_gem_object_set_active_reference(hws->obj);
-	}
+	err = move_to_active(hws, rq, 0);
+	if (err)
+		goto cancel_rq;
 
 	batch = spin->batch;
 
@@ -127,35 +144,18 @@ static int emit_recurse_batch(struct igt_spinner *spin,
 
 	i915_gem_chipset_flush(spin->i915);
 
-	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
+	err = engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
 
+cancel_rq:
+	if (err) {
+		i915_request_skip(rq, err);
+		i915_request_add(rq);
+	}
 unpin_hws:
 	i915_vma_unpin(hws);
 unpin_vma:
 	i915_vma_unpin(vma);
-	return err;
-}
-
-struct i915_request *
-igt_spinner_create_request(struct igt_spinner *spin,
-			   struct i915_gem_context *ctx,
-			   struct intel_engine_cs *engine,
-			   u32 arbitration_command)
-{
-	struct i915_request *rq;
-	int err;
-
-	rq = i915_request_alloc(engine, ctx);
-	if (IS_ERR(rq))
-		return rq;
-
-	err = emit_recurse_batch(spin, rq, arbitration_command);
-	if (err) {
-		i915_request_add(rq);
-		return ERR_PTR(err);
-	}
-
-	return rq;
+	return err ? ERR_PTR(err) : rq;
 }
 
 static u32
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 40efbed611de..60a4bd9405be 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -103,52 +103,87 @@ static u64 hws_address(const struct i915_vma *hws,
 	return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
 }
 
-static int emit_recurse_batch(struct hang *h,
-			      struct i915_request *rq)
+static int move_to_active(struct i915_vma *vma,
+			  struct i915_request *rq,
+			  unsigned int flags)
+{
+	int err;
+
+	err = i915_vma_move_to_active(vma, rq, flags);
+	if (err)
+		return err;
+
+	if (!i915_gem_object_has_active_reference(vma->obj)) {
+		i915_gem_object_get(vma->obj);
+		i915_gem_object_set_active_reference(vma->obj);
+	}
+
+	return 0;
+}
+
+static struct i915_request *
+hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = h->i915;
 	struct i915_address_space *vm =
-		rq->gem_context->ppgtt ?
-		&rq->gem_context->ppgtt->vm :
-		&i915->ggtt.vm;
+		h->ctx->ppgtt ? &h->ctx->ppgtt->vm : &i915->ggtt.vm;
+	struct i915_request *rq = NULL;
 	struct i915_vma *hws, *vma;
 	unsigned int flags;
 	u32 *batch;
 	int err;
 
+	if (i915_gem_object_is_active(h->obj)) {
+		struct drm_i915_gem_object *obj;
+		void *vaddr;
+
+		obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
+		if (IS_ERR(obj))
+			return ERR_CAST(obj);
+
+		vaddr = i915_gem_object_pin_map(obj,
+						i915_coherent_map_type(h->i915));
+		if (IS_ERR(vaddr)) {
+			i915_gem_object_put(obj);
+			return ERR_CAST(vaddr);
+		}
+
+		i915_gem_object_unpin_map(h->obj);
+		i915_gem_object_put(h->obj);
+
+		h->obj = obj;
+		h->batch = vaddr;
+	}
+
 	vma = i915_vma_instance(h->obj, vm, NULL);
 	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+		return ERR_CAST(vma);
 
 	hws = i915_vma_instance(h->hws, vm, NULL);
 	if (IS_ERR(hws))
-		return PTR_ERR(hws);
+		return ERR_CAST(hws);
 
 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
 	if (err)
-		return err;
+		return ERR_PTR(err);
 
 	err = i915_vma_pin(hws, 0, 0, PIN_USER);
 	if (err)
 		goto unpin_vma;
 
-	err = i915_vma_move_to_active(vma, rq, 0);
-	if (err)
+	rq = i915_request_alloc(engine, h->ctx);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
 		goto unpin_hws;
-
-	if (!i915_gem_object_has_active_reference(vma->obj)) {
-		i915_gem_object_get(vma->obj);
-		i915_gem_object_set_active_reference(vma->obj);
 	}
 
-	err = i915_vma_move_to_active(hws, rq, 0);
+	err = move_to_active(vma, rq, 0);
 	if (err)
-		goto unpin_hws;
+		goto cancel_rq;
 
-	if (!i915_gem_object_has_active_reference(hws->obj)) {
-		i915_gem_object_get(hws->obj);
-		i915_gem_object_set_active_reference(hws->obj);
-	}
+	err = move_to_active(hws, rq, 0);
+	if (err)
+		goto cancel_rq;
 
 	batch = h->batch;
 	if (INTEL_GEN(i915) >= 8) {
@@ -213,52 +248,16 @@ static int emit_recurse_batch(struct hang *h,
 
 	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
 
+cancel_rq:
+	if (err) {
+		i915_request_skip(rq, err);
+		i915_request_add(rq);
+	}
 unpin_hws:
 	i915_vma_unpin(hws);
 unpin_vma:
 	i915_vma_unpin(vma);
-	return err;
-}
-
-static struct i915_request *
-hang_create_request(struct hang *h, struct intel_engine_cs *engine)
-{
-	struct i915_request *rq;
-	int err;
-
-	if (i915_gem_object_is_active(h->obj)) {
-		struct drm_i915_gem_object *obj;
-		void *vaddr;
-
-		obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
-		if (IS_ERR(obj))
-			return ERR_CAST(obj);
-
-		vaddr = i915_gem_object_pin_map(obj,
-						i915_coherent_map_type(h->i915));
-		if (IS_ERR(vaddr)) {
-			i915_gem_object_put(obj);
-			return ERR_CAST(vaddr);
-		}
-
-		i915_gem_object_unpin_map(h->obj);
-		i915_gem_object_put(h->obj);
-
-		h->obj = obj;
-		h->batch = vaddr;
-	}
-
-	rq = i915_request_alloc(engine, h->ctx);
-	if (IS_ERR(rq))
-		return rq;
-
-	err = emit_recurse_batch(h, rq);
-	if (err) {
-		i915_request_add(rq);
-		return ERR_PTR(err);
-	}
-
-	return rq;
+	return err ? ERR_PTR(err) : rq;
 }
 
 static u32 hws_seqno(const struct hang *h, const struct i915_request *rq)
-- 
2.20.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/7] drm/i915: Pipeline PDP updates for Braswell
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
  2018-12-04 14:15 ` [PATCH 2/7] drm/i915: Flush GPU relocs harder for gen3 Chris Wilson
  2018-12-04 14:15 ` [PATCH 3/7] drm/i915/selftests: Reorder request allocation vs vma pinning Chris Wilson
@ 2018-12-04 14:15 ` Chris Wilson
  2018-12-04 14:15 ` [PATCH 5/7] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-12-04 14:15 UTC (permalink / raw)
  To: intel-gfx

Currently we face a severe problem on Braswell that manifests as invalid
ppGTT accesses. The code tries to maintain the PDP (page directory
pointers) inside the context in two ways, direct write into the context
and a pipelined LRI update. The direct write into the context is
fundamentally racy as it is unserialised with any access (read or write)
the GPU is doing. By asserting that Braswell is not used with vGPU
(currently an unsupported platform) we can eliminate the dangerous
direct write into the context image and solely use the pipelined update.

However, the LRI of the PDP fouls up the GPU, causing it to freeze and
take out the machine with "forcewake ack timeouts". This seems possible
to workaround by preventing the GPU from sleeping (via means of
disabling the power-state management interface, i.e. forcing each ring
to remain awake) around the update.

v2: Include a bunch of EMIT_FLUSH after the LRI to prevent the magic
smoke from escaping.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108656
References: https://bugs.freedesktop.org/show_bug.cgi?id=108714
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c     |   2 -
 drivers/gpu/drm/i915/i915_request.c     |   5 -
 drivers/gpu/drm/i915/intel_lrc.c        | 156 +++++++++++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.c |   5 +-
 4 files changed, 85 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index add1fe7aeb93..62bde517d383 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1423,8 +1423,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 			gen8_initialize_pd(vm, pd);
 			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
 			GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
-
-			mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
 		}
 
 		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ca95ab2f4cfa..8ab8e8e6a086 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -719,11 +719,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
-	/* Unconditionally invalidate GPU caches and TLBs. */
-	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
-	if (ret)
-		goto err_unwind;
-
 	ret = engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d7fa301b5ec7..49505dc9320f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -363,31 +363,12 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 	trace_i915_request_out(rq);
 }
 
-static void
-execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
-{
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
-}
-
 static u64 execlists_update_context(struct i915_request *rq)
 {
-	struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
 	struct intel_context *ce = rq->hw_context;
-	u32 *reg_state = ce->lrc_reg_state;
 
-	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
-
-	/*
-	 * True 32b PPGTT with dynamic page allocation: update PDP
-	 * registers and point the unallocated PDPs to scratch page.
-	 * PML4 is allocated during ppgtt init, so this is not needed
-	 * in 48-bit mode.
-	 */
-	if (!i915_vm_is_48bit(&ppgtt->vm))
-		execlists_update_context_pdps(ppgtt, reg_state);
+	ce->lrc_reg_state[CTX_RING_TAIL + 1] =
+		intel_ring_set_tail(rq->ring, rq->tail);
 
 	/*
 	 * Make sure the context image is complete before we submit it to HW.
@@ -1242,29 +1223,95 @@ execlists_context_pin(struct intel_engine_cs *engine,
 	return __execlists_context_pin(engine, ctx, ce);
 }
 
+static int emit_pdps(struct i915_request *rq)
+{
+	const struct intel_engine_cs * const engine = rq->engine;
+	struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt;
+	int err, i;
+	u32 *cs;
+
+	/*
+	 * Beware ye of the dragons, this sequence is magic!
+	 *
+	 * Small changes to this sequence can cause anything from
+	 * GPU hangs to forcewake errors and machine lockups!
+	 */
+
+	err = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (err)
+		return err;
+
+	cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+	for (i = GEN8_3LVL_PDPES; i--; ) {
+		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+		*cs++ = upper_32_bits(pd_daddr);
+		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+		*cs++ = lower_32_bits(pd_daddr);
+	}
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
+	/*
+	 * This is largely (entirely!) derived from empirical testing
+	 * with gem_concurrent_blit on Braswell. Under the stress of
+	 * updating the ppGTT PTEs we observe invalid TLB lookups and
+	 * execution of stale data (leading to GPU hangs with garbage
+	 * IPEHR).
+	 */
+	for (i = 0; i < 8; i++) {
+		err = engine->emit_flush(rq, EMIT_FLUSH);
+		if (err)
+			return err;
+	}
+
+	/* And yet another INVALIDATE to avoid meda forcewake errors! */
+	err = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static int execlists_request_alloc(struct i915_request *request)
 {
 	int ret;
 
 	GEM_BUG_ON(!request->hw_context->pin_count);
 
-	/* Flush enough space to reduce the likelihood of waiting after
+	/*
+	 * Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
 	 * have to repeat work.
 	 */
 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
 
-	ret = intel_ring_wait_for_space(request->ring, request->reserved_space);
-	if (ret)
-		return ret;
-
-	/* Note that after this point, we have committed to using
+	/*
+	 * Note that after this point, we have committed to using
 	 * this request as it is being used to both track the
 	 * state of engine initialisation and liveness of the
 	 * golden renderstate above. Think twice before you try
 	 * to cancel/unwind this request now.
 	 */
 
+	/* Unconditionally invalidate GPU caches and TLBs. */
+	if (i915_vm_is_48bit(&request->gem_context->ppgtt->vm)) {
+		ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+		if (ret)
+			return ret;
+	} else {
+		GEM_BUG_ON(intel_vgpu_active(request->i915));
+		ret = emit_pdps(request);
+		if (ret)
+			return ret;
+	}
+
 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
 	return 0;
 }
@@ -1836,56 +1883,11 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
 		  atomic_read(&execlists->tasklet.count));
 }
 
-static int intel_logical_ring_emit_pdps(struct i915_request *rq)
-{
-	struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
-	struct intel_engine_cs *engine = rq->engine;
-	const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
-	u32 *cs;
-	int i;
-
-	cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds);
-	for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
-		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
-
-		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
-		*cs++ = upper_32_bits(pd_daddr);
-		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
-		*cs++ = lower_32_bits(pd_daddr);
-	}
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
 static int gen8_emit_bb_start(struct i915_request *rq,
 			      u64 offset, u32 len,
 			      const unsigned int flags)
 {
 	u32 *cs;
-	int ret;
-
-	/* Don't rely in hw updating PDPs, specially in lite-restore.
-	 * Ideally, we should set Force PD Restore in ctx descriptor,
-	 * but we can't. Force Restore would be a second option, but
-	 * it is unsafe in case of lite-restore (because the ctx is
-	 * not idle). PML4 is allocated during ppgtt init so this is
-	 * not needed in 48-bit.*/
-	if ((intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) &&
-	    !i915_vm_is_48bit(&rq->gem_context->ppgtt->vm) &&
-	    !intel_vgpu_active(rq->i915)) {
-		ret = intel_logical_ring_emit_pdps(rq);
-		if (ret)
-			return ret;
-
-		rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
-	}
 
 	cs = intel_ring_begin(rq, 6);
 	if (IS_ERR(cs))
@@ -1918,6 +1920,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
 
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 	*cs++ = MI_NOOP;
+
 	intel_ring_advance(rq, cs);
 
 	return 0;
@@ -1980,6 +1983,8 @@ static int gen8_emit_flush_render(struct i915_request *request,
 	int len;
 
 	flags |= PIPE_CONTROL_CS_STALL;
+	flags |= PIPE_CONTROL_QW_WRITE;
+	flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 
 	if (mode & EMIT_FLUSH) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
@@ -1995,8 +2000,6 @@ static int gen8_emit_flush_render(struct i915_request *request,
 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 
 		/*
 		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
@@ -2533,6 +2536,11 @@ static void execlists_init_reg_state(u32 *regs,
 		 * other PDP Descriptors are ignored.
 		 */
 		ASSIGN_CTX_PML4(ctx->ppgtt, regs);
+	} else {
+		ASSIGN_CTX_PDP(ctx->ppgtt, regs, 3);
+		ASSIGN_CTX_PDP(ctx->ppgtt, regs, 2);
+		ASSIGN_CTX_PDP(ctx->ppgtt, regs, 1);
+		ASSIGN_CTX_PDP(ctx->ppgtt, regs, 0);
 	}
 
 	if (rcs) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index fbeaec3994e7..c730c4e938c5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1835,11 +1835,12 @@ static int ring_request_alloc(struct i915_request *request)
 	 */
 	request->reserved_space += LEGACY_REQUEST_SIZE;
 
-	ret = intel_ring_wait_for_space(request->ring, request->reserved_space);
+	ret = switch_context(request);
 	if (ret)
 		return ret;
 
-	ret = switch_context(request);
+	/* Unconditionally invalidate GPU caches and TLBs. */
+	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
 	if (ret)
 		return ret;
 
-- 
2.20.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 5/7] drm/i915: Return immediately if trylock fails for direct-reclaim
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
                   ` (2 preceding siblings ...)
  2018-12-04 14:15 ` [PATCH 4/7] drm/i915: Pipeline PDP updates for Braswell Chris Wilson
@ 2018-12-04 14:15 ` Chris Wilson
  2018-12-06 15:18   ` Tvrtko Ursulin
  2018-12-10 11:29   ` Tvrtko Ursulin
  2018-12-04 14:15 ` [PATCH 6/7] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start Chris Wilson
                   ` (6 subsequent siblings)
  10 siblings, 2 replies; 19+ messages in thread
From: Chris Wilson @ 2018-12-04 14:15 UTC (permalink / raw)
  To: intel-gfx

Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
in the shrinker while performing direct-reclaim. The trade-off being
(much) lower latency for non-i915 clients at an increased risk of being
unable to obtain a page from direct-reclaim without hitting the
oom-notifier. The proviso being that we still keep trying to hard
obtain the lock for oom so that we can reap under heavy memory pressure.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  4 ++--
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 +++++++++++-------------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c5f01964f0fb..1cad218b71d3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2916,9 +2916,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 	__i915_gem_object_unpin_pages(obj);
 }
 
-enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
+enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
 	I915_MM_NORMAL = 0,
-	I915_MM_SHRINKER
+	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
 };
 
 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index ea90d3a0d511..d461f458f4af 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -36,7 +36,9 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 
-static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
+static bool shrinker_lock(struct drm_i915_private *i915,
+			  unsigned int flags,
+			  bool *unlock)
 {
 	switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
 	case MUTEX_TRYLOCK_RECURSIVE:
@@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
 
 	case MUTEX_TRYLOCK_FAILED:
 		*unlock = false;
-		preempt_disable();
-		do {
-			cpu_relax();
-			if (mutex_trylock(&i915->drm.struct_mutex)) {
-				*unlock = true;
-				break;
-			}
-		} while (!need_resched());
-		preempt_enable();
+		if (flags & I915_SHRINK_ACTIVE) {
+			mutex_lock_nested(&i915->drm.struct_mutex,
+					  I915_MM_SHRINKER);
+			*unlock = true;
+		}
 		return *unlock;
 
 	case MUTEX_TRYLOCK_SUCCESS:
@@ -160,7 +158,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	unsigned long scanned = 0;
 	bool unlock;
 
-	if (!shrinker_lock(i915, &unlock))
+	if (!shrinker_lock(i915, flags, &unlock))
 		return 0;
 
 	/*
@@ -357,7 +355,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 
 	sc->nr_scanned = 0;
 
-	if (!shrinker_lock(i915, &unlock))
+	if (!shrinker_lock(i915, 0, &unlock))
 		return SHRINK_STOP;
 
 	freed = i915_gem_shrink(i915,
@@ -397,7 +395,7 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock,
 	do {
 		if (i915_gem_wait_for_idle(i915,
 					   0, MAX_SCHEDULE_TIMEOUT) == 0 &&
-		    shrinker_lock(i915, unlock))
+		    shrinker_lock(i915, 0, unlock))
 			break;
 
 		schedule_timeout_killable(1);
-- 
2.20.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 6/7] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
                   ` (3 preceding siblings ...)
  2018-12-04 14:15 ` [PATCH 5/7] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
@ 2018-12-04 14:15 ` Chris Wilson
  2018-12-10 12:00   ` Tvrtko Ursulin
  2018-12-04 14:15 ` [PATCH 7/7] drm/i915/userptr: Probe vma range before gup Chris Wilson
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-12-04 14:15 UTC (permalink / raw)
  To: intel-gfx

Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
notifiers") we have been able to report failure from
mmu_invalidate_range_start which allows us to use a trylock on the
struct_mutex to avoid potential recursion and report -EBUSY instead.
Furthermore, this allows us to pull the work into the main callback and
avoid the sleight-of-hand in using a workqueue to avoid lockdep.

However, not all paths to mmu_invalidate_range_start are prepared to
handle failure, so instead of reporting the recursion, deal with it by
propagating the failure upwards, who can decide themselves to handle it
or report it.

v2: Mark up the recursive lock behaviour and comment on the various weak
points.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |   4 +-
 drivers/gpu/drm/i915/i915_gem.c         |  30 +++-
 drivers/gpu/drm/i915/i915_gem_object.h  |   7 +
 drivers/gpu/drm/i915/i915_gem_userptr.c | 221 +++++++++++-------------
 4 files changed, 136 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1cad218b71d3..144b7737c0a2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2921,8 +2921,8 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
 	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
 };
 
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				 enum i915_mm_subclass subclass);
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass);
 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d36a9755ad91..2ad8f94f5056 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2447,8 +2447,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 	struct sg_table *pages;
 
 	pages = fetch_and_zero(&obj->mm.pages);
-	if (!pages)
-		return NULL;
+	if (IS_ERR_OR_NULL(pages))
+		return pages;
 
 	spin_lock(&i915->mm.obj_lock);
 	list_del(&obj->mm.link);
@@ -2472,22 +2472,23 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 	return pages;
 }
 
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				 enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass)
 {
 	struct sg_table *pages;
+	int ret;
 
 	if (i915_gem_object_has_pinned_pages(obj))
-		return;
+		return -EBUSY;
 
 	GEM_BUG_ON(obj->bind_count);
-	if (!i915_gem_object_has_pages(obj))
-		return;
 
 	/* May be called by shrinker from within get_pages() (on another bo) */
 	mutex_lock_nested(&obj->mm.lock, subclass);
-	if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
+	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
+		ret = -EBUSY;
 		goto unlock;
+	}
 
 	/*
 	 * ->put_pages might need to allocate memory for the bit17 swizzle
@@ -2495,11 +2496,24 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	 * lists early.
 	 */
 	pages = __i915_gem_object_unset_pages(obj);
+
+	/*
+	 * XXX Temporary hijinx to avoid updating all backends to handle
+	 * NULL pages. In the future, when we have more asynchronous
+	 * get_pages backends we should be better able to handle the
+	 * cancellation of the async task in a more uniform manner.
+	 */
+	if (!pages && !i915_gem_object_needs_async_cancel(obj))
+		pages = ERR_PTR(-EINVAL);
+
 	if (!IS_ERR(pages))
 		obj->ops->put_pages(obj, pages);
 
+	ret = 0;
 unlock:
 	mutex_unlock(&obj->mm.lock);
+
+	return ret;
 }
 
 bool i915_sg_trim(struct sg_table *orig_st)
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index a6dd7c46de0d..49ce797173b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -56,6 +56,7 @@ struct drm_i915_gem_object_ops {
 #define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
 #define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
 #define I915_GEM_OBJECT_IS_PROXY	BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
 
 	/* Interface between the GEM object and its backing storage.
 	 * get_pages() is called once prior to the use of the associated set
@@ -386,6 +387,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
 	return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
 }
 
+static inline bool
+i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
+}
+
 static inline bool
 i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 2c9b284036d1..8b07fd44731f 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -50,79 +50,84 @@ struct i915_mmu_notifier {
 	struct hlist_node node;
 	struct mmu_notifier mn;
 	struct rb_root_cached objects;
-	struct workqueue_struct *wq;
+	struct i915_mm_struct *mm;
 };
 
 struct i915_mmu_object {
 	struct i915_mmu_notifier *mn;
 	struct drm_i915_gem_object *obj;
 	struct interval_tree_node it;
-	struct list_head link;
-	struct work_struct work;
-	bool attached;
 };
 
-static void cancel_userptr(struct work_struct *work)
-{
-	struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
-	struct drm_i915_gem_object *obj = mo->obj;
-	struct work_struct *active;
-
-	/* Cancel any active worker and force us to re-evaluate gup */
-	mutex_lock(&obj->mm.lock);
-	active = fetch_and_zero(&obj->userptr.work);
-	mutex_unlock(&obj->mm.lock);
-	if (active)
-		goto out;
-
-	i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);
-
-	mutex_lock(&obj->base.dev->struct_mutex);
-
-	/* We are inside a kthread context and can't be interrupted */
-	if (i915_gem_object_unbind(obj) == 0)
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
-	WARN_ONCE(i915_gem_object_has_pages(obj),
-		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n",
-		  obj->bind_count,
-		  atomic_read(&obj->mm.pages_pin_count),
-		  obj->pin_global);
-
-	mutex_unlock(&obj->base.dev->struct_mutex);
-
-out:
-	i915_gem_object_put(obj);
-}
-
 static void add_object(struct i915_mmu_object *mo)
 {
-	if (mo->attached)
+	if (!RB_EMPTY_NODE(&mo->it.rb))
 		return;
 
 	interval_tree_insert(&mo->it, &mo->mn->objects);
-	mo->attached = true;
 }
 
 static void del_object(struct i915_mmu_object *mo)
 {
-	if (!mo->attached)
+	if (RB_EMPTY_NODE(&mo->it.rb))
 		return;
 
 	interval_tree_remove(&mo->it, &mo->mn->objects);
-	mo->attached = false;
+	RB_CLEAR_NODE(&mo->it.rb);
+}
+
+static void
+__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
+{
+	struct i915_mmu_object *mo = obj->userptr.mmu_object;
+
+	/*
+	 * During mm_invalidate_range we need to cancel any userptr that
+	 * overlaps the range being invalidated. Doing so requires the
+	 * struct_mutex, and that risks recursion. In order to cause
+	 * recursion, the user must alias the userptr address space with
+	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
+	 * to invalidate that mmaping, mm_invalidate_range is called with
+	 * the userptr address *and* the struct_mutex held.  To prevent that
+	 * we set a flag under the i915_mmu_notifier spinlock to indicate
+	 * whether this object is valid.
+	 */
+	if (!mo)
+		return;
+
+	spin_lock(&mo->mn->lock);
+	if (value)
+		add_object(mo);
+	else
+		del_object(mo);
+	spin_unlock(&mo->mn->lock);
+}
+
+static struct mutex *__i915_mutex_lock_recursive(struct mutex *m)
+{
+	switch (mutex_trylock_recursive(m)) {
+	default:
+	case MUTEX_TRYLOCK_FAILED:
+		mutex_lock_nested(m, I915_MM_SHRINKER);
+	case MUTEX_TRYLOCK_SUCCESS:
+		return m;
+
+	case MUTEX_TRYLOCK_RECURSIVE:
+		return ERR_PTR(-EEXIST);
+	}
 }
 
 static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
-						       struct mm_struct *mm,
-						       unsigned long start,
-						       unsigned long end,
-						       bool blockable)
+						      struct mm_struct *mm,
+						      unsigned long start,
+						      unsigned long end,
+						      bool blockable)
 {
 	struct i915_mmu_notifier *mn =
 		container_of(_mn, struct i915_mmu_notifier, mn);
-	struct i915_mmu_object *mo;
 	struct interval_tree_node *it;
-	LIST_HEAD(cancelled);
+	struct mutex *unlock = NULL;
+	int ret = 0;
 
 	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
 		return 0;
@@ -133,11 +138,15 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	spin_lock(&mn->lock);
 	it = interval_tree_iter_first(&mn->objects, start, end);
 	while (it) {
+		struct drm_i915_gem_object *obj;
+
 		if (!blockable) {
-			spin_unlock(&mn->lock);
-			return -EAGAIN;
+			ret = -EAGAIN;
+			break;
 		}
-		/* The mmu_object is released late when destroying the
+
+		/*
+		 * The mmu_object is released late when destroying the
 		 * GEM object so it is entirely possible to gain a
 		 * reference on an object in the process of being freed
 		 * since our serialisation is via the spinlock and not
@@ -146,21 +155,39 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 		 * use-after-free we only acquire a reference on the
 		 * object if it is not in the process of being destroyed.
 		 */
-		mo = container_of(it, struct i915_mmu_object, it);
-		if (kref_get_unless_zero(&mo->obj->base.refcount))
-			queue_work(mn->wq, &mo->work);
+		obj = container_of(it, struct i915_mmu_object, it)->obj;
+		if (!kref_get_unless_zero(&obj->base.refcount)) {
+			it = interval_tree_iter_next(it, start, end);
+			continue;
+		}
+		spin_unlock(&mn->lock);
 
-		list_add(&mo->link, &cancelled);
-		it = interval_tree_iter_next(it, start, end);
+		if (!unlock)
+			unlock = __i915_mutex_lock_recursive(&mn->mm->i915->drm.struct_mutex);
+		ret = i915_gem_object_unbind(obj);
+		if (ret == 0)
+			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+		i915_gem_object_put(obj);
+		if (ret)
+			goto unlock;
+
+		spin_lock(&mn->lock);
+
+		/*
+		 * As we do not (yet) protect the mmu from concurrent insertion
+		 * over this range, there is no guarantee that this search will
+		 * terminate given a pathologic workload.
+		 */
+		it = interval_tree_iter_first(&mn->objects, start, end);
 	}
-	list_for_each_entry(mo, &cancelled, link)
-		del_object(mo);
 	spin_unlock(&mn->lock);
 
-	if (!list_empty(&cancelled))
-		flush_workqueue(mn->wq);
+unlock:
+	if (!IS_ERR_OR_NULL(unlock))
+		mutex_unlock(unlock);
+
+	return ret;
 
-	return 0;
 }
 
 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
@@ -168,7 +195,7 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 };
 
 static struct i915_mmu_notifier *
-i915_mmu_notifier_create(struct mm_struct *mm)
+i915_mmu_notifier_create(struct i915_mm_struct *mm)
 {
 	struct i915_mmu_notifier *mn;
 
@@ -179,13 +206,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)
 	spin_lock_init(&mn->lock);
 	mn->mn.ops = &i915_gem_userptr_notifier;
 	mn->objects = RB_ROOT_CACHED;
-	mn->wq = alloc_workqueue("i915-userptr-release",
-				 WQ_UNBOUND | WQ_MEM_RECLAIM,
-				 0);
-	if (mn->wq == NULL) {
-		kfree(mn);
-		return ERR_PTR(-ENOMEM);
-	}
+	mn->mm = mm;
 
 	return mn;
 }
@@ -195,16 +216,14 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 {
 	struct i915_mmu_object *mo;
 
-	mo = obj->userptr.mmu_object;
-	if (mo == NULL)
+	mo = fetch_and_zero(&obj->userptr.mmu_object);
+	if (!mo)
 		return;
 
 	spin_lock(&mo->mn->lock);
 	del_object(mo);
 	spin_unlock(&mo->mn->lock);
 	kfree(mo);
-
-	obj->userptr.mmu_object = NULL;
 }
 
 static struct i915_mmu_notifier *
@@ -217,7 +236,7 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
 	if (mn)
 		return mn;
 
-	mn = i915_mmu_notifier_create(mm->mm);
+	mn = i915_mmu_notifier_create(mm);
 	if (IS_ERR(mn))
 		err = PTR_ERR(mn);
 
@@ -240,10 +259,8 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
 	mutex_unlock(&mm->i915->mm_lock);
 	up_write(&mm->mm->mmap_sem);
 
-	if (mn && !IS_ERR(mn)) {
-		destroy_workqueue(mn->wq);
+	if (mn && !IS_ERR(mn))
 		kfree(mn);
-	}
 
 	return err ? ERR_PTR(err) : mm->mn;
 }
@@ -266,14 +283,14 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 		return PTR_ERR(mn);
 
 	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
-	if (mo == NULL)
+	if (!mo)
 		return -ENOMEM;
 
 	mo->mn = mn;
 	mo->obj = obj;
 	mo->it.start = obj->userptr.ptr;
 	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
-	INIT_WORK(&mo->work, cancel_userptr);
+	RB_CLEAR_NODE(&mo->it.rb);
 
 	obj->userptr.mmu_object = mo;
 	return 0;
@@ -287,12 +304,16 @@ i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 		return;
 
 	mmu_notifier_unregister(&mn->mn, mm);
-	destroy_workqueue(mn->wq);
 	kfree(mn);
 }
 
 #else
 
+static void
+__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
+{
+}
+
 static void
 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 {
@@ -461,42 +482,6 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 	return st;
 }
 
-static int
-__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
-			      bool value)
-{
-	int ret = 0;
-
-	/* During mm_invalidate_range we need to cancel any userptr that
-	 * overlaps the range being invalidated. Doing so requires the
-	 * struct_mutex, and that risks recursion. In order to cause
-	 * recursion, the user must alias the userptr address space with
-	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
-	 * to invalidate that mmaping, mm_invalidate_range is called with
-	 * the userptr address *and* the struct_mutex held.  To prevent that
-	 * we set a flag under the i915_mmu_notifier spinlock to indicate
-	 * whether this object is valid.
-	 */
-#if defined(CONFIG_MMU_NOTIFIER)
-	if (obj->userptr.mmu_object == NULL)
-		return 0;
-
-	spin_lock(&obj->userptr.mmu_object->mn->lock);
-	/* In order to serialise get_pages with an outstanding
-	 * cancel_userptr, we must drop the struct_mutex and try again.
-	 */
-	if (!value)
-		del_object(obj->userptr.mmu_object);
-	else if (!work_pending(&obj->userptr.mmu_object->work))
-		add_object(obj->userptr.mmu_object);
-	else
-		ret = -EAGAIN;
-	spin_unlock(&obj->userptr.mmu_object->mn->lock);
-#endif
-
-	return ret;
-}
-
 static void
 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 {
@@ -682,8 +667,11 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
 	struct sgt_iter sgt_iter;
 	struct page *page;
 
-	BUG_ON(obj->userptr.work != NULL);
+	/* Cancel any inflight work and force them to restart their gup */
+	obj->userptr.work = NULL;
 	__i915_gem_userptr_set_active(obj, false);
+	if (!pages)
+		return;
 
 	if (obj->mm.madv != I915_MADV_WILLNEED)
 		obj->mm.dirty = false;
@@ -721,7 +709,8 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
 
 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
-		 I915_GEM_OBJECT_IS_SHRINKABLE,
+		 I915_GEM_OBJECT_IS_SHRINKABLE |
+		 I915_GEM_OBJECT_ASYNC_CANCEL,
 	.get_pages = i915_gem_userptr_get_pages,
 	.put_pages = i915_gem_userptr_put_pages,
 	.dmabuf_export = i915_gem_userptr_dmabuf_export,
-- 
2.20.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 7/7] drm/i915/userptr: Probe vma range before gup
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
                   ` (4 preceding siblings ...)
  2018-12-04 14:15 ` [PATCH 6/7] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start Chris Wilson
@ 2018-12-04 14:15 ` Chris Wilson
  2018-12-04 14:28 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915: Allocate a common scratch page Patchwork
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-12-04 14:15 UTC (permalink / raw)
  To: intel-gfx

We want to exclude any GGTT objects from being present on our internal
lists to avoid the deadlock we may run into with our requirement for
struct_mutex during invalidate. However, if the gup_fast fails, we put
the userptr onto the workqueue and mark it as active, so that we
remember to serialise the worker upon mmu_invalidate.

Note that despite the previous fix, it is still better to avoid the
struct_mutex recursion where possible, leaving the recursion only to
handle the shrinker-esque paths.

v2: Hold mmap_sem to prevent modifications to the mm while we probe and
add ourselves to the interval-tree for notificiation.
v3: Rely on mmap_sem for a simpler patch.
v4: Mark up the mmap_sem nesting
v5: Don't deactivate on -EAGAIN as that means the worker is queued
v6: Fight the indentation and chained if-else error handling
v7: Fight again.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104209
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 158 ++++++++++++++++--------
 1 file changed, 106 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 8b07fd44731f..744aa538d5db 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -439,7 +439,7 @@ struct get_pages_work {
 	struct task_struct *task;
 };
 
-static struct sg_table *
+static int
 __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 			       struct page **pvec, int num_pages)
 {
@@ -450,7 +450,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 alloc_table:
 	ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
@@ -459,7 +459,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 					  GFP_KERNEL);
 	if (ret) {
 		kfree(st);
-		return ERR_PTR(ret);
+		return ret;
 	}
 
 	ret = i915_gem_gtt_prepare_pages(obj, st);
@@ -472,14 +472,14 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 		}
 
 		kfree(st);
-		return ERR_PTR(ret);
+		return ret;
 	}
 
 	sg_page_sizes = i915_sg_page_sizes(st->sgl);
 
 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
 
-	return st;
+	return 0;
 }
 
 static void
@@ -524,19 +524,14 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
 	mutex_lock(&obj->mm.lock);
 	if (obj->userptr.work == &work->work) {
-		struct sg_table *pages = ERR_PTR(ret);
-
 		if (pinned == npages) {
-			pages = __i915_gem_userptr_alloc_pages(obj, pvec,
-							       npages);
-			if (!IS_ERR(pages)) {
+			ret = __i915_gem_userptr_alloc_pages(obj, pvec, npages);
+			if (!ret)
 				pinned = 0;
-				pages = NULL;
-			}
 		}
 
-		obj->userptr.work = ERR_CAST(pages);
-		if (IS_ERR(pages))
+		obj->userptr.work = ERR_PTR(ret);
+		if (ret)
 			__i915_gem_userptr_set_active(obj, false);
 	}
 	mutex_unlock(&obj->mm.lock);
@@ -549,7 +544,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 	kfree(work);
 }
 
-static struct sg_table *
+static int
 __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 {
 	struct get_pages_work *work;
@@ -575,7 +570,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 	 */
 	work = kmalloc(sizeof(*work), GFP_KERNEL);
 	if (work == NULL)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	obj->userptr.work = &work->work;
 
@@ -587,19 +582,89 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
 	queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
 
-	return ERR_PTR(-EAGAIN);
+	return -EAGAIN;
 }
 
-static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+static int
+probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len)
+{
+	const unsigned long end = addr + len;
+	struct vm_area_struct *vma;
+	int ret = -EFAULT;
+
+	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+		if (vma->vm_start > addr)
+			break;
+
+		/*
+		 * Exclude any VMA that is not backed only by struct_page, i.e.
+		 * IO regions that include our own GGTT mmaps. We cannot handle
+		 * such ranges, as we may encounter deadlocks around our
+		 * struct_mutex on mmu_invalidate_range.
+		 */
+		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
+			break;
+
+		if (vma->vm_end >= end) {
+			ret = 0;
+			break;
+		}
+
+		addr = vma->vm_end;
+	}
+
+	return ret;
+}
+
+static int try_fast_gup(struct drm_i915_gem_object *obj)
 {
 	const int num_pages = obj->base.size >> PAGE_SHIFT;
-	struct mm_struct *mm = obj->userptr.mm->mm;
 	struct page **pvec;
-	struct sg_table *pages;
-	bool active;
-	int pinned;
+	int pinned, err;
 
-	/* If userspace should engineer that these pages are replaced in
+	pvec = kvmalloc_array(num_pages, sizeof(struct page *),
+			      GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (!pvec) /* defer to worker if malloc fails */
+		return -ENOMEM;
+
+	pinned = __get_user_pages_fast(obj->userptr.ptr,
+				       num_pages,
+				       !i915_gem_object_is_readonly(obj),
+				       pvec);
+	if (pinned < 0) {
+		err = pinned;
+		pinned = 0;
+		goto out_pvec;
+	}
+
+	if (pinned < num_pages) {
+		err = -EFAULT;
+		goto out_pinned;
+	}
+
+	__i915_gem_userptr_set_active(obj, true);
+
+	err = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
+	if (err) {
+		__i915_gem_userptr_set_active(obj, false);
+		goto out_pinned;
+	}
+
+	pinned = 0;
+out_pinned:
+	release_pages(pvec, pinned);
+out_pvec:
+	kvfree(pvec);
+	return err;
+}
+
+static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct mm_struct *mm = obj->userptr.mm->mm;
+	int err;
+
+	/*
+	 * If userspace should engineer that these pages are replaced in
 	 * the vma between us binding this page into the GTT and completion
 	 * of rendering... Their loss. If they change the mapping of their
 	 * pages they need to create a new bo to point to the new vma.
@@ -624,40 +689,29 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 			return -EAGAIN;
 	}
 
-	pvec = NULL;
-	pinned = 0;
-
 	if (mm == current->mm) {
-		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
-				      GFP_KERNEL |
-				      __GFP_NORETRY |
-				      __GFP_NOWARN);
-		if (pvec) /* defer to worker if malloc fails */
-			pinned = __get_user_pages_fast(obj->userptr.ptr,
-						       num_pages,
-						       !i915_gem_object_is_readonly(obj),
-						       pvec);
+		err = try_fast_gup(obj);
+		if (!err)
+			return 0;
 	}
 
-	active = false;
-	if (pinned < 0) {
-		pages = ERR_PTR(pinned);
-		pinned = 0;
-	} else if (pinned < num_pages) {
-		pages = __i915_gem_userptr_get_pages_schedule(obj);
-		active = pages == ERR_PTR(-EAGAIN);
-	} else {
-		pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
-		active = !IS_ERR(pages);
-	}
-	if (active)
-		__i915_gem_userptr_set_active(obj, true);
+	/* lockdep doesn't yet automatically allow nesting of readers */
+	down_read_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 
-	if (IS_ERR(pages))
-		release_pages(pvec, pinned);
-	kvfree(pvec);
+	err = probe_range(mm, obj->userptr.ptr, obj->base.size);
+	if (err)
+		goto err_unlock;
+
+	__i915_gem_userptr_set_active(obj, true);
+
+	err = __i915_gem_userptr_get_pages_schedule(obj);
+	if (err != -EAGAIN)
+		__i915_gem_userptr_set_active(obj, false);
+
+err_unlock:
+	up_read(&mm->mmap_sem);
 
-	return PTR_ERR_OR_ZERO(pages);
+	return err;
 }
 
 static void
-- 
2.20.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915: Allocate a common scratch page
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
                   ` (5 preceding siblings ...)
  2018-12-04 14:15 ` [PATCH 7/7] drm/i915/userptr: Probe vma range before gup Chris Wilson
@ 2018-12-04 14:28 ` Patchwork
  2018-12-04 14:31 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-12-04 14:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915: Allocate a common scratch page
URL   : https://patchwork.freedesktop.org/series/53477/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
2fc4093e46e8 drm/i915: Allocate a common scratch page
90bafab654fd drm/i915: Flush GPU relocs harder for gen3
-:14: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 7fa28e146994 ("drm/i915: Write GPU relocs harder with gen3")'
#14: 
References: 7fa28e146994 ("drm/i915: Write GPU relocs harder with gen3")

total: 1 errors, 0 warnings, 0 checks, 50 lines checked
d0acfc4c3006 drm/i915/selftests: Reorder request allocation vs vma pinning
8d513703db38 drm/i915: Pipeline PDP updates for Braswell
b161e78c0b56 drm/i915: Return immediately if trylock fails for direct-reclaim
c13fea83071c drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start
-:23: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#23: 
References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")

-:23: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")'
#23: 
References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")

-:240: ERROR:LOCKING: recursive locking is bad, do not use this ever.
#240: FILE: drivers/gpu/drm/i915/i915_gem_userptr.c:108:
+	switch (mutex_trylock_recursive(m)) {

total: 2 errors, 1 warnings, 0 checks, 444 lines checked
0633dd1e91ac drm/i915/userptr: Probe vma range before gup

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/7] drm/i915: Allocate a common scratch page
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
                   ` (6 preceding siblings ...)
  2018-12-04 14:28 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915: Allocate a common scratch page Patchwork
@ 2018-12-04 14:31 ` Patchwork
  2018-12-04 14:48 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-12-04 14:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915: Allocate a common scratch page
URL   : https://patchwork.freedesktop.org/series/53477/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Allocate a common scratch page
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3560:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3562:16: warning: expression using sizeof(void)

Commit: drm/i915: Flush GPU relocs harder for gen3
Okay!

Commit: drm/i915/selftests: Reorder request allocation vs vma pinning
Okay!

Commit: drm/i915: Pipeline PDP updates for Braswell
Okay!

Commit: drm/i915: Return immediately if trylock fails for direct-reclaim
Okay!

Commit: drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start
Okay!

Commit: drm/i915/userptr: Probe vma range before gup
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Allocate a common scratch page
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
                   ` (7 preceding siblings ...)
  2018-12-04 14:31 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-12-04 14:48 ` Patchwork
  2018-12-04 15:53   ` Chris Wilson
  2018-12-04 15:07 ` [PATCH 1/7] " Mika Kuoppala
  2018-12-04 22:58 ` ✓ Fi.CI.IGT: success for series starting with [1/7] " Patchwork
  10 siblings, 1 reply; 19+ messages in thread
From: Patchwork @ 2018-12-04 14:48 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915: Allocate a common scratch page
URL   : https://patchwork.freedesktop.org/series/53477/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5256 -> Patchwork_11010
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/53477/revisions/1/mbox/

Known issues
------------

  Here are the changes found in Patchwork_11010 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_create@basic-files:
    - fi-bsw-kefka:       PASS -> INCOMPLETE [fdo#105876] / [fdo#108714]

  
#### Possible fixes ####

  * igt@gem_ctx_create@basic-files:
    - fi-bsw-n3050:       FAIL [fdo#108656] -> PASS

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - fi-cfl-8109u:       INCOMPLETE [fdo#106070] / [fdo#108126] -> PASS

  
  [fdo#105876]: https://bugs.freedesktop.org/show_bug.cgi?id=105876
  [fdo#106070]: https://bugs.freedesktop.org/show_bug.cgi?id=106070
  [fdo#108126]: https://bugs.freedesktop.org/show_bug.cgi?id=108126
  [fdo#108656]: https://bugs.freedesktop.org/show_bug.cgi?id=108656
  [fdo#108714]: https://bugs.freedesktop.org/show_bug.cgi?id=108714


Participating hosts (47 -> 43)
------------------------------

  Missing    (4): fi-ctg-p8600 fi-ilk-m540 fi-bsw-cyan fi-hsw-4200u 


Build changes
-------------

    * Linux: CI_DRM_5256 -> Patchwork_11010

  CI_DRM_5256: eb16a05c5410e4b0b95798b93bace2440507a4c7 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4740: dd8de0efa64e50bc06c2882a0028d98ad870e752 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11010: 0633dd1e91acb4a83bed84fafa340cb3a6510151 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

0633dd1e91ac drm/i915/userptr: Probe vma range before gup
c13fea83071c drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start
b161e78c0b56 drm/i915: Return immediately if trylock fails for direct-reclaim
8d513703db38 drm/i915: Pipeline PDP updates for Braswell
d0acfc4c3006 drm/i915/selftests: Reorder request allocation vs vma pinning
90bafab654fd drm/i915: Flush GPU relocs harder for gen3
2fc4093e46e8 drm/i915: Allocate a common scratch page

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11010/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] drm/i915: Allocate a common scratch page
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
                   ` (8 preceding siblings ...)
  2018-12-04 14:48 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-12-04 15:07 ` Mika Kuoppala
  2018-12-04 16:01   ` Chris Wilson
  2018-12-04 22:58 ` ✓ Fi.CI.IGT: success for series starting with [1/7] " Patchwork
  10 siblings, 1 reply; 19+ messages in thread
From: Mika Kuoppala @ 2018-12-04 15:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Currently we allocate a scratch page for each engine, but since we only
> ever write into it for post-sync operations, it is not exposed to
> userspace nor do we care for coherency. As we then do not care about its
> contents, we can use one page for all, reducing our allocations and
> avoid complications by not assuming per-engine isolation.
>
> For later use, it simplifies engine initialisation (by removing the
> allocation that required struct_mutex!) and means that we can always rely
> on there being a scratch page.
>
> v2: Check that we allocated a large enough scratch for I830 w/a
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  7 ++++
>  drivers/gpu/drm/i915/i915_gem.c         | 50 ++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
>  drivers/gpu/drm/i915/intel_engine_cs.c  | 42 ---------------------
>  drivers/gpu/drm/i915/intel_lrc.c        | 17 +++------
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 37 ++++++------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  5 ---
>  7 files changed, 74 insertions(+), 86 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 23a3dc6f3907..c5f01964f0fb 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1983,6 +1983,8 @@ struct drm_i915_private {
>  		struct delayed_work idle_work;
>  
>  		ktime_t last_init_time;
> +
> +		struct i915_vma *scratch;
>  	} gt;
>  
>  	/* perform PHY state sanity checks? */
> @@ -3713,4 +3715,9 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
>  		return I915_HWS_CSB_WRITE_INDEX;
>  }
>  
> +static inline u32 i915_scratch_offset(const struct drm_i915_private *i915)
> +{
> +	return i915_ggtt_offset(i915->gt.scratch);
> +}
> +
>  #endif
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 35ecfea4e903..d36a9755ad91 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5498,6 +5498,44 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>  	goto out_ctx;
>  }
>  
> +static int
> +i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
> +{
> +	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
> +	int ret;
> +
> +	obj = i915_gem_object_create_stolen(i915, size);
> +	if (!obj)
> +		obj = i915_gem_object_create_internal(i915, size);
> +	if (IS_ERR(obj)) {
> +		DRM_ERROR("Failed to allocate scratch page\n");
> +		return PTR_ERR(obj);
> +	}
> +
> +	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
> +		goto err_unref;
> +	}
> +
> +	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> +	if (ret)
> +		goto err_unref;
> +
> +	i915->gt.scratch = vma;
> +	return 0;
> +
> +err_unref:
> +	i915_gem_object_put(obj);
> +	return ret;
> +}
> +
> +static void i915_gem_fini_scratch(struct drm_i915_private *i915)
> +{
> +	i915_vma_unpin_and_release(&i915->gt.scratch, 0);
> +}
> +
>  int i915_gem_init(struct drm_i915_private *dev_priv)
>  {
>  	int ret;
> @@ -5544,12 +5582,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>  		goto err_unlock;
>  	}
>  
> -	ret = i915_gem_contexts_init(dev_priv);
> +	ret = i915_gem_init_scratch(dev_priv,
> +				    IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE);
>  	if (ret) {
>  		GEM_BUG_ON(ret == -EIO);
>  		goto err_ggtt;
>  	}
>  
> +	ret = i915_gem_contexts_init(dev_priv);
> +	if (ret) {
> +		GEM_BUG_ON(ret == -EIO);
> +		goto err_scratch;
> +	}
> +
>  	ret = intel_engines_init(dev_priv);
>  	if (ret) {
>  		GEM_BUG_ON(ret == -EIO);
> @@ -5622,6 +5667,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>  err_context:
>  	if (ret != -EIO)
>  		i915_gem_contexts_fini(dev_priv);
> +err_scratch:
> +	i915_gem_fini_scratch(dev_priv);
>  err_ggtt:
>  err_unlock:
>  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> @@ -5673,6 +5720,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
>  	intel_uc_fini(dev_priv);
>  	i915_gem_cleanup_engines(dev_priv);
>  	i915_gem_contexts_fini(dev_priv);
> +	i915_gem_fini_scratch(dev_priv);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  
>  	intel_wa_list_free(&dev_priv->gt_wa_list);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index a6885a59568b..07465123c166 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1571,7 +1571,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
>  			if (HAS_BROKEN_CS_TLB(i915))
>  				ee->wa_batchbuffer =
>  					i915_error_object_create(i915,
> -								 engine->scratch);
> +								 i915->gt.scratch);
>  			request_record_user_bo(request, ee);
>  
>  			ee->ctx =
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 6b427bc52f78..af2873403009 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -493,46 +493,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
>  	intel_engine_init_cmd_parser(engine);
>  }
>  
> -int intel_engine_create_scratch(struct intel_engine_cs *engine,
> -				unsigned int size)
> -{
> -	struct drm_i915_gem_object *obj;
> -	struct i915_vma *vma;
> -	int ret;
> -
> -	WARN_ON(engine->scratch);
> -
> -	obj = i915_gem_object_create_stolen(engine->i915, size);
> -	if (!obj)
> -		obj = i915_gem_object_create_internal(engine->i915, size);
> -	if (IS_ERR(obj)) {
> -		DRM_ERROR("Failed to allocate scratch page\n");
> -		return PTR_ERR(obj);
> -	}
> -
> -	vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
> -	if (IS_ERR(vma)) {
> -		ret = PTR_ERR(vma);
> -		goto err_unref;
> -	}
> -
> -	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> -	if (ret)
> -		goto err_unref;
> -
> -	engine->scratch = vma;
> -	return 0;
> -
> -err_unref:
> -	i915_gem_object_put(obj);
> -	return ret;
> -}
> -
> -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
> -{
> -	i915_vma_unpin_and_release(&engine->scratch, 0);
> -}
> -
>  static void cleanup_status_page(struct intel_engine_cs *engine)
>  {
>  	if (HWS_NEEDS_PHYSICAL(engine->i915)) {
> @@ -707,8 +667,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *i915 = engine->i915;
>  
> -	intel_engine_cleanup_scratch(engine);
> -
>  	cleanup_status_page(engine);
>  
>  	intel_engine_fini_breadcrumbs(engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 87227fd9ae5f..d7fa301b5ec7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1288,9 +1288,10 @@ static int execlists_request_alloc(struct i915_request *request)
>  static u32 *
>  gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
>  {
> +	/* NB no one else is allowed to scribble over scratch + 256! */
>  	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
>  	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
> -	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
> +	*batch++ = i915_scratch_offset(engine->i915) + 256;
>  	*batch++ = 0;
>  
>  	*batch++ = MI_LOAD_REGISTER_IMM(1);
> @@ -1304,7 +1305,7 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
>  
>  	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
>  	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
> -	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
> +	*batch++ = i915_scratch_offset(engine->i915) + 256;
>  	*batch++ = 0;
>  
>  	return batch;
> @@ -1341,7 +1342,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
>  				       PIPE_CONTROL_GLOBAL_GTT_IVB |
>  				       PIPE_CONTROL_CS_STALL |
>  				       PIPE_CONTROL_QW_WRITE,
> -				       i915_ggtt_offset(engine->scratch) +
> +				       i915_scratch_offset(engine->i915) +
>  				       2 * CACHELINE_BYTES);
>  
>  	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> @@ -1973,7 +1974,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
>  {
>  	struct intel_engine_cs *engine = request->engine;
>  	u32 scratch_addr =
> -		i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
> +		i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
>  	bool vf_flush_wa = false, dc_flush_wa = false;
>  	u32 *cs, flags = 0;
>  	int len;
> @@ -2292,10 +2293,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
>  	if (ret)
>  		return ret;
>  
> -	ret = intel_engine_create_scratch(engine, PAGE_SIZE);
> -	if (ret)
> -		goto err_cleanup_common;
> -
>  	ret = intel_init_workaround_bb(engine);
>  	if (ret) {
>  		/*
> @@ -2311,10 +2308,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
>  	intel_engine_init_workarounds(engine);
>  
>  	return 0;
> -
> -err_cleanup_common:
> -	intel_engine_cleanup_common(engine);
> -	return ret;
>  }
>  
>  int logical_xcs_ring_init(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7f88df5bff09..c5eb26a7ee79 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -150,8 +150,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
>  	 */
>  	if (mode & EMIT_INVALIDATE) {
>  		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
> -		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
> -			PIPE_CONTROL_GLOBAL_GTT;
> +		*cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
>  		*cs++ = 0;
>  		*cs++ = 0;
>  
> @@ -159,8 +158,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
>  			*cs++ = MI_FLUSH;
>  
>  		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
> -		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
> -			PIPE_CONTROL_GLOBAL_GTT;
> +		*cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
>  		*cs++ = 0;
>  		*cs++ = 0;
>  	}
> @@ -212,8 +210,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
>  static int
>  intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
>  {
> -	u32 scratch_addr =
> -		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
> +	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
>  	u32 *cs;
>  
>  	cs = intel_ring_begin(rq, 6);
> @@ -246,8 +243,7 @@ intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
>  static int
>  gen6_render_ring_flush(struct i915_request *rq, u32 mode)
>  {
> -	u32 scratch_addr =
> -		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
> +	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
>  	u32 *cs, flags = 0;
>  	int ret;
>  
> @@ -316,8 +312,7 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq)
>  static int
>  gen7_render_ring_flush(struct i915_request *rq, u32 mode)
>  {
> -	u32 scratch_addr =
> -		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
> +	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
>  	u32 *cs, flags = 0;
>  
>  	/*
> @@ -994,7 +989,7 @@ i965_emit_bb_start(struct i915_request *rq,
>  }
>  
>  /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
> -#define I830_BATCH_LIMIT (256*1024)
> +#define I830_BATCH_LIMIT SZ_256K
>  #define I830_TLB_ENTRIES (2)
>  #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
>  static int
> @@ -1002,7 +997,9 @@ i830_emit_bb_start(struct i915_request *rq,
>  		   u64 offset, u32 len,
>  		   unsigned int dispatch_flags)
>  {
> -	u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch);
> +	u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
> +
> +	GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
>  
>  	cs = intel_ring_begin(rq, 6);
>  	if (IS_ERR(cs))
> @@ -1459,7 +1456,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
>  {
>  	struct i915_timeline *timeline;
>  	struct intel_ring *ring;
> -	unsigned int size;
>  	int err;
>  
>  	intel_engine_setup_common(engine);
> @@ -1484,21 +1480,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
>  	GEM_BUG_ON(engine->buffer);
>  	engine->buffer = ring;
>  
> -	size = PAGE_SIZE;
> -	if (HAS_BROKEN_CS_TLB(engine->i915))
> -		size = I830_WA_SIZE;
> -	err = intel_engine_create_scratch(engine, size);
> -	if (err)
> -		goto err_unpin;
> -
>  	err = intel_engine_init_common(engine);
>  	if (err)
> -		goto err_scratch;
> +		goto err_unpin;
>  
>  	return 0;
>  
> -err_scratch:
> -	intel_engine_cleanup_scratch(engine);
>  err_unpin:
>  	intel_ring_unpin(ring);
>  err_ring:
> @@ -1572,7 +1559,7 @@ static int flush_pd_dir(struct i915_request *rq)
>  	/* Stall until the page table load is complete */
>  	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>  	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
> -	*cs++ = i915_ggtt_offset(engine->scratch);
> +	*cs++ = i915_scratch_offset(rq->i915);
>  	*cs++ = MI_NOOP;
>  
>  	intel_ring_advance(rq, cs);
> @@ -1681,7 +1668,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>  			/* Insert a delay before the next switch! */
>  			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>  			*cs++ = i915_mmio_reg_offset(last_reg);
> -			*cs++ = i915_ggtt_offset(engine->scratch);
> +			*cs++ = i915_scratch_offset(rq->i915);
>  			*cs++ = MI_NOOP;
>  		}
>  		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 927bb21a2b0b..72edaa7ff411 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -439,7 +439,6 @@ struct intel_engine_cs {
>  	struct i915_wa_list ctx_wa_list;
>  	struct i915_wa_list wa_list;
>  	struct i915_wa_list whitelist;
> -	struct i915_vma *scratch;
>  
>  	u32             irq_keep_mask; /* always keep these interrupts */
>  	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
> @@ -896,10 +895,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine);
>  int intel_engine_init_common(struct intel_engine_cs *engine);
>  void intel_engine_cleanup_common(struct intel_engine_cs *engine);
>  
> -int intel_engine_create_scratch(struct intel_engine_cs *engine,
> -				unsigned int size);
> -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine);
> -
>  int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
>  int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
>  int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
> -- 
> 2.20.0.rc2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Allocate a common scratch page
  2018-12-04 14:48 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-12-04 15:53   ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-12-04 15:53 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2018-12-04 14:48:15)
> == Series Details ==
> 
> Series: series starting with [1/7] drm/i915: Allocate a common scratch page
> URL   : https://patchwork.freedesktop.org/series/53477/
> State : success
> 
> == Summary ==
> 
> CI Bug Log - changes from CI_DRM_5256 -> Patchwork_11010
> ====================================================
> 
> Summary
> -------
> 
>   **SUCCESS**
> 
>   No regressions found.
> 
>   External URL: https://patchwork.freedesktop.org/api/1.0/series/53477/revisions/1/mbox/
> 
> Known issues
> ------------
> 
>   Here are the changes found in Patchwork_11010 that come from known issues:
> 
> ### IGT changes ###
> 
> #### Issues hit ####
> 
>   * igt@gem_ctx_create@basic-files:
>     - fi-bsw-kefka:       PASS -> INCOMPLETE [fdo#105876] / [fdo#108714]

Ah flip,

[drm:fw_domains_get [i915]] *ERROR* render: timed out waiting for forcewake ack request.

Back to trying with the PMSI, I guess.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] drm/i915: Allocate a common scratch page
  2018-12-04 15:07 ` [PATCH 1/7] " Mika Kuoppala
@ 2018-12-04 16:01   ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-12-04 16:01 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-12-04 15:07:01)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Currently we allocate a scratch page for each engine, but since we only
> > ever write into it for post-sync operations, it is not exposed to
> > userspace nor do we care for coherency. As we then do not care about its
> > contents, we can use one page for all, reducing our allocations and
> > avoid complications by not assuming per-engine isolation.
> >
> > For later use, it simplifies engine initialisation (by removing the
> > allocation that required struct_mutex!) and means that we can always rely
> > on there being a scratch page.
> >
> > v2: Check that we allocated a large enough scratch for I830 w/a
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> 
> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

Plonked it in since it can serve for our need of a backport to fix
v4.18.20. Thanks,
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/7] drm/i915: Allocate a common scratch page
  2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
                   ` (9 preceding siblings ...)
  2018-12-04 15:07 ` [PATCH 1/7] " Mika Kuoppala
@ 2018-12-04 22:58 ` Patchwork
  10 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-12-04 22:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915: Allocate a common scratch page
URL   : https://patchwork.freedesktop.org/series/53477/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5256_full -> Patchwork_11010_full
====================================================

Summary
-------

  **WARNING**

  Minor unknown changes coming with Patchwork_11010_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11010_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11010_full:

### IGT changes ###

#### Possible regressions ####

  * {igt@kms_plane@pixel-format-pipe-a-planes-source-clamping}:
    - shard-skl:          NOTRUN -> DMESG-WARN

  * {igt@kms_plane@pixel-format-pipe-b-planes-source-clamping}:
    - shard-apl:          PASS -> FAIL +1

  
#### Warnings ####

  * igt@tools_test@tools_test:
    - shard-kbl:          SKIP -> PASS

  
Known issues
------------

  Here are the changes found in Patchwork_11010_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@debugfs_test@read_all_entries_display_off:
    - shard-skl:          PASS -> INCOMPLETE [fdo#104108]

  * igt@gem_exec_schedule@pi-ringfull-vebox:
    - shard-skl:          NOTRUN -> FAIL [fdo#103158]

  * igt@gem_ppgtt@blt-vs-render-ctxn:
    - shard-skl:          NOTRUN -> TIMEOUT [fdo#108039]

  * igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-b:
    - shard-skl:          NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-a:
    - shard-glk:          PASS -> DMESG-WARN [fdo#107956]

  * igt@kms_color@pipe-a-legacy-gamma:
    - shard-kbl:          PASS -> DMESG-WARN [fdo#103558] / [fdo#105602] +19

  * igt@kms_cursor_crc@cursor-128x42-sliding:
    - shard-apl:          PASS -> FAIL [fdo#103232] +1

  * igt@kms_cursor_crc@cursor-256x256-random:
    - shard-glk:          PASS -> FAIL [fdo#103232] +3

  * igt@kms_cursor_crc@cursor-256x256-sliding:
    - shard-skl:          PASS -> FAIL [fdo#103232]

  * igt@kms_cursor_crc@cursor-256x256-suspend:
    - shard-apl:          PASS -> FAIL [fdo#103191] / [fdo#103232]

  * igt@kms_fbcon_fbt@fbc-suspend:
    - shard-skl:          PASS -> FAIL [fdo#103833] / [fdo#105682]

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-skl:          PASS -> FAIL [fdo#105363]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-move:
    - shard-glk:          PASS -> FAIL [fdo#103167] +1

  * igt@kms_frontbuffer_tracking@fbc-stridechange:
    - shard-skl:          NOTRUN -> FAIL [fdo#105683]

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-mmap-gtt:
    - shard-skl:          PASS -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@fbcpsr-rgb565-draw-mmap-cpu:
    - shard-skl:          PASS -> FAIL [fdo#105682]

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - shard-skl:          NOTRUN -> INCOMPLETE [fdo#104108] / [fdo#107773]

  * igt@kms_plane@pixel-format-pipe-a-planes:
    - shard-skl:          NOTRUN -> DMESG-WARN [fdo#106885]

  * igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb:
    - shard-skl:          NOTRUN -> FAIL [fdo#108145] +4

  * igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
    - shard-skl:          PASS -> FAIL [fdo#107815] / [fdo#108145]

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          PASS -> FAIL [fdo#107815]

  * igt@kms_plane_alpha_blend@pipe-c-alpha-7efc:
    - shard-skl:          NOTRUN -> FAIL [fdo#107815] / [fdo#108145]

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-x:
    - shard-glk:          PASS -> FAIL [fdo#103166] +2

  * igt@kms_setmode@basic:
    - shard-skl:          NOTRUN -> FAIL [fdo#99912]
    - shard-hsw:          PASS -> FAIL [fdo#99912]

  * igt@kms_vblank@pipe-c-ts-continuation-idle-hang:
    - {shard-iclb}:       NOTRUN -> DMESG-WARN [fdo#108928] +1

  * igt@pm_rpm@fences:
    - shard-skl:          PASS -> INCOMPLETE [fdo#107807]

  * igt@pm_rpm@reg-read-ioctl:
    - shard-kbl:          PASS -> DMESG-WARN [fdo#103313] / [fdo#103558] / [fdo#105602] +7

  
#### Possible fixes ####

  * igt@debugfs_test@read_all_entries:
    - shard-skl:          INCOMPLETE [fdo#108901] -> PASS

  * igt@kms_content_protection@atomic:
    - shard-kbl:          FAIL [fdo#108597] -> SKIP

  * igt@kms_cursor_crc@cursor-256x256-sliding:
    - shard-glk:          FAIL [fdo#103232] -> PASS +2

  * igt@kms_flip@plain-flip-fb-recreate-interruptible:
    - shard-skl:          FAIL [fdo#100368] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-pwrite:
    - shard-apl:          FAIL [fdo#103167] -> PASS +1

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-move:
    - shard-glk:          FAIL [fdo#103167] -> PASS

  * igt@kms_plane_multiple@atomic-pipe-c-tiling-x:
    - shard-glk:          FAIL [fdo#103166] -> PASS +1

  * igt@kms_psr@suspend:
    - shard-skl:          INCOMPLETE [fdo#107773] -> PASS

  * {igt@kms_rotation_crc@multiplane-rotation-cropping-top}:
    - shard-kbl:          DMESG-FAIL -> PASS

  * igt@kms_universal_plane@universal-plane-pipe-c-functional:
    - shard-apl:          FAIL [fdo#103166] -> PASS +1

  * igt@pm_rpm@universal-planes-dpms:
    - shard-skl:          INCOMPLETE [fdo#107807] -> PASS

  
#### Warnings ####

  * igt@i915_selftest@live_contexts:
    - {shard-iclb}:       INCOMPLETE [fdo#108315] -> DMESG-FAIL [fdo#108569]

  * igt@kms_plane_alpha_blend@pipe-c-alpha-opaque-fb:
    - shard-kbl:          FAIL [fdo#108145] -> DMESG-FAIL [fdo#103558] / [fdo#105602] / [fdo#108145]

  * {igt@runner@aborted}:
    - {shard-iclb}:       ( 34 FAIL ) [fdo#108315] / [fdo#108924] / [fdo#108928] -> ( 35 FAIL ) [fdo#108924] / [fdo#108928]

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#100368]: https://bugs.freedesktop.org/show_bug.cgi?id=100368
  [fdo#103158]: https://bugs.freedesktop.org/show_bug.cgi?id=103158
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103313]: https://bugs.freedesktop.org/show_bug.cgi?id=103313
  [fdo#103558]: https://bugs.freedesktop.org/show_bug.cgi?id=103558
  [fdo#103833]: https://bugs.freedesktop.org/show_bug.cgi?id=103833
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105602]: https://bugs.freedesktop.org/show_bug.cgi?id=105602
  [fdo#105682]: https://bugs.freedesktop.org/show_bug.cgi?id=105682
  [fdo#105683]: https://bugs.freedesktop.org/show_bug.cgi?id=105683
  [fdo#106885]: https://bugs.freedesktop.org/show_bug.cgi?id=106885
  [fdo#107773]: https://bugs.freedesktop.org/show_bug.cgi?id=107773
  [fdo#107807]: https://bugs.freedesktop.org/show_bug.cgi?id=107807
  [fdo#107815]: https://bugs.freedesktop.org/show_bug.cgi?id=107815
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108039]: https://bugs.freedesktop.org/show_bug.cgi?id=108039
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108315]: https://bugs.freedesktop.org/show_bug.cgi?id=108315
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108597]: https://bugs.freedesktop.org/show_bug.cgi?id=108597
  [fdo#108901]: https://bugs.freedesktop.org/show_bug.cgi?id=108901
  [fdo#108924]: https://bugs.freedesktop.org/show_bug.cgi?id=108924
  [fdo#108928]: https://bugs.freedesktop.org/show_bug.cgi?id=108928
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (7 -> 7)
------------------------------

  No changes in participating hosts


Build changes
-------------

    * Linux: CI_DRM_5256 -> Patchwork_11010

  CI_DRM_5256: eb16a05c5410e4b0b95798b93bace2440507a4c7 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4740: dd8de0efa64e50bc06c2882a0028d98ad870e752 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11010: 0633dd1e91acb4a83bed84fafa340cb3a6510151 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11010/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/7] drm/i915: Return immediately if trylock fails for direct-reclaim
  2018-12-04 14:15 ` [PATCH 5/7] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
@ 2018-12-06 15:18   ` Tvrtko Ursulin
  2018-12-06 21:30     ` Chris Wilson
  2018-12-10 11:29   ` Tvrtko Ursulin
  1 sibling, 1 reply; 19+ messages in thread
From: Tvrtko Ursulin @ 2018-12-06 15:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 04/12/2018 14:15, Chris Wilson wrote:
> Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
> in the shrinker while performing direct-reclaim. The trade-off being
> (much) lower latency for non-i915 clients at an increased risk of being
> unable to obtain a page from direct-reclaim without hitting the
> oom-notifier. The proviso being that we still keep trying to hard
> obtain the lock for oom so that we can reap under heavy memory pressure.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |  4 ++--
>   drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 +++++++++++-------------
>   2 files changed, 13 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c5f01964f0fb..1cad218b71d3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2916,9 +2916,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>   	__i915_gem_object_unpin_pages(obj);
>   }
>   
> -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
> +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
>   	I915_MM_NORMAL = 0,
> -	I915_MM_SHRINKER
> +	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
>   };
>   
>   void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index ea90d3a0d511..d461f458f4af 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -36,7 +36,9 @@
>   #include "i915_drv.h"
>   #include "i915_trace.h"
>   
> -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
> +static bool shrinker_lock(struct drm_i915_private *i915,
> +			  unsigned int flags,
> +			  bool *unlock)
>   {
>   	switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
>   	case MUTEX_TRYLOCK_RECURSIVE:
> @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
>   
>   	case MUTEX_TRYLOCK_FAILED:
>   		*unlock = false;
> -		preempt_disable();
> -		do {
> -			cpu_relax();
> -			if (mutex_trylock(&i915->drm.struct_mutex)) {
> -				*unlock = true;
> -				break;
> -			}
> -		} while (!need_resched());
> -		preempt_enable();
> +		if (flags & I915_SHRINK_ACTIVE) {
> +			mutex_lock_nested(&i915->drm.struct_mutex,
> +					  I915_MM_SHRINKER);
> +			*unlock = true;
> +		}

I just realized once oddity in the shrinker code which escaped me 
before. It is the fact the call paths will call the shrinker_lock twice. 
For instance i915_gem_shrinker_vmap and i915_gem_shrinker_scan. They 
both first take lock with flags of zero, and then they call 
i915_gem_shrink which takes the lock again, which obviously always 
results in the recursive path to be taken.

I think we need to clean this up so it is easier to understand the code 
before further tweaking, even if in this patch. For instance adding 
I915_SHRINK_LOCKED would solve it.

shrinker_lock_uninterruptible is also funky in that it doesn't respect 
the timeout in the waiting for idle phase.

Sounds reasonable?

Regards,

Tvrtko

>   		return *unlock;
>   
>   	case MUTEX_TRYLOCK_SUCCESS:
> @@ -160,7 +158,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>   	unsigned long scanned = 0;
>   	bool unlock;
>   
> -	if (!shrinker_lock(i915, &unlock))
> +	if (!shrinker_lock(i915, flags, &unlock))
>   		return 0;
>   
>   	/*
> @@ -357,7 +355,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>   
>   	sc->nr_scanned = 0;
>   
> -	if (!shrinker_lock(i915, &unlock))
> +	if (!shrinker_lock(i915, 0, &unlock))
>   		return SHRINK_STOP;
>   
>   	freed = i915_gem_shrink(i915,
> @@ -397,7 +395,7 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock,
>   	do {
>   		if (i915_gem_wait_for_idle(i915,
>   					   0, MAX_SCHEDULE_TIMEOUT) == 0 &&
> -		    shrinker_lock(i915, unlock))
> +		    shrinker_lock(i915, 0, unlock))
>   			break;
>   
>   		schedule_timeout_killable(1);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/7] drm/i915: Return immediately if trylock fails for direct-reclaim
  2018-12-06 15:18   ` Tvrtko Ursulin
@ 2018-12-06 21:30     ` Chris Wilson
  2018-12-07  8:38       ` Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-12-06 21:30 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-12-06 15:18:13)
> 
> On 04/12/2018 14:15, Chris Wilson wrote:
> > Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
> > in the shrinker while performing direct-reclaim. The trade-off being
> > (much) lower latency for non-i915 clients at an increased risk of being
> > unable to obtain a page from direct-reclaim without hitting the
> > oom-notifier. The proviso being that we still keep trying to hard
> > obtain the lock for oom so that we can reap under heavy memory pressure.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_drv.h          |  4 ++--
> >   drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 +++++++++++-------------
> >   2 files changed, 13 insertions(+), 15 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index c5f01964f0fb..1cad218b71d3 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2916,9 +2916,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
> >       __i915_gem_object_unpin_pages(obj);
> >   }
> >   
> > -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
> > +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
> >       I915_MM_NORMAL = 0,
> > -     I915_MM_SHRINKER
> > +     I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
> >   };
> >   
> >   void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > index ea90d3a0d511..d461f458f4af 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > @@ -36,7 +36,9 @@
> >   #include "i915_drv.h"
> >   #include "i915_trace.h"
> >   
> > -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
> > +static bool shrinker_lock(struct drm_i915_private *i915,
> > +                       unsigned int flags,
> > +                       bool *unlock)
> >   {
> >       switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
> >       case MUTEX_TRYLOCK_RECURSIVE:
> > @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
> >   
> >       case MUTEX_TRYLOCK_FAILED:
> >               *unlock = false;
> > -             preempt_disable();
> > -             do {
> > -                     cpu_relax();
> > -                     if (mutex_trylock(&i915->drm.struct_mutex)) {
> > -                             *unlock = true;
> > -                             break;
> > -                     }
> > -             } while (!need_resched());
> > -             preempt_enable();
> > +             if (flags & I915_SHRINK_ACTIVE) {
> > +                     mutex_lock_nested(&i915->drm.struct_mutex,
> > +                                       I915_MM_SHRINKER);
> > +                     *unlock = true;
> > +             }
> 
> I just realized once oddity in the shrinker code which escaped me 
> before. It is the fact the call paths will call the shrinker_lock twice. 
> For instance i915_gem_shrinker_vmap and i915_gem_shrinker_scan. They 
> both first take lock with flags of zero, and then they call 
> i915_gem_shrink which takes the lock again, which obviously always 
> results in the recursive path to be taken.
> 
> I think we need to clean this up so it is easier to understand the code 
> before further tweaking, even if in this patch. For instance adding 
> I915_SHRINK_LOCKED would solve it.
> 
> shrinker_lock_uninterruptible is also funky in that it doesn't respect 
> the timeout in the waiting for idle phase.
> 
> Sounds reasonable?

My alternate code for this avoids struct_mutex here, but the compromise
is that we can't process active requests here, and can't reap pages from
zombie objects (objects that are still waiting for the RCU release).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/7] drm/i915: Return immediately if trylock fails for direct-reclaim
  2018-12-06 21:30     ` Chris Wilson
@ 2018-12-07  8:38       ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-12-07  8:38 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2018-12-06 21:30:25)
> Quoting Tvrtko Ursulin (2018-12-06 15:18:13)
> > 
> > On 04/12/2018 14:15, Chris Wilson wrote:
> > > Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
> > > in the shrinker while performing direct-reclaim. The trade-off being
> > > (much) lower latency for non-i915 clients at an increased risk of being
> > > unable to obtain a page from direct-reclaim without hitting the
> > > oom-notifier. The proviso being that we still keep trying to hard
> > > obtain the lock for oom so that we can reap under heavy memory pressure.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > ---
> > >   drivers/gpu/drm/i915/i915_drv.h          |  4 ++--
> > >   drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 +++++++++++-------------
> > >   2 files changed, 13 insertions(+), 15 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index c5f01964f0fb..1cad218b71d3 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -2916,9 +2916,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
> > >       __i915_gem_object_unpin_pages(obj);
> > >   }
> > >   
> > > -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
> > > +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
> > >       I915_MM_NORMAL = 0,
> > > -     I915_MM_SHRINKER
> > > +     I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
> > >   };
> > >   
> > >   void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > index ea90d3a0d511..d461f458f4af 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > @@ -36,7 +36,9 @@
> > >   #include "i915_drv.h"
> > >   #include "i915_trace.h"
> > >   
> > > -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
> > > +static bool shrinker_lock(struct drm_i915_private *i915,
> > > +                       unsigned int flags,
> > > +                       bool *unlock)
> > >   {
> > >       switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
> > >       case MUTEX_TRYLOCK_RECURSIVE:
> > > @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
> > >   
> > >       case MUTEX_TRYLOCK_FAILED:
> > >               *unlock = false;
> > > -             preempt_disable();
> > > -             do {
> > > -                     cpu_relax();
> > > -                     if (mutex_trylock(&i915->drm.struct_mutex)) {
> > > -                             *unlock = true;
> > > -                             break;
> > > -                     }
> > > -             } while (!need_resched());
> > > -             preempt_enable();
> > > +             if (flags & I915_SHRINK_ACTIVE) {
> > > +                     mutex_lock_nested(&i915->drm.struct_mutex,
> > > +                                       I915_MM_SHRINKER);
> > > +                     *unlock = true;
> > > +             }
> > 
> > I just realized once oddity in the shrinker code which escaped me 
> > before. It is the fact the call paths will call the shrinker_lock twice. 
> > For instance i915_gem_shrinker_vmap and i915_gem_shrinker_scan. They 
> > both first take lock with flags of zero, and then they call 
> > i915_gem_shrink which takes the lock again, which obviously always 
> > results in the recursive path to be taken.
> > 
> > I think we need to clean this up so it is easier to understand the code 
> > before further tweaking, even if in this patch. For instance adding 
> > I915_SHRINK_LOCKED would solve it.
> > 
> > shrinker_lock_uninterruptible is also funky in that it doesn't respect 
> > the timeout in the waiting for idle phase.
> > 
> > Sounds reasonable?
> 
> My alternate code for this avoids struct_mutex here, but the compromise
> is that we can't process active requests here, and can't reap pages from
> zombie objects (objects that are still waiting for the RCU release).

As far as what the current patch is describing, I still like it. It
basically says if we get to this point and we need to wait and freeze the
batch queue but haven't actually committed ourselves to that, don't.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/7] drm/i915: Return immediately if trylock fails for direct-reclaim
  2018-12-04 14:15 ` [PATCH 5/7] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
  2018-12-06 15:18   ` Tvrtko Ursulin
@ 2018-12-10 11:29   ` Tvrtko Ursulin
  1 sibling, 0 replies; 19+ messages in thread
From: Tvrtko Ursulin @ 2018-12-10 11:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 04/12/2018 14:15, Chris Wilson wrote:
> Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
> in the shrinker while performing direct-reclaim. The trade-off being
> (much) lower latency for non-i915 clients at an increased risk of being
> unable to obtain a page from direct-reclaim without hitting the
> oom-notifier. The proviso being that we still keep trying to hard
> obtain the lock for oom so that we can reap under heavy memory pressure.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |  4 ++--
>   drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 +++++++++++-------------
>   2 files changed, 13 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c5f01964f0fb..1cad218b71d3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2916,9 +2916,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>   	__i915_gem_object_unpin_pages(obj);
>   }
>   
> -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
> +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
>   	I915_MM_NORMAL = 0,
> -	I915_MM_SHRINKER
> +	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
>   };
>   
>   void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index ea90d3a0d511..d461f458f4af 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -36,7 +36,9 @@
>   #include "i915_drv.h"
>   #include "i915_trace.h"
>   
> -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
> +static bool shrinker_lock(struct drm_i915_private *i915,
> +			  unsigned int flags,
> +			  bool *unlock)
>   {
>   	switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
>   	case MUTEX_TRYLOCK_RECURSIVE:
> @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
>   
>   	case MUTEX_TRYLOCK_FAILED:
>   		*unlock = false;
> -		preempt_disable();
> -		do {
> -			cpu_relax();
> -			if (mutex_trylock(&i915->drm.struct_mutex)) {
> -				*unlock = true;
> -				break;
> -			}
> -		} while (!need_resched());
> -		preempt_enable();
> +		if (flags & I915_SHRINK_ACTIVE) {
> +			mutex_lock_nested(&i915->drm.struct_mutex,
> +					  I915_MM_SHRINKER);

Nested still scares me, even though so far I failed to break it.

How about, and especially given the commit message talks about "still 
trying hard under oom", you actually split this patch into two:

1. Implement exactly what the commit says - so "if (flags & 
I915_SHRINK_ACTIVE)" the code keeps using the loopy trylock.

2. Add a patch which I earlier suggested, introducing 
I915_SHRINK_LOCKED, so we we avoid hitting the recursive path from the 
shrinker itself.

3. Cleanup the wait timeout handling in the vmap notifier case.

4. And only finally add the nested trick.

Sounds like a reasonable shrinker cleanup and improvement mini-series?

Regards,

Tvrtko

> +			*unlock = true;
> +		}
>   		return *unlock;
>   
>   	case MUTEX_TRYLOCK_SUCCESS:
> @@ -160,7 +158,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>   	unsigned long scanned = 0;
>   	bool unlock;
>   
> -	if (!shrinker_lock(i915, &unlock))
> +	if (!shrinker_lock(i915, flags, &unlock))
>   		return 0;
>   
>   	/*
> @@ -357,7 +355,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>   
>   	sc->nr_scanned = 0;
>   
> -	if (!shrinker_lock(i915, &unlock))
> +	if (!shrinker_lock(i915, 0, &unlock))
>   		return SHRINK_STOP;
>   
>   	freed = i915_gem_shrink(i915,
> @@ -397,7 +395,7 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock,
>   	do {
>   		if (i915_gem_wait_for_idle(i915,
>   					   0, MAX_SCHEDULE_TIMEOUT) == 0 &&
> -		    shrinker_lock(i915, unlock))
> +		    shrinker_lock(i915, 0, unlock))
>   			break;
>   
>   		schedule_timeout_killable(1);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/7] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start
  2018-12-04 14:15 ` [PATCH 6/7] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start Chris Wilson
@ 2018-12-10 12:00   ` Tvrtko Ursulin
  0 siblings, 0 replies; 19+ messages in thread
From: Tvrtko Ursulin @ 2018-12-10 12:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 04/12/2018 14:15, Chris Wilson wrote:
> Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
> notifiers") we have been able to report failure from
> mmu_invalidate_range_start which allows us to use a trylock on the
> struct_mutex to avoid potential recursion and report -EBUSY instead.
> Furthermore, this allows us to pull the work into the main callback and
> avoid the sleight-of-hand in using a workqueue to avoid lockdep.
> 
> However, not all paths to mmu_invalidate_range_start are prepared to
> handle failure, so instead of reporting the recursion, deal with it by
> propagating the failure upwards, who can decide themselves to handle it
> or report it.
> 
> v2: Mark up the recursive lock behaviour and comment on the various weak
> points.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
> References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |   4 +-
>   drivers/gpu/drm/i915/i915_gem.c         |  30 +++-
>   drivers/gpu/drm/i915/i915_gem_object.h  |   7 +
>   drivers/gpu/drm/i915/i915_gem_userptr.c | 221 +++++++++++-------------
>   4 files changed, 136 insertions(+), 126 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1cad218b71d3..144b7737c0a2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2921,8 +2921,8 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
>   	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
>   };
>   
> -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> -				 enum i915_mm_subclass subclass);
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> +				enum i915_mm_subclass subclass);
>   void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
>   
>   enum i915_map_type {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d36a9755ad91..2ad8f94f5056 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2447,8 +2447,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
>   	struct sg_table *pages;
>   
>   	pages = fetch_and_zero(&obj->mm.pages);
> -	if (!pages)
> -		return NULL;
> +	if (IS_ERR_OR_NULL(pages))
> +		return pages;
>   
>   	spin_lock(&i915->mm.obj_lock);
>   	list_del(&obj->mm.link);
> @@ -2472,22 +2472,23 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
>   	return pages;
>   }
>   
> -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> -				 enum i915_mm_subclass subclass)
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> +				enum i915_mm_subclass subclass)
>   {
>   	struct sg_table *pages;
> +	int ret;
>   
>   	if (i915_gem_object_has_pinned_pages(obj))
> -		return;
> +		return -EBUSY;
>   
>   	GEM_BUG_ON(obj->bind_count);
> -	if (!i915_gem_object_has_pages(obj))
> -		return;
>   
>   	/* May be called by shrinker from within get_pages() (on another bo) */
>   	mutex_lock_nested(&obj->mm.lock, subclass);
> -	if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
> +	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
> +		ret = -EBUSY;
>   		goto unlock;
> +	}
>   
>   	/*
>   	 * ->put_pages might need to allocate memory for the bit17 swizzle
> @@ -2495,11 +2496,24 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>   	 * lists early.
>   	 */
>   	pages = __i915_gem_object_unset_pages(obj);
> +
> +	/*
> +	 * XXX Temporary hijinx to avoid updating all backends to handle
> +	 * NULL pages. In the future, when we have more asynchronous
> +	 * get_pages backends we should be better able to handle the
> +	 * cancellation of the async task in a more uniform manner.
> +	 */
> +	if (!pages && !i915_gem_object_needs_async_cancel(obj))
> +		pages = ERR_PTR(-EINVAL);
> +
>   	if (!IS_ERR(pages))
>   		obj->ops->put_pages(obj, pages);
>   
> +	ret = 0;
>   unlock:
>   	mutex_unlock(&obj->mm.lock);
> +
> +	return ret;
>   }
>   
>   bool i915_sg_trim(struct sg_table *orig_st)
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index a6dd7c46de0d..49ce797173b5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -56,6 +56,7 @@ struct drm_i915_gem_object_ops {
>   #define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
>   #define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
>   #define I915_GEM_OBJECT_IS_PROXY	BIT(2)
> +#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
>   
>   	/* Interface between the GEM object and its backing storage.
>   	 * get_pages() is called once prior to the use of the associated set
> @@ -386,6 +387,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
>   	return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
>   }
>   
> +static inline bool
> +i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
> +{
> +	return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
> +}
> +
>   static inline bool
>   i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 2c9b284036d1..8b07fd44731f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -50,79 +50,84 @@ struct i915_mmu_notifier {
>   	struct hlist_node node;
>   	struct mmu_notifier mn;
>   	struct rb_root_cached objects;
> -	struct workqueue_struct *wq;
> +	struct i915_mm_struct *mm;
>   };
>   
>   struct i915_mmu_object {
>   	struct i915_mmu_notifier *mn;
>   	struct drm_i915_gem_object *obj;
>   	struct interval_tree_node it;
> -	struct list_head link;
> -	struct work_struct work;
> -	bool attached;
>   };
>   
> -static void cancel_userptr(struct work_struct *work)
> -{
> -	struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
> -	struct drm_i915_gem_object *obj = mo->obj;
> -	struct work_struct *active;
> -
> -	/* Cancel any active worker and force us to re-evaluate gup */
> -	mutex_lock(&obj->mm.lock);
> -	active = fetch_and_zero(&obj->userptr.work);
> -	mutex_unlock(&obj->mm.lock);
> -	if (active)
> -		goto out;
> -
> -	i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);
> -
> -	mutex_lock(&obj->base.dev->struct_mutex);
> -
> -	/* We are inside a kthread context and can't be interrupted */
> -	if (i915_gem_object_unbind(obj) == 0)
> -		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> -	WARN_ONCE(i915_gem_object_has_pages(obj),
> -		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n",
> -		  obj->bind_count,
> -		  atomic_read(&obj->mm.pages_pin_count),
> -		  obj->pin_global);
> -
> -	mutex_unlock(&obj->base.dev->struct_mutex);
> -
> -out:
> -	i915_gem_object_put(obj);
> -}
> -
>   static void add_object(struct i915_mmu_object *mo)
>   {
> -	if (mo->attached)
> +	if (!RB_EMPTY_NODE(&mo->it.rb))
>   		return;
>   
>   	interval_tree_insert(&mo->it, &mo->mn->objects);
> -	mo->attached = true;
>   }
>   
>   static void del_object(struct i915_mmu_object *mo)
>   {
> -	if (!mo->attached)
> +	if (RB_EMPTY_NODE(&mo->it.rb))
>   		return;
>   
>   	interval_tree_remove(&mo->it, &mo->mn->objects);
> -	mo->attached = false;
> +	RB_CLEAR_NODE(&mo->it.rb);
> +}
> +
> +static void
> +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
> +{
> +	struct i915_mmu_object *mo = obj->userptr.mmu_object;
> +
> +	/*
> +	 * During mm_invalidate_range we need to cancel any userptr that
> +	 * overlaps the range being invalidated. Doing so requires the
> +	 * struct_mutex, and that risks recursion. In order to cause
> +	 * recursion, the user must alias the userptr address space with
> +	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
> +	 * to invalidate that mmaping, mm_invalidate_range is called with
> +	 * the userptr address *and* the struct_mutex held.  To prevent that
> +	 * we set a flag under the i915_mmu_notifier spinlock to indicate
> +	 * whether this object is valid.
> +	 */
> +	if (!mo)
> +		return;
> +
> +	spin_lock(&mo->mn->lock);
> +	if (value)
> +		add_object(mo);
> +	else
> +		del_object(mo);
> +	spin_unlock(&mo->mn->lock);
> +}
> +
> +static struct mutex *__i915_mutex_lock_recursive(struct mutex *m)
> +{
> +	switch (mutex_trylock_recursive(m)) {
> +	default:
> +	case MUTEX_TRYLOCK_FAILED:
> +		mutex_lock_nested(m, I915_MM_SHRINKER);

I have no idea why is the nested annotation ok, or if it fixes anything.

The IGT I am testing manages to hit this path a lot, and with Daniel's 
MMU notifier and cross-release resurrection patches, with the normal 
mutex_lock flavour in there, lockdep is silent and there are no 
deadlocks. Same failure to trigger any fails with existing interesting 
gem_user_blits subtests.

So don't know.. does it actually fix anything, or only hide things, no idea.

Regards,

Tvrtko

> +	case MUTEX_TRYLOCK_SUCCESS:
> +		return m;
> +
> +	case MUTEX_TRYLOCK_RECURSIVE:
> +		return ERR_PTR(-EEXIST);
> +	}
>   }
>   
>   static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> -						       struct mm_struct *mm,
> -						       unsigned long start,
> -						       unsigned long end,
> -						       bool blockable)
> +						      struct mm_struct *mm,
> +						      unsigned long start,
> +						      unsigned long end,
> +						      bool blockable)
>   {
>   	struct i915_mmu_notifier *mn =
>   		container_of(_mn, struct i915_mmu_notifier, mn);
> -	struct i915_mmu_object *mo;
>   	struct interval_tree_node *it;
> -	LIST_HEAD(cancelled);
> +	struct mutex *unlock = NULL;
> +	int ret = 0;
>   
>   	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
>   		return 0;
> @@ -133,11 +138,15 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   	spin_lock(&mn->lock);
>   	it = interval_tree_iter_first(&mn->objects, start, end);
>   	while (it) {
> +		struct drm_i915_gem_object *obj;
> +
>   		if (!blockable) {
> -			spin_unlock(&mn->lock);
> -			return -EAGAIN;
> +			ret = -EAGAIN;
> +			break;
>   		}
> -		/* The mmu_object is released late when destroying the
> +
> +		/*
> +		 * The mmu_object is released late when destroying the
>   		 * GEM object so it is entirely possible to gain a
>   		 * reference on an object in the process of being freed
>   		 * since our serialisation is via the spinlock and not
> @@ -146,21 +155,39 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   		 * use-after-free we only acquire a reference on the
>   		 * object if it is not in the process of being destroyed.
>   		 */
> -		mo = container_of(it, struct i915_mmu_object, it);
> -		if (kref_get_unless_zero(&mo->obj->base.refcount))
> -			queue_work(mn->wq, &mo->work);
> +		obj = container_of(it, struct i915_mmu_object, it)->obj;
> +		if (!kref_get_unless_zero(&obj->base.refcount)) {
> +			it = interval_tree_iter_next(it, start, end);
> +			continue;
> +		}
> +		spin_unlock(&mn->lock);
>   
> -		list_add(&mo->link, &cancelled);
> -		it = interval_tree_iter_next(it, start, end);
> +		if (!unlock)
> +			unlock = __i915_mutex_lock_recursive(&mn->mm->i915->drm.struct_mutex);
> +		ret = i915_gem_object_unbind(obj);
> +		if (ret == 0)
> +			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
> +		i915_gem_object_put(obj);
> +		if (ret)
> +			goto unlock;
> +
> +		spin_lock(&mn->lock);
> +
> +		/*
> +		 * As we do not (yet) protect the mmu from concurrent insertion
> +		 * over this range, there is no guarantee that this search will
> +		 * terminate given a pathologic workload.
> +		 */
> +		it = interval_tree_iter_first(&mn->objects, start, end);
>   	}
> -	list_for_each_entry(mo, &cancelled, link)
> -		del_object(mo);
>   	spin_unlock(&mn->lock);
>   
> -	if (!list_empty(&cancelled))
> -		flush_workqueue(mn->wq);
> +unlock:
> +	if (!IS_ERR_OR_NULL(unlock))
> +		mutex_unlock(unlock);
> +
> +	return ret;
>   
> -	return 0;
>   }
>   
>   static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> @@ -168,7 +195,7 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
>   };
>   
>   static struct i915_mmu_notifier *
> -i915_mmu_notifier_create(struct mm_struct *mm)
> +i915_mmu_notifier_create(struct i915_mm_struct *mm)
>   {
>   	struct i915_mmu_notifier *mn;
>   
> @@ -179,13 +206,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)
>   	spin_lock_init(&mn->lock);
>   	mn->mn.ops = &i915_gem_userptr_notifier;
>   	mn->objects = RB_ROOT_CACHED;
> -	mn->wq = alloc_workqueue("i915-userptr-release",
> -				 WQ_UNBOUND | WQ_MEM_RECLAIM,
> -				 0);
> -	if (mn->wq == NULL) {
> -		kfree(mn);
> -		return ERR_PTR(-ENOMEM);
> -	}
> +	mn->mm = mm;
>   
>   	return mn;
>   }
> @@ -195,16 +216,14 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
>   {
>   	struct i915_mmu_object *mo;
>   
> -	mo = obj->userptr.mmu_object;
> -	if (mo == NULL)
> +	mo = fetch_and_zero(&obj->userptr.mmu_object);
> +	if (!mo)
>   		return;
>   
>   	spin_lock(&mo->mn->lock);
>   	del_object(mo);
>   	spin_unlock(&mo->mn->lock);
>   	kfree(mo);
> -
> -	obj->userptr.mmu_object = NULL;
>   }
>   
>   static struct i915_mmu_notifier *
> @@ -217,7 +236,7 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
>   	if (mn)
>   		return mn;
>   
> -	mn = i915_mmu_notifier_create(mm->mm);
> +	mn = i915_mmu_notifier_create(mm);
>   	if (IS_ERR(mn))
>   		err = PTR_ERR(mn);
>   
> @@ -240,10 +259,8 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
>   	mutex_unlock(&mm->i915->mm_lock);
>   	up_write(&mm->mm->mmap_sem);
>   
> -	if (mn && !IS_ERR(mn)) {
> -		destroy_workqueue(mn->wq);
> +	if (mn && !IS_ERR(mn))
>   		kfree(mn);
> -	}
>   
>   	return err ? ERR_PTR(err) : mm->mn;
>   }
> @@ -266,14 +283,14 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
>   		return PTR_ERR(mn);
>   
>   	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
> -	if (mo == NULL)
> +	if (!mo)
>   		return -ENOMEM;
>   
>   	mo->mn = mn;
>   	mo->obj = obj;
>   	mo->it.start = obj->userptr.ptr;
>   	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
> -	INIT_WORK(&mo->work, cancel_userptr);
> +	RB_CLEAR_NODE(&mo->it.rb);
>   
>   	obj->userptr.mmu_object = mo;
>   	return 0;
> @@ -287,12 +304,16 @@ i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
>   		return;
>   
>   	mmu_notifier_unregister(&mn->mn, mm);
> -	destroy_workqueue(mn->wq);
>   	kfree(mn);
>   }
>   
>   #else
>   
> +static void
> +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
> +{
> +}
> +
>   static void
>   i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
>   {
> @@ -461,42 +482,6 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
>   	return st;
>   }
>   
> -static int
> -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
> -			      bool value)
> -{
> -	int ret = 0;
> -
> -	/* During mm_invalidate_range we need to cancel any userptr that
> -	 * overlaps the range being invalidated. Doing so requires the
> -	 * struct_mutex, and that risks recursion. In order to cause
> -	 * recursion, the user must alias the userptr address space with
> -	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
> -	 * to invalidate that mmaping, mm_invalidate_range is called with
> -	 * the userptr address *and* the struct_mutex held.  To prevent that
> -	 * we set a flag under the i915_mmu_notifier spinlock to indicate
> -	 * whether this object is valid.
> -	 */
> -#if defined(CONFIG_MMU_NOTIFIER)
> -	if (obj->userptr.mmu_object == NULL)
> -		return 0;
> -
> -	spin_lock(&obj->userptr.mmu_object->mn->lock);
> -	/* In order to serialise get_pages with an outstanding
> -	 * cancel_userptr, we must drop the struct_mutex and try again.
> -	 */
> -	if (!value)
> -		del_object(obj->userptr.mmu_object);
> -	else if (!work_pending(&obj->userptr.mmu_object->work))
> -		add_object(obj->userptr.mmu_object);
> -	else
> -		ret = -EAGAIN;
> -	spin_unlock(&obj->userptr.mmu_object->mn->lock);
> -#endif
> -
> -	return ret;
> -}
> -
>   static void
>   __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>   {
> @@ -682,8 +667,11 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
>   	struct sgt_iter sgt_iter;
>   	struct page *page;
>   
> -	BUG_ON(obj->userptr.work != NULL);
> +	/* Cancel any inflight work and force them to restart their gup */
> +	obj->userptr.work = NULL;
>   	__i915_gem_userptr_set_active(obj, false);
> +	if (!pages)
> +		return;
>   
>   	if (obj->mm.madv != I915_MADV_WILLNEED)
>   		obj->mm.dirty = false;
> @@ -721,7 +709,8 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
>   
>   static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
>   	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
> -		 I915_GEM_OBJECT_IS_SHRINKABLE,
> +		 I915_GEM_OBJECT_IS_SHRINKABLE |
> +		 I915_GEM_OBJECT_ASYNC_CANCEL,
>   	.get_pages = i915_gem_userptr_get_pages,
>   	.put_pages = i915_gem_userptr_put_pages,
>   	.dmabuf_export = i915_gem_userptr_dmabuf_export,
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2018-12-10 12:00 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-04 14:15 [PATCH 1/7] drm/i915: Allocate a common scratch page Chris Wilson
2018-12-04 14:15 ` [PATCH 2/7] drm/i915: Flush GPU relocs harder for gen3 Chris Wilson
2018-12-04 14:15 ` [PATCH 3/7] drm/i915/selftests: Reorder request allocation vs vma pinning Chris Wilson
2018-12-04 14:15 ` [PATCH 4/7] drm/i915: Pipeline PDP updates for Braswell Chris Wilson
2018-12-04 14:15 ` [PATCH 5/7] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
2018-12-06 15:18   ` Tvrtko Ursulin
2018-12-06 21:30     ` Chris Wilson
2018-12-07  8:38       ` Chris Wilson
2018-12-10 11:29   ` Tvrtko Ursulin
2018-12-04 14:15 ` [PATCH 6/7] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start Chris Wilson
2018-12-10 12:00   ` Tvrtko Ursulin
2018-12-04 14:15 ` [PATCH 7/7] drm/i915/userptr: Probe vma range before gup Chris Wilson
2018-12-04 14:28 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915: Allocate a common scratch page Patchwork
2018-12-04 14:31 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-12-04 14:48 ` ✓ Fi.CI.BAT: success " Patchwork
2018-12-04 15:53   ` Chris Wilson
2018-12-04 15:07 ` [PATCH 1/7] " Mika Kuoppala
2018-12-04 16:01   ` Chris Wilson
2018-12-04 22:58 ` ✓ Fi.CI.IGT: success for series starting with [1/7] " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.