All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 21:21 Chris Wilson
  2019-12-15 21:27 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gt: Set vm again after MI_SET_CONTEXT (rev12) Patchwork
  2019-12-15 21:53 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  0 siblings, 2 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 21:21 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 101 +++++++-----------
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   2 +
 2 files changed, 43 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..4b33d7e72b67 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1371,51 +1371,27 @@ static int load_pd_dir(struct i915_request *rq,
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
 
-	cs = intel_ring_begin(rq, 12);
+	cs = intel_ring_begin(rq, 10);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(3);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
-
-	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+	*cs++ = intel_gt_scratch_offset(engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
 	intel_ring_advance(rq, cs);
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
-}
-
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
+	return engine->emit_flush(rq, EMIT_FLUSH);
 }
 
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
@@ -1590,52 +1566,57 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 {
-	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
-	int ret;
+	const struct intel_engine_cs * const engine = rq->engine;
+	int ret, i;
 
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+	if (!vm)
+		return 0;
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	for (i = 0; i < 2; i++) {
+		ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 		if (ret)
 			return ret;
 
-		ret = flush_tlb(rq);
+		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
 		if (ret)
 			return ret;
+	}
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
+	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
+	return engine->emit_flush(rq, EMIT_FLUSH);
+}
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
+	int ret;
 
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index be36719e7987..d9a2f58a620a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1709,8 +1709,10 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
 	gen6_for_each_pde(pt, pd, start, end, pde)
 		gen6_write_pde(ppgtt, pde, pt);
 
+	mb();
 	ioread32(ppgtt->pd_addr + pde - 1);
 	gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
+	mb();
 
 	mutex_unlock(&ppgtt->flush);
 }
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gt: Set vm again after MI_SET_CONTEXT (rev12)
  2019-12-15 21:21 [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT Chris Wilson
@ 2019-12-15 21:27 ` Patchwork
  2019-12-15 21:53 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  1 sibling, 0 replies; 16+ messages in thread
From: Patchwork @ 2019-12-15 21:27 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gt: Set vm again after MI_SET_CONTEXT (rev12)
URL   : https://patchwork.freedesktop.org/series/70839/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
2ff7f1e21ea4 drm/i915/gt: Set vm again after MI_SET_CONTEXT
-:169: WARNING:MEMORY_BARRIER: memory barrier without comment
#169: FILE: drivers/gpu/drm/i915/i915_gem_gtt.c:1712:
+	mb();

-:172: WARNING:MEMORY_BARRIER: memory barrier without comment
#172: FILE: drivers/gpu/drm/i915/i915_gem_gtt.c:1715:
+	mb();

total: 0 errors, 2 warnings, 0 checks, 154 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/gt: Set vm again after MI_SET_CONTEXT (rev12)
  2019-12-15 21:21 [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT Chris Wilson
  2019-12-15 21:27 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gt: Set vm again after MI_SET_CONTEXT (rev12) Patchwork
@ 2019-12-15 21:53 ` Patchwork
  1 sibling, 0 replies; 16+ messages in thread
From: Patchwork @ 2019-12-15 21:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gt: Set vm again after MI_SET_CONTEXT (rev12)
URL   : https://patchwork.freedesktop.org/series/70839/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7569 -> Patchwork_15777
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15777/index.html

Known issues
------------

  Here are the changes found in Patchwork_15777 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_blt:
    - fi-hsw-4770:        [PASS][1] -> [DMESG-FAIL][2] ([i915#563])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7569/fi-hsw-4770/igt@i915_selftest@live_blt.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15777/fi-hsw-4770/igt@i915_selftest@live_blt.html

  * igt@i915_selftest@live_requests:
    - fi-hsw-4770r:       [PASS][3] -> [INCOMPLETE][4] ([i915#773])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7569/fi-hsw-4770r/igt@i915_selftest@live_requests.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15777/fi-hsw-4770r/igt@i915_selftest@live_requests.html

  
#### Possible fixes ####

  * igt@kms_flip@basic-flip-vs-dpms:
    - fi-apl-guc:         [DMESG-WARN][5] ([i915#180]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7569/fi-apl-guc/igt@kms_flip@basic-flip-vs-dpms.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15777/fi-apl-guc/igt@kms_flip@basic-flip-vs-dpms.html

  
#### Warnings ####

  * igt@i915_selftest@live_blt:
    - fi-ivb-3770:        [DMESG-FAIL][7] ([i915#725]) -> [DMESG-FAIL][8] ([i915#770])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7569/fi-ivb-3770/igt@i915_selftest@live_blt.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15777/fi-ivb-3770/igt@i915_selftest@live_blt.html

  * igt@i915_selftest@live_gem_contexts:
    - fi-hsw-peppy:       [DMESG-FAIL][9] ([i915#722]) -> [INCOMPLETE][10] ([i915#694])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7569/fi-hsw-peppy/igt@i915_selftest@live_gem_contexts.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15777/fi-hsw-peppy/igt@i915_selftest@live_gem_contexts.html

  
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#563]: https://gitlab.freedesktop.org/drm/intel/issues/563
  [i915#694]: https://gitlab.freedesktop.org/drm/intel/issues/694
  [i915#722]: https://gitlab.freedesktop.org/drm/intel/issues/722
  [i915#725]: https://gitlab.freedesktop.org/drm/intel/issues/725
  [i915#770]: https://gitlab.freedesktop.org/drm/intel/issues/770
  [i915#773]: https://gitlab.freedesktop.org/drm/intel/issues/773


Participating hosts (34 -> 22)
------------------------------

  Additional (1): fi-elk-e7500 
  Missing    (13): fi-icl-1065g7 fi-ilk-m540 fi-hsw-4200u fi-byt-j1900 fi-glk-dsi fi-byt-squawks fi-bsw-cyan fi-snb-2520m fi-ctg-p8600 fi-blb-e6850 fi-byt-clapper fi-bdw-samus fi-snb-2600 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7569 -> Patchwork_15777

  CI-20190529: 20190529
  CI_DRM_7569: 62c2abc0df8983aba79ba093413683c44e9c4748 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5349: 048f58513d8b8ec6bb307a939f0ac959bc0f0e10 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_15777: 2ff7f1e21ea478164987c847b89f1b433d9dcec6 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

2ff7f1e21ea4 drm/i915/gt: Set vm again after MI_SET_CONTEXT

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15777/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-16  0:25 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-16  0:25 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 91 ++++++-------------
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  6 +-
 2 files changed, 32 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..dede82d4d02b 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1371,50 +1371,26 @@ static int load_pd_dir(struct i915_request *rq,
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
 
-	cs = intel_ring_begin(rq, 12);
+	cs = intel_ring_begin(rq, 10);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(3);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
-
-	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+	*cs++ = intel_gt_scratch_offset(engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
 	intel_ring_advance(rq, cs);
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
-}
-
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
 	return 0;
 }
 
@@ -1590,52 +1566,39 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
+{
+	if (!vm)
+		return 0;
+
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	return load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+}
+
 static int switch_context(struct i915_request *rq)
 {
 	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
 	int ret;
 
 	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
-
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
-
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
-
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
-
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	ret = switch_mm(rq, vm_alias(ce));
+	if (ret)
+		return ret;
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index be36719e7987..1d546f0c3e3f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1709,8 +1709,10 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
 	gen6_for_each_pde(pt, pd, start, end, pde)
 		gen6_write_pde(ppgtt, pde, pt);
 
+	mb();
 	ioread32(ppgtt->pd_addr + pde - 1);
 	gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
+	mb();
 
 	mutex_unlock(&ppgtt->flush);
 }
@@ -1758,8 +1760,10 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 	}
 	spin_unlock(&pd->lock);
 
-	if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND))
+	if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
 		gen6_flush_pd(ppgtt, from, start);
+		gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
+	}
 
 	goto out;
 
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 21:56 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 21:56 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 101 +++++++-----------
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   2 +
 2 files changed, 43 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..4fa1c89620a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1371,51 +1371,27 @@ static int load_pd_dir(struct i915_request *rq,
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
 
-	cs = intel_ring_begin(rq, 12);
+	cs = intel_ring_begin(rq, 10);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(3);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
-
-	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+	*cs++ = intel_gt_scratch_offset(engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
 	intel_ring_advance(rq, cs);
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
-}
-
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
+	return engine->emit_flush(rq, EMIT_FLUSH);
 }
 
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
@@ -1590,52 +1566,57 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 {
-	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
-	int ret;
-
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+	const struct intel_engine_cs * const engine = rq->engine;
+	int ret, i;
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
+	if (!vm)
+		return 0;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	ret = engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
+	for (i = 0; i < 2; i++) {
+		ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 		if (ret)
 			return ret;
 
 		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
 		if (ret)
 			return ret;
+	}
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	return engine->emit_flush(rq, EMIT_INVALIDATE);
+}
 
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
+	int ret;
+
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index be36719e7987..d9a2f58a620a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1709,8 +1709,10 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
 	gen6_for_each_pde(pt, pd, start, end, pde)
 		gen6_write_pde(ppgtt, pde, pt);
 
+	mb();
 	ioread32(ppgtt->pd_addr + pde - 1);
 	gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
+	mb();
 
 	mutex_unlock(&ppgtt->flush);
 }
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 17:01 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 17:01 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 105 +++++++-----------
 1 file changed, 43 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..1a072110e2dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1371,51 +1371,31 @@ static int load_pd_dir(struct i915_request *rq,
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
 
-	cs = intel_ring_begin(rq, 12);
+	cs = intel_ring_begin(rq, 14);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(5);
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
-
-	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)) + 4;
+	*cs++ = 0;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+	*cs++ = intel_gt_scratch_offset(engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
 	intel_ring_advance(rq, cs);
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
-}
-
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
+	return engine->emit_flush(rq, EMIT_FLUSH);
 }
 
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
@@ -1590,52 +1570,53 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 {
-	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
-	int ret;
+	const struct intel_engine_cs * const engine = rq->engine;
+	int ret, i;
 
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+	if (!vm)
+		return 0;
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	for (i = 0; i < 2; i++) {
+		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
 		if (ret)
 			return ret;
+	}
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
+	return engine->emit_flush(rq, EMIT_FLUSH);
+}
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
+	int ret;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 16:47 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 16:47 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 113 ++++++++----------
 1 file changed, 50 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..073da495b8f9 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1371,51 +1371,31 @@ static int load_pd_dir(struct i915_request *rq,
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
 
-	cs = intel_ring_begin(rq, 12);
+	cs = intel_ring_begin(rq, 14);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(5);
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
-
-	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)) + 4;
+	*cs++ = 0;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+	*cs++ = intel_gt_scratch_offset(engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
 	intel_ring_advance(rq, cs);
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
-}
-
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
+	return engine->emit_flush(rq, EMIT_FLUSH);
 }
 
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
@@ -1590,52 +1570,59 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 {
-	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
+	const struct intel_engine_cs * const engine = rq->engine;
 	int ret;
 
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+	if (!vm)
+		return 0;
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
+	ret = engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
+	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	return engine->emit_flush(rq, EMIT_FLUSH);
+}
 
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
+	int ret;
+
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 16:21 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 16:21 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 111 ++++++++----------
 1 file changed, 50 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..02e3c08fa233 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1371,50 +1371,30 @@ static int load_pd_dir(struct i915_request *rq,
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
 
-	cs = intel_ring_begin(rq, 12);
+	cs = intel_ring_begin(rq, 14);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(5);
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
-
-	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)) + 4;
+	*cs++ = 0;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+	*cs++ = intel_gt_scratch_offset(engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
 	intel_ring_advance(rq, cs);
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
-}
-
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
 	return 0;
 }
 
@@ -1590,52 +1570,61 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 {
-	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
-	int ret;
-
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+	const struct intel_engine_cs * const engine = rq->engine;
+	int ret, i;
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
+	if (!vm)
+		return 0;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	ret = engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
+	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	for (i = 0; i < 8; i++) {
 		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
 		if (ret)
 			return ret;
+	}
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	return engine->emit_flush(rq, EMIT_FLUSH);
+}
+
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
+	int ret;
+
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 15:47 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 15:47 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 107 ++++++++----------
 1 file changed, 45 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..d955888f84ef 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1371,50 +1371,30 @@ static int load_pd_dir(struct i915_request *rq,
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
 
-	cs = intel_ring_begin(rq, 12);
+	cs = intel_ring_begin(rq, 14);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(5);
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
-
-	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)) + 4;
+	*cs++ = 0;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+	*cs++ = intel_gt_scratch_offset(engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
 	intel_ring_advance(rq, cs);
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
-}
-
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
 	return 0;
 }
 
@@ -1590,52 +1570,55 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 {
-	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
+	const struct intel_engine_cs * const engine = rq->engine;
 	int ret;
 
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+	if (!vm)
+		return 0;
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
+	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
+	ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
+	return engine->emit_flush(rq, EMIT_FLUSH);
+}
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
+	int ret;
 
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 15:23 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 15:23 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 103 +++++++-----------
 1 file changed, 40 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..fd2ed910a41f 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1370,23 +1370,31 @@ static int load_pd_dir(struct i915_request *rq,
 {
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
+	int ret;
 
-	cs = intel_ring_begin(rq, 12);
+	ret = engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
+
+	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
+
+	cs = intel_ring_begin(rq, 14);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(5);
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
-
-	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)) + 4;
+	*cs++ = 0;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
@@ -1396,26 +1404,7 @@ static int load_pd_dir(struct i915_request *rq,
 
 	intel_ring_advance(rq, cs);
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
-}
-
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
+	return engine->emit_flush(rq, EMIT_FLUSH);
 }
 
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
@@ -1590,52 +1579,40 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
+{
+	if (!vm)
+		return 0;
+
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	return load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+}
+
 static int switch_context(struct i915_request *rq)
 {
 	struct intel_context *ce = rq->hw_context;
 	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
 	int ret;
 
 	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
-
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
-
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
-
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
-
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 15:05 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 15:05 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 105 +++++++-----------
 1 file changed, 43 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..cbb80ea320a5 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1370,23 +1370,31 @@ static int load_pd_dir(struct i915_request *rq,
 {
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
+	int ret;
 
-	cs = intel_ring_begin(rq, 12);
+	ret = engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
+
+	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
+
+	cs = intel_ring_begin(rq, 14);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(5);
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
-
-	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)) + 4;
+	*cs++ = 0;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
@@ -1396,26 +1404,11 @@ static int load_pd_dir(struct i915_request *rq,
 
 	intel_ring_advance(rq, cs);
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
-}
-
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
+	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-	return 0;
+	return engine->emit_flush(rq, EMIT_FLUSH);
 }
 
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
@@ -1590,52 +1583,40 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
+{
+	if (!vm)
+		return 0;
+
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	return load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+}
+
 static int switch_context(struct i915_request *rq)
 {
 	struct intel_context *ce = rq->hw_context;
 	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
 	int ret;
 
 	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
-
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
-
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
-
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
-
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 13:55 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 13:55 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 110 +++++++++---------
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..f5fb8a9dabe1 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1370,12 +1370,23 @@ static int load_pd_dir(struct i915_request *rq,
 {
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
+	int ret;
 
-	cs = intel_ring_begin(rq, 12);
+	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
+
+	ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
+
+	cs = intel_ring_begin(rq, 16);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
 
@@ -1384,9 +1395,11 @@ static int load_pd_dir(struct i915_request *rq,
 	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
@@ -1396,26 +1409,15 @@ static int load_pd_dir(struct i915_request *rq,
 
 	intel_ring_advance(rq, cs);
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
-}
-
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
+	ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
 
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
+	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-	return 0;
+	return rq->engine->emit_flush(rq, EMIT_FLUSH);
 }
 
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
@@ -1590,52 +1592,46 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 {
-	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
 	int ret;
 
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
-
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
+	if (!vm)
+		return 0;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
+	return load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+}
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
+	int ret;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
@@ -1644,6 +1640,10 @@ static int switch_context(struct i915_request *rq)
 			return ret;
 	}
 
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
+
 	ret = remap_l3(rq);
 	if (ret)
 		return ret;
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 12:52 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 12:52 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 100 ++++++++----------
 1 file changed, 45 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..910ae7288479 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1371,22 +1371,26 @@ static int load_pd_dir(struct i915_request *rq,
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
 
-	cs = intel_ring_begin(rq, 12);
+	cs = intel_ring_begin(rq, 16);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
@@ -1399,25 +1403,6 @@ static int load_pd_dir(struct i915_request *rq,
 	return rq->engine->emit_flush(rq, EMIT_FLUSH);
 }
 
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
 {
 	struct drm_i915_private *i915 = rq->i915;
@@ -1590,52 +1575,53 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 {
-	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
 	int ret;
 
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+	if (!vm)
+		return 0;
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
+	ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+	if (ret)
+		return ret;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
+	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
+	return load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+}
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
+	int ret;
 
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
@@ -1644,6 +1630,10 @@ static int switch_context(struct i915_request *rq)
 			return ret;
 	}
 
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
+
 	ret = remap_l3(rq);
 	if (ret)
 		return ret;
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-15 12:19 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-15 12:19 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 100 ++++++++----------
 1 file changed, 45 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 30ba67c9abe9..10aa61e15a60 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1371,11 +1371,13 @@ static int load_pd_dir(struct i915_request *rq,
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
 
-	cs = intel_ring_begin(rq, 12);
+	cs = intel_ring_begin(rq, 16);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
 
@@ -1384,9 +1386,11 @@ static int load_pd_dir(struct i915_request *rq,
 	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
@@ -1399,25 +1403,6 @@ static int load_pd_dir(struct i915_request *rq,
 	return rq->engine->emit_flush(rq, EMIT_FLUSH);
 }
 
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
 {
 	struct drm_i915_private *i915 = rq->i915;
@@ -1590,52 +1575,53 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 {
-	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
 	int ret;
 
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+	if (!vm)
+		return 0;
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
+	ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+	if (ret)
+		return ret;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
+	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
+	return load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+}
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
+	int ret;
 
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
@@ -1644,6 +1630,10 @@ static int switch_context(struct i915_request *rq)
 			return ret;
 	}
 
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
+
 	ret = remap_l3(rq);
 	if (ret)
 		return ret;
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-12 22:29 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-12 22:29 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 100 ++++++++----------
 1 file changed, 45 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 5c22ca6f998a..6d896ff52125 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1372,11 +1372,13 @@ static int load_pd_dir(struct i915_request *rq,
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
 
-	cs = intel_ring_begin(rq, 12);
+	cs = intel_ring_begin(rq, 16);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 	*cs++ = valid;
 
@@ -1385,9 +1387,11 @@ static int load_pd_dir(struct i915_request *rq,
 	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 
 	/* Stall until the page table load is complete? */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
@@ -1400,25 +1404,6 @@ static int load_pd_dir(struct i915_request *rq,
 	return rq->engine->emit_flush(rq, EMIT_FLUSH);
 }
 
-static int flush_tlb(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
-	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
 {
 	struct drm_i915_private *i915 = rq->i915;
@@ -1591,52 +1576,53 @@ static int remap_l3(struct i915_request *rq)
 	return 0;
 }
 
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 {
-	struct intel_context *ce = rq->hw_context;
-	struct i915_address_space *vm = vm_alias(ce);
-	u32 hw_flags = 0;
 	int ret;
 
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+	if (!vm)
+		return 0;
 
-	if (vm) {
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
+	ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+	if (ret)
+		return ret;
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+	ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0);
-		if (ret)
-			return ret;
+	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
 
-		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
-		if (ret)
-			return ret;
+	return load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+}
 
-		ret = flush_tlb(rq);
-		if (ret)
-			return ret;
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
+	int ret;
 
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-	}
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
 	if (ce->state) {
+		u32 hw_flags;
+
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
+		hw_flags = 0;
 		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
 			hw_flags = MI_RESTORE_INHIBIT;
 
@@ -1645,6 +1631,10 @@ static int switch_context(struct i915_request *rq)
 			return ret;
 	}
 
+	ret = switch_mm(rq, vm);
+	if (ret)
+		return ret;
+
 	ret = remap_l3(rq);
 	if (ret)
 		return ret;
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT
@ 2019-12-12 16:04 Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-12-12 16:04 UTC (permalink / raw)
  To: intel-gfx

Reloading the PD after MI_SET_CONTEXT, along with copious amounts of
flushes, so far is making Baytrail more content.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 25 +++++++++++++++----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 5c22ca6f998a..336eb3f864c9 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1609,11 +1609,7 @@ static int switch_context(struct i915_request *rq)
 		 * post-sync op, this extra pass appears vital before a
 		 * mm switch!
 		 */
-		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			return ret;
-
-		ret = flush_tlb(rq);
+		ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
 		if (ret)
 			return ret;
 
@@ -1632,6 +1628,7 @@ static int switch_context(struct i915_request *rq)
 		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
 		if (ret)
 			return ret;
+
 	}
 
 	if (ce->state) {
@@ -1645,6 +1642,24 @@ static int switch_context(struct i915_request *rq)
 			return ret;
 	}
 
+	if (vm) {
+		ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+		if (ret)
+			return ret;
+
+		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+		if (ret)
+			return ret;
+
+		ret = flush_tlb(rq);
+		if (ret)
+			return ret;
+
+		ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+		if (ret)
+			return ret;
+	}
+
 	ret = remap_l3(rq);
 	if (ret)
 		return ret;
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2019-12-16  0:25 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-15 21:21 [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT Chris Wilson
2019-12-15 21:27 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gt: Set vm again after MI_SET_CONTEXT (rev12) Patchwork
2019-12-15 21:53 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2019-12-16  0:25 [Intel-gfx] [CI] drm/i915/gt: Set vm again after MI_SET_CONTEXT Chris Wilson
2019-12-15 21:56 Chris Wilson
2019-12-15 17:01 Chris Wilson
2019-12-15 16:47 Chris Wilson
2019-12-15 16:21 Chris Wilson
2019-12-15 15:47 Chris Wilson
2019-12-15 15:23 Chris Wilson
2019-12-15 15:05 Chris Wilson
2019-12-15 13:55 Chris Wilson
2019-12-15 12:52 Chris Wilson
2019-12-15 12:19 Chris Wilson
2019-12-12 22:29 Chris Wilson
2019-12-12 16:04 Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.