All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm
@ 2014-06-18 16:15 oscar.mateo
  2014-06-18 16:15 ` [PATCH 2/8] drm/i915/bdw: If we are in reset, switch the GEN8 ppgtt synchronously oscar.mateo
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: oscar.mateo @ 2014-06-18 16:15 UTC (permalink / raw)
  To: intel-gfx

From: Oscar Mateo <oscar.mateo@intel.com>

As a first step, split switch_mm into two different functions:
sync_switch_mm (currently used by ppgtt_enable) and switch_mm
(used by the outside world).

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c |   2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 101 ++++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_gtt.h     |   5 +-
 3 files changed, 73 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3ffe308..14cd7fc 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -624,7 +624,7 @@ static int do_switch(struct intel_engine_cs *ring,
 	from = ring->last_context;
 
 	if (USES_FULL_PPGTT(ring->dev)) {
-		ret = ppgtt->switch_mm(ppgtt, ring, false);
+		ret = ppgtt->switch_mm(ppgtt, ring);
 		if (ret)
 			goto unpin_out;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ee7af4e..ad5c85b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -242,9 +242,26 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
 	return 0;
 }
 
+static int gen8_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
+			       struct intel_engine_cs *ring)
+{
+	int i, ret;
+
+	/* bit of a hack to find the actual last used pd */
+	int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
+
+	for (i = used_pd - 1; i >= 0; i--) {
+		dma_addr_t addr = ppgtt->pd_dma_addr[i];
+		ret = gen8_write_pdp(ring, i, addr, true);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *ring,
-			  bool synchronous)
+			  struct intel_engine_cs *ring)
 {
 	int i, ret;
 
@@ -253,7 +270,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 
 	for (i = used_pd - 1; i >= 0; i--) {
 		dma_addr_t addr = ppgtt->pd_dma_addr[i];
-		ret = gen8_write_pdp(ring, i, addr, synchronous);
+		ret = gen8_write_pdp(ring, i, addr, false);
 		if (ret)
 			return ret;
 	}
@@ -654,6 +671,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 	}
 
 	ppgtt->enable = gen8_ppgtt_enable;
+	ppgtt->sync_switch_mm = gen8_mm_sync_switch;
 	ppgtt->switch_mm = gen8_mm_switch;
 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
@@ -768,9 +786,22 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 	return (ppgtt->pd_offset / 64) << 16;
 }
 
+static int hsw_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
+			      struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
+	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
+	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
+	POSTING_READ(RING_PP_DIR_BASE(ring));
+
+	return 0;
+}
+
 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			 struct intel_engine_cs *ring,
-			 bool synchronous)
+			 struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -783,14 +814,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	 *
 	 * FIXME: We should try not to special case reset
 	 */
-	if (synchronous ||
-	    i915_reset_in_progress(&dev_priv->gpu_error)) {
-		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
-		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
-		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
-		POSTING_READ(RING_PP_DIR_BASE(ring));
-		return 0;
-	}
+	if (i915_reset_in_progress(&dev_priv->gpu_error))
+		return ppgtt->sync_switch_mm(ppgtt, ring);
 
 	/* NB: TLBs must be flushed and invalidated before a switch */
 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
@@ -812,9 +837,22 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	return 0;
 }
 
+static int gen7_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
+			       struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
+	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
+	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
+	POSTING_READ(RING_PP_DIR_BASE(ring));
+
+	return 0;
+}
+
 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *ring,
-			  bool synchronous)
+			  struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -827,14 +865,8 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	 *
 	 * FIXME: We should try not to special case reset
 	 */
-	if (synchronous ||
-	    i915_reset_in_progress(&dev_priv->gpu_error)) {
-		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
-		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
-		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
-		POSTING_READ(RING_PP_DIR_BASE(ring));
-		return 0;
-	}
+	if (i915_reset_in_progress(&dev_priv->gpu_error))
+		return ppgtt->sync_switch_mm(ppgtt, ring);
 
 	/* NB: TLBs must be flushed and invalidated before a switch */
 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
@@ -863,16 +895,12 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	return 0;
 }
 
-static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *ring,
-			  bool synchronous)
+static int gen6_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
+			       struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (!synchronous)
-		return 0;
-
 	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
 	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
 
@@ -881,6 +909,12 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	return 0;
 }
 
+static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
+			  struct intel_engine_cs *ring)
+{
+	return 0;
+}
+
 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_device *dev = ppgtt->base.dev;
@@ -897,7 +931,7 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
 		if (USES_FULL_PPGTT(dev))
 			continue;
 
-		ret = ppgtt->switch_mm(ppgtt, ring, true);
+		ret = ppgtt->sync_switch_mm(ppgtt, ring);
 		if (ret)
 			goto err_out;
 	}
@@ -942,7 +976,7 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
 		if (USES_FULL_PPGTT(dev))
 			continue;
 
-		ret = ppgtt->switch_mm(ppgtt, ring, true);
+		ret = ppgtt->sync_switch_mm(ppgtt, ring);
 		if (ret)
 			return ret;
 	}
@@ -971,7 +1005,7 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 
 	for_each_ring(ring, dev_priv, i) {
-		int ret = ppgtt->switch_mm(ppgtt, ring, true);
+		int ret = ppgtt->sync_switch_mm(ppgtt, ring);
 		if (ret)
 			return ret;
 	}
@@ -1196,12 +1230,15 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
 	if (IS_GEN6(dev)) {
 		ppgtt->enable = gen6_ppgtt_enable;
+		ppgtt->sync_switch_mm = gen6_mm_sync_switch;
 		ppgtt->switch_mm = gen6_mm_switch;
 	} else if (IS_HASWELL(dev)) {
 		ppgtt->enable = gen7_ppgtt_enable;
+		ppgtt->sync_switch_mm = hsw_mm_sync_switch;
 		ppgtt->switch_mm = hsw_mm_switch;
 	} else if (IS_GEN7(dev)) {
 		ppgtt->enable = gen7_ppgtt_enable;
+		ppgtt->sync_switch_mm = gen7_mm_sync_switch;
 		ppgtt->switch_mm = gen7_mm_switch;
 	} else
 		BUG();
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 7ab8446..c260ada 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -262,9 +262,10 @@ struct i915_hw_ppgtt {
 	struct intel_context *ctx;
 
 	int (*enable)(struct i915_hw_ppgtt *ppgtt);
+	int (*sync_switch_mm)(struct i915_hw_ppgtt *ppgtt,
+			      struct intel_engine_cs *ring);
 	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
-			 struct intel_engine_cs *ring,
-			 bool synchronous);
+			 struct intel_engine_cs *ring);
 	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
 };
 
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/8] drm/i915/bdw: If we are in reset, switch the GEN8 ppgtt synchronously
  2014-06-18 16:15 [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm oscar.mateo
@ 2014-06-18 16:15 ` oscar.mateo
  2014-06-18 16:15 ` [PATCH 3/8] drm/i915: Enable the PPGTT in the ring init oscar.mateo
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: oscar.mateo @ 2014-06-18 16:15 UTC (permalink / raw)
  To: intel-gfx

From: Oscar Mateo <oscar.mateo@intel.com>

If this comment is true for GEN6 and GEN7, I imagine it is for GEN8 as well:

    If we're in reset, we can assume the GPU is sufficiently idle to
    manually frob these bits. Ideally we could use the ring functions,
    except our error handling makes it quite difficult (can't use
    intel_ring_begin, ring->flush, or intel_ring_advance).

This exception was lost (wrongly, I suspect) in:

commit eeb9488e751a0a6401e7516a893efaf9d1f77fb5
Author: Ben Widawsky <ben@bwidawsk.net>
Date:   Fri Dec 6 14:11:10 2013 -0800

    drm/i915: Extract mm switching to function

    In order to do the full context switch with address space, it's
    convenient to have a way to switch the address space. We already have
    this in our code - just pull it out to be called by the context switch
    code later.

    v2: Rebased on BDW support. Required adding BDW.

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ad5c85b..49f103f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -263,10 +263,23 @@ static int gen8_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 			  struct intel_engine_cs *ring)
 {
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int used_pd;
 	int i, ret;
 
+	/* If we're in reset, we can assume the GPU is sufficiently idle to
+	 * manually frob these bits. Ideally we could use the ring functions,
+	 * except our error handling makes it quite difficult (can't use
+	 * intel_ring_begin, ring->flush, or intel_ring_advance)
+	 *
+	 * FIXME: We should try not to special case reset
+	 */
+	if (i915_reset_in_progress(&dev_priv->gpu_error))
+		return ppgtt->sync_switch_mm(ppgtt, ring);
+
 	/* bit of a hack to find the actual last used pd */
-	int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
+	used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
 
 	for (i = used_pd - 1; i >= 0; i--) {
 		dma_addr_t addr = ppgtt->pd_dma_addr[i];
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/8] drm/i915: Enable the PPGTT in the ring init
  2014-06-18 16:15 [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm oscar.mateo
  2014-06-18 16:15 ` [PATCH 2/8] drm/i915/bdw: If we are in reset, switch the GEN8 ppgtt synchronously oscar.mateo
@ 2014-06-18 16:15 ` oscar.mateo
  2014-06-18 16:15 ` [PATCH 4/8] drm/i915: Split PPGTT enabling from Aliasing PPGTT switching oscar.mateo
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: oscar.mateo @ 2014-06-18 16:15 UTC (permalink / raw)
  To: intel-gfx

From: Oscar Mateo <oscar.mateo@intel.com>

PPGTT enabling is per-ring on GEN7+, so it makes sense that the enable
functions takes a ring argument. GEN6 can live with multiple calls to
the enable function.

And, more importantly, this allows us to enable the PPGTT from the ring
init code (the place where all the initializations should be done).

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c |  7 ---
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 79 +++++++++++++++------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h     |  3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  6 +++
 4 files changed, 43 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 14cd7fc..bf07d9d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -471,13 +471,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *ring;
 	int ret, i;
 
-	/* This is the only place the aliasing PPGTT gets enabled, which means
-	 * it has to happen before we bail on reset */
-	if (dev_priv->mm.aliasing_ppgtt) {
-		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-		ppgtt->enable(ppgtt);
-	}
-
 	/* FIXME: We should make this work, even in reset */
 	if (i915_reset_in_progress(&dev_priv->gpu_error))
 		return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 49f103f..63507eb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -77,7 +77,8 @@ static void ppgtt_bind_vma(struct i915_vma *vma,
 			   enum i915_cache_level cache_level,
 			   u32 flags);
 static void ppgtt_unbind_vma(struct i915_vma *vma);
-static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt);
+static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt,
+			     struct intel_engine_cs *ring);
 
 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
 					     enum i915_cache_level level,
@@ -928,43 +929,38 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	return 0;
 }
 
-static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
+static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt,
+			     struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring;
-	int j, ret;
+	int ret;
 
-	for_each_ring(ring, dev_priv, j) {
-		I915_WRITE(RING_MODE_GEN7(ring),
-			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+	I915_WRITE(RING_MODE_GEN7(ring),
+		   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 
-		/* We promise to do a switch later with FULL PPGTT. If this is
-		 * aliasing, this is the one and only switch we'll do */
-		if (USES_FULL_PPGTT(dev))
-			continue;
+	/* We promise to do a switch later with FULL PPGTT. If this is
+	 * aliasing, this is the one and only switch we'll do */
+	if (USES_FULL_PPGTT(dev))
+		return 0;
 
-		ret = ppgtt->sync_switch_mm(ppgtt, ring);
-		if (ret)
-			goto err_out;
+	ret = ppgtt->sync_switch_mm(ppgtt, ring);
+	if (ret) {
+		I915_WRITE(RING_MODE_GEN7(ring),
+			   _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
+		return ret;
 	}
 
 	return 0;
-
-err_out:
-	for_each_ring(ring, dev_priv, j)
-		I915_WRITE(RING_MODE_GEN7(ring),
-			   _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
-	return ret;
 }
 
-static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
+static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt,
+			     struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring;
 	uint32_t ecochk, ecobits;
-	int i;
+	int ret;
 
 	ecobits = I915_READ(GAC_ECO_BITS);
 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
@@ -978,32 +974,29 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
 	}
 	I915_WRITE(GAM_ECOCHK, ecochk);
 
-	for_each_ring(ring, dev_priv, i) {
-		int ret;
-		/* GFX_MODE is per-ring on gen7+ */
-		I915_WRITE(RING_MODE_GEN7(ring),
-			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+	/* GFX_MODE is per-ring on gen7+ */
+	I915_WRITE(RING_MODE_GEN7(ring),
+		   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 
-		/* We promise to do a switch later with FULL PPGTT. If this is
-		 * aliasing, this is the one and only switch we'll do */
-		if (USES_FULL_PPGTT(dev))
-			continue;
+	/* We promise to do a switch later with FULL PPGTT. If this is
+	 * aliasing, this is the one and only switch we'll do */
+	if (USES_FULL_PPGTT(dev))
+		return 0;
 
-		ret = ppgtt->sync_switch_mm(ppgtt, ring);
-		if (ret)
-			return ret;
-	}
+	ret = ppgtt->sync_switch_mm(ppgtt, ring);
+	if (ret)
+		return ret;
 
 	return 0;
 }
 
-static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
+static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt,
+			     struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring;
 	uint32_t ecochk, gab_ctl, ecobits;
-	int i;
+	int ret;
 
 	ecobits = I915_READ(GAC_ECO_BITS);
 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
@@ -1017,11 +1010,9 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
 
 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 
-	for_each_ring(ring, dev_priv, i) {
-		int ret = ppgtt->sync_switch_mm(ppgtt, ring);
-		if (ret)
-			return ret;
-	}
+	ret = ppgtt->sync_switch_mm(ppgtt, ring);
+	if (ret)
+		return ret;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c260ada..15ed42f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -261,7 +261,8 @@ struct i915_hw_ppgtt {
 	dma_addr_t scratch_dma_addr;
 	struct intel_context *ctx;
 
-	int (*enable)(struct i915_hw_ppgtt *ppgtt);
+	int (*enable)(struct i915_hw_ppgtt *ppgtt,
+		      struct intel_engine_cs *ring);
 	int (*sync_switch_mm)(struct i915_hw_ppgtt *ppgtt,
 			      struct intel_engine_cs *ring);
 	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0eaaaec..e53e32e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1426,6 +1426,7 @@ err_unref:
 static int intel_init_ring_buffer(struct drm_device *dev,
 				  struct intel_engine_cs *ring)
 {
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_ringbuffer *ringbuf = ring->buffer;
 	int ret;
 
@@ -1477,6 +1478,11 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	if (ret)
 		goto error;
 
+	if (dev_priv->mm.aliasing_ppgtt) {
+		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+		ppgtt->enable(ppgtt, ring);
+	}
+
 	return 0;
 
 error:
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/8] drm/i915: Split PPGTT enabling from Aliasing PPGTT switching
  2014-06-18 16:15 [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm oscar.mateo
  2014-06-18 16:15 ` [PATCH 2/8] drm/i915/bdw: If we are in reset, switch the GEN8 ppgtt synchronously oscar.mateo
  2014-06-18 16:15 ` [PATCH 3/8] drm/i915: Enable the PPGTT in the ring init oscar.mateo
@ 2014-06-18 16:15 ` oscar.mateo
  2014-06-18 16:27 ` [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm Mateo Lozano, Oscar
  2014-06-18 19:50 ` Daniel Vetter
  4 siblings, 0 replies; 8+ messages in thread
From: oscar.mateo @ 2014-06-18 16:15 UTC (permalink / raw)
  To: intel-gfx

From: Oscar Mateo <oscar.mateo@intel.com>

At this point, I could simply kill the sync_switch_mm function pointer
off and just transform it into some static functions belonging to
i915_gem_gtt.c

But I have a problem with Execlists: enabling the PPGTT is still needed,
but switching to it the old way (writing to the PDP registers) is not.
So, instead, use this as a preparatory patch to give me the option to do
one without the other.

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 28 ----------------------------
 drivers/gpu/drm/i915/intel_ringbuffer.c |  8 ++++++++
 2 files changed, 8 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 63507eb..707cbb8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -934,23 +934,10 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt,
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
 
 	I915_WRITE(RING_MODE_GEN7(ring),
 		   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 
-	/* We promise to do a switch later with FULL PPGTT. If this is
-	 * aliasing, this is the one and only switch we'll do */
-	if (USES_FULL_PPGTT(dev))
-		return 0;
-
-	ret = ppgtt->sync_switch_mm(ppgtt, ring);
-	if (ret) {
-		I915_WRITE(RING_MODE_GEN7(ring),
-			   _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
-		return ret;
-	}
-
 	return 0;
 }
 
@@ -960,7 +947,6 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt,
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t ecochk, ecobits;
-	int ret;
 
 	ecobits = I915_READ(GAC_ECO_BITS);
 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
@@ -978,15 +964,6 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt,
 	I915_WRITE(RING_MODE_GEN7(ring),
 		   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 
-	/* We promise to do a switch later with FULL PPGTT. If this is
-	 * aliasing, this is the one and only switch we'll do */
-	if (USES_FULL_PPGTT(dev))
-		return 0;
-
-	ret = ppgtt->sync_switch_mm(ppgtt, ring);
-	if (ret)
-		return ret;
-
 	return 0;
 }
 
@@ -996,7 +973,6 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt,
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t ecochk, gab_ctl, ecobits;
-	int ret;
 
 	ecobits = I915_READ(GAC_ECO_BITS);
 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
@@ -1010,10 +986,6 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt,
 
 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 
-	ret = ppgtt->sync_switch_mm(ppgtt, ring);
-	if (ret)
-		return ret;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e53e32e..8050c70 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1481,6 +1481,14 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	if (dev_priv->mm.aliasing_ppgtt) {
 		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
 		ppgtt->enable(ppgtt, ring);
+
+		/* We promise to do a switch later with FULL PPGTT. If this is
+		 * aliasing, this is the one and only switch we'll do */
+		if (!USES_FULL_PPGTT(dev)) {
+			ret = ppgtt->sync_switch_mm(ppgtt, ring);
+			if (ret)
+				goto error;
+		}
 	}
 
 	return 0;
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm
  2014-06-18 16:15 [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm oscar.mateo
                   ` (2 preceding siblings ...)
  2014-06-18 16:15 ` [PATCH 4/8] drm/i915: Split PPGTT enabling from Aliasing PPGTT switching oscar.mateo
@ 2014-06-18 16:27 ` Mateo Lozano, Oscar
  2014-06-18 19:50 ` Daniel Vetter
  4 siblings, 0 replies; 8+ messages in thread
From: Mateo Lozano, Oscar @ 2014-06-18 16:27 UTC (permalink / raw)
  To: intel-gfx

No, dear reader, you are not missing patches: this series should be 1/4, 2/4, 3/4 and 4/4, but git went rogue on me :(

I can resend the series if that helps...

> -----Original Message-----
> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf
> Of oscar.mateo@intel.com
> Sent: Wednesday, June 18, 2014 5:16 PM
> To: intel-gfx@lists.freedesktop.org
> Subject: [Intel-gfx] [PATCH 1/8] drm/i915: Get rid of the synchronous flag in
> switch_mm
> 
> From: Oscar Mateo <oscar.mateo@intel.com>
> 
> As a first step, split switch_mm into two different functions:
> sync_switch_mm (currently used by ppgtt_enable) and switch_mm (used by
> the outside world).
> 
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_context.c |   2 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c     | 101 ++++++++++++++++++++++---
> -------
>  drivers/gpu/drm/i915/i915_gem_gtt.h     |   5 +-
>  3 files changed, 73 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
> b/drivers/gpu/drm/i915/i915_gem_context.c
> index 3ffe308..14cd7fc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -624,7 +624,7 @@ static int do_switch(struct intel_engine_cs *ring,
>  	from = ring->last_context;
> 
>  	if (USES_FULL_PPGTT(ring->dev)) {
> -		ret = ppgtt->switch_mm(ppgtt, ring, false);
> +		ret = ppgtt->switch_mm(ppgtt, ring);
>  		if (ret)
>  			goto unpin_out;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ee7af4e..ad5c85b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -242,9 +242,26 @@ static int gen8_write_pdp(struct intel_engine_cs
> *ring, unsigned entry,
>  	return 0;
>  }
> 
> +static int gen8_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			       struct intel_engine_cs *ring) {
> +	int i, ret;
> +
> +	/* bit of a hack to find the actual last used pd */
> +	int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
> +
> +	for (i = used_pd - 1; i >= 0; i--) {
> +		dma_addr_t addr = ppgtt->pd_dma_addr[i];
> +		ret = gen8_write_pdp(ring, i, addr, true);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>  static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring,
> -			  bool synchronous)
> +			  struct intel_engine_cs *ring)
>  {
>  	int i, ret;
> 
> @@ -253,7 +270,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt
> *ppgtt,
> 
>  	for (i = used_pd - 1; i >= 0; i--) {
>  		dma_addr_t addr = ppgtt->pd_dma_addr[i];
> -		ret = gen8_write_pdp(ring, i, addr, synchronous);
> +		ret = gen8_write_pdp(ring, i, addr, false);
>  		if (ret)
>  			return ret;
>  	}
> @@ -654,6 +671,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt
> *ppgtt, uint64_t size)
>  	}
> 
>  	ppgtt->enable = gen8_ppgtt_enable;
> +	ppgtt->sync_switch_mm = gen8_mm_sync_switch;
>  	ppgtt->switch_mm = gen8_mm_switch;
>  	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
>  	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; @@ -768,9
> +786,22 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
>  	return (ppgtt->pd_offset / 64) << 16;
>  }
> 
> +static int hsw_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			      struct intel_engine_cs *ring)
> +{
> +	struct drm_device *dev = ppgtt->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> +	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> +	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> +	POSTING_READ(RING_PP_DIR_BASE(ring));
> +
> +	return 0;
> +}
> +
>  static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_engine_cs *ring,
> -			 bool synchronous)
> +			 struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private; @@ -783,14
> +814,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	 *
>  	 * FIXME: We should try not to special case reset
>  	 */
> -	if (synchronous ||
> -	    i915_reset_in_progress(&dev_priv->gpu_error)) {
> -		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> -		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> -		I915_WRITE(RING_PP_DIR_BASE(ring),
> get_pd_offset(ppgtt));
> -		POSTING_READ(RING_PP_DIR_BASE(ring));
> -		return 0;
> -	}
> +	if (i915_reset_in_progress(&dev_priv->gpu_error))
> +		return ppgtt->sync_switch_mm(ppgtt, ring);
> 
>  	/* NB: TLBs must be flushed and invalidated before a switch */
>  	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS,
> I915_GEM_GPU_DOMAINS); @@ -812,9 +837,22 @@ static int
> hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	return 0;
>  }
> 
> +static int gen7_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			       struct intel_engine_cs *ring) {
> +	struct drm_device *dev = ppgtt->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> +	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> +	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> +	POSTING_READ(RING_PP_DIR_BASE(ring));
> +
> +	return 0;
> +}
> +
>  static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring,
> -			  bool synchronous)
> +			  struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private; @@ -827,14
> +865,8 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	 *
>  	 * FIXME: We should try not to special case reset
>  	 */
> -	if (synchronous ||
> -	    i915_reset_in_progress(&dev_priv->gpu_error)) {
> -		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> -		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> -		I915_WRITE(RING_PP_DIR_BASE(ring),
> get_pd_offset(ppgtt));
> -		POSTING_READ(RING_PP_DIR_BASE(ring));
> -		return 0;
> -	}
> +	if (i915_reset_in_progress(&dev_priv->gpu_error))
> +		return ppgtt->sync_switch_mm(ppgtt, ring);
> 
>  	/* NB: TLBs must be flushed and invalidated before a switch */
>  	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS,
> I915_GEM_GPU_DOMAINS); @@ -863,16 +895,12 @@ static int
> gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	return 0;
>  }
> 
> -static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring,
> -			  bool synchronous)
> +static int gen6_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			       struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> 
> -	if (!synchronous)
> -		return 0;
> -
>  	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
>  	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> 
> @@ -881,6 +909,12 @@ static int gen6_mm_switch(struct i915_hw_ppgtt
> *ppgtt,
>  	return 0;
>  }
> 
> +static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
> +			  struct intel_engine_cs *ring)
> +{
> +	return 0;
> +}
> +
>  static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)  {
>  	struct drm_device *dev = ppgtt->base.dev; @@ -897,7 +931,7 @@
> static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>  		if (USES_FULL_PPGTT(dev))
>  			continue;
> 
> -		ret = ppgtt->switch_mm(ppgtt, ring, true);
> +		ret = ppgtt->sync_switch_mm(ppgtt, ring);
>  		if (ret)
>  			goto err_out;
>  	}
> @@ -942,7 +976,7 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt
> *ppgtt)
>  		if (USES_FULL_PPGTT(dev))
>  			continue;
> 
> -		ret = ppgtt->switch_mm(ppgtt, ring, true);
> +		ret = ppgtt->sync_switch_mm(ppgtt, ring);
>  		if (ret)
>  			return ret;
>  	}
> @@ -971,7 +1005,7 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt
> *ppgtt)
>  	I915_WRITE(GFX_MODE,
> _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
> 
>  	for_each_ring(ring, dev_priv, i) {
> -		int ret = ppgtt->switch_mm(ppgtt, ring, true);
> +		int ret = ppgtt->sync_switch_mm(ppgtt, ring);
>  		if (ret)
>  			return ret;
>  	}
> @@ -1196,12 +1230,15 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt
> *ppgtt)
>  	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
>  	if (IS_GEN6(dev)) {
>  		ppgtt->enable = gen6_ppgtt_enable;
> +		ppgtt->sync_switch_mm = gen6_mm_sync_switch;
>  		ppgtt->switch_mm = gen6_mm_switch;
>  	} else if (IS_HASWELL(dev)) {
>  		ppgtt->enable = gen7_ppgtt_enable;
> +		ppgtt->sync_switch_mm = hsw_mm_sync_switch;
>  		ppgtt->switch_mm = hsw_mm_switch;
>  	} else if (IS_GEN7(dev)) {
>  		ppgtt->enable = gen7_ppgtt_enable;
> +		ppgtt->sync_switch_mm = gen7_mm_sync_switch;
>  		ppgtt->switch_mm = gen7_mm_switch;
>  	} else
>  		BUG();
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h
> b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 7ab8446..c260ada 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -262,9 +262,10 @@ struct i915_hw_ppgtt {
>  	struct intel_context *ctx;
> 
>  	int (*enable)(struct i915_hw_ppgtt *ppgtt);
> +	int (*sync_switch_mm)(struct i915_hw_ppgtt *ppgtt,
> +			      struct intel_engine_cs *ring);
>  	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_engine_cs *ring,
> -			 bool synchronous);
> +			 struct intel_engine_cs *ring);
>  	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file
> *m);  };
> 
> --
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm
  2014-06-18 16:15 [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm oscar.mateo
                   ` (3 preceding siblings ...)
  2014-06-18 16:27 ` [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm Mateo Lozano, Oscar
@ 2014-06-18 19:50 ` Daniel Vetter
  2014-06-19 14:00   ` Mateo Lozano, Oscar
  4 siblings, 1 reply; 8+ messages in thread
From: Daniel Vetter @ 2014-06-18 19:50 UTC (permalink / raw)
  To: oscar.mateo; +Cc: intel-gfx

On Wed, Jun 18, 2014 at 05:15:35PM +0100, oscar.mateo@intel.com wrote:
> From: Oscar Mateo <oscar.mateo@intel.com>
> 
> As a first step, split switch_mm into two different functions:
> sync_switch_mm (currently used by ppgtt_enable) and switch_mm
> (used by the outside world).
> 
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>

I guess that jira task description was a bit unclear - removing the bool
synchronous argument isn't the goal itself. The idea was to rework the gpu
reset sequence so that it matches the same sequence we use at driver load
and so can use the same code. This patch removes the bool synchronous, but
the logical split with two different ways to load ppgtt is still there.

The reason for aiming as hard as possible to use the exact same code for
driver load, gpu reset and runtime pm/system resume is that we've simply
seen too many bugs due to slight variations and unintended omissions.

If for some really odd reason this is impossible (e.g. when the gpu needs
a special dance when coming out of reset) then we need a giant comment
explaining the magic. But without such a clear reason the goal should
always be to have just 1 single autorative piece of code for a given state
transition.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_context.c |   2 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c     | 101 ++++++++++++++++++++++----------
>  drivers/gpu/drm/i915/i915_gem_gtt.h     |   5 +-
>  3 files changed, 73 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 3ffe308..14cd7fc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -624,7 +624,7 @@ static int do_switch(struct intel_engine_cs *ring,
>  	from = ring->last_context;
>  
>  	if (USES_FULL_PPGTT(ring->dev)) {
> -		ret = ppgtt->switch_mm(ppgtt, ring, false);
> +		ret = ppgtt->switch_mm(ppgtt, ring);
>  		if (ret)
>  			goto unpin_out;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ee7af4e..ad5c85b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -242,9 +242,26 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
>  	return 0;
>  }
>  
> +static int gen8_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			       struct intel_engine_cs *ring)
> +{
> +	int i, ret;
> +
> +	/* bit of a hack to find the actual last used pd */
> +	int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
> +
> +	for (i = used_pd - 1; i >= 0; i--) {
> +		dma_addr_t addr = ppgtt->pd_dma_addr[i];
> +		ret = gen8_write_pdp(ring, i, addr, true);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>  static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring,
> -			  bool synchronous)
> +			  struct intel_engine_cs *ring)
>  {
>  	int i, ret;
>  
> @@ -253,7 +270,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  
>  	for (i = used_pd - 1; i >= 0; i--) {
>  		dma_addr_t addr = ppgtt->pd_dma_addr[i];
> -		ret = gen8_write_pdp(ring, i, addr, synchronous);
> +		ret = gen8_write_pdp(ring, i, addr, false);
>  		if (ret)
>  			return ret;
>  	}
> @@ -654,6 +671,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
>  	}
>  
>  	ppgtt->enable = gen8_ppgtt_enable;
> +	ppgtt->sync_switch_mm = gen8_mm_sync_switch;
>  	ppgtt->switch_mm = gen8_mm_switch;
>  	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
>  	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
> @@ -768,9 +786,22 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
>  	return (ppgtt->pd_offset / 64) << 16;
>  }
>  
> +static int hsw_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			      struct intel_engine_cs *ring)
> +{
> +	struct drm_device *dev = ppgtt->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> +	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> +	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> +	POSTING_READ(RING_PP_DIR_BASE(ring));
> +
> +	return 0;
> +}
> +
>  static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_engine_cs *ring,
> -			 bool synchronous)
> +			 struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -783,14 +814,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	 *
>  	 * FIXME: We should try not to special case reset
>  	 */
> -	if (synchronous ||
> -	    i915_reset_in_progress(&dev_priv->gpu_error)) {
> -		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> -		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> -		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> -		POSTING_READ(RING_PP_DIR_BASE(ring));
> -		return 0;
> -	}
> +	if (i915_reset_in_progress(&dev_priv->gpu_error))
> +		return ppgtt->sync_switch_mm(ppgtt, ring);
>  
>  	/* NB: TLBs must be flushed and invalidated before a switch */
>  	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> @@ -812,9 +837,22 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	return 0;
>  }
>  
> +static int gen7_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			       struct intel_engine_cs *ring)
> +{
> +	struct drm_device *dev = ppgtt->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> +	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> +	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> +	POSTING_READ(RING_PP_DIR_BASE(ring));
> +
> +	return 0;
> +}
> +
>  static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring,
> -			  bool synchronous)
> +			  struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -827,14 +865,8 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	 *
>  	 * FIXME: We should try not to special case reset
>  	 */
> -	if (synchronous ||
> -	    i915_reset_in_progress(&dev_priv->gpu_error)) {
> -		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> -		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> -		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> -		POSTING_READ(RING_PP_DIR_BASE(ring));
> -		return 0;
> -	}
> +	if (i915_reset_in_progress(&dev_priv->gpu_error))
> +		return ppgtt->sync_switch_mm(ppgtt, ring);
>  
>  	/* NB: TLBs must be flushed and invalidated before a switch */
>  	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> @@ -863,16 +895,12 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	return 0;
>  }
>  
> -static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring,
> -			  bool synchronous)
> +static int gen6_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			       struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	if (!synchronous)
> -		return 0;
> -
>  	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
>  	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
>  
> @@ -881,6 +909,12 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	return 0;
>  }
>  
> +static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
> +			  struct intel_engine_cs *ring)
> +{
> +	return 0;
> +}
> +
>  static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
> @@ -897,7 +931,7 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>  		if (USES_FULL_PPGTT(dev))
>  			continue;
>  
> -		ret = ppgtt->switch_mm(ppgtt, ring, true);
> +		ret = ppgtt->sync_switch_mm(ppgtt, ring);
>  		if (ret)
>  			goto err_out;
>  	}
> @@ -942,7 +976,7 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>  		if (USES_FULL_PPGTT(dev))
>  			continue;
>  
> -		ret = ppgtt->switch_mm(ppgtt, ring, true);
> +		ret = ppgtt->sync_switch_mm(ppgtt, ring);
>  		if (ret)
>  			return ret;
>  	}
> @@ -971,7 +1005,7 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>  	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
>  
>  	for_each_ring(ring, dev_priv, i) {
> -		int ret = ppgtt->switch_mm(ppgtt, ring, true);
> +		int ret = ppgtt->sync_switch_mm(ppgtt, ring);
>  		if (ret)
>  			return ret;
>  	}
> @@ -1196,12 +1230,15 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
>  	if (IS_GEN6(dev)) {
>  		ppgtt->enable = gen6_ppgtt_enable;
> +		ppgtt->sync_switch_mm = gen6_mm_sync_switch;
>  		ppgtt->switch_mm = gen6_mm_switch;
>  	} else if (IS_HASWELL(dev)) {
>  		ppgtt->enable = gen7_ppgtt_enable;
> +		ppgtt->sync_switch_mm = hsw_mm_sync_switch;
>  		ppgtt->switch_mm = hsw_mm_switch;
>  	} else if (IS_GEN7(dev)) {
>  		ppgtt->enable = gen7_ppgtt_enable;
> +		ppgtt->sync_switch_mm = gen7_mm_sync_switch;
>  		ppgtt->switch_mm = gen7_mm_switch;
>  	} else
>  		BUG();
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 7ab8446..c260ada 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -262,9 +262,10 @@ struct i915_hw_ppgtt {
>  	struct intel_context *ctx;
>  
>  	int (*enable)(struct i915_hw_ppgtt *ppgtt);
> +	int (*sync_switch_mm)(struct i915_hw_ppgtt *ppgtt,
> +			      struct intel_engine_cs *ring);
>  	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_engine_cs *ring,
> -			 bool synchronous);
> +			 struct intel_engine_cs *ring);
>  	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
>  };
>  
> -- 
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm
  2014-06-18 19:50 ` Daniel Vetter
@ 2014-06-19 14:00   ` Mateo Lozano, Oscar
  2014-06-19 16:03     ` Daniel Vetter
  0 siblings, 1 reply; 8+ messages in thread
From: Mateo Lozano, Oscar @ 2014-06-19 14:00 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Wednesday, June 18, 2014 8:51 PM
> To: Mateo Lozano, Oscar
> Cc: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH 1/8] drm/i915: Get rid of the synchronous flag
> in switch_mm
> 
> On Wed, Jun 18, 2014 at 05:15:35PM +0100, oscar.mateo@intel.com wrote:
> > From: Oscar Mateo <oscar.mateo@intel.com>
> >
> > As a first step, split switch_mm into two different functions:
> > sync_switch_mm (currently used by ppgtt_enable) and switch_mm (used by
> > the outside world).
> >
> > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> 
> I guess that jira task description was a bit unclear - removing the bool
> synchronous argument isn't the goal itself. The idea was to rework the gpu
> reset sequence so that it matches the same sequence we use at driver load
> and so can use the same code. This patch removes the bool synchronous, but
> the logical split with two different ways to load ppgtt is still there.

No, I realized I didn´t really do anything, but I saw the possibility of saving one special casing in Execlists and I jumped for it, forgetting about the JIRA task (psychological fixation, I guess)
Anyway, I was going to ask for more details on the JIRA...

> The reason for aiming as hard as possible to use the exact same code for
> driver load, gpu reset and runtime pm/system resume is that we've simply
> seen too many bugs due to slight variations and unintended omissions.
> 
> If for some really odd reason this is impossible (e.g. when the gpu needs a
> special dance when coming out of reset) then we need a giant comment
> explaining the magic. But without such a clear reason the goal should always
> be to have just 1 single autorative piece of code for a given state transition.

Ok. How about this: on ring init, I manually set the Aliasing PPGTT and the default context object synchronously via MMIO writes (the Aliasing PPGTT via the ppgtt->sync_switch_mm() and the default context directly using the CCID). I assume that, whenever we are in ring init, the GPU is sufficiently idle to manually frob all the required registers. That way, I get rid of i915_gem_context_enable() completely and I don´t have to do any special casing at all.
Ring commands (PPGTT switch and MI_SET_CONTEXT) are then left completely for the normal do_switch().
What do you think?

-- Oscar

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm
  2014-06-19 14:00   ` Mateo Lozano, Oscar
@ 2014-06-19 16:03     ` Daniel Vetter
  0 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2014-06-19 16:03 UTC (permalink / raw)
  To: Mateo Lozano, Oscar; +Cc: intel-gfx

On Thu, Jun 19, 2014 at 02:00:24PM +0000, Mateo Lozano, Oscar wrote:
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> > Vetter
> > Sent: Wednesday, June 18, 2014 8:51 PM
> > To: Mateo Lozano, Oscar
> > Cc: intel-gfx@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH 1/8] drm/i915: Get rid of the synchronous flag
> > in switch_mm
> > 
> > On Wed, Jun 18, 2014 at 05:15:35PM +0100, oscar.mateo@intel.com wrote:
> > > From: Oscar Mateo <oscar.mateo@intel.com>
> > >
> > > As a first step, split switch_mm into two different functions:
> > > sync_switch_mm (currently used by ppgtt_enable) and switch_mm (used by
> > > the outside world).
> > >
> > > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> > 
> > I guess that jira task description was a bit unclear - removing the bool
> > synchronous argument isn't the goal itself. The idea was to rework the gpu
> > reset sequence so that it matches the same sequence we use at driver load
> > and so can use the same code. This patch removes the bool synchronous, but
> > the logical split with two different ways to load ppgtt is still there.
> 
> No, I realized I didn´t really do anything, but I saw the possibility of saving one special casing in Execlists and I jumped for it, forgetting about the JIRA task (psychological fixation, I guess)
> Anyway, I was going to ask for more details on the JIRA...
> 
> > The reason for aiming as hard as possible to use the exact same code for
> > driver load, gpu reset and runtime pm/system resume is that we've simply
> > seen too many bugs due to slight variations and unintended omissions.
> > 
> > If for some really odd reason this is impossible (e.g. when the gpu needs a
> > special dance when coming out of reset) then we need a giant comment
> > explaining the magic. But without such a clear reason the goal should always
> > be to have just 1 single autorative piece of code for a given state transition.
> 
> Ok. How about this: on ring init, I manually set the Aliasing PPGTT and
> the default context object synchronously via MMIO writes (the Aliasing
> PPGTT via the ppgtt->sync_switch_mm() and the default context directly
> using the CCID). I assume that, whenever we are in ring init, the GPU is
> sufficiently idle to manually frob all the required registers. That way,
> I get rid of i915_gem_context_enable() completely and I don´t have to do
> any special casing at all.  Ring commands (PPGTT switch and
> MI_SET_CONTEXT) are then left completely for the normal do_switch().
> What do you think?

Can't we do the ring_init loading the same way as a normal context switch?
That's kinda my dream land thing. Of course if the initial loading is
sufficiently differnt then we have a problem. So essentially do_switch
would check whether the ppgtt changes and load that as part of the
context.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-06-19 16:03 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-18 16:15 [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm oscar.mateo
2014-06-18 16:15 ` [PATCH 2/8] drm/i915/bdw: If we are in reset, switch the GEN8 ppgtt synchronously oscar.mateo
2014-06-18 16:15 ` [PATCH 3/8] drm/i915: Enable the PPGTT in the ring init oscar.mateo
2014-06-18 16:15 ` [PATCH 4/8] drm/i915: Split PPGTT enabling from Aliasing PPGTT switching oscar.mateo
2014-06-18 16:27 ` [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm Mateo Lozano, Oscar
2014-06-18 19:50 ` Daniel Vetter
2014-06-19 14:00   ` Mateo Lozano, Oscar
2014-06-19 16:03     ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.