From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Mcaulay, Alistair" Subject: Re: [PATCH v3] drm/i915: Rework GPU reset sequence to match driver load & thaw Date: Tue, 19 Aug 2014 10:12:15 +0000 Message-ID: <2F6A3166A8653C4D914E172D478C0F012E4FEEEB@IRSMSX105.ger.corp.intel.com> References: <8761ht7kiq.fsf@gaia.fi.intel.com> <1408125095-10699-1-git-send-email-alistair.mcaulay@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by gabe.freedesktop.org (Postfix) with ESMTP id 0285C6E330 for ; Tue, 19 Aug 2014 03:12:17 -0700 (PDT) In-Reply-To: <1408125095-10699-1-git-send-email-alistair.mcaulay@intel.com> Content-Language: en-US List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: "Mika Kuoppala (mika.kuoppala@linux.intel.com)" , "intel-gfx@lists.freedesktop.org" List-Id: intel-gfx@lists.freedesktop.org Hi Mika, can you please review this patch, and verify it fixes the issues in your previous review. Thanks, Alistair. > -----Original Message----- > From: Mcaulay, Alistair > Sent: Friday, August 15, 2014 6:52 PM > To: intel-gfx@lists.freedesktop.org > Cc: Mcaulay, Alistair > Subject: [PATCH v3] drm/i915: Rework GPU reset sequence to match driver > load & thaw > > From: "McAulay, Alistair" > > This patch is to address Daniels concerns over different code during reset: > > http://lists.freedesktop.org/archives/intel-gfx/2014-June/047758.html > > "The reason for aiming as hard as possible to use the exact same code for > driver load, gpu reset and runtime pm/system resume is that we've simply > seen too many bugs due to slight variations and unintended omissions." > > Tested using igt drv_hangman. > > V2: Cleaner way of preventing check_wedge returning -EAGAIN > V3: Clean the last_context during reset, to ensure do_switch() does the > MI_SET_CONTEXT. As per review. > Signed-off-by: McAulay, Alistair > --- > drivers/gpu/drm/i915/i915_drv.c | 6 +++ > drivers/gpu/drm/i915/i915_drv.h | 3 ++ > drivers/gpu/drm/i915/i915_gem.c | 4 +- > drivers/gpu/drm/i915/i915_gem_context.c | 33 +++------------- > drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +++++---------------------------- > drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- > 6 files changed, 28 insertions(+), 88 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c > b/drivers/gpu/drm/i915/i915_drv.c index 5e4fefd..3bfafe6 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -806,7 +806,13 @@ int i915_reset(struct drm_device *dev) > !dev_priv->ums.mm_suspended) { > dev_priv->ums.mm_suspended = 0; > > + /* Used to prevent gem_check_wedged returning -EAGAIN > during gpu reset */ > + dev_priv->gpu_error.reload_in_reset = true; > + > ret = i915_gem_init_hw(dev); > + > + dev_priv->gpu_error.reload_in_reset = false; > + > mutex_unlock(&dev->struct_mutex); > if (ret) { > DRM_ERROR("Failed hw init on reset %d\n", ret); diff > --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 991b663..116daff 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1217,6 +1217,9 @@ struct i915_gpu_error { > > /* For missed irq/seqno simulation. */ > unsigned int test_irq_rings; > + > + /* Used to prevent gem_check_wedged returning -EAGAIN during > gpu reset */ > + bool reload_in_reset; > }; > > enum modeset_restore { > diff --git a/drivers/gpu/drm/i915/i915_gem.c > b/drivers/gpu/drm/i915/i915_gem.c index ef047bc..e7396eb 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1085,7 +1085,9 @@ i915_gem_check_wedge(struct i915_gpu_error > *error, > if (i915_terminally_wedged(error)) > return -EIO; > > - return -EAGAIN; > + /* Check if GPU Reset is in progress */ > + if (!error->reload_in_reset) > + return -EAGAIN; > } > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c > b/drivers/gpu/drm/i915/i915_gem_context.c > index de72a28..9378ad8 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -377,34 +377,17 @@ void i915_gem_context_reset(struct drm_device > *dev) > struct drm_i915_private *dev_priv = dev->dev_private; > int i; > > - /* Prevent the hardware from restoring the last context (which > hung) on > - * the next switch */ > for (i = 0; i < I915_NUM_RINGS; i++) { > struct intel_engine_cs *ring = &dev_priv->ring[i]; > - struct intel_context *dctx = ring->default_context; > struct intel_context *lctx = ring->last_context; > > - /* Do a fake switch to the default context */ > - if (lctx == dctx) > - continue; > - > - if (!lctx) > - continue; > + if (lctx) { > + if (lctx->legacy_hw_ctx.rcs_state && i == RCS) > + i915_gem_object_ggtt_unpin(lctx- > >legacy_hw_ctx.rcs_state); > > - if (dctx->legacy_hw_ctx.rcs_state && i == RCS) { > - WARN_ON(i915_gem_obj_ggtt_pin(dctx- > >legacy_hw_ctx.rcs_state, > - > get_context_alignment(dev), 0)); > - /* Fake a finish/inactive */ > - dctx->legacy_hw_ctx.rcs_state->base.write_domain > = 0; > - dctx->legacy_hw_ctx.rcs_state->active = 0; > + i915_gem_context_unreference(lctx); > + ring->last_context = NULL; > } > - > - if (lctx->legacy_hw_ctx.rcs_state && i == RCS) > - i915_gem_object_ggtt_unpin(lctx- > >legacy_hw_ctx.rcs_state); > - > - i915_gem_context_unreference(lctx); > - i915_gem_context_reference(dctx); > - ring->last_context = dctx; > } > } > > @@ -498,10 +481,6 @@ int i915_gem_context_enable(struct > drm_i915_private *dev_priv) > ppgtt->enable(ppgtt); > } > > - /* FIXME: We should make this work, even in reset */ > - if (i915_reset_in_progress(&dev_priv->gpu_error)) > - return 0; > - > BUG_ON(!dev_priv->ring[RCS].default_context); > > for_each_ring(ring, dev_priv, i) { > @@ -645,7 +624,7 @@ static int do_switch(struct intel_engine_cs *ring, > from = ring->last_context; > > if (USES_FULL_PPGTT(ring->dev)) { > - ret = ppgtt->switch_mm(ppgtt, ring, false); > + ret = ppgtt->switch_mm(ppgtt, ring); > if (ret) > goto unpin_out; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c > b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 5188936..450c8a9 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -216,19 +216,12 @@ static gen6_gtt_pte_t > iris_pte_encode(dma_addr_t addr, > > /* Broadwell Page Directory Pointer Descriptors */ static int > gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, > - uint64_t val, bool synchronous) > + uint64_t val) > { > - struct drm_i915_private *dev_priv = ring->dev->dev_private; > int ret; > > BUG_ON(entry >= 4); > > - if (synchronous) { > - I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32); > - I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val); > - return 0; > - } > - > ret = intel_ring_begin(ring, 6); > if (ret) > return ret; > @@ -245,8 +238,7 @@ static int gen8_write_pdp(struct intel_engine_cs > *ring, unsigned entry, } > > static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring, > - bool synchronous) > + struct intel_engine_cs *ring) > { > int i, ret; > > @@ -255,7 +247,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt > *ppgtt, > > for (i = used_pd - 1; i >= 0; i--) { > dma_addr_t addr = ppgtt->pd_dma_addr[i]; > - ret = gen8_write_pdp(ring, i, addr, synchronous); > + ret = gen8_write_pdp(ring, i, addr); > if (ret) > return ret; > } > @@ -724,29 +716,10 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt > *ppgtt) } > > static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring, > - bool synchronous) > + struct intel_engine_cs *ring) > { > - struct drm_device *dev = ppgtt->base.dev; > - struct drm_i915_private *dev_priv = dev->dev_private; > int ret; > > - /* If we're in reset, we can assume the GPU is sufficiently idle to > - * manually frob these bits. Ideally we could use the ring functions, > - * except our error handling makes it quite difficult (can't use > - * intel_ring_begin, ring->flush, or intel_ring_advance) > - * > - * FIXME: We should try not to special case reset > - */ > - if (synchronous || > - i915_reset_in_progress(&dev_priv->gpu_error)) { > - WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); > - I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); > - I915_WRITE(RING_PP_DIR_BASE(ring), > get_pd_offset(ppgtt)); > - POSTING_READ(RING_PP_DIR_BASE(ring)); > - return 0; > - } > - > /* NB: TLBs must be flushed and invalidated before a switch */ > ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, > I915_GEM_GPU_DOMAINS); > if (ret) > @@ -768,29 +741,10 @@ static int hsw_mm_switch(struct i915_hw_ppgtt > *ppgtt, } > > static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring, > - bool synchronous) > + struct intel_engine_cs *ring) > { > - struct drm_device *dev = ppgtt->base.dev; > - struct drm_i915_private *dev_priv = dev->dev_private; > int ret; > > - /* If we're in reset, we can assume the GPU is sufficiently idle to > - * manually frob these bits. Ideally we could use the ring functions, > - * except our error handling makes it quite difficult (can't use > - * intel_ring_begin, ring->flush, or intel_ring_advance) > - * > - * FIXME: We should try not to special case reset > - */ > - if (synchronous || > - i915_reset_in_progress(&dev_priv->gpu_error)) { > - WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); > - I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); > - I915_WRITE(RING_PP_DIR_BASE(ring), > get_pd_offset(ppgtt)); > - POSTING_READ(RING_PP_DIR_BASE(ring)); > - return 0; > - } > - > /* NB: TLBs must be flushed and invalidated before a switch */ > ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, > I915_GEM_GPU_DOMAINS); > if (ret) > @@ -819,14 +773,11 @@ static int gen7_mm_switch(struct i915_hw_ppgtt > *ppgtt, } > > static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring, > - bool synchronous) > + struct intel_engine_cs *ring) > { > struct drm_device *dev = ppgtt->base.dev; > struct drm_i915_private *dev_priv = dev->dev_private; > > - if (!synchronous) > - return 0; > > I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); > I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); @@ - > 852,7 +803,7 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) > if (USES_FULL_PPGTT(dev)) > continue; > > - ret = ppgtt->switch_mm(ppgtt, ring, true); > + ret = ppgtt->switch_mm(ppgtt, ring); > if (ret) > goto err_out; > } > @@ -897,7 +848,7 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt > *ppgtt) > if (USES_FULL_PPGTT(dev)) > continue; > > - ret = ppgtt->switch_mm(ppgtt, ring, true); > + ret = ppgtt->switch_mm(ppgtt, ring); > if (ret) > return ret; > } > @@ -926,7 +877,7 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt > *ppgtt) > I915_WRITE(GFX_MODE, > _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); > > for_each_ring(ring, dev_priv, i) { > - int ret = ppgtt->switch_mm(ppgtt, ring, true); > + int ret = ppgtt->switch_mm(ppgtt, ring); > if (ret) > return ret; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h > b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 8d6f7c1..bf1e4fc 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -262,8 +262,7 @@ struct i915_hw_ppgtt { > > int (*enable)(struct i915_hw_ppgtt *ppgtt); > int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring, > - bool synchronous); > + struct intel_engine_cs *ring); > void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file > *m); }; > > -- > 2.0.4