All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: fei.yang@intel.com, intel-gfx@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org, chris.p.wilson@intel.com
Subject: Re: [Intel-gfx] [PATCH] drm/i915: avoid concurrent writes to aux_inv
Date: Mon, 28 Mar 2022 09:37:10 +0100	[thread overview]
Message-ID: <188a2a45-7f83-0ed1-0171-6596d918b225@linux.intel.com> (raw)
In-Reply-To: <20220328031607.1810247-1-fei.yang@intel.com>


On 28/03/2022 04:16, fei.yang@intel.com wrote:
> From: Fei Yang <fei.yang@intel.com>
> 
> GPU hangs have been observed when multiple engines write to the
> same aux_inv register at the same time. To avoid this each engine
> should only invalidate its own auxiliary table. The function
> gen12_emit_flush_xcs() currently invalidate the auxiliary table for
> all engines because the rq->engine is not necessarily the engine
> eventually carrying out the request, and potentially the engine
> could even be a virtual one (with engine->instance being -1).
> With the MMIO remap feature, we can actually set bit 17 of MI_LRI
> instruction and let the hardware to figure out the local aux_inv
> register at runtime to avoid invalidating auxiliary table for all
> engines.
> 
> Bspec: 45728
> 
> v2: Invalidate AUX table for indirect context as well.
> 
> Cc: Stuart Summers <stuart.summers@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/gen8_engine_cs.c     | 48 ++++----------------
>   drivers/gpu/drm/i915/gt/gen8_engine_cs.h     |  4 +-
>   drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  1 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 12 +++++
>   4 files changed, 26 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index 36148887c699..8178be083b42 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -6,7 +6,6 @@
>   #include "gen8_engine_cs.h"
>   #include "i915_drv.h"
>   #include "intel_gpu_commands.h"
> -#include "intel_gt_regs.h"
>   #include "intel_lrc.h"
>   #include "intel_ring.h"
>   
> @@ -165,33 +164,9 @@ static u32 preparser_disable(bool state)
>   	return MI_ARB_CHECK | 1 << 8 | state;
>   }
>   
> -static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
> +u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)

I think all helpers which emit to ring take cs as the first argument so 
it would be good to make this consistent.

>   {
> -	static const i915_reg_t vd[] = {
> -		GEN12_VD0_AUX_NV,
> -		GEN12_VD1_AUX_NV,
> -		GEN12_VD2_AUX_NV,
> -		GEN12_VD3_AUX_NV,
> -	};
> -
> -	static const i915_reg_t ve[] = {
> -		GEN12_VE0_AUX_NV,
> -		GEN12_VE1_AUX_NV,
> -	};
> -
> -	if (engine->class == VIDEO_DECODE_CLASS)
> -		return vd[engine->instance];
> -
> -	if (engine->class == VIDEO_ENHANCEMENT_CLASS)
> -		return ve[engine->instance];
> -
> -	GEM_BUG_ON("unknown aux_inv reg\n");
> -	return INVALID_MMIO_REG;
> -}
> -
> -static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
> -{
> -	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
>   	*cs++ = i915_mmio_reg_offset(inv_reg);
>   	*cs++ = AUX_INV;
>   	*cs++ = MI_NOOP;
> @@ -293,10 +268,12 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
>   	if (mode & EMIT_INVALIDATE) {
>   		cmd += 2;
>   
> -		if (!HAS_FLAT_CCS(rq->engine->i915)) {
> +		if (!HAS_FLAT_CCS(rq->engine->i915) &&
> +		    (rq->engine->class == VIDEO_DECODE_CLASS ||
> +		     rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
>   			aux_inv = rq->engine->mask & ~BIT(BCS0);
>   			if (aux_inv)
> -				cmd += 2 * hweight32(aux_inv) + 2;
> +				cmd += 4;
>   		}
>   	}
>   
> @@ -329,15 +306,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
>   	*cs++ = 0; /* value */
>   
>   	if (aux_inv) { /* hsdes: 1809175790 */
> -		struct intel_engine_cs *engine;
> -		unsigned int tmp;
> -
> -		*cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv));
> -		for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) {
> -			*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
> -			*cs++ = AUX_INV;
> -		}
> -		*cs++ = MI_NOOP;
> +		if (rq->engine->class == VIDEO_DECODE_CLASS)
> +			cs = gen12_emit_aux_table_inv(GEN12_VD0_AUX_NV, cs);
> +		else
> +			cs = gen12_emit_aux_table_inv(GEN12_VE0_AUX_NV, cs);

Not sure if, here and below, it would be worth to put register in a 
local and then have a single function call - up to you.

>   	}
>   
>   	if (mode & EMIT_INVALIDATE)
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> index cc6e21d3662a..94f589e73d10 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> @@ -10,7 +10,7 @@
>   #include <linux/types.h>
>   
>   #include "i915_gem.h" /* GEM_BUG_ON */
> -
> +#include "intel_gt_regs.h"
>   #include "intel_gpu_commands.h"
>   
>   struct i915_request;
> @@ -38,6 +38,8 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
>   u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
>   u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
>   
> +u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs);
> +
>   static inline u32 *
>   __gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
>   {
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index d112ffd56418..4243be030bc1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -144,6 +144,7 @@
>   #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
>   /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
>   #define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
> +#define   MI_LRI_MMIO_REMAP_EN		REG_BIT(17)
>   #define   MI_LRI_FORCE_POSTED		(1<<12)
>   #define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
>   #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 07bef7128fdb..7581ef9e2cce 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1208,6 +1208,10 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
>   	    IS_DG2_G11(ce->engine->i915))
>   		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
>   
> +	/* hsdes: 1809175790 */
> +	if (!HAS_FLAT_CCS(ce->engine->i915))
> +		cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
> +
>   	return cs;
>   }
>   
> @@ -1225,6 +1229,14 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
>   						    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
>   						    0);
>   
> +	/* hsdes: 1809175790 */
> +	if (!HAS_FLAT_CCS(ce->engine->i915)) {
> +		if (ce->engine->class == VIDEO_DECODE_CLASS)
> +			cs = gen12_emit_aux_table_inv(GEN12_VD0_AUX_NV, cs);
> +		else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
> +			cs = gen12_emit_aux_table_inv(GEN12_VE0_AUX_NV, cs);
> +	}
> +
>   	return cs;
>   }
>   

Apart from the cs re-order looks good to me.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

  parent reply	other threads:[~2022-03-28  8:37 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-28  3:16 [PATCH] drm/i915: avoid concurrent writes to aux_inv fei.yang
2022-03-28  3:16 ` [Intel-gfx] " fei.yang
2022-03-28  3:38 ` [Intel-gfx] ✗ Fi.CI.DOCS: warning for drm/i915: avoid concurrent writes to aux_inv (rev9) Patchwork
2022-03-28  3:57 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2022-03-28  4:39 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2022-03-28  8:37 ` Tvrtko Ursulin [this message]
2022-03-28 17:58   ` [Intel-gfx] [PATCH] drm/i915: avoid concurrent writes to aux_inv Yang, Fei
2022-03-28 17:58     ` Yang, Fei
  -- strict thread matches above, loose matches on Subject: below --
2022-03-28 17:16 fei.yang
2022-03-29  7:51 ` Tvrtko Ursulin
2022-03-18 18:08 fei.yang
2022-03-21 13:41 ` Tvrtko Ursulin
2022-03-18  5:26 fei.yang
2022-03-18 14:38 ` Tvrtko Ursulin
2022-03-18 18:12   ` Yang, Fei
2022-03-18 18:12     ` Yang, Fei
2022-03-18  5:12 fei.yang
2022-03-04 22:14 fei.yang
2022-03-16  4:43 ` Summers, Stuart
2022-03-16  5:54   ` Yang, Fei
2022-03-16  7:19     ` Yang, Fei
2022-03-16 10:03 ` Tvrtko Ursulin
2022-03-16 18:25   ` Yang, Fei
2022-03-16 18:25     ` Yang, Fei
2022-03-04 22:04 fei.yang
2022-03-02 18:26 fei.yang
2022-03-02 20:50 ` Chris Wilson
2022-02-26  7:11 fei.yang
2022-02-26  1:50 fei.yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=188a2a45-7f83-0ed1-0171-6596d918b225@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=chris.p.wilson@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=fei.yang@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.