All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: <intel-gfx@lists.freedesktop.org>, <dri-devel@lists.freedesktop.org>
Cc: john.c.harrison@intel.com, Michal.Wajdeczko@intel.com
Subject: [PATCH 0/2] Introduce set_parallel2 extension
Date: Wed,  7 Jul 2021 17:30:17 -0700	[thread overview]
Message-ID: <20210708003019.9213-1-matthew.brost@intel.com> (raw)
In-Reply-To: <20210706222010.101522-1-matthew.brost@intel.com>

Based on upstream feedback [1] the current set_parallel extension isn't
suitable. Add a single patch to DII implementing the new interface
agreed two upstream [2]. Intended to enable the UMDs with the upstream
interface while maintaining the old interface on DII. 

Quick IGT to prove this is working should be list shortly.

v2: Move single patch in GuC section on pile, align with agreed to
upstream interface, only include prelim* definitions. 
v3: Enable set_parallel2 via SET_PARAM IOCTL, resend for CI
v4: Fix regression when patch was merge - only do parallel checks on
user engine sets 

Signed-off-by: Matthew Brost <matthew.brost@intel.com>

[1] https://patchwork.freedesktop.org/patch/432205/?series=89840&rev=1
[2] https://patchwork.freedesktop.org/patch/438911/?series=91417&rev=1

Signed-off-by: Matthew Brost <matthew.brost@intel.com>


---
baseline: b7227afd06bac1fe6719136e2ddd2bfed1d85feb
pile-commit: b7a2c9136977a385659a71df837cbe5a1f775b32
range-diff:
   -:  ------------ >  930:  ad12b87b91af INTEL_DII/NOT_UPSTREAM: drm/i915: Introduce set_parallel2 extension
1083:  73e59e150cde ! 1084:  79b296835b1c INTEL_DII/FIXME: drm/i915/perf: add a parameter to control the size of OA buffer
1120:  edbc20ae1355 ! 1121:  30d02d618229 INTEL_DII/FIXME: drm/i915: Add context parameter for debug flags
1293:  997b317fc408 ! 1294:  016b5903b0a0 INTEL_DII: drm/i915/perf: Add OA formats for XEHPSDV
1364:  136064b76b92 ! 1365:  5f564d553dc8 INTEL_DII: drm/i915/xehpsdv: Expand total numbers of supported engines up to 256
1403:  67b729033e82 ! 1404:  4398a2322f2f INTEL_DII: drm/i915/xehpsdv: Impose ULLS context restrictions
1405:  b8dd2a22a952 ! 1406:  dd2fab232cf1 INTEL_DII: drm/i915: Add context methods to suspend and resume requests
1670:  b4633106fa13 ! 1671:  53b4a54ee2cc INTEL_DII: drm/i915/pxp: interface for marking contexts as using protected content
1671:  22369ab70556 ! 1672:  42234590cdf5 INTEL_DII: drm/i915/pxp: start the arb session on demand

 series                                             |   1 +
 ...IXME-drm-i915-perf-add-a-parameter-to-con.patch |   4 +-
 ...IXME-drm-i915-Add-context-parameter-for-d.patch |  18 +-
 ...-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch |   4 +-
 ...rm-i915-xehpsdv-Expand-total-numbers-of-s.patch |   2 +-
 ...rm-i915-xehpsdv-Impose-ULLS-context-restr.patch |  12 +-
 ...rm-i915-Add-context-methods-to-suspend-an.patch |  38 +-
 ...rm-i915-pxp-interface-for-marking-context.patch |  16 +-
 ...rm-i915-pxp-start-the-arb-session-on-dema.patch |   2 +-
 ...OT_UPSTREAM-drm-i915-Introduce-set_parall.patch | 676 +++++++++++++++++++++
 10 files changed, 725 insertions(+), 48 deletions(-)

diff --git a/series b/series
index 8b77d52df40c..7db508ea974d 100644
--- a/series
+++ b/series
@@ -929,6 +929,7 @@
 0001-INTEL_DII-drm-i915-guc-Increase-GuC-log-size-for-CON.patch
 0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Dump-error-capture-t.patch
 0001-INTEL_DII-NOT_UPSTREAM-drm-i915-guc-Dump-error-captu.patch
+0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch
 0001-INTEL_DII-END-GuC-submission-and-slpc-support.patch
 0001-INTEL_DII-BEGIN-SR-IOV-ENABLING.patch
 0001-INTEL_DII-drm-i915-guc-Update-GuC-to-62.0.3.patch
diff --git a/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch b/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch
index dd654f144374..b7a637b3813f 100644
--- a/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch
+++ b/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch
@@ -384,8 +384,8 @@ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
 diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h
 --- a/include/uapi/drm/i915_drm_prelim.h
 +++ b/include/uapi/drm/i915_drm_prelim.h
-@@ -393,6 +393,36 @@ struct prelim_i915_context_param_engines {
- #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
+@@ -508,6 +508,36 @@ struct prelim_i915_context_param_engines {
+ #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */
  };
  
 +enum prelim_drm_i915_perf_property_id {
diff --git a/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch b/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch
index dfd5790ac2b8..71a5943b5536 100644
--- a/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch
+++ b/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch
@@ -44,7 +44,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  }
  
  static void __free_engines(struct i915_gem_engines *e, unsigned int count)
-@@ -2252,6 +2255,76 @@ static int set_priority(struct i915_gem_context *ctx,
+@@ -2436,6 +2439,76 @@ static int set_priority(struct i915_gem_context *ctx,
  	return 0;
  }
  
@@ -121,7 +121,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  static int ctx_setparam(struct drm_i915_file_private *fpriv,
  			struct i915_gem_context *ctx,
  			struct drm_i915_gem_context_param *args)
-@@ -2321,6 +2394,11 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
+@@ -2505,6 +2578,11 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
  		ret = set_ringsize(ctx, args);
  		break;
  
@@ -133,7 +133,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  	case I915_CONTEXT_PARAM_BAN_PERIOD:
  	default:
  		ret = -EINVAL;
-@@ -2777,6 +2855,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+@@ -2961,6 +3039,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
  		ret = get_ringsize(ctx, args);
  		break;
  
@@ -184,7 +184,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm
 diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
 --- a/drivers/gpu/drm/i915/gt/intel_context.h
 +++ b/drivers/gpu/drm/i915/gt/intel_context.h
-@@ -285,6 +285,24 @@ intel_context_clear_nopreempt(struct intel_context *ce)
+@@ -296,6 +296,24 @@ intel_context_clear_nopreempt(struct intel_context *ce)
  		ce->emit_bb_start = ce->engine->emit_bb_start;
  }
  
@@ -212,19 +212,19 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/i
 diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
 +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
-@@ -114,6 +114,7 @@ struct intel_context {
- #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
+@@ -115,6 +115,7 @@ struct intel_context {
  #define CONTEXT_NOPREEMPT		8
  #define CONTEXT_LRCA_DIRTY		9
-+#define CONTEXT_DEBUG			10
+ #define CONTEXT_NO_PREEMPT_MID_BATCH	10
++#define CONTEXT_DEBUG			11
  
  	struct {
  		u64 timeout_us;
 diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h
 --- a/include/uapi/drm/i915_drm_prelim.h
 +++ b/include/uapi/drm/i915_drm_prelim.h
-@@ -395,6 +395,32 @@ struct prelim_i915_context_param_engines {
- #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
+@@ -510,6 +510,32 @@ struct prelim_i915_context_param_engines {
+ #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */
  };
  
 +struct prelim_drm_i915_gem_context_param {
diff --git a/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch b/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch
index 19a07b3926ae..f62d7848e091 100644
--- a/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch
+++ b/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch
@@ -204,8 +204,8 @@ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
 diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h
 --- a/include/uapi/drm/i915_drm_prelim.h
 +++ b/include/uapi/drm/i915_drm_prelim.h
-@@ -435,6 +435,27 @@ struct prelim_i915_context_param_engines {
- #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
+@@ -550,6 +550,27 @@ struct prelim_i915_context_param_engines {
+ #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */
  };
  
 +enum prelim_drm_i915_oa_format {
diff --git a/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch b/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch
index 05a84884a3d1..ee486b95d11e 100644
--- a/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch
+++ b/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch
@@ -76,7 +76,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
  
  	/* Kernel clipping was a DRI1 misfeature */
  	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
-@@ -3233,9 +3235,12 @@ eb_select_engine(struct i915_execbuffer *eb)
+@@ -3233,9 +3235,12 @@ eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number)
  	int err;
  
  	if (i915_gem_context_user_engines(eb->gem_context))
diff --git a/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch b/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch
index 38ad84c4dc12..80880e3008cc 100644
--- a/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch
+++ b/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch
@@ -76,7 +76,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
  	if (intel_context_nopreempt(eb->context) ||
  	    intel_context_debug(eb->context))
  		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
-@@ -3453,6 +3462,13 @@ static int eb_request_add(struct i915_execbuffer *eb, int err)
+@@ -3463,6 +3472,13 @@ static int eb_request_add(struct i915_execbuffer *eb, int err)
  
  	trace_i915_request_add(rq);
  
@@ -90,7 +90,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
  	prev = __i915_request_commit(rq);
  
  	/* Check that the context wasn't destroyed before submission */
-@@ -3531,6 +3547,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
+@@ -3541,6 +3557,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
  	int err;
  	bool first = batch_number == 0;
  	bool last = batch_number + 1 == num_batches;
@@ -98,7 +98,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
  
  	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
  	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
-@@ -3582,6 +3599,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
+@@ -3592,6 +3609,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
  	if (unlikely(err))
  		goto err_destroy;
  
@@ -109,7 +109,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
 +		goto err_context;
 +	}
 +
- 	err = eb_select_engine(&eb);
+ 	err = eb_select_engine(&eb, batch_number);
  	if (unlikely(err))
  		goto err_context;
 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -239,7 +239,7 @@ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_req
  		/*
  		 * Requests on the same timeline are explicitly ordered, along
  		 * with their dependencies, by i915_request_add() which ensures
-@@ -2126,6 +2181,7 @@ long i915_request_wait(struct i915_request *rq,
+@@ -2121,6 +2176,7 @@ long i915_request_wait(struct i915_request *rq,
  {
  	might_sleep();
  	GEM_BUG_ON(timeout < 0);
@@ -247,7 +247,7 @@ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_req
  
  	if (dma_fence_is_signaled(&rq->fence))
  		return timeout;
-@@ -2331,6 +2387,8 @@ static struct i915_global_request global = { {
+@@ -2326,6 +2382,8 @@ static struct i915_global_request global = { {
  
  int __init i915_global_request_init(void)
  {
diff --git a/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch b/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch
index 7d523c8dadba..44fd93184b8a 100644
--- a/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch
+++ b/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch
@@ -52,7 +52,7 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/i
  void
  intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
  {
-@@ -475,6 +481,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
+@@ -476,6 +482,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
  	ce->guc_id = GUC_INVALID_LRC_ID;
  	INIT_LIST_HEAD(&ce->guc_id_link);
  
@@ -62,7 +62,7 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/i
  	i915_active_init(&ce->active,
  			 __intel_context_active, __intel_context_retire);
  }
-@@ -485,6 +494,7 @@ void intel_context_fini(struct intel_context *ce)
+@@ -486,6 +495,7 @@ void intel_context_fini(struct intel_context *ce)
  
  	if (ce->last_rq)
  		i915_request_put(ce->last_rq);
@@ -73,7 +73,7 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/i
 diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
 --- a/drivers/gpu/drm/i915/gt/intel_context.h
 +++ b/drivers/gpu/drm/i915/gt/intel_context.h
-@@ -252,6 +252,54 @@ static inline bool intel_context_ban(struct intel_context *ce,
+@@ -263,6 +263,54 @@ static inline bool intel_context_ban(struct intel_context *ce,
  	return ret;
  }
  
@@ -152,10 +152,10 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i91
  	void (*enter)(struct intel_context *ce);
  	void (*exit)(struct intel_context *ce);
  
-@@ -241,6 +248,9 @@ struct intel_context {
+@@ -245,6 +252,9 @@ struct intel_context {
  
- 	/* Last request submitted on a parent */
- 	struct i915_request *last_rq;
+ 	/* Parallel submit mutex */
+ 	struct mutex parallel_submit;
 +
 +	/* GuC context blocked fence */
 +	struct i915_sw_fence guc_blocked;
@@ -231,7 +231,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  
  	if (!enabled) {
  		GEM_BUG_ON(context_pending_enable(ce));
-@@ -1103,6 +1137,8 @@ static void __guc_context_destroy(struct intel_context *ce);
+@@ -1102,6 +1136,8 @@ static void __guc_context_destroy(struct intel_context *ce);
  static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
  static void guc_signal_context_fence(struct intel_context *ce);
  static void guc_cancel_context_requests(struct intel_context *ce);
@@ -240,7 +240,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  
  static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
  {
-@@ -1143,6 +1179,8 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
+@@ -1142,6 +1178,8 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
  
  		/* Not mutualy exclusive with above if statement. */
  		if (pending_disable) {
@@ -249,7 +249,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  			guc_signal_context_fence(ce);
  			if (banned) {
  				guc_cancel_context_requests(ce);
-@@ -1150,7 +1188,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
+@@ -1149,7 +1187,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
  			}
  			intel_context_sched_disable_unpin(ce);
  			atomic_dec(&guc->outstanding_submission_g2h);
@@ -262,7 +262,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  		}
  	}
  }
-@@ -2549,6 +2592,22 @@ static void guc_parent_context_unpin(struct intel_context *ce)
+@@ -2551,6 +2594,22 @@ static void guc_parent_context_unpin(struct intel_context *ce)
  	__guc_context_unpin(ce);
  }
  
@@ -285,7 +285,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  static void __guc_context_sched_disable(struct intel_guc *guc,
  					struct intel_context *ce,
  					u16 guc_id)
-@@ -2576,10 +2635,13 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
+@@ -2578,10 +2637,13 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
  				 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
  }
  
@@ -299,7 +299,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	intel_context_get(ce);
  
  	return ce->guc_id;
-@@ -2677,6 +2739,132 @@ static void guc_context_sched_disable(struct intel_context *ce)
+@@ -2679,6 +2741,132 @@ static void guc_context_sched_disable(struct intel_context *ce)
  	intel_context_sched_disable_unpin(ce);
  }
  
@@ -432,7 +432,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  int intel_guc_modify_scheduling(struct intel_guc *guc, bool enable)
  {
  	struct intel_gt *gt = guc_to_gt(guc);
-@@ -2991,6 +3179,9 @@ static const struct intel_context_ops guc_context_ops = {
+@@ -2993,6 +3181,9 @@ static const struct intel_context_ops guc_context_ops = {
  
  	.ban = guc_context_ban,
  
@@ -442,7 +442,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = intel_context_enter_engine,
  	.exit = guc_context_exit,
  
-@@ -3380,6 +3571,9 @@ static const struct intel_context_ops virtual_guc_context_ops = {
+@@ -3382,6 +3573,9 @@ static const struct intel_context_ops virtual_guc_context_ops = {
  
  	.ban = guc_context_ban,
  
@@ -452,7 +452,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = guc_virtual_context_enter,
  	.exit = guc_virtual_context_exit,
  
-@@ -3457,6 +3651,9 @@ static const struct intel_context_ops parent_context_ops = {
+@@ -3459,6 +3653,9 @@ static const struct intel_context_ops parent_context_ops = {
  
  	.ban = guc_context_ban,
  
@@ -462,7 +462,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = intel_context_enter_engine,
  	.exit = intel_context_exit_engine,
  
-@@ -3476,6 +3673,9 @@ static const struct intel_context_ops virtual_parent_context_ops = {
+@@ -3478,6 +3675,9 @@ static const struct intel_context_ops virtual_parent_context_ops = {
  
  	.ban = guc_context_ban,
  
@@ -472,7 +472,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = guc_virtual_context_enter,
  	.exit = guc_virtual_context_exit,
  
-@@ -3487,6 +3687,9 @@ static const struct intel_context_ops virtual_parent_context_ops = {
+@@ -3489,6 +3689,9 @@ static const struct intel_context_ops virtual_parent_context_ops = {
  static const struct intel_context_ops child_context_ops = {
  	.alloc = guc_context_alloc,
  
@@ -482,7 +482,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = intel_context_enter_engine,
  	.exit = guc_context_exit,
  
-@@ -3497,6 +3700,9 @@ static const struct intel_context_ops child_context_ops = {
+@@ -3499,6 +3702,9 @@ static const struct intel_context_ops child_context_ops = {
  static const struct intel_context_ops virtual_child_context_ops = {
  	.alloc = guc_virtual_context_alloc,
  
@@ -492,7 +492,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = guc_virtual_context_enter,
  	.exit = guc_virtual_context_exit,
  
-@@ -4440,6 +4646,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
+@@ -4441,6 +4647,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
  		clr_context_banned(ce);
  		clr_context_pending_disable(ce);
  		__guc_signal_context_fence(ce);
diff --git a/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch b/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch
index 6b38bd36d21b..8a6b9561eb24 100644
--- a/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch
+++ b/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch
@@ -56,7 +56,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  #include "i915_gem_context.h"
  #include "i915_gem_ioctls.h"
  #include "i915_globals.h"
-@@ -2574,6 +2576,40 @@ static int set_acc(struct i915_gem_context *ctx,
+@@ -2769,6 +2771,40 @@ static int set_acc(struct i915_gem_context *ctx,
  	return 0;
  }
  
@@ -97,7 +97,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  static int ctx_setparam(struct drm_i915_file_private *fpriv,
  			struct i915_gem_context *ctx,
  			struct drm_i915_gem_context_param *args,
-@@ -2607,6 +2643,8 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
+@@ -2802,6 +2838,8 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
  			ret = -EPERM;
  		else if (args->value)
  			i915_gem_context_set_bannable(ctx);
@@ -106,7 +106,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  		else
  			i915_gem_context_clear_bannable(ctx);
  		break;
-@@ -2614,10 +2652,12 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
+@@ -2809,10 +2847,12 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
  	case I915_CONTEXT_PARAM_RECOVERABLE:
  		if (args->size)
  			ret = -EINVAL;
@@ -122,7 +122,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  		break;
  
  	case I915_CONTEXT_PARAM_PRIORITY:
-@@ -2664,6 +2704,9 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
+@@ -2865,6 +2905,9 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
  	case I915_CONTEXT_PARAM_DEBUG_FLAGS:
  		ret = set_debug_flags(ctx, args);
  		break;
@@ -132,7 +132,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  
  	case I915_CONTEXT_PARAM_BAN_PERIOD:
  	default:
-@@ -3157,6 +3200,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+@@ -3358,6 +3401,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
  	case I915_CONTEXT_PARAM_DEBUG_FLAGS:
  		ret = get_debug_flags(ctx, args);
  		break;
@@ -142,7 +142,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  
  	case I915_CONTEXT_PARAM_BAN_PERIOD:
  	default:
-@@ -3281,6 +3327,11 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
+@@ -3482,6 +3528,11 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
  	args->batch_active = atomic_read(&ctx->guilty_count);
  	args->batch_pending = atomic_read(&ctx->active_count);
  
@@ -225,7 +225,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
  	eb->gem_context = ctx;
  	if (rcu_access_pointer(ctx->vm))
  		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
-@@ -3301,6 +3308,17 @@ eb_select_engine(struct i915_execbuffer *eb)
+@@ -3311,6 +3318,17 @@ eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number)
  
  	intel_gt_pm_get(ce->engine->gt);
  
@@ -348,7 +348,7 @@ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
 diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h
 --- a/include/uapi/drm/i915_drm_prelim.h
 +++ b/include/uapi/drm/i915_drm_prelim.h
-@@ -893,6 +893,26 @@ struct prelim_drm_i915_gem_context_param {
+@@ -1003,6 +1003,26 @@ struct prelim_drm_i915_gem_context_param {
  #define I915_CONTEXT_PARAM_ACC    0xd
  };
  
diff --git a/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch b/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch
index 5ee627b00811..4b4326057959 100644
--- a/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch
+++ b/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch
@@ -22,7 +22,7 @@ Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
 +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
-@@ -3309,9 +3309,11 @@ eb_select_engine(struct i915_execbuffer *eb)
+@@ -3319,9 +3319,11 @@ eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number)
  	intel_gt_pm_get(ce->engine->gt);
  
  	if (i915_gem_context_uses_protected_content(eb->gem_context)) {
diff --git a/0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch b/0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch
new file mode 100644
index 000000000000..415fbd930383
--- /dev/null
+++ b/0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch
@@ -0,0 +1,676 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Matthew Brost <matthew.brost@intel.com>
+Date: Wed, 7 Jul 2021 16:55:03 -0700
+Subject: [PATCH] INTEL_DII/NOT_UPSTREAM: drm/i915: Introduce set_parallel2
+ extension
+
+Based on upstream feedback the set_parallel extension isn't suitable as
+it looks a bit too much like the bonding extension. Introduce a
+set_parallel2 extension which configures parallel submission in a single
+extension and in a single slot. This compares to old set_parallel
+extension which configured parallel submission across multiple slots.
+
+Also remove the ability for the user to pass in the number of BBs in
+the execbuf IOCTL. The number of BBs is now implied based on the width
+of the context in the slot.
+
+This patch is intended in enable UMDs for the upstream direction while
+maintaining the old set_parallel extension to not break UMDs. Once UMDs
+have been updated to use new extension the old one can be removed from
+DII.
+
+v2: Only enable parallel submission on engines set by user
+
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+---
+ drivers/gpu/drm/i915/gem/i915_gem_context.c   | 190 +++++++++++++++++-
+ .../gpu/drm/i915/gem/i915_gem_context_types.h |   6 -
+ .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  73 +++++--
+ drivers/gpu/drm/i915/gt/intel_context.c       |   2 +
+ drivers/gpu/drm/i915/gt/intel_context.h       |  11 +
+ drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +
+ .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   4 +-
+ drivers/gpu/drm/i915/i915_request.c           |   7 +-
+ include/uapi/drm/i915_drm_prelim.h            | 115 +++++++++++
+ 9 files changed, 376 insertions(+), 36 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
+--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
+@@ -374,7 +374,6 @@ void i915_gem_context_release(struct kref *ref)
+ 	mutex_destroy(&ctx->engines_mutex);
+ 	mutex_destroy(&ctx->lut_mutex);
+ 	mutex_destroy(&ctx->mutex);
+-	mutex_destroy(&ctx->parallel_submit);
+ 
+ 	kfree_rcu(ctx, rcu);
+ }
+@@ -699,8 +698,6 @@ __create_context(struct drm_i915_private *i915)
+ 	mutex_init(&ctx->mutex);
+ 	INIT_LIST_HEAD(&ctx->link);
+ 
+-	mutex_init(&ctx->parallel_submit);
+-
+ 	spin_lock_init(&ctx->stale.lock);
+ 	INIT_LIST_HEAD(&ctx->stale.engines);
+ 
+@@ -1857,6 +1854,48 @@ static bool validate_parallel_engines_layout(const struct set_engines *set)
+ 	return true;
+ }
+ 
++/*
++ * Engine must be same class and form a logically contiguous mask.
++ *
++ * FIXME: Logical mask check not 100% correct but good enough for the PoC
++ */
++static bool __validate_parallel_engines_layout(struct drm_i915_private *i915,
++					       struct intel_context *parent)
++{
++	u8 engine_class = parent->engine->class;
++	u8 num_siblings = hweight_long(parent->engine->logical_mask);
++	struct intel_context *child;
++	intel_engine_mask_t logical_mask = parent->engine->logical_mask;
++
++	for_each_child(parent, child) {
++		if (child->engine->class != engine_class) {
++			drm_dbg(&i915->drm, "Class mismatch: %u, %u",
++				engine_class, child->engine->class);
++			return false;
++		}
++		if (hweight_long(child->engine->logical_mask) != num_siblings) {
++			drm_dbg(&i915->drm, "Sibling mismatch: %u, %lu",
++				num_siblings,
++				hweight_long(child->engine->logical_mask));
++			return false;
++		}
++		if (logical_mask & child->engine->logical_mask) {
++			drm_dbg(&i915->drm, "Overlapping logical mask: 0x%04x, 0x%04x",
++				logical_mask, child->engine->logical_mask);
++			return false;
++		}
++		logical_mask |= child->engine->logical_mask;
++	}
++
++	if (!is_power_of_2((logical_mask >> (ffs(logical_mask) - 1)) + 1)) {
++		drm_dbg(&i915->drm, "Non-contiguous logical mask: 0x%04x",
++			logical_mask);
++		return false;
++	}
++
++	return true;
++}
++
+ static int
+ set_engines__parallel_submit(struct i915_user_extension __user *base, void *data)
+ {
+@@ -2009,11 +2048,156 @@ set_engines__parallel_submit(struct i915_user_extension __user *base, void *data
+ 	return err;
+ }
+ 
++static int
++set_engines__parallel2_submit(struct i915_user_extension __user *base,
++			      void *data)
++{
++	struct prelim_drm_i915_context_engines_parallel2_submit __user *ext =
++		container_of_user(base, typeof(*ext), base);
++	const struct set_engines *set = data;
++	struct drm_i915_private *i915 = set->ctx->i915;
++	struct intel_context *parent, *child, *ce;
++	u64 flags;
++	int err = 0, n, i, j;
++	u16 slot, width, num_siblings;
++	struct intel_engine_cs **siblings = NULL;
++
++	if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
++		return -ENODEV;
++
++	if (get_user(slot, &ext->engine_index))
++		return -EFAULT;
++
++	if (get_user(width, &ext->width))
++		return -EFAULT;
++
++	if (get_user(num_siblings, &ext->num_siblings))
++		return -EFAULT;
++
++	if (slot >= set->engines->num_engines) {
++		drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
++			slot, set->engines->num_engines);
++		return -EINVAL;
++	}
++
++	parent = set->engines->engines[slot];
++	if (parent) {
++		drm_dbg(&i915->drm, "Context index[%d] not NULL\n", slot);
++		return -EINVAL;
++	}
++
++	if (get_user(flags, &ext->flags))
++		return -EFAULT;
++
++	if (flags) {
++		drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
++		return -EINVAL;
++	}
++
++	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
++		err = check_user_mbz(&ext->mbz64[n]);
++		if (err)
++			return err;
++	}
++
++	if (width < 1) {
++		drm_dbg(&i915->drm, "Width (%d) < 1 \n", width);
++		return -EINVAL;
++	}
++
++	if (num_siblings < 1) {
++		drm_dbg(&i915->drm, "Number siblings (%d) < 1 \n",
++			num_siblings);
++		return -EINVAL;
++	}
++
++	siblings = kmalloc_array(num_siblings,
++				 sizeof(*siblings),
++				 GFP_KERNEL);
++	if (!siblings)
++		return -ENOMEM;
++
++	mutex_lock(&set->ctx->mutex);
++
++	/* Create contexts / engines */
++	for (i = 0; i < width; ++i) {
++		for (j = 0; j < num_siblings; ++j) {
++			struct i915_engine_class_instance ci;
++
++			if (copy_from_user(&ci, &ext->engines[i * num_siblings + j],
++					   sizeof(ci))) {
++				err = -EFAULT;
++				goto out_err;
++			}
++
++			siblings[j] = intel_engine_lookup_user(i915,
++							       ci.engine_class,
++							       ci.engine_instance);
++			if (!siblings[j]) {
++				drm_dbg(&i915->drm,
++					"Invalid sibling[%d]: { class:%d, inst:%d }\n",
++					n, ci.engine_class, ci.engine_instance);
++				err = -EINVAL;
++				goto out_err;
++			}
++		}
++
++		ce = intel_engine_create_virtual(siblings, num_siblings,
++						 FORCE_VIRTUAL);
++		if (IS_ERR(ce)) {
++			err = PTR_ERR(ce);
++			goto out_err;
++		}
++		intel_context_set_gem(ce, set->ctx);
++
++		if (i == 0) {
++			parent = ce;
++			__set_bit(CONTEXT_NO_PREEMPT_MID_BATCH, &ce->flags);
++		} else {
++			intel_context_bind_parent_child(parent, ce);
++			err = intel_context_alloc_state(ce);
++			if (err)
++				goto out_err;
++		}
++	}
++
++	if (!__validate_parallel_engines_layout(i915, parent)) {
++		drm_dbg(&i915->drm, "Invalidate parallel context layout");
++		err = -EINVAL;
++		goto out_err;
++	}
++
++	intel_guc_configure_parent_context(parent);
++	if (cmpxchg(&set->engines->engines[slot], NULL, parent)) {
++		err = -EEXIST;
++		goto out_err;
++	}
++
++	kfree(siblings);
++	mutex_unlock(&set->ctx->mutex);
++
++	return 0;
++
++out_err:
++	if (parent) {
++		for_each_child(parent, child)
++			intel_context_put(child);
++		intel_context_put(parent);
++		set->engines->engines[slot] = NULL;
++	}
++	kfree(siblings);
++	mutex_unlock(&set->ctx->mutex);
++
++	return err;
++}
++
+ static const i915_user_extension_fn set_engines__extensions[] = {
+ 	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+ 	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
+ 	[PRELIM_I915_USER_EXT_MASK(PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)] =
+ 		set_engines__parallel_submit,
++	[PRELIM_I915_USER_EXT_MASK(PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT)] =
++		set_engines__parallel2_submit,
+ };
+ 
+ static int
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
++++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+@@ -194,12 +194,6 @@ struct i915_gem_context {
+ 	 */
+ 	u64 fence_context;
+ 
+-	/**
+-	 * @parallel_submit: Ensure only 1 parallel submission is happening on
+-	 * this context at a time.
+-	 */
+-	struct mutex parallel_submit;
+-
+ 	/**
+ 	 * @seqno: Seqno when using when a parallel context.
+ 	 */
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+@@ -1633,7 +1633,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
+ 		goto err_unmap;
+ 
+ 	if (engine == eb->context->engine &&
+-	    !i915_gem_context_is_parallel(eb->gem_context)) {
++	    !intel_context_is_parallel(eb->context)) {
+ 		rq = i915_request_create(eb->context);
+ 	} else {
+ 		struct intel_context *ce = eb->reloc_context;
+@@ -1727,7 +1727,7 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
+ 		struct intel_engine_cs *engine = eb->engine;
+ 
+ 		if (!reloc_can_use_engine(engine) ||
+-		    i915_gem_context_is_parallel(eb->gem_context)) {
++		    intel_context_is_parallel(eb->context)) {
+ 			engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
+ 			if (!engine)
+ 				return ERR_PTR(-ENODEV);
+@@ -3223,7 +3223,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb)
+ }
+ 
+ static int
+-eb_select_engine(struct i915_execbuffer *eb)
++eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number)
+ {
+ 	struct intel_context *ce;
+ 	unsigned int idx;
+@@ -3238,6 +3238,16 @@ eb_select_engine(struct i915_execbuffer *eb)
+ 	if (IS_ERR(ce))
+ 		return PTR_ERR(ce);
+ 
++	if (batch_number > 0 &&
++	    !i915_gem_context_is_parallel(eb->gem_context)) {
++		struct intel_context *parent = ce;
++		for_each_child(parent, ce)
++			if (!--batch_number)
++				break;
++		intel_context_put(parent);
++		intel_context_get(ce);
++	}
++
+ 	intel_gt_pm_get(ce->engine->gt);
+ 
+ 	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+@@ -3562,7 +3572,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
+ 	if (unlikely(err))
+ 		goto err_destroy;
+ 
+-	err = eb_select_engine(&eb);
++	err = eb_select_engine(&eb, batch_number);
+ 	if (unlikely(err))
+ 		goto err_context;
+ 
+@@ -3751,6 +3761,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 	const size_t count = args->buffer_count;
+ 	unsigned int num_batches, i;
+ 	int err, start_context;
++	bool is_parallel = false;
++	struct intel_context *parent = NULL;
+ 
+ 	if (!check_buffer_count(count)) {
+ 		drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count);
+@@ -3782,15 +3794,35 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 			I915_EXEC_NUMBER_BB_LSB) +
+ 		       ((args->flags & PRELIM_I915_EXEC_NUMBER_BB_MASK) >>
+ 			PRELIM_I915_EXEC_NUMBER_BB_LSB)) + 1;
+-	if (i915_gem_context_is_parallel(ctx)) {
+-		if (num_batches > count ||
+-		    start_context + num_batches > ctx->width) {
+-			err = -EINVAL;
+-			goto err_context;
++
++	if (i915_gem_context_user_engines(ctx)) {
++		parent = i915_gem_context_get_engine(ctx, start_context);
++		if (IS_ERR(parent)) {
++			i915_gem_context_put(ctx);
++			return PTR_ERR(parent);
+ 		}
+ 
+-		if (i915_gem_context_is_bb_preempt_boundary(ctx) &&
+-		    (start_context || num_batches != ctx->width)) {
++		is_parallel = i915_gem_context_is_parallel(ctx) ||
++			intel_context_is_parallel(parent);
++		if (i915_gem_context_is_parallel(ctx)) {
++			if (num_batches > count ||
++			    start_context + num_batches > ctx->width) {
++				err = -EINVAL;
++				goto err_context;
++			}
++
++			if (i915_gem_context_is_bb_preempt_boundary(ctx) &&
++			    (start_context || num_batches != ctx->width)) {
++				err = -EINVAL;
++				goto err_context;
++			}
++		} else if (intel_context_is_parallel(parent)) {
++			if (num_batches != 1)
++				return -EINVAL;
++			num_batches = parent->guc_number_children + 1;
++			if (num_batches > count)
++				return -EINVAL;
++		} else if(num_batches > 1) {
+ 			err = -EINVAL;
+ 			goto err_context;
+ 		}
+@@ -3827,8 +3859,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 	 * properly, also this is needed to create an excl fence for an dma buf
+ 	 * objects these BBs touch.
+ 	 */
+-	if (args->flags & I915_EXEC_FENCE_OUT ||
+-	    i915_gem_context_is_parallel(ctx)) {
++	if (args->flags & I915_EXEC_FENCE_OUT || is_parallel) {
+ 		out_fences = kcalloc(num_batches, sizeof(*out_fences),
+ 				     GFP_KERNEL);
+ 		if (!out_fences) {
+@@ -3874,8 +3905,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 	 * in intel_context sequence, thus only 1 submission can happen at a
+ 	 * time.
+ 	 */
+-	if (i915_gem_context_is_parallel(ctx))
+-		mutex_lock(&ctx->parallel_submit);
++	if (is_parallel)
++		mutex_lock(&parent->parallel_submit);
+ 
+ 	err = i915_gem_do_execbuffer(dev, file, args, exec2_list,
+ 				     args->flags & I915_EXEC_BATCH_FIRST ?
+@@ -3889,8 +3920,10 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 				     &ww);
+ 
+ 	for (i = 1; err == 0 && i < num_batches; i++) {
+-		args->flags &= ~I915_EXEC_RING_MASK;
+-		args->flags |= start_context + i;
++		if (i915_gem_context_is_parallel(ctx)) {
++			args->flags &= ~I915_EXEC_RING_MASK;
++			args->flags |= start_context + i;
++		}
+ 		args->batch_len = 0;
+ 
+ 		err = i915_gem_do_execbuffer(dev, file, args, exec2_list,
+@@ -3905,8 +3938,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 					     &ww);
+ 	}
+ 
+-	if (i915_gem_context_is_parallel(ctx))
+-		mutex_unlock(&ctx->parallel_submit);
++	if (is_parallel)
++		mutex_unlock(&parent->parallel_submit);
+ 
+ 	/*
+ 	 * Now that we have begun execution of the batchbuffer, we ignore
+@@ -4009,6 +4042,8 @@ end:;
+ 	dma_fence_put(in_fence);
+ err_context:
+ 	i915_gem_context_put(ctx);
++	if (parent)
++		intel_context_put(parent);
+ 
+ 	return err;
+ }
+diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
+--- a/drivers/gpu/drm/i915/gt/intel_context.c
++++ b/drivers/gpu/drm/i915/gt/intel_context.c
+@@ -460,6 +460,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
+ 	INIT_LIST_HEAD(&ce->signals);
+ 
+ 	mutex_init(&ce->pin_mutex);
++	mutex_init(&ce->parallel_submit);
+ 
+ 	spin_lock_init(&ce->guc_state.lock);
+ 	INIT_LIST_HEAD(&ce->guc_state.fences);
+@@ -491,6 +492,7 @@ void intel_context_fini(struct intel_context *ce)
+ 			intel_context_put(child);
+ 
+ 	mutex_destroy(&ce->pin_mutex);
++	mutex_destroy(&ce->parallel_submit);
+ 	i915_active_fini(&ce->active);
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
+--- a/drivers/gpu/drm/i915/gt/intel_context.h
++++ b/drivers/gpu/drm/i915/gt/intel_context.h
+@@ -52,6 +52,11 @@ static inline bool intel_context_is_parent(struct intel_context *ce)
+ 	return !!ce->guc_number_children;
+ }
+ 
++static inline bool intel_context_is_parallel(struct intel_context *ce)
++{
++	return intel_context_is_child(ce) || intel_context_is_parent(ce);
++}
++
+ /* Only should be called directly by selftests */
+ void __intel_context_bind_parent_child(struct intel_context *parent,
+ 				       struct intel_context *child);
+@@ -204,6 +209,12 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce)
+ 	return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
+ }
+ 
++static inline bool
++intel_context_is_no_preempt_mid_batch(const struct intel_context *ce)
++{
++	return test_bit(CONTEXT_NO_PREEMPT_MID_BATCH, &ce->flags);
++}
++
+ static inline bool intel_context_is_closed(const struct intel_context *ce)
+ {
+ 	return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
+--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
++++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
+@@ -114,6 +114,7 @@ struct intel_context {
+ #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
+ #define CONTEXT_NOPREEMPT		8
+ #define CONTEXT_LRCA_DIRTY		9
++#define CONTEXT_NO_PREEMPT_MID_BATCH	10
+ 
+ 	struct {
+ 		u64 timeout_us;
+@@ -239,6 +240,9 @@ struct intel_context {
+ 
+ 	/* Last request submitted on a parent */
+ 	struct i915_request *last_rq;
++
++	/* Parallel submit mutex */
++	struct mutex parallel_submit;
+ };
+ 
+ #endif /* __INTEL_CONTEXT_TYPES__ */
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+@@ -798,8 +798,7 @@ static inline int rq_prio(const struct i915_request *rq)
+ 
+ static inline bool is_multi_lrc(struct intel_context *ce)
+ {
+-	return intel_context_is_child(ce) ||
+-		intel_context_is_parent(ce);
++	return intel_context_is_parallel(ce);
+ }
+ 
+ static inline bool is_multi_lrc_rq(struct i915_request *rq)
+@@ -3458,6 +3457,7 @@ void intel_guc_configure_parent_context(struct intel_context *ce)
+ 		bb_preempt_boundary =
+ 			i915_gem_context_is_bb_preempt_boundary(ctx);
+ 	rcu_read_unlock();
++	bb_preempt_boundary |= intel_context_is_no_preempt_mid_batch(ce);
+ 	if (bb_preempt_boundary) {
+ 		ce->emit_bb_start = emit_bb_start_parent_bb_preempt_boundary;
+ 		ce->emit_fini_breadcrumb =
+diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
+--- a/drivers/gpu/drm/i915/i915_request.c
++++ b/drivers/gpu/drm/i915/i915_request.c
+@@ -1606,14 +1606,9 @@ i915_request_await_object(struct i915_request *to,
+ 	return ret;
+ }
+ 
+-static inline bool is_parallel(struct intel_context *ce)
+-{
+-	return intel_context_is_child(ce) || intel_context_is_parent(ce);
+-}
+-
+ static inline bool is_parallel_rq(struct i915_request *rq)
+ {
+-	return is_parallel(rq->context);
++	return intel_context_is_parallel(rq->context);
+ }
+ 
+ static inline struct intel_context *request_to_parent(struct i915_request *rq)
+diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h
+--- a/include/uapi/drm/i915_drm_prelim.h
++++ b/include/uapi/drm/i915_drm_prelim.h
+@@ -370,9 +370,124 @@ struct prelim_i915_context_engines_parallel_submit {
+ } __attribute__ ((packed));
+ #define i915_context_engines_parallel_submit prelim_i915_context_engines_parallel_submit
+ 
++/**
++ * struct prelim_drm_i915_context_engines_parallel2_submit - Configure engine
++ * for parallel submission.
++ *
++ * Setup a slot in the context engine map to allow multiple BBs to be submitted
++ * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
++ * in parallel. Multiple hardware contexts are created internally in the i915
++ * run these BBs. Once a slot is configured for N BBs only N BBs can be
++ * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
++ * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
++ * many BBs there are based on the slot's configuration. The N BBs are the last
++ * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
++ *
++ * The default placement behavior is to create implicit bonds between each
++ * context if each context maps to more than 1 physical engine (e.g. context is
++ * a virtual engine). Also we only allow contexts of same engine class and these
++ * contexts must be in logically contiguous order. Examples of the placement
++ * behavior described below. Lastly, the default is to not allow BBs to
++ * preempted mid BB rather insert coordinated preemption on all hardware
++ * contexts between each set of BBs. Flags may be added in the future to change
++ * both of these default behaviors.
++ *
++ * Returns -EINVAL if hardware context placement configuration is invalid or if
++ * the placement configuration isn't supported on the platform / submission
++ * interface.
++ * Returns -ENODEV if extension isn't supported on the platform / submission
++ * inteface.
++ *
++ * .. code-block::
++ *
++ *	Example 1 pseudo code:
++ *	CS[X] = generic engine of same class, logical instance X
++ *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
++ *	set_engines(INVALID)
++ *	set_parallel(engine_index=0, width=2, num_siblings=1,
++ *		     engines=CS[0],CS[1])
++ *
++ *	Results in the following valid placement:
++ *	CS[0], CS[1]
++ *
++ *	Example 2 pseudo code:
++ *	CS[X] = generic engine of same class, logical instance X
++ *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
++ *	set_engines(INVALID)
++ *	set_parallel(engine_index=0, width=2, num_siblings=2,
++ *		     engines=CS[0],CS[2],CS[1],CS[3])
++ *
++ *	Results in the following valid placements:
++ *	CS[0], CS[1]
++ *	CS[2], CS[3]
++ *
++ *	This can also be thought of as 2 virtual engines described by 2-D array
++ *	in the engines the field with bonds placed between each index of the
++ *	virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
++ *	CS[3].
++ *	VE[0] = CS[0], CS[2]
++ *	VE[1] = CS[1], CS[3]
++ *
++ *	Example 3 pseudo code:
++ *	CS[X] = generic engine of same class, logical instance X
++ *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
++ *	set_engines(INVALID)
++ *	set_parallel(engine_index=0, width=2, num_siblings=2,
++ *		     engines=CS[0],CS[1],CS[1],CS[3])
++ *
++ *	Results in the following valid and invalid placements:
++ *	CS[0], CS[1]
++ *	CS[1], CS[3] - Not logical contiguous, return -EINVAL
++ */
++struct prelim_drm_i915_context_engines_parallel2_submit {
++	/**
++	 * @base: base user extension.
++	 */
++	struct i915_user_extension base;
++
++	/**
++	 * @engine_index: slot for parallel engine
++	 */
++	__u16 engine_index;
++
++	/**
++	 * @width: number of contexts per parallel engine
++	 */
++	__u16 width;
++
++	/**
++	 * @num_siblings: number of siblings per context
++	 */
++	__u16 num_siblings;
++
++	/**
++	 * @mbz16: reserved for future use; must be zero
++	 */
++	__u16 mbz16;
++
++	/**
++	 * @flags: all undefined flags must be zero, currently not defined flags
++	 */
++	__u64 flags;
++
++	/**
++	 * @mbz64: reserved for future use; must be zero
++	 */
++	__u64 mbz64[3];
++
++	/**
++	 * @engines: 2-d array of engine instances to configure parallel engine
++	 *
++	 * length = width (i) * num_siblings (j)
++	 * index = j + i * num_siblings
++	 */
++	struct i915_engine_class_instance engines[0];
++} __attribute__ ((packed));
++
+ struct prelim_i915_context_param_engines {
+ #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT (PRELIM_I915_USER_EXT | 2) /* see prelim_i915_context_engines_parallel_submit */
+ #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
++#define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */
+ };
+ 
+ enum prelim_drm_i915_gem_memory_class {
--
git-pile 0.97


WARNING: multiple messages have this Message-ID (diff)
From: Matthew Brost <matthew.brost@intel.com>
To: <intel-gfx@lists.freedesktop.org>, <dri-devel@lists.freedesktop.org>
Subject: [Intel-gfx] [PATCH 0/2] Introduce set_parallel2 extension
Date: Wed,  7 Jul 2021 17:30:17 -0700	[thread overview]
Message-ID: <20210708003019.9213-1-matthew.brost@intel.com> (raw)
In-Reply-To: <20210706222010.101522-1-matthew.brost@intel.com>

Based on upstream feedback [1] the current set_parallel extension isn't
suitable. Add a single patch to DII implementing the new interface
agreed two upstream [2]. Intended to enable the UMDs with the upstream
interface while maintaining the old interface on DII. 

Quick IGT to prove this is working should be list shortly.

v2: Move single patch in GuC section on pile, align with agreed to
upstream interface, only include prelim* definitions. 
v3: Enable set_parallel2 via SET_PARAM IOCTL, resend for CI
v4: Fix regression when patch was merge - only do parallel checks on
user engine sets 

Signed-off-by: Matthew Brost <matthew.brost@intel.com>

[1] https://patchwork.freedesktop.org/patch/432205/?series=89840&rev=1
[2] https://patchwork.freedesktop.org/patch/438911/?series=91417&rev=1

Signed-off-by: Matthew Brost <matthew.brost@intel.com>


---
baseline: b7227afd06bac1fe6719136e2ddd2bfed1d85feb
pile-commit: b7a2c9136977a385659a71df837cbe5a1f775b32
range-diff:
   -:  ------------ >  930:  ad12b87b91af INTEL_DII/NOT_UPSTREAM: drm/i915: Introduce set_parallel2 extension
1083:  73e59e150cde ! 1084:  79b296835b1c INTEL_DII/FIXME: drm/i915/perf: add a parameter to control the size of OA buffer
1120:  edbc20ae1355 ! 1121:  30d02d618229 INTEL_DII/FIXME: drm/i915: Add context parameter for debug flags
1293:  997b317fc408 ! 1294:  016b5903b0a0 INTEL_DII: drm/i915/perf: Add OA formats for XEHPSDV
1364:  136064b76b92 ! 1365:  5f564d553dc8 INTEL_DII: drm/i915/xehpsdv: Expand total numbers of supported engines up to 256
1403:  67b729033e82 ! 1404:  4398a2322f2f INTEL_DII: drm/i915/xehpsdv: Impose ULLS context restrictions
1405:  b8dd2a22a952 ! 1406:  dd2fab232cf1 INTEL_DII: drm/i915: Add context methods to suspend and resume requests
1670:  b4633106fa13 ! 1671:  53b4a54ee2cc INTEL_DII: drm/i915/pxp: interface for marking contexts as using protected content
1671:  22369ab70556 ! 1672:  42234590cdf5 INTEL_DII: drm/i915/pxp: start the arb session on demand

 series                                             |   1 +
 ...IXME-drm-i915-perf-add-a-parameter-to-con.patch |   4 +-
 ...IXME-drm-i915-Add-context-parameter-for-d.patch |  18 +-
 ...-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch |   4 +-
 ...rm-i915-xehpsdv-Expand-total-numbers-of-s.patch |   2 +-
 ...rm-i915-xehpsdv-Impose-ULLS-context-restr.patch |  12 +-
 ...rm-i915-Add-context-methods-to-suspend-an.patch |  38 +-
 ...rm-i915-pxp-interface-for-marking-context.patch |  16 +-
 ...rm-i915-pxp-start-the-arb-session-on-dema.patch |   2 +-
 ...OT_UPSTREAM-drm-i915-Introduce-set_parall.patch | 676 +++++++++++++++++++++
 10 files changed, 725 insertions(+), 48 deletions(-)

diff --git a/series b/series
index 8b77d52df40c..7db508ea974d 100644
--- a/series
+++ b/series
@@ -929,6 +929,7 @@
 0001-INTEL_DII-drm-i915-guc-Increase-GuC-log-size-for-CON.patch
 0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Dump-error-capture-t.patch
 0001-INTEL_DII-NOT_UPSTREAM-drm-i915-guc-Dump-error-captu.patch
+0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch
 0001-INTEL_DII-END-GuC-submission-and-slpc-support.patch
 0001-INTEL_DII-BEGIN-SR-IOV-ENABLING.patch
 0001-INTEL_DII-drm-i915-guc-Update-GuC-to-62.0.3.patch
diff --git a/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch b/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch
index dd654f144374..b7a637b3813f 100644
--- a/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch
+++ b/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch
@@ -384,8 +384,8 @@ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
 diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h
 --- a/include/uapi/drm/i915_drm_prelim.h
 +++ b/include/uapi/drm/i915_drm_prelim.h
-@@ -393,6 +393,36 @@ struct prelim_i915_context_param_engines {
- #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
+@@ -508,6 +508,36 @@ struct prelim_i915_context_param_engines {
+ #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */
  };
  
 +enum prelim_drm_i915_perf_property_id {
diff --git a/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch b/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch
index dfd5790ac2b8..71a5943b5536 100644
--- a/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch
+++ b/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch
@@ -44,7 +44,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  }
  
  static void __free_engines(struct i915_gem_engines *e, unsigned int count)
-@@ -2252,6 +2255,76 @@ static int set_priority(struct i915_gem_context *ctx,
+@@ -2436,6 +2439,76 @@ static int set_priority(struct i915_gem_context *ctx,
  	return 0;
  }
  
@@ -121,7 +121,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  static int ctx_setparam(struct drm_i915_file_private *fpriv,
  			struct i915_gem_context *ctx,
  			struct drm_i915_gem_context_param *args)
-@@ -2321,6 +2394,11 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
+@@ -2505,6 +2578,11 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
  		ret = set_ringsize(ctx, args);
  		break;
  
@@ -133,7 +133,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  	case I915_CONTEXT_PARAM_BAN_PERIOD:
  	default:
  		ret = -EINVAL;
-@@ -2777,6 +2855,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+@@ -2961,6 +3039,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
  		ret = get_ringsize(ctx, args);
  		break;
  
@@ -184,7 +184,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm
 diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
 --- a/drivers/gpu/drm/i915/gt/intel_context.h
 +++ b/drivers/gpu/drm/i915/gt/intel_context.h
-@@ -285,6 +285,24 @@ intel_context_clear_nopreempt(struct intel_context *ce)
+@@ -296,6 +296,24 @@ intel_context_clear_nopreempt(struct intel_context *ce)
  		ce->emit_bb_start = ce->engine->emit_bb_start;
  }
  
@@ -212,19 +212,19 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/i
 diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
 +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
-@@ -114,6 +114,7 @@ struct intel_context {
- #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
+@@ -115,6 +115,7 @@ struct intel_context {
  #define CONTEXT_NOPREEMPT		8
  #define CONTEXT_LRCA_DIRTY		9
-+#define CONTEXT_DEBUG			10
+ #define CONTEXT_NO_PREEMPT_MID_BATCH	10
++#define CONTEXT_DEBUG			11
  
  	struct {
  		u64 timeout_us;
 diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h
 --- a/include/uapi/drm/i915_drm_prelim.h
 +++ b/include/uapi/drm/i915_drm_prelim.h
-@@ -395,6 +395,32 @@ struct prelim_i915_context_param_engines {
- #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
+@@ -510,6 +510,32 @@ struct prelim_i915_context_param_engines {
+ #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */
  };
  
 +struct prelim_drm_i915_gem_context_param {
diff --git a/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch b/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch
index 19a07b3926ae..f62d7848e091 100644
--- a/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch
+++ b/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch
@@ -204,8 +204,8 @@ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
 diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h
 --- a/include/uapi/drm/i915_drm_prelim.h
 +++ b/include/uapi/drm/i915_drm_prelim.h
-@@ -435,6 +435,27 @@ struct prelim_i915_context_param_engines {
- #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
+@@ -550,6 +550,27 @@ struct prelim_i915_context_param_engines {
+ #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */
  };
  
 +enum prelim_drm_i915_oa_format {
diff --git a/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch b/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch
index 05a84884a3d1..ee486b95d11e 100644
--- a/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch
+++ b/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch
@@ -76,7 +76,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
  
  	/* Kernel clipping was a DRI1 misfeature */
  	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
-@@ -3233,9 +3235,12 @@ eb_select_engine(struct i915_execbuffer *eb)
+@@ -3233,9 +3235,12 @@ eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number)
  	int err;
  
  	if (i915_gem_context_user_engines(eb->gem_context))
diff --git a/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch b/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch
index 38ad84c4dc12..80880e3008cc 100644
--- a/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch
+++ b/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch
@@ -76,7 +76,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
  	if (intel_context_nopreempt(eb->context) ||
  	    intel_context_debug(eb->context))
  		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
-@@ -3453,6 +3462,13 @@ static int eb_request_add(struct i915_execbuffer *eb, int err)
+@@ -3463,6 +3472,13 @@ static int eb_request_add(struct i915_execbuffer *eb, int err)
  
  	trace_i915_request_add(rq);
  
@@ -90,7 +90,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
  	prev = __i915_request_commit(rq);
  
  	/* Check that the context wasn't destroyed before submission */
-@@ -3531,6 +3547,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
+@@ -3541,6 +3557,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
  	int err;
  	bool first = batch_number == 0;
  	bool last = batch_number + 1 == num_batches;
@@ -98,7 +98,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
  
  	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
  	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
-@@ -3582,6 +3599,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
+@@ -3592,6 +3609,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
  	if (unlikely(err))
  		goto err_destroy;
  
@@ -109,7 +109,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
 +		goto err_context;
 +	}
 +
- 	err = eb_select_engine(&eb);
+ 	err = eb_select_engine(&eb, batch_number);
  	if (unlikely(err))
  		goto err_context;
 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -239,7 +239,7 @@ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_req
  		/*
  		 * Requests on the same timeline are explicitly ordered, along
  		 * with their dependencies, by i915_request_add() which ensures
-@@ -2126,6 +2181,7 @@ long i915_request_wait(struct i915_request *rq,
+@@ -2121,6 +2176,7 @@ long i915_request_wait(struct i915_request *rq,
  {
  	might_sleep();
  	GEM_BUG_ON(timeout < 0);
@@ -247,7 +247,7 @@ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_req
  
  	if (dma_fence_is_signaled(&rq->fence))
  		return timeout;
-@@ -2331,6 +2387,8 @@ static struct i915_global_request global = { {
+@@ -2326,6 +2382,8 @@ static struct i915_global_request global = { {
  
  int __init i915_global_request_init(void)
  {
diff --git a/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch b/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch
index 7d523c8dadba..44fd93184b8a 100644
--- a/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch
+++ b/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch
@@ -52,7 +52,7 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/i
  void
  intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
  {
-@@ -475,6 +481,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
+@@ -476,6 +482,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
  	ce->guc_id = GUC_INVALID_LRC_ID;
  	INIT_LIST_HEAD(&ce->guc_id_link);
  
@@ -62,7 +62,7 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/i
  	i915_active_init(&ce->active,
  			 __intel_context_active, __intel_context_retire);
  }
-@@ -485,6 +494,7 @@ void intel_context_fini(struct intel_context *ce)
+@@ -486,6 +495,7 @@ void intel_context_fini(struct intel_context *ce)
  
  	if (ce->last_rq)
  		i915_request_put(ce->last_rq);
@@ -73,7 +73,7 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/i
 diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
 --- a/drivers/gpu/drm/i915/gt/intel_context.h
 +++ b/drivers/gpu/drm/i915/gt/intel_context.h
-@@ -252,6 +252,54 @@ static inline bool intel_context_ban(struct intel_context *ce,
+@@ -263,6 +263,54 @@ static inline bool intel_context_ban(struct intel_context *ce,
  	return ret;
  }
  
@@ -152,10 +152,10 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i91
  	void (*enter)(struct intel_context *ce);
  	void (*exit)(struct intel_context *ce);
  
-@@ -241,6 +248,9 @@ struct intel_context {
+@@ -245,6 +252,9 @@ struct intel_context {
  
- 	/* Last request submitted on a parent */
- 	struct i915_request *last_rq;
+ 	/* Parallel submit mutex */
+ 	struct mutex parallel_submit;
 +
 +	/* GuC context blocked fence */
 +	struct i915_sw_fence guc_blocked;
@@ -231,7 +231,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  
  	if (!enabled) {
  		GEM_BUG_ON(context_pending_enable(ce));
-@@ -1103,6 +1137,8 @@ static void __guc_context_destroy(struct intel_context *ce);
+@@ -1102,6 +1136,8 @@ static void __guc_context_destroy(struct intel_context *ce);
  static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
  static void guc_signal_context_fence(struct intel_context *ce);
  static void guc_cancel_context_requests(struct intel_context *ce);
@@ -240,7 +240,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  
  static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
  {
-@@ -1143,6 +1179,8 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
+@@ -1142,6 +1178,8 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
  
  		/* Not mutualy exclusive with above if statement. */
  		if (pending_disable) {
@@ -249,7 +249,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  			guc_signal_context_fence(ce);
  			if (banned) {
  				guc_cancel_context_requests(ce);
-@@ -1150,7 +1188,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
+@@ -1149,7 +1187,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
  			}
  			intel_context_sched_disable_unpin(ce);
  			atomic_dec(&guc->outstanding_submission_g2h);
@@ -262,7 +262,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  		}
  	}
  }
-@@ -2549,6 +2592,22 @@ static void guc_parent_context_unpin(struct intel_context *ce)
+@@ -2551,6 +2594,22 @@ static void guc_parent_context_unpin(struct intel_context *ce)
  	__guc_context_unpin(ce);
  }
  
@@ -285,7 +285,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  static void __guc_context_sched_disable(struct intel_guc *guc,
  					struct intel_context *ce,
  					u16 guc_id)
-@@ -2576,10 +2635,13 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
+@@ -2578,10 +2637,13 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
  				 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
  }
  
@@ -299,7 +299,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	intel_context_get(ce);
  
  	return ce->guc_id;
-@@ -2677,6 +2739,132 @@ static void guc_context_sched_disable(struct intel_context *ce)
+@@ -2679,6 +2741,132 @@ static void guc_context_sched_disable(struct intel_context *ce)
  	intel_context_sched_disable_unpin(ce);
  }
  
@@ -432,7 +432,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  int intel_guc_modify_scheduling(struct intel_guc *guc, bool enable)
  {
  	struct intel_gt *gt = guc_to_gt(guc);
-@@ -2991,6 +3179,9 @@ static const struct intel_context_ops guc_context_ops = {
+@@ -2993,6 +3181,9 @@ static const struct intel_context_ops guc_context_ops = {
  
  	.ban = guc_context_ban,
  
@@ -442,7 +442,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = intel_context_enter_engine,
  	.exit = guc_context_exit,
  
-@@ -3380,6 +3571,9 @@ static const struct intel_context_ops virtual_guc_context_ops = {
+@@ -3382,6 +3573,9 @@ static const struct intel_context_ops virtual_guc_context_ops = {
  
  	.ban = guc_context_ban,
  
@@ -452,7 +452,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = guc_virtual_context_enter,
  	.exit = guc_virtual_context_exit,
  
-@@ -3457,6 +3651,9 @@ static const struct intel_context_ops parent_context_ops = {
+@@ -3459,6 +3653,9 @@ static const struct intel_context_ops parent_context_ops = {
  
  	.ban = guc_context_ban,
  
@@ -462,7 +462,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = intel_context_enter_engine,
  	.exit = intel_context_exit_engine,
  
-@@ -3476,6 +3673,9 @@ static const struct intel_context_ops virtual_parent_context_ops = {
+@@ -3478,6 +3675,9 @@ static const struct intel_context_ops virtual_parent_context_ops = {
  
  	.ban = guc_context_ban,
  
@@ -472,7 +472,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = guc_virtual_context_enter,
  	.exit = guc_virtual_context_exit,
  
-@@ -3487,6 +3687,9 @@ static const struct intel_context_ops virtual_parent_context_ops = {
+@@ -3489,6 +3689,9 @@ static const struct intel_context_ops virtual_parent_context_ops = {
  static const struct intel_context_ops child_context_ops = {
  	.alloc = guc_context_alloc,
  
@@ -482,7 +482,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = intel_context_enter_engine,
  	.exit = guc_context_exit,
  
-@@ -3497,6 +3700,9 @@ static const struct intel_context_ops child_context_ops = {
+@@ -3499,6 +3702,9 @@ static const struct intel_context_ops child_context_ops = {
  static const struct intel_context_ops virtual_child_context_ops = {
  	.alloc = guc_virtual_context_alloc,
  
@@ -492,7 +492,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm
  	.enter = guc_virtual_context_enter,
  	.exit = guc_virtual_context_exit,
  
-@@ -4440,6 +4646,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
+@@ -4441,6 +4647,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
  		clr_context_banned(ce);
  		clr_context_pending_disable(ce);
  		__guc_signal_context_fence(ce);
diff --git a/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch b/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch
index 6b38bd36d21b..8a6b9561eb24 100644
--- a/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch
+++ b/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch
@@ -56,7 +56,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  #include "i915_gem_context.h"
  #include "i915_gem_ioctls.h"
  #include "i915_globals.h"
-@@ -2574,6 +2576,40 @@ static int set_acc(struct i915_gem_context *ctx,
+@@ -2769,6 +2771,40 @@ static int set_acc(struct i915_gem_context *ctx,
  	return 0;
  }
  
@@ -97,7 +97,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  static int ctx_setparam(struct drm_i915_file_private *fpriv,
  			struct i915_gem_context *ctx,
  			struct drm_i915_gem_context_param *args,
-@@ -2607,6 +2643,8 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
+@@ -2802,6 +2838,8 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
  			ret = -EPERM;
  		else if (args->value)
  			i915_gem_context_set_bannable(ctx);
@@ -106,7 +106,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  		else
  			i915_gem_context_clear_bannable(ctx);
  		break;
-@@ -2614,10 +2652,12 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
+@@ -2809,10 +2847,12 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
  	case I915_CONTEXT_PARAM_RECOVERABLE:
  		if (args->size)
  			ret = -EINVAL;
@@ -122,7 +122,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  		break;
  
  	case I915_CONTEXT_PARAM_PRIORITY:
-@@ -2664,6 +2704,9 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
+@@ -2865,6 +2905,9 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
  	case I915_CONTEXT_PARAM_DEBUG_FLAGS:
  		ret = set_debug_flags(ctx, args);
  		break;
@@ -132,7 +132,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  
  	case I915_CONTEXT_PARAM_BAN_PERIOD:
  	default:
-@@ -3157,6 +3200,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+@@ -3358,6 +3401,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
  	case I915_CONTEXT_PARAM_DEBUG_FLAGS:
  		ret = get_debug_flags(ctx, args);
  		break;
@@ -142,7 +142,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/
  
  	case I915_CONTEXT_PARAM_BAN_PERIOD:
  	default:
-@@ -3281,6 +3327,11 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
+@@ -3482,6 +3528,11 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
  	args->batch_active = atomic_read(&ctx->guilty_count);
  	args->batch_pending = atomic_read(&ctx->active_count);
  
@@ -225,7 +225,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9
  	eb->gem_context = ctx;
  	if (rcu_access_pointer(ctx->vm))
  		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
-@@ -3301,6 +3308,17 @@ eb_select_engine(struct i915_execbuffer *eb)
+@@ -3311,6 +3318,17 @@ eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number)
  
  	intel_gt_pm_get(ce->engine->gt);
  
@@ -348,7 +348,7 @@ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
 diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h
 --- a/include/uapi/drm/i915_drm_prelim.h
 +++ b/include/uapi/drm/i915_drm_prelim.h
-@@ -893,6 +893,26 @@ struct prelim_drm_i915_gem_context_param {
+@@ -1003,6 +1003,26 @@ struct prelim_drm_i915_gem_context_param {
  #define I915_CONTEXT_PARAM_ACC    0xd
  };
  
diff --git a/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch b/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch
index 5ee627b00811..4b4326057959 100644
--- a/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch
+++ b/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch
@@ -22,7 +22,7 @@ Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
 +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
-@@ -3309,9 +3309,11 @@ eb_select_engine(struct i915_execbuffer *eb)
+@@ -3319,9 +3319,11 @@ eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number)
  	intel_gt_pm_get(ce->engine->gt);
  
  	if (i915_gem_context_uses_protected_content(eb->gem_context)) {
diff --git a/0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch b/0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch
new file mode 100644
index 000000000000..415fbd930383
--- /dev/null
+++ b/0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch
@@ -0,0 +1,676 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Matthew Brost <matthew.brost@intel.com>
+Date: Wed, 7 Jul 2021 16:55:03 -0700
+Subject: [PATCH] INTEL_DII/NOT_UPSTREAM: drm/i915: Introduce set_parallel2
+ extension
+
+Based on upstream feedback the set_parallel extension isn't suitable as
+it looks a bit too much like the bonding extension. Introduce a
+set_parallel2 extension which configures parallel submission in a single
+extension and in a single slot. This compares to old set_parallel
+extension which configured parallel submission across multiple slots.
+
+Also remove the ability for the user to pass in the number of BBs in
+the execbuf IOCTL. The number of BBs is now implied based on the width
+of the context in the slot.
+
+This patch is intended in enable UMDs for the upstream direction while
+maintaining the old set_parallel extension to not break UMDs. Once UMDs
+have been updated to use new extension the old one can be removed from
+DII.
+
+v2: Only enable parallel submission on engines set by user
+
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+---
+ drivers/gpu/drm/i915/gem/i915_gem_context.c   | 190 +++++++++++++++++-
+ .../gpu/drm/i915/gem/i915_gem_context_types.h |   6 -
+ .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  73 +++++--
+ drivers/gpu/drm/i915/gt/intel_context.c       |   2 +
+ drivers/gpu/drm/i915/gt/intel_context.h       |  11 +
+ drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +
+ .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   4 +-
+ drivers/gpu/drm/i915/i915_request.c           |   7 +-
+ include/uapi/drm/i915_drm_prelim.h            | 115 +++++++++++
+ 9 files changed, 376 insertions(+), 36 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
+--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
+@@ -374,7 +374,6 @@ void i915_gem_context_release(struct kref *ref)
+ 	mutex_destroy(&ctx->engines_mutex);
+ 	mutex_destroy(&ctx->lut_mutex);
+ 	mutex_destroy(&ctx->mutex);
+-	mutex_destroy(&ctx->parallel_submit);
+ 
+ 	kfree_rcu(ctx, rcu);
+ }
+@@ -699,8 +698,6 @@ __create_context(struct drm_i915_private *i915)
+ 	mutex_init(&ctx->mutex);
+ 	INIT_LIST_HEAD(&ctx->link);
+ 
+-	mutex_init(&ctx->parallel_submit);
+-
+ 	spin_lock_init(&ctx->stale.lock);
+ 	INIT_LIST_HEAD(&ctx->stale.engines);
+ 
+@@ -1857,6 +1854,48 @@ static bool validate_parallel_engines_layout(const struct set_engines *set)
+ 	return true;
+ }
+ 
++/*
++ * Engine must be same class and form a logically contiguous mask.
++ *
++ * FIXME: Logical mask check not 100% correct but good enough for the PoC
++ */
++static bool __validate_parallel_engines_layout(struct drm_i915_private *i915,
++					       struct intel_context *parent)
++{
++	u8 engine_class = parent->engine->class;
++	u8 num_siblings = hweight_long(parent->engine->logical_mask);
++	struct intel_context *child;
++	intel_engine_mask_t logical_mask = parent->engine->logical_mask;
++
++	for_each_child(parent, child) {
++		if (child->engine->class != engine_class) {
++			drm_dbg(&i915->drm, "Class mismatch: %u, %u",
++				engine_class, child->engine->class);
++			return false;
++		}
++		if (hweight_long(child->engine->logical_mask) != num_siblings) {
++			drm_dbg(&i915->drm, "Sibling mismatch: %u, %lu",
++				num_siblings,
++				hweight_long(child->engine->logical_mask));
++			return false;
++		}
++		if (logical_mask & child->engine->logical_mask) {
++			drm_dbg(&i915->drm, "Overlapping logical mask: 0x%04x, 0x%04x",
++				logical_mask, child->engine->logical_mask);
++			return false;
++		}
++		logical_mask |= child->engine->logical_mask;
++	}
++
++	if (!is_power_of_2((logical_mask >> (ffs(logical_mask) - 1)) + 1)) {
++		drm_dbg(&i915->drm, "Non-contiguous logical mask: 0x%04x",
++			logical_mask);
++		return false;
++	}
++
++	return true;
++}
++
+ static int
+ set_engines__parallel_submit(struct i915_user_extension __user *base, void *data)
+ {
+@@ -2009,11 +2048,156 @@ set_engines__parallel_submit(struct i915_user_extension __user *base, void *data
+ 	return err;
+ }
+ 
++static int
++set_engines__parallel2_submit(struct i915_user_extension __user *base,
++			      void *data)
++{
++	struct prelim_drm_i915_context_engines_parallel2_submit __user *ext =
++		container_of_user(base, typeof(*ext), base);
++	const struct set_engines *set = data;
++	struct drm_i915_private *i915 = set->ctx->i915;
++	struct intel_context *parent, *child, *ce;
++	u64 flags;
++	int err = 0, n, i, j;
++	u16 slot, width, num_siblings;
++	struct intel_engine_cs **siblings = NULL;
++
++	if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
++		return -ENODEV;
++
++	if (get_user(slot, &ext->engine_index))
++		return -EFAULT;
++
++	if (get_user(width, &ext->width))
++		return -EFAULT;
++
++	if (get_user(num_siblings, &ext->num_siblings))
++		return -EFAULT;
++
++	if (slot >= set->engines->num_engines) {
++		drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
++			slot, set->engines->num_engines);
++		return -EINVAL;
++	}
++
++	parent = set->engines->engines[slot];
++	if (parent) {
++		drm_dbg(&i915->drm, "Context index[%d] not NULL\n", slot);
++		return -EINVAL;
++	}
++
++	if (get_user(flags, &ext->flags))
++		return -EFAULT;
++
++	if (flags) {
++		drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
++		return -EINVAL;
++	}
++
++	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
++		err = check_user_mbz(&ext->mbz64[n]);
++		if (err)
++			return err;
++	}
++
++	if (width < 1) {
++		drm_dbg(&i915->drm, "Width (%d) < 1 \n", width);
++		return -EINVAL;
++	}
++
++	if (num_siblings < 1) {
++		drm_dbg(&i915->drm, "Number siblings (%d) < 1 \n",
++			num_siblings);
++		return -EINVAL;
++	}
++
++	siblings = kmalloc_array(num_siblings,
++				 sizeof(*siblings),
++				 GFP_KERNEL);
++	if (!siblings)
++		return -ENOMEM;
++
++	mutex_lock(&set->ctx->mutex);
++
++	/* Create contexts / engines */
++	for (i = 0; i < width; ++i) {
++		for (j = 0; j < num_siblings; ++j) {
++			struct i915_engine_class_instance ci;
++
++			if (copy_from_user(&ci, &ext->engines[i * num_siblings + j],
++					   sizeof(ci))) {
++				err = -EFAULT;
++				goto out_err;
++			}
++
++			siblings[j] = intel_engine_lookup_user(i915,
++							       ci.engine_class,
++							       ci.engine_instance);
++			if (!siblings[j]) {
++				drm_dbg(&i915->drm,
++					"Invalid sibling[%d]: { class:%d, inst:%d }\n",
++					n, ci.engine_class, ci.engine_instance);
++				err = -EINVAL;
++				goto out_err;
++			}
++		}
++
++		ce = intel_engine_create_virtual(siblings, num_siblings,
++						 FORCE_VIRTUAL);
++		if (IS_ERR(ce)) {
++			err = PTR_ERR(ce);
++			goto out_err;
++		}
++		intel_context_set_gem(ce, set->ctx);
++
++		if (i == 0) {
++			parent = ce;
++			__set_bit(CONTEXT_NO_PREEMPT_MID_BATCH, &ce->flags);
++		} else {
++			intel_context_bind_parent_child(parent, ce);
++			err = intel_context_alloc_state(ce);
++			if (err)
++				goto out_err;
++		}
++	}
++
++	if (!__validate_parallel_engines_layout(i915, parent)) {
++		drm_dbg(&i915->drm, "Invalidate parallel context layout");
++		err = -EINVAL;
++		goto out_err;
++	}
++
++	intel_guc_configure_parent_context(parent);
++	if (cmpxchg(&set->engines->engines[slot], NULL, parent)) {
++		err = -EEXIST;
++		goto out_err;
++	}
++
++	kfree(siblings);
++	mutex_unlock(&set->ctx->mutex);
++
++	return 0;
++
++out_err:
++	if (parent) {
++		for_each_child(parent, child)
++			intel_context_put(child);
++		intel_context_put(parent);
++		set->engines->engines[slot] = NULL;
++	}
++	kfree(siblings);
++	mutex_unlock(&set->ctx->mutex);
++
++	return err;
++}
++
+ static const i915_user_extension_fn set_engines__extensions[] = {
+ 	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+ 	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
+ 	[PRELIM_I915_USER_EXT_MASK(PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)] =
+ 		set_engines__parallel_submit,
++	[PRELIM_I915_USER_EXT_MASK(PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT)] =
++		set_engines__parallel2_submit,
+ };
+ 
+ static int
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
++++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+@@ -194,12 +194,6 @@ struct i915_gem_context {
+ 	 */
+ 	u64 fence_context;
+ 
+-	/**
+-	 * @parallel_submit: Ensure only 1 parallel submission is happening on
+-	 * this context at a time.
+-	 */
+-	struct mutex parallel_submit;
+-
+ 	/**
+ 	 * @seqno: Seqno when using when a parallel context.
+ 	 */
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+@@ -1633,7 +1633,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
+ 		goto err_unmap;
+ 
+ 	if (engine == eb->context->engine &&
+-	    !i915_gem_context_is_parallel(eb->gem_context)) {
++	    !intel_context_is_parallel(eb->context)) {
+ 		rq = i915_request_create(eb->context);
+ 	} else {
+ 		struct intel_context *ce = eb->reloc_context;
+@@ -1727,7 +1727,7 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
+ 		struct intel_engine_cs *engine = eb->engine;
+ 
+ 		if (!reloc_can_use_engine(engine) ||
+-		    i915_gem_context_is_parallel(eb->gem_context)) {
++		    intel_context_is_parallel(eb->context)) {
+ 			engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
+ 			if (!engine)
+ 				return ERR_PTR(-ENODEV);
+@@ -3223,7 +3223,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb)
+ }
+ 
+ static int
+-eb_select_engine(struct i915_execbuffer *eb)
++eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number)
+ {
+ 	struct intel_context *ce;
+ 	unsigned int idx;
+@@ -3238,6 +3238,16 @@ eb_select_engine(struct i915_execbuffer *eb)
+ 	if (IS_ERR(ce))
+ 		return PTR_ERR(ce);
+ 
++	if (batch_number > 0 &&
++	    !i915_gem_context_is_parallel(eb->gem_context)) {
++		struct intel_context *parent = ce;
++		for_each_child(parent, ce)
++			if (!--batch_number)
++				break;
++		intel_context_put(parent);
++		intel_context_get(ce);
++	}
++
+ 	intel_gt_pm_get(ce->engine->gt);
+ 
+ 	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+@@ -3562,7 +3572,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
+ 	if (unlikely(err))
+ 		goto err_destroy;
+ 
+-	err = eb_select_engine(&eb);
++	err = eb_select_engine(&eb, batch_number);
+ 	if (unlikely(err))
+ 		goto err_context;
+ 
+@@ -3751,6 +3761,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 	const size_t count = args->buffer_count;
+ 	unsigned int num_batches, i;
+ 	int err, start_context;
++	bool is_parallel = false;
++	struct intel_context *parent = NULL;
+ 
+ 	if (!check_buffer_count(count)) {
+ 		drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count);
+@@ -3782,15 +3794,35 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 			I915_EXEC_NUMBER_BB_LSB) +
+ 		       ((args->flags & PRELIM_I915_EXEC_NUMBER_BB_MASK) >>
+ 			PRELIM_I915_EXEC_NUMBER_BB_LSB)) + 1;
+-	if (i915_gem_context_is_parallel(ctx)) {
+-		if (num_batches > count ||
+-		    start_context + num_batches > ctx->width) {
+-			err = -EINVAL;
+-			goto err_context;
++
++	if (i915_gem_context_user_engines(ctx)) {
++		parent = i915_gem_context_get_engine(ctx, start_context);
++		if (IS_ERR(parent)) {
++			i915_gem_context_put(ctx);
++			return PTR_ERR(parent);
+ 		}
+ 
+-		if (i915_gem_context_is_bb_preempt_boundary(ctx) &&
+-		    (start_context || num_batches != ctx->width)) {
++		is_parallel = i915_gem_context_is_parallel(ctx) ||
++			intel_context_is_parallel(parent);
++		if (i915_gem_context_is_parallel(ctx)) {
++			if (num_batches > count ||
++			    start_context + num_batches > ctx->width) {
++				err = -EINVAL;
++				goto err_context;
++			}
++
++			if (i915_gem_context_is_bb_preempt_boundary(ctx) &&
++			    (start_context || num_batches != ctx->width)) {
++				err = -EINVAL;
++				goto err_context;
++			}
++		} else if (intel_context_is_parallel(parent)) {
++			if (num_batches != 1)
++				return -EINVAL;
++			num_batches = parent->guc_number_children + 1;
++			if (num_batches > count)
++				return -EINVAL;
++		} else if(num_batches > 1) {
+ 			err = -EINVAL;
+ 			goto err_context;
+ 		}
+@@ -3827,8 +3859,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 	 * properly, also this is needed to create an excl fence for an dma buf
+ 	 * objects these BBs touch.
+ 	 */
+-	if (args->flags & I915_EXEC_FENCE_OUT ||
+-	    i915_gem_context_is_parallel(ctx)) {
++	if (args->flags & I915_EXEC_FENCE_OUT || is_parallel) {
+ 		out_fences = kcalloc(num_batches, sizeof(*out_fences),
+ 				     GFP_KERNEL);
+ 		if (!out_fences) {
+@@ -3874,8 +3905,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 	 * in intel_context sequence, thus only 1 submission can happen at a
+ 	 * time.
+ 	 */
+-	if (i915_gem_context_is_parallel(ctx))
+-		mutex_lock(&ctx->parallel_submit);
++	if (is_parallel)
++		mutex_lock(&parent->parallel_submit);
+ 
+ 	err = i915_gem_do_execbuffer(dev, file, args, exec2_list,
+ 				     args->flags & I915_EXEC_BATCH_FIRST ?
+@@ -3889,8 +3920,10 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 				     &ww);
+ 
+ 	for (i = 1; err == 0 && i < num_batches; i++) {
+-		args->flags &= ~I915_EXEC_RING_MASK;
+-		args->flags |= start_context + i;
++		if (i915_gem_context_is_parallel(ctx)) {
++			args->flags &= ~I915_EXEC_RING_MASK;
++			args->flags |= start_context + i;
++		}
+ 		args->batch_len = 0;
+ 
+ 		err = i915_gem_do_execbuffer(dev, file, args, exec2_list,
+@@ -3905,8 +3938,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+ 					     &ww);
+ 	}
+ 
+-	if (i915_gem_context_is_parallel(ctx))
+-		mutex_unlock(&ctx->parallel_submit);
++	if (is_parallel)
++		mutex_unlock(&parent->parallel_submit);
+ 
+ 	/*
+ 	 * Now that we have begun execution of the batchbuffer, we ignore
+@@ -4009,6 +4042,8 @@ end:;
+ 	dma_fence_put(in_fence);
+ err_context:
+ 	i915_gem_context_put(ctx);
++	if (parent)
++		intel_context_put(parent);
+ 
+ 	return err;
+ }
+diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
+--- a/drivers/gpu/drm/i915/gt/intel_context.c
++++ b/drivers/gpu/drm/i915/gt/intel_context.c
+@@ -460,6 +460,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
+ 	INIT_LIST_HEAD(&ce->signals);
+ 
+ 	mutex_init(&ce->pin_mutex);
++	mutex_init(&ce->parallel_submit);
+ 
+ 	spin_lock_init(&ce->guc_state.lock);
+ 	INIT_LIST_HEAD(&ce->guc_state.fences);
+@@ -491,6 +492,7 @@ void intel_context_fini(struct intel_context *ce)
+ 			intel_context_put(child);
+ 
+ 	mutex_destroy(&ce->pin_mutex);
++	mutex_destroy(&ce->parallel_submit);
+ 	i915_active_fini(&ce->active);
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
+--- a/drivers/gpu/drm/i915/gt/intel_context.h
++++ b/drivers/gpu/drm/i915/gt/intel_context.h
+@@ -52,6 +52,11 @@ static inline bool intel_context_is_parent(struct intel_context *ce)
+ 	return !!ce->guc_number_children;
+ }
+ 
++static inline bool intel_context_is_parallel(struct intel_context *ce)
++{
++	return intel_context_is_child(ce) || intel_context_is_parent(ce);
++}
++
+ /* Only should be called directly by selftests */
+ void __intel_context_bind_parent_child(struct intel_context *parent,
+ 				       struct intel_context *child);
+@@ -204,6 +209,12 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce)
+ 	return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
+ }
+ 
++static inline bool
++intel_context_is_no_preempt_mid_batch(const struct intel_context *ce)
++{
++	return test_bit(CONTEXT_NO_PREEMPT_MID_BATCH, &ce->flags);
++}
++
+ static inline bool intel_context_is_closed(const struct intel_context *ce)
+ {
+ 	return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
+--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
++++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
+@@ -114,6 +114,7 @@ struct intel_context {
+ #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
+ #define CONTEXT_NOPREEMPT		8
+ #define CONTEXT_LRCA_DIRTY		9
++#define CONTEXT_NO_PREEMPT_MID_BATCH	10
+ 
+ 	struct {
+ 		u64 timeout_us;
+@@ -239,6 +240,9 @@ struct intel_context {
+ 
+ 	/* Last request submitted on a parent */
+ 	struct i915_request *last_rq;
++
++	/* Parallel submit mutex */
++	struct mutex parallel_submit;
+ };
+ 
+ #endif /* __INTEL_CONTEXT_TYPES__ */
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+@@ -798,8 +798,7 @@ static inline int rq_prio(const struct i915_request *rq)
+ 
+ static inline bool is_multi_lrc(struct intel_context *ce)
+ {
+-	return intel_context_is_child(ce) ||
+-		intel_context_is_parent(ce);
++	return intel_context_is_parallel(ce);
+ }
+ 
+ static inline bool is_multi_lrc_rq(struct i915_request *rq)
+@@ -3458,6 +3457,7 @@ void intel_guc_configure_parent_context(struct intel_context *ce)
+ 		bb_preempt_boundary =
+ 			i915_gem_context_is_bb_preempt_boundary(ctx);
+ 	rcu_read_unlock();
++	bb_preempt_boundary |= intel_context_is_no_preempt_mid_batch(ce);
+ 	if (bb_preempt_boundary) {
+ 		ce->emit_bb_start = emit_bb_start_parent_bb_preempt_boundary;
+ 		ce->emit_fini_breadcrumb =
+diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
+--- a/drivers/gpu/drm/i915/i915_request.c
++++ b/drivers/gpu/drm/i915/i915_request.c
+@@ -1606,14 +1606,9 @@ i915_request_await_object(struct i915_request *to,
+ 	return ret;
+ }
+ 
+-static inline bool is_parallel(struct intel_context *ce)
+-{
+-	return intel_context_is_child(ce) || intel_context_is_parent(ce);
+-}
+-
+ static inline bool is_parallel_rq(struct i915_request *rq)
+ {
+-	return is_parallel(rq->context);
++	return intel_context_is_parallel(rq->context);
+ }
+ 
+ static inline struct intel_context *request_to_parent(struct i915_request *rq)
+diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h
+--- a/include/uapi/drm/i915_drm_prelim.h
++++ b/include/uapi/drm/i915_drm_prelim.h
+@@ -370,9 +370,124 @@ struct prelim_i915_context_engines_parallel_submit {
+ } __attribute__ ((packed));
+ #define i915_context_engines_parallel_submit prelim_i915_context_engines_parallel_submit
+ 
++/**
++ * struct prelim_drm_i915_context_engines_parallel2_submit - Configure engine
++ * for parallel submission.
++ *
++ * Setup a slot in the context engine map to allow multiple BBs to be submitted
++ * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
++ * in parallel. Multiple hardware contexts are created internally in the i915
++ * run these BBs. Once a slot is configured for N BBs only N BBs can be
++ * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
++ * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
++ * many BBs there are based on the slot's configuration. The N BBs are the last
++ * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
++ *
++ * The default placement behavior is to create implicit bonds between each
++ * context if each context maps to more than 1 physical engine (e.g. context is
++ * a virtual engine). Also we only allow contexts of same engine class and these
++ * contexts must be in logically contiguous order. Examples of the placement
++ * behavior described below. Lastly, the default is to not allow BBs to
++ * preempted mid BB rather insert coordinated preemption on all hardware
++ * contexts between each set of BBs. Flags may be added in the future to change
++ * both of these default behaviors.
++ *
++ * Returns -EINVAL if hardware context placement configuration is invalid or if
++ * the placement configuration isn't supported on the platform / submission
++ * interface.
++ * Returns -ENODEV if extension isn't supported on the platform / submission
++ * inteface.
++ *
++ * .. code-block::
++ *
++ *	Example 1 pseudo code:
++ *	CS[X] = generic engine of same class, logical instance X
++ *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
++ *	set_engines(INVALID)
++ *	set_parallel(engine_index=0, width=2, num_siblings=1,
++ *		     engines=CS[0],CS[1])
++ *
++ *	Results in the following valid placement:
++ *	CS[0], CS[1]
++ *
++ *	Example 2 pseudo code:
++ *	CS[X] = generic engine of same class, logical instance X
++ *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
++ *	set_engines(INVALID)
++ *	set_parallel(engine_index=0, width=2, num_siblings=2,
++ *		     engines=CS[0],CS[2],CS[1],CS[3])
++ *
++ *	Results in the following valid placements:
++ *	CS[0], CS[1]
++ *	CS[2], CS[3]
++ *
++ *	This can also be thought of as 2 virtual engines described by 2-D array
++ *	in the engines the field with bonds placed between each index of the
++ *	virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
++ *	CS[3].
++ *	VE[0] = CS[0], CS[2]
++ *	VE[1] = CS[1], CS[3]
++ *
++ *	Example 3 pseudo code:
++ *	CS[X] = generic engine of same class, logical instance X
++ *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
++ *	set_engines(INVALID)
++ *	set_parallel(engine_index=0, width=2, num_siblings=2,
++ *		     engines=CS[0],CS[1],CS[1],CS[3])
++ *
++ *	Results in the following valid and invalid placements:
++ *	CS[0], CS[1]
++ *	CS[1], CS[3] - Not logical contiguous, return -EINVAL
++ */
++struct prelim_drm_i915_context_engines_parallel2_submit {
++	/**
++	 * @base: base user extension.
++	 */
++	struct i915_user_extension base;
++
++	/**
++	 * @engine_index: slot for parallel engine
++	 */
++	__u16 engine_index;
++
++	/**
++	 * @width: number of contexts per parallel engine
++	 */
++	__u16 width;
++
++	/**
++	 * @num_siblings: number of siblings per context
++	 */
++	__u16 num_siblings;
++
++	/**
++	 * @mbz16: reserved for future use; must be zero
++	 */
++	__u16 mbz16;
++
++	/**
++	 * @flags: all undefined flags must be zero, currently not defined flags
++	 */
++	__u64 flags;
++
++	/**
++	 * @mbz64: reserved for future use; must be zero
++	 */
++	__u64 mbz64[3];
++
++	/**
++	 * @engines: 2-d array of engine instances to configure parallel engine
++	 *
++	 * length = width (i) * num_siblings (j)
++	 * index = j + i * num_siblings
++	 */
++	struct i915_engine_class_instance engines[0];
++} __attribute__ ((packed));
++
+ struct prelim_i915_context_param_engines {
+ #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT (PRELIM_I915_USER_EXT | 2) /* see prelim_i915_context_engines_parallel_submit */
+ #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
++#define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */
+ };
+ 
+ enum prelim_drm_i915_gem_memory_class {
--
git-pile 0.97

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2021-07-08  0:12 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-06 22:20 [PATCH 0/7] CT changes required for GuC submission Matthew Brost
2021-07-06 22:20 ` [Intel-gfx] " Matthew Brost
2021-07-06 22:10 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for CT changes required for GuC submission (rev3) Patchwork
2021-07-06 22:11 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-07-06 22:20 ` [PATCH 1/7] drm/i915/guc: Relax CTB response timeout Matthew Brost
2021-07-06 22:20   ` [Intel-gfx] " Matthew Brost
2021-07-06 22:20 ` [PATCH 2/7] drm/i915/guc: Improve error message for unsolicited CT response Matthew Brost
2021-07-06 22:20   ` [Intel-gfx] " Matthew Brost
2021-07-06 22:20 ` [PATCH 3/7] drm/i915/guc: Increase size of CTB buffers Matthew Brost
2021-07-06 22:20   ` [Intel-gfx] " Matthew Brost
2021-07-06 22:20 ` [PATCH 4/7] drm/i915/guc: Add non blocking CTB send function Matthew Brost
2021-07-06 22:20   ` [Intel-gfx] " Matthew Brost
2021-07-06 22:20 ` [PATCH 5/7] drm/i915/guc: Add stall timer to " Matthew Brost
2021-07-06 22:20   ` [Intel-gfx] " Matthew Brost
2021-07-06 22:20 ` [PATCH 6/7] drm/i915/guc: Optimize CTB writes and reads Matthew Brost
2021-07-06 22:20   ` [Intel-gfx] " Matthew Brost
2021-07-06 22:51   ` John Harrison
2021-07-06 22:51     ` [Intel-gfx] " John Harrison
2021-07-07 17:50     ` Matthew Brost
2021-07-07 17:50       ` [Intel-gfx] " Matthew Brost
2021-07-07 18:19       ` John Harrison
2021-07-07 18:19         ` [Intel-gfx] " John Harrison
2021-07-07 18:56         ` Matthew Brost
2021-07-07 18:56           ` [Intel-gfx] " Matthew Brost
2021-07-07 20:21           ` John Harrison
2021-07-07 20:21             ` [Intel-gfx] " John Harrison
2021-07-07 20:23             ` Matthew Brost
2021-07-07 20:23               ` [Intel-gfx] " Matthew Brost
2021-07-06 22:20 ` [PATCH 7/7] drm/i915/guc: Module load failure test for CT buffer creation Matthew Brost
2021-07-06 22:20   ` [Intel-gfx] " Matthew Brost
2021-07-06 22:38 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for CT changes required for GuC submission (rev3) Patchwork
2021-07-07 19:09 ` [PATCH 06/56] drm/i915/guc: Optimize CTB writes and reads Matthew Brost
2021-07-07 19:09   ` [Intel-gfx] " Matthew Brost
2021-07-07 20:30   ` Michal Wajdeczko
2021-07-07 20:30     ` [Intel-gfx] " Michal Wajdeczko
2021-07-07 23:25 ` [PATCH 06/7] " Matthew Brost
2021-07-07 23:25   ` [Intel-gfx] " Matthew Brost
2021-07-08 13:23   ` Michal Wajdeczko
2021-07-08 13:23     ` [Intel-gfx] " Michal Wajdeczko
2021-07-08  0:30 ` Matthew Brost [this message]
2021-07-08  0:30   ` [Intel-gfx] [PATCH 0/2] Introduce set_parallel2 extension Matthew Brost
2021-07-08  0:30   ` [PATCH 1/2] INTEL_DII/NOT_UPSTREAM: drm/i915: " Matthew Brost
2021-07-08  0:30   ` [PATCH 2/2] REVIEW: Full tree diff against internal/internal Matthew Brost
2021-07-08  0:30     ` [Intel-gfx] " Matthew Brost
2021-07-08  2:18 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for CT changes required for GuC submission (rev4) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210708003019.9213-1-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=Michal.Wajdeczko@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=john.c.harrison@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.