All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Vetter <daniel@ffwll.ch>
To: Matthew Brost <matthew.brost@intel.com>
Cc: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Subject: Re: [PATCH 26/46] drm/i915: Connect UAPI to GuC multi-lrc interface
Date: Mon, 9 Aug 2021 18:37:20 +0200	[thread overview]
Message-ID: <YRFZwPSkrGqh4hpJ@phenom.ffwll.local> (raw)
In-Reply-To: <20210803222943.27686-27-matthew.brost@intel.com>

On Tue, Aug 03, 2021 at 03:29:23PM -0700, Matthew Brost wrote:
> Introduce 'set parallel submit' extension to connect UAPI to GuC
> multi-lrc interface. Kernel doc in new uAPI should explain it all.
> 
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>

UMD merge request link + igt patchwork link because this is uapi please.
-Daniel

> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 157 +++++++++++++++++-
>  .../gpu/drm/i915/gem/i915_gem_context_types.h |   6 +
>  drivers/gpu/drm/i915/gt/intel_context_types.h |   8 +-
>  drivers/gpu/drm/i915/gt/intel_engine.h        |  12 +-
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   6 +-
>  .../drm/i915/gt/intel_execlists_submission.c  |   6 +-
>  drivers/gpu/drm/i915/gt/selftest_execlists.c  |  12 +-
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 111 +++++++++++--
>  include/uapi/drm/i915_drm.h                   | 128 ++++++++++++++
>  9 files changed, 417 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index cff72679ad7c..2b0dd3ff4db8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -515,9 +515,149 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data)
>  	return 0;
>  }
>  
> +static int
> +set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
> +				      void *data)
> +{
> +	struct i915_context_engines_parallel_submit __user *ext =
> +		container_of_user(base, typeof(*ext), base);
> +	const struct set_proto_ctx_engines *set = data;
> +	struct drm_i915_private *i915 = set->i915;
> +	u64 flags;
> +	int err = 0, n, i, j;
> +	u16 slot, width, num_siblings;
> +	struct intel_engine_cs **siblings = NULL;
> +	intel_engine_mask_t prev_mask;
> +
> +	/* Disabling for now */
> +	return -ENODEV;
> +
> +	if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
> +		return -ENODEV;
> +
> +	if (get_user(slot, &ext->engine_index))
> +		return -EFAULT;
> +
> +	if (get_user(width, &ext->width))
> +		return -EFAULT;
> +
> +	if (get_user(num_siblings, &ext->num_siblings))
> +		return -EFAULT;
> +
> +	if (slot >= set->num_engines) {
> +		drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
> +			slot, set->num_engines);
> +		return -EINVAL;
> +	}
> +
> +	if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) {
> +		drm_dbg(&i915->drm,
> +			"Invalid placement[%d], already occupied\n", slot);
> +		return -EINVAL;
> +	}
> +
> +	if (get_user(flags, &ext->flags))
> +		return -EFAULT;
> +
> +	if (flags) {
> +		drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
> +		return -EINVAL;
> +	}
> +
> +	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> +		err = check_user_mbz(&ext->mbz64[n]);
> +		if (err)
> +			return err;
> +	}
> +
> +	if (width < 2) {
> +		drm_dbg(&i915->drm, "Width (%d) < 2\n", width);
> +		return -EINVAL;
> +	}
> +
> +	if (num_siblings < 1) {
> +		drm_dbg(&i915->drm, "Number siblings (%d) < 1\n",
> +			num_siblings);
> +		return -EINVAL;
> +	}
> +
> +	siblings = kmalloc_array(num_siblings * width,
> +				 sizeof(*siblings),
> +				 GFP_KERNEL);
> +	if (!siblings)
> +		return -ENOMEM;
> +
> +	/* Create contexts / engines */
> +	for (i = 0; i < width; ++i) {
> +		intel_engine_mask_t current_mask = 0;
> +		struct i915_engine_class_instance prev_engine;
> +
> +		for (j = 0; j < num_siblings; ++j) {
> +			struct i915_engine_class_instance ci;
> +
> +			n = i * num_siblings + j;
> +			if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
> +				err = -EFAULT;
> +				goto out_err;
> +			}
> +
> +			siblings[n] =
> +				intel_engine_lookup_user(i915, ci.engine_class,
> +							 ci.engine_instance);
> +			if (!siblings[n]) {
> +				drm_dbg(&i915->drm,
> +					"Invalid sibling[%d]: { class:%d, inst:%d }\n",
> +					n, ci.engine_class, ci.engine_instance);
> +				err = -EINVAL;
> +				goto out_err;
> +			}
> +
> +			if (n) {
> +				if (prev_engine.engine_class !=
> +				    ci.engine_class) {
> +					drm_dbg(&i915->drm,
> +						"Mismatched class %d, %d\n",
> +						prev_engine.engine_class,
> +						ci.engine_class);
> +					err = -EINVAL;
> +					goto out_err;
> +				}
> +			}
> +
> +			prev_engine = ci;
> +			current_mask |= siblings[n]->logical_mask;
> +		}
> +
> +		if (i > 0) {
> +			if (current_mask != prev_mask << 1) {
> +				drm_dbg(&i915->drm,
> +					"Non contiguous logical mask 0x%x, 0x%x\n",
> +					prev_mask, current_mask);
> +				err = -EINVAL;
> +				goto out_err;
> +			}
> +		}
> +		prev_mask = current_mask;
> +	}
> +
> +	set->engines[slot].type = I915_GEM_ENGINE_TYPE_PARALLEL;
> +	set->engines[slot].num_siblings = num_siblings;
> +	set->engines[slot].width = width;
> +	set->engines[slot].siblings = siblings;
> +
> +	return 0;
> +
> +out_err:
> +	kfree(siblings);
> +
> +	return err;
> +}
> +
>  static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = {
>  	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance,
>  	[I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond,
> +	[I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT] =
> +		set_proto_ctx_engines_parallel_submit,
>  };
>  
>  static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv,
> @@ -938,7 +1078,7 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
>  
>  	e = alloc_engines(num_engines);
>  	for (n = 0; n < num_engines; n++) {
> -		struct intel_context *ce;
> +		struct intel_context *ce, *child;
>  		int ret;
>  
>  		switch (pe[n].type) {
> @@ -948,7 +1088,13 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
>  
>  		case I915_GEM_ENGINE_TYPE_BALANCED:
>  			ce = intel_engine_create_virtual(pe[n].siblings,
> -							 pe[n].num_siblings);
> +							 pe[n].num_siblings, 0);
> +			break;
> +
> +		case I915_GEM_ENGINE_TYPE_PARALLEL:
> +			ce = intel_engine_create_parallel(pe[n].siblings,
> +							  pe[n].num_siblings,
> +							  pe[n].width);
>  			break;
>  
>  		case I915_GEM_ENGINE_TYPE_INVALID:
> @@ -969,6 +1115,13 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
>  			err = ERR_PTR(ret);
>  			goto free_engines;
>  		}
> +		for_each_child(ce, child) {
> +			ret = intel_context_set_gem(child, ctx, pe->sseu);
> +			if (ret) {
> +				err = ERR_PTR(ret);
> +				goto free_engines;
> +			}
> +		}
>  	}
>  	e->num_engines = num_engines;
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index 94c03a97cb77..7b096d83bca1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -78,6 +78,9 @@ enum i915_gem_engine_type {
>  
>  	/** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */
>  	I915_GEM_ENGINE_TYPE_BALANCED,
> +
> +	/** @I915_GEM_ENGINE_TYPE_PARALLEL: A parallel engine set */
> +	I915_GEM_ENGINE_TYPE_PARALLEL,
>  };
>  
>  /**
> @@ -108,6 +111,9 @@ struct i915_gem_proto_engine {
>  	/** @num_siblings: Number of balanced siblings */
>  	unsigned int num_siblings;
>  
> +	/** @width: Width of each sibling */
> +	unsigned int width;
> +
>  	/** @siblings: Balanced siblings */
>  	struct intel_engine_cs **siblings;
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index f4fc81f64921..9cdbea752014 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -55,9 +55,13 @@ struct intel_context_ops {
>  	void (*reset)(struct intel_context *ce);
>  	void (*destroy)(struct kref *kref);
>  
> -	/* virtual engine/context interface */
> +	/* virtual/parallel engine/context interface */
>  	struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
> -						unsigned int count);
> +						unsigned int count,
> +						unsigned long flags);
> +	struct intel_context *(*create_parallel)(struct intel_engine_cs **engines,
> +						 unsigned int num_siblings,
> +						 unsigned int width);
>  	struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine,
>  					       unsigned int sibling);
>  };
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 87579affb952..43f16a8347ee 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -279,9 +279,19 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
>  	return intel_engine_has_preemption(engine);
>  }
>  
> +#define FORCE_VIRTUAL	BIT(0)
>  struct intel_context *
>  intel_engine_create_virtual(struct intel_engine_cs **siblings,
> -			    unsigned int count);
> +			    unsigned int count, unsigned long flags);
> +
> +static inline struct intel_context *
> +intel_engine_create_parallel(struct intel_engine_cs **engines,
> +			     unsigned int num_engines,
> +			     unsigned int width)
> +{
> +	GEM_BUG_ON(!engines[0]->cops->create_parallel);
> +	return engines[0]->cops->create_parallel(engines, num_engines, width);
> +}
>  
>  static inline bool
>  intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 4d790f9a65dd..f66c75c77584 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1923,16 +1923,16 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
>  
>  struct intel_context *
>  intel_engine_create_virtual(struct intel_engine_cs **siblings,
> -			    unsigned int count)
> +			    unsigned int count, unsigned long flags)
>  {
>  	if (count == 0)
>  		return ERR_PTR(-EINVAL);
>  
> -	if (count == 1)
> +	if (count == 1 && !(flags & FORCE_VIRTUAL))
>  		return intel_context_create(siblings[0]);
>  
>  	GEM_BUG_ON(!siblings[0]->cops->create_virtual);
> -	return siblings[0]->cops->create_virtual(siblings, count);
> +	return siblings[0]->cops->create_virtual(siblings, count, flags);
>  }
>  
>  struct i915_request *
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index fc74ca28f245..769480e026bb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -201,7 +201,8 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
>  }
>  
>  static struct intel_context *
> -execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
> +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
> +			 unsigned long flags);
>  
>  static struct i915_request *
>  __active_request(const struct intel_timeline * const tl,
> @@ -3785,7 +3786,8 @@ static void virtual_submit_request(struct i915_request *rq)
>  }
>  
>  static struct intel_context *
> -execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
> +			 unsigned long flags)
>  {
>  	struct virtual_engine *ve;
>  	unsigned int n;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> index f12ffe797639..e876a9d88a5c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> @@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
>  	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
>  
>  	for (n = 0; n < nctx; n++) {
> -		ve[n] = intel_engine_create_virtual(siblings, nsibling);
> +		ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
>  		if (IS_ERR(ve[n])) {
>  			err = PTR_ERR(ve[n]);
>  			nctx = n;
> @@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt,
>  	 * restrict it to our desired engine within the virtual engine.
>  	 */
>  
> -	ve = intel_engine_create_virtual(siblings, nsibling);
> +	ve = intel_engine_create_virtual(siblings, nsibling, 0);
>  	if (IS_ERR(ve)) {
>  		err = PTR_ERR(ve);
>  		goto out_close;
> @@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt,
>  		i915_request_add(rq);
>  	}
>  
> -	ce = intel_engine_create_virtual(siblings, nsibling);
> +	ce = intel_engine_create_virtual(siblings, nsibling, 0);
>  	if (IS_ERR(ce)) {
>  		err = PTR_ERR(ce);
>  		goto out;
> @@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt,
>  
>  	/* XXX We do not handle oversubscription and fairness with normal rq */
>  	for (n = 0; n < nsibling; n++) {
> -		ce = intel_engine_create_virtual(siblings, nsibling);
> +		ce = intel_engine_create_virtual(siblings, nsibling, 0);
>  		if (IS_ERR(ce)) {
>  			err = PTR_ERR(ce);
>  			goto out;
> @@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt,
>  	if (err)
>  		goto out_scratch;
>  
> -	ve = intel_engine_create_virtual(siblings, nsibling);
> +	ve = intel_engine_create_virtual(siblings, nsibling, 0);
>  	if (IS_ERR(ve)) {
>  		err = PTR_ERR(ve);
>  		goto out_scratch;
> @@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
>  	if (igt_spinner_init(&spin, gt))
>  		return -ENOMEM;
>  
> -	ve = intel_engine_create_virtual(siblings, nsibling);
> +	ve = intel_engine_create_virtual(siblings, nsibling, 0);
>  	if (IS_ERR(ve)) {
>  		err = PTR_ERR(ve);
>  		goto out_spin;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 44a7582c9aed..89528624710a 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -82,7 +82,8 @@
>   */
>  
>  static struct intel_context *
> -guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
> +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
> +		   unsigned long flags);
>  
>  #define GUC_REQUEST_SIZE 64 /* bytes */
>  
> @@ -2514,8 +2515,6 @@ static void guc_context_post_unpin(struct intel_context *ce)
>  	__guc_context_post_unpin(ce);
>  }
>  
> -/* Future patches will use this function */
> -__maybe_unused
>  static int guc_parent_context_pre_pin(struct intel_context *ce,
>  				      struct i915_gem_ww_ctx *ww)
>  {
> @@ -2559,8 +2558,6 @@ static int guc_parent_context_pre_pin(struct intel_context *ce,
>  	return err;
>  }
>  
> -/* Future patches will use this function */
> -__maybe_unused
>  static void guc_parent_context_post_unpin(struct intel_context *ce)
>  {
>  	struct intel_context *child;
> @@ -2576,8 +2573,6 @@ static void guc_parent_context_post_unpin(struct intel_context *ce)
>  	}
>  }
>  
> -/* Future patches will use this function */
> -__maybe_unused
>  static int guc_parent_context_pin(struct intel_context *ce)
>  {
>  	int ret, i = 0, j = 0;
> @@ -2623,8 +2618,6 @@ static int guc_parent_context_pin(struct intel_context *ce)
>  	return ret;
>  }
>  
> -/* Future patches will use this function */
> -__maybe_unused
>  static void guc_parent_context_unpin(struct intel_context *ce)
>  {
>  	struct intel_context *child;
> @@ -3048,8 +3041,6 @@ static void destroy_worker_func(struct work_struct *w)
>  		intel_gt_pm_unpark_work_add(gt, destroy_worker);
>  }
>  
> -/* Future patches will use this function */
> -__maybe_unused
>  static void guc_child_context_destroy(struct kref *kref)
>  {
>  	__guc_context_destroy(container_of(kref, struct intel_context, ref));
> @@ -3272,6 +3263,11 @@ static void remove_from_context(struct i915_request *rq)
>  	i915_request_notify_execute_cb_imm(rq);
>  }
>  
> +static struct intel_context *
> +guc_create_parallel(struct intel_engine_cs **engines,
> +		    unsigned int num_siblings,
> +		    unsigned int width);
> +
>  static const struct intel_context_ops guc_context_ops = {
>  	.alloc = guc_context_alloc,
>  
> @@ -3293,6 +3289,7 @@ static const struct intel_context_ops guc_context_ops = {
>  	.destroy = guc_context_destroy,
>  
>  	.create_virtual = guc_create_virtual,
> +	.create_parallel = guc_create_parallel,
>  };
>  
>  static void __guc_signal_context_fence(struct intel_context *ce)
> @@ -3782,6 +3779,91 @@ static void guc_retire_inflight_request_prio(struct i915_request *rq)
>  	spin_unlock(&ce->guc_active.lock);
>  }
>  
> +static const struct intel_context_ops virtual_parent_context_ops = {
> +	.alloc = guc_virtual_context_alloc,
> +
> +	.pre_pin = guc_parent_context_pre_pin,
> +	.pin = guc_parent_context_pin,
> +	.unpin = guc_parent_context_unpin,
> +	.post_unpin = guc_parent_context_post_unpin,
> +
> +	.ban = guc_context_ban,
> +
> +	.enter = guc_virtual_context_enter,
> +	.exit = guc_virtual_context_exit,
> +
> +	.sched_disable = guc_context_sched_disable,
> +
> +	.destroy = guc_context_destroy,
> +
> +	.get_sibling = guc_virtual_get_sibling,
> +};
> +
> +static const struct intel_context_ops virtual_child_context_ops = {
> +	.alloc = guc_virtual_context_alloc,
> +
> +	.enter = guc_virtual_context_enter,
> +	.exit = guc_virtual_context_exit,
> +
> +	.destroy = guc_child_context_destroy,
> +};
> +
> +static struct intel_context *
> +guc_create_parallel(struct intel_engine_cs **engines,
> +		    unsigned int num_siblings,
> +		    unsigned int width)
> +{
> +	struct intel_engine_cs **siblings = NULL;
> +	struct intel_context *parent = NULL, *ce, *err;
> +	int i, j;
> +	int ret;
> +
> +	siblings = kmalloc_array(num_siblings,
> +				 sizeof(*siblings),
> +				 GFP_KERNEL);
> +	if (!siblings)
> +		return ERR_PTR(-ENOMEM);
> +
> +	for (i = 0; i < width; ++i) {
> +		for (j = 0; j < num_siblings; ++j)
> +			siblings[j] = engines[i * num_siblings + j];
> +
> +		ce = intel_engine_create_virtual(siblings, num_siblings,
> +						 FORCE_VIRTUAL);
> +		if (!ce) {
> +			err = ERR_PTR(-ENOMEM);
> +			goto unwind;
> +		}
> +
> +		if (i == 0) {
> +			parent = ce;
> +		} else {
> +			intel_context_bind_parent_child(parent, ce);
> +			ret = intel_context_alloc_state(ce);
> +			if (ret) {
> +				err = ERR_PTR(ret);
> +				goto unwind;
> +			}
> +		}
> +	}
> +
> +	parent->ops = &virtual_parent_context_ops;
> +	for_each_child(parent, ce)
> +		ce->ops = &virtual_child_context_ops;
> +
> +	kfree(siblings);
> +	return parent;
> +
> +unwind:
> +	if (parent) {
> +		for_each_child(parent, ce)
> +			intel_context_put(ce);
> +		intel_context_put(parent);
> +	}
> +	kfree(siblings);
> +	return err;
> +}
> +
>  static void sanitize_hwsp(struct intel_engine_cs *engine)
>  {
>  	struct intel_timeline *tl;
> @@ -4578,7 +4660,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
>  }
>  
>  static struct intel_context *
> -guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
> +		   unsigned long flags)
>  {
>  	struct guc_virtual_engine *ve;
>  	struct intel_guc *guc;
> @@ -4591,7 +4674,9 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
>  		return ERR_PTR(-ENOMEM);
>  
>  	guc = &siblings[0]->gt->uc.guc;
> -	sched_engine = guc_to_sched_engine(guc, GUC_SUBMIT_ENGINE_SINGLE_LRC);
> +	sched_engine = guc_to_sched_engine(guc, (flags & FORCE_VIRTUAL) ?
> +					   GUC_SUBMIT_ENGINE_MULTI_LRC :
> +					   GUC_SUBMIT_ENGINE_SINGLE_LRC);
>  
>  	ve->base.i915 = siblings[0]->i915;
>  	ve->base.gt = siblings[0]->gt;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index ef72e07fe08c..a16f0f8908de 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1821,6 +1821,7 @@ struct drm_i915_gem_context_param {
>   * Extensions:
>   *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
>   *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
> + *   i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
>   */
>  #define I915_CONTEXT_PARAM_ENGINES	0xa
>  
> @@ -2046,6 +2047,132 @@ struct i915_context_engines_bond {
>  	struct i915_engine_class_instance engines[N__]; \
>  } __attribute__((packed)) name__
>  
> +/**
> + * struct i915_context_engines_parallel_submit - Configure engine for
> + * parallel submission.
> + *
> + * Setup a slot in the context engine map to allow multiple BBs to be submitted
> + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
> + * in parallel. Multiple hardware contexts are created internally in the i915
> + * run these BBs. Once a slot is configured for N BBs only N BBs can be
> + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
> + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
> + * many BBs there are based on the slot's configuration. The N BBs are the last
> + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
> + *
> + * The default placement behavior is to create implicit bonds between each
> + * context if each context maps to more than 1 physical engine (e.g. context is
> + * a virtual engine). Also we only allow contexts of same engine class and these
> + * contexts must be in logically contiguous order. Examples of the placement
> + * behavior described below. Lastly, the default is to not allow BBs to
> + * preempted mid BB rather insert coordinated preemption on all hardware
> + * contexts between each set of BBs. Flags may be added in the future to change
> + * both of these default behaviors.
> + *
> + * Returns -EINVAL if hardware context placement configuration is invalid or if
> + * the placement configuration isn't supported on the platform / submission
> + * interface.
> + * Returns -ENODEV if extension isn't supported on the platform / submission
> + * interface.
> + *
> + * .. code-block:: none
> + *
> + *	Example 1 pseudo code:
> + *	CS[X] = generic engine of same class, logical instance X
> + *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
> + *	set_engines(INVALID)
> + *	set_parallel(engine_index=0, width=2, num_siblings=1,
> + *		     engines=CS[0],CS[1])
> + *
> + *	Results in the following valid placement:
> + *	CS[0], CS[1]
> + *
> + *	Example 2 pseudo code:
> + *	CS[X] = generic engine of same class, logical instance X
> + *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
> + *	set_engines(INVALID)
> + *	set_parallel(engine_index=0, width=2, num_siblings=2,
> + *		     engines=CS[0],CS[2],CS[1],CS[3])
> + *
> + *	Results in the following valid placements:
> + *	CS[0], CS[1]
> + *	CS[2], CS[3]
> + *
> + *	This can also be thought of as 2 virtual engines described by 2-D array
> + *	in the engines the field with bonds placed between each index of the
> + *	virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
> + *	CS[3].
> + *	VE[0] = CS[0], CS[2]
> + *	VE[1] = CS[1], CS[3]
> + *
> + *	Example 3 pseudo code:
> + *	CS[X] = generic engine of same class, logical instance X
> + *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
> + *	set_engines(INVALID)
> + *	set_parallel(engine_index=0, width=2, num_siblings=2,
> + *		     engines=CS[0],CS[1],CS[1],CS[3])
> + *
> + *	Results in the following valid and invalid placements:
> + *	CS[0], CS[1]
> + *	CS[1], CS[3] - Not logical contiguous, return -EINVAL
> + */
> +struct i915_context_engines_parallel_submit {
> +	/**
> +	 * @base: base user extension.
> +	 */
> +	struct i915_user_extension base;
> +
> +	/**
> +	 * @engine_index: slot for parallel engine
> +	 */
> +	__u16 engine_index;
> +
> +	/**
> +	 * @width: number of contexts per parallel engine
> +	 */
> +	__u16 width;
> +
> +	/**
> +	 * @num_siblings: number of siblings per context
> +	 */
> +	__u16 num_siblings;
> +
> +	/**
> +	 * @mbz16: reserved for future use; must be zero
> +	 */
> +	__u16 mbz16;
> +
> +	/**
> +	 * @flags: all undefined flags must be zero, currently not defined flags
> +	 */
> +	__u64 flags;
> +
> +	/**
> +	 * @mbz64: reserved for future use; must be zero
> +	 */
> +	__u64 mbz64[3];
> +
> +	/**
> +	 * @engines: 2-d array of engine instances to configure parallel engine
> +	 *
> +	 * length = width (i) * num_siblings (j)
> +	 * index = j + i * num_siblings
> +	 */
> +	struct i915_engine_class_instance engines[0];
> +
> +} __packed;
> +
> +#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
> +	struct i915_user_extension base; \
> +	__u16 engine_index; \
> +	__u16 width; \
> +	__u16 num_siblings; \
> +	__u16 mbz16; \
> +	__u64 flags; \
> +	__u64 mbz64[3]; \
> +	struct i915_engine_class_instance engines[N__]; \
> +} __attribute__((packed)) name__
> +
>  /**
>   * DOC: Context Engine Map uAPI
>   *
> @@ -2105,6 +2232,7 @@ struct i915_context_param_engines {
>  	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
>  #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
>  #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
> +#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
>  	struct i915_engine_class_instance engines[0];
>  } __attribute__((packed));
>  
> -- 
> 2.28.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

WARNING: multiple messages have this Message-ID (diff)
From: Daniel Vetter <daniel@ffwll.ch>
To: Matthew Brost <matthew.brost@intel.com>
Cc: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH 26/46] drm/i915: Connect UAPI to GuC multi-lrc interface
Date: Mon, 9 Aug 2021 18:37:20 +0200	[thread overview]
Message-ID: <YRFZwPSkrGqh4hpJ@phenom.ffwll.local> (raw)
In-Reply-To: <20210803222943.27686-27-matthew.brost@intel.com>

On Tue, Aug 03, 2021 at 03:29:23PM -0700, Matthew Brost wrote:
> Introduce 'set parallel submit' extension to connect UAPI to GuC
> multi-lrc interface. Kernel doc in new uAPI should explain it all.
> 
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>

UMD merge request link + igt patchwork link because this is uapi please.
-Daniel

> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 157 +++++++++++++++++-
>  .../gpu/drm/i915/gem/i915_gem_context_types.h |   6 +
>  drivers/gpu/drm/i915/gt/intel_context_types.h |   8 +-
>  drivers/gpu/drm/i915/gt/intel_engine.h        |  12 +-
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   6 +-
>  .../drm/i915/gt/intel_execlists_submission.c  |   6 +-
>  drivers/gpu/drm/i915/gt/selftest_execlists.c  |  12 +-
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 111 +++++++++++--
>  include/uapi/drm/i915_drm.h                   | 128 ++++++++++++++
>  9 files changed, 417 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index cff72679ad7c..2b0dd3ff4db8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -515,9 +515,149 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data)
>  	return 0;
>  }
>  
> +static int
> +set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
> +				      void *data)
> +{
> +	struct i915_context_engines_parallel_submit __user *ext =
> +		container_of_user(base, typeof(*ext), base);
> +	const struct set_proto_ctx_engines *set = data;
> +	struct drm_i915_private *i915 = set->i915;
> +	u64 flags;
> +	int err = 0, n, i, j;
> +	u16 slot, width, num_siblings;
> +	struct intel_engine_cs **siblings = NULL;
> +	intel_engine_mask_t prev_mask;
> +
> +	/* Disabling for now */
> +	return -ENODEV;
> +
> +	if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
> +		return -ENODEV;
> +
> +	if (get_user(slot, &ext->engine_index))
> +		return -EFAULT;
> +
> +	if (get_user(width, &ext->width))
> +		return -EFAULT;
> +
> +	if (get_user(num_siblings, &ext->num_siblings))
> +		return -EFAULT;
> +
> +	if (slot >= set->num_engines) {
> +		drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
> +			slot, set->num_engines);
> +		return -EINVAL;
> +	}
> +
> +	if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) {
> +		drm_dbg(&i915->drm,
> +			"Invalid placement[%d], already occupied\n", slot);
> +		return -EINVAL;
> +	}
> +
> +	if (get_user(flags, &ext->flags))
> +		return -EFAULT;
> +
> +	if (flags) {
> +		drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
> +		return -EINVAL;
> +	}
> +
> +	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> +		err = check_user_mbz(&ext->mbz64[n]);
> +		if (err)
> +			return err;
> +	}
> +
> +	if (width < 2) {
> +		drm_dbg(&i915->drm, "Width (%d) < 2\n", width);
> +		return -EINVAL;
> +	}
> +
> +	if (num_siblings < 1) {
> +		drm_dbg(&i915->drm, "Number siblings (%d) < 1\n",
> +			num_siblings);
> +		return -EINVAL;
> +	}
> +
> +	siblings = kmalloc_array(num_siblings * width,
> +				 sizeof(*siblings),
> +				 GFP_KERNEL);
> +	if (!siblings)
> +		return -ENOMEM;
> +
> +	/* Create contexts / engines */
> +	for (i = 0; i < width; ++i) {
> +		intel_engine_mask_t current_mask = 0;
> +		struct i915_engine_class_instance prev_engine;
> +
> +		for (j = 0; j < num_siblings; ++j) {
> +			struct i915_engine_class_instance ci;
> +
> +			n = i * num_siblings + j;
> +			if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
> +				err = -EFAULT;
> +				goto out_err;
> +			}
> +
> +			siblings[n] =
> +				intel_engine_lookup_user(i915, ci.engine_class,
> +							 ci.engine_instance);
> +			if (!siblings[n]) {
> +				drm_dbg(&i915->drm,
> +					"Invalid sibling[%d]: { class:%d, inst:%d }\n",
> +					n, ci.engine_class, ci.engine_instance);
> +				err = -EINVAL;
> +				goto out_err;
> +			}
> +
> +			if (n) {
> +				if (prev_engine.engine_class !=
> +				    ci.engine_class) {
> +					drm_dbg(&i915->drm,
> +						"Mismatched class %d, %d\n",
> +						prev_engine.engine_class,
> +						ci.engine_class);
> +					err = -EINVAL;
> +					goto out_err;
> +				}
> +			}
> +
> +			prev_engine = ci;
> +			current_mask |= siblings[n]->logical_mask;
> +		}
> +
> +		if (i > 0) {
> +			if (current_mask != prev_mask << 1) {
> +				drm_dbg(&i915->drm,
> +					"Non contiguous logical mask 0x%x, 0x%x\n",
> +					prev_mask, current_mask);
> +				err = -EINVAL;
> +				goto out_err;
> +			}
> +		}
> +		prev_mask = current_mask;
> +	}
> +
> +	set->engines[slot].type = I915_GEM_ENGINE_TYPE_PARALLEL;
> +	set->engines[slot].num_siblings = num_siblings;
> +	set->engines[slot].width = width;
> +	set->engines[slot].siblings = siblings;
> +
> +	return 0;
> +
> +out_err:
> +	kfree(siblings);
> +
> +	return err;
> +}
> +
>  static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = {
>  	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance,
>  	[I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond,
> +	[I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT] =
> +		set_proto_ctx_engines_parallel_submit,
>  };
>  
>  static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv,
> @@ -938,7 +1078,7 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
>  
>  	e = alloc_engines(num_engines);
>  	for (n = 0; n < num_engines; n++) {
> -		struct intel_context *ce;
> +		struct intel_context *ce, *child;
>  		int ret;
>  
>  		switch (pe[n].type) {
> @@ -948,7 +1088,13 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
>  
>  		case I915_GEM_ENGINE_TYPE_BALANCED:
>  			ce = intel_engine_create_virtual(pe[n].siblings,
> -							 pe[n].num_siblings);
> +							 pe[n].num_siblings, 0);
> +			break;
> +
> +		case I915_GEM_ENGINE_TYPE_PARALLEL:
> +			ce = intel_engine_create_parallel(pe[n].siblings,
> +							  pe[n].num_siblings,
> +							  pe[n].width);
>  			break;
>  
>  		case I915_GEM_ENGINE_TYPE_INVALID:
> @@ -969,6 +1115,13 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
>  			err = ERR_PTR(ret);
>  			goto free_engines;
>  		}
> +		for_each_child(ce, child) {
> +			ret = intel_context_set_gem(child, ctx, pe->sseu);
> +			if (ret) {
> +				err = ERR_PTR(ret);
> +				goto free_engines;
> +			}
> +		}
>  	}
>  	e->num_engines = num_engines;
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index 94c03a97cb77..7b096d83bca1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -78,6 +78,9 @@ enum i915_gem_engine_type {
>  
>  	/** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */
>  	I915_GEM_ENGINE_TYPE_BALANCED,
> +
> +	/** @I915_GEM_ENGINE_TYPE_PARALLEL: A parallel engine set */
> +	I915_GEM_ENGINE_TYPE_PARALLEL,
>  };
>  
>  /**
> @@ -108,6 +111,9 @@ struct i915_gem_proto_engine {
>  	/** @num_siblings: Number of balanced siblings */
>  	unsigned int num_siblings;
>  
> +	/** @width: Width of each sibling */
> +	unsigned int width;
> +
>  	/** @siblings: Balanced siblings */
>  	struct intel_engine_cs **siblings;
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index f4fc81f64921..9cdbea752014 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -55,9 +55,13 @@ struct intel_context_ops {
>  	void (*reset)(struct intel_context *ce);
>  	void (*destroy)(struct kref *kref);
>  
> -	/* virtual engine/context interface */
> +	/* virtual/parallel engine/context interface */
>  	struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
> -						unsigned int count);
> +						unsigned int count,
> +						unsigned long flags);
> +	struct intel_context *(*create_parallel)(struct intel_engine_cs **engines,
> +						 unsigned int num_siblings,
> +						 unsigned int width);
>  	struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine,
>  					       unsigned int sibling);
>  };
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 87579affb952..43f16a8347ee 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -279,9 +279,19 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
>  	return intel_engine_has_preemption(engine);
>  }
>  
> +#define FORCE_VIRTUAL	BIT(0)
>  struct intel_context *
>  intel_engine_create_virtual(struct intel_engine_cs **siblings,
> -			    unsigned int count);
> +			    unsigned int count, unsigned long flags);
> +
> +static inline struct intel_context *
> +intel_engine_create_parallel(struct intel_engine_cs **engines,
> +			     unsigned int num_engines,
> +			     unsigned int width)
> +{
> +	GEM_BUG_ON(!engines[0]->cops->create_parallel);
> +	return engines[0]->cops->create_parallel(engines, num_engines, width);
> +}
>  
>  static inline bool
>  intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 4d790f9a65dd..f66c75c77584 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1923,16 +1923,16 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
>  
>  struct intel_context *
>  intel_engine_create_virtual(struct intel_engine_cs **siblings,
> -			    unsigned int count)
> +			    unsigned int count, unsigned long flags)
>  {
>  	if (count == 0)
>  		return ERR_PTR(-EINVAL);
>  
> -	if (count == 1)
> +	if (count == 1 && !(flags & FORCE_VIRTUAL))
>  		return intel_context_create(siblings[0]);
>  
>  	GEM_BUG_ON(!siblings[0]->cops->create_virtual);
> -	return siblings[0]->cops->create_virtual(siblings, count);
> +	return siblings[0]->cops->create_virtual(siblings, count, flags);
>  }
>  
>  struct i915_request *
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index fc74ca28f245..769480e026bb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -201,7 +201,8 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
>  }
>  
>  static struct intel_context *
> -execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
> +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
> +			 unsigned long flags);
>  
>  static struct i915_request *
>  __active_request(const struct intel_timeline * const tl,
> @@ -3785,7 +3786,8 @@ static void virtual_submit_request(struct i915_request *rq)
>  }
>  
>  static struct intel_context *
> -execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
> +			 unsigned long flags)
>  {
>  	struct virtual_engine *ve;
>  	unsigned int n;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> index f12ffe797639..e876a9d88a5c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> @@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
>  	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
>  
>  	for (n = 0; n < nctx; n++) {
> -		ve[n] = intel_engine_create_virtual(siblings, nsibling);
> +		ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
>  		if (IS_ERR(ve[n])) {
>  			err = PTR_ERR(ve[n]);
>  			nctx = n;
> @@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt,
>  	 * restrict it to our desired engine within the virtual engine.
>  	 */
>  
> -	ve = intel_engine_create_virtual(siblings, nsibling);
> +	ve = intel_engine_create_virtual(siblings, nsibling, 0);
>  	if (IS_ERR(ve)) {
>  		err = PTR_ERR(ve);
>  		goto out_close;
> @@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt,
>  		i915_request_add(rq);
>  	}
>  
> -	ce = intel_engine_create_virtual(siblings, nsibling);
> +	ce = intel_engine_create_virtual(siblings, nsibling, 0);
>  	if (IS_ERR(ce)) {
>  		err = PTR_ERR(ce);
>  		goto out;
> @@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt,
>  
>  	/* XXX We do not handle oversubscription and fairness with normal rq */
>  	for (n = 0; n < nsibling; n++) {
> -		ce = intel_engine_create_virtual(siblings, nsibling);
> +		ce = intel_engine_create_virtual(siblings, nsibling, 0);
>  		if (IS_ERR(ce)) {
>  			err = PTR_ERR(ce);
>  			goto out;
> @@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt,
>  	if (err)
>  		goto out_scratch;
>  
> -	ve = intel_engine_create_virtual(siblings, nsibling);
> +	ve = intel_engine_create_virtual(siblings, nsibling, 0);
>  	if (IS_ERR(ve)) {
>  		err = PTR_ERR(ve);
>  		goto out_scratch;
> @@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
>  	if (igt_spinner_init(&spin, gt))
>  		return -ENOMEM;
>  
> -	ve = intel_engine_create_virtual(siblings, nsibling);
> +	ve = intel_engine_create_virtual(siblings, nsibling, 0);
>  	if (IS_ERR(ve)) {
>  		err = PTR_ERR(ve);
>  		goto out_spin;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 44a7582c9aed..89528624710a 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -82,7 +82,8 @@
>   */
>  
>  static struct intel_context *
> -guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
> +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
> +		   unsigned long flags);
>  
>  #define GUC_REQUEST_SIZE 64 /* bytes */
>  
> @@ -2514,8 +2515,6 @@ static void guc_context_post_unpin(struct intel_context *ce)
>  	__guc_context_post_unpin(ce);
>  }
>  
> -/* Future patches will use this function */
> -__maybe_unused
>  static int guc_parent_context_pre_pin(struct intel_context *ce,
>  				      struct i915_gem_ww_ctx *ww)
>  {
> @@ -2559,8 +2558,6 @@ static int guc_parent_context_pre_pin(struct intel_context *ce,
>  	return err;
>  }
>  
> -/* Future patches will use this function */
> -__maybe_unused
>  static void guc_parent_context_post_unpin(struct intel_context *ce)
>  {
>  	struct intel_context *child;
> @@ -2576,8 +2573,6 @@ static void guc_parent_context_post_unpin(struct intel_context *ce)
>  	}
>  }
>  
> -/* Future patches will use this function */
> -__maybe_unused
>  static int guc_parent_context_pin(struct intel_context *ce)
>  {
>  	int ret, i = 0, j = 0;
> @@ -2623,8 +2618,6 @@ static int guc_parent_context_pin(struct intel_context *ce)
>  	return ret;
>  }
>  
> -/* Future patches will use this function */
> -__maybe_unused
>  static void guc_parent_context_unpin(struct intel_context *ce)
>  {
>  	struct intel_context *child;
> @@ -3048,8 +3041,6 @@ static void destroy_worker_func(struct work_struct *w)
>  		intel_gt_pm_unpark_work_add(gt, destroy_worker);
>  }
>  
> -/* Future patches will use this function */
> -__maybe_unused
>  static void guc_child_context_destroy(struct kref *kref)
>  {
>  	__guc_context_destroy(container_of(kref, struct intel_context, ref));
> @@ -3272,6 +3263,11 @@ static void remove_from_context(struct i915_request *rq)
>  	i915_request_notify_execute_cb_imm(rq);
>  }
>  
> +static struct intel_context *
> +guc_create_parallel(struct intel_engine_cs **engines,
> +		    unsigned int num_siblings,
> +		    unsigned int width);
> +
>  static const struct intel_context_ops guc_context_ops = {
>  	.alloc = guc_context_alloc,
>  
> @@ -3293,6 +3289,7 @@ static const struct intel_context_ops guc_context_ops = {
>  	.destroy = guc_context_destroy,
>  
>  	.create_virtual = guc_create_virtual,
> +	.create_parallel = guc_create_parallel,
>  };
>  
>  static void __guc_signal_context_fence(struct intel_context *ce)
> @@ -3782,6 +3779,91 @@ static void guc_retire_inflight_request_prio(struct i915_request *rq)
>  	spin_unlock(&ce->guc_active.lock);
>  }
>  
> +static const struct intel_context_ops virtual_parent_context_ops = {
> +	.alloc = guc_virtual_context_alloc,
> +
> +	.pre_pin = guc_parent_context_pre_pin,
> +	.pin = guc_parent_context_pin,
> +	.unpin = guc_parent_context_unpin,
> +	.post_unpin = guc_parent_context_post_unpin,
> +
> +	.ban = guc_context_ban,
> +
> +	.enter = guc_virtual_context_enter,
> +	.exit = guc_virtual_context_exit,
> +
> +	.sched_disable = guc_context_sched_disable,
> +
> +	.destroy = guc_context_destroy,
> +
> +	.get_sibling = guc_virtual_get_sibling,
> +};
> +
> +static const struct intel_context_ops virtual_child_context_ops = {
> +	.alloc = guc_virtual_context_alloc,
> +
> +	.enter = guc_virtual_context_enter,
> +	.exit = guc_virtual_context_exit,
> +
> +	.destroy = guc_child_context_destroy,
> +};
> +
> +static struct intel_context *
> +guc_create_parallel(struct intel_engine_cs **engines,
> +		    unsigned int num_siblings,
> +		    unsigned int width)
> +{
> +	struct intel_engine_cs **siblings = NULL;
> +	struct intel_context *parent = NULL, *ce, *err;
> +	int i, j;
> +	int ret;
> +
> +	siblings = kmalloc_array(num_siblings,
> +				 sizeof(*siblings),
> +				 GFP_KERNEL);
> +	if (!siblings)
> +		return ERR_PTR(-ENOMEM);
> +
> +	for (i = 0; i < width; ++i) {
> +		for (j = 0; j < num_siblings; ++j)
> +			siblings[j] = engines[i * num_siblings + j];
> +
> +		ce = intel_engine_create_virtual(siblings, num_siblings,
> +						 FORCE_VIRTUAL);
> +		if (!ce) {
> +			err = ERR_PTR(-ENOMEM);
> +			goto unwind;
> +		}
> +
> +		if (i == 0) {
> +			parent = ce;
> +		} else {
> +			intel_context_bind_parent_child(parent, ce);
> +			ret = intel_context_alloc_state(ce);
> +			if (ret) {
> +				err = ERR_PTR(ret);
> +				goto unwind;
> +			}
> +		}
> +	}
> +
> +	parent->ops = &virtual_parent_context_ops;
> +	for_each_child(parent, ce)
> +		ce->ops = &virtual_child_context_ops;
> +
> +	kfree(siblings);
> +	return parent;
> +
> +unwind:
> +	if (parent) {
> +		for_each_child(parent, ce)
> +			intel_context_put(ce);
> +		intel_context_put(parent);
> +	}
> +	kfree(siblings);
> +	return err;
> +}
> +
>  static void sanitize_hwsp(struct intel_engine_cs *engine)
>  {
>  	struct intel_timeline *tl;
> @@ -4578,7 +4660,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
>  }
>  
>  static struct intel_context *
> -guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
> +		   unsigned long flags)
>  {
>  	struct guc_virtual_engine *ve;
>  	struct intel_guc *guc;
> @@ -4591,7 +4674,9 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
>  		return ERR_PTR(-ENOMEM);
>  
>  	guc = &siblings[0]->gt->uc.guc;
> -	sched_engine = guc_to_sched_engine(guc, GUC_SUBMIT_ENGINE_SINGLE_LRC);
> +	sched_engine = guc_to_sched_engine(guc, (flags & FORCE_VIRTUAL) ?
> +					   GUC_SUBMIT_ENGINE_MULTI_LRC :
> +					   GUC_SUBMIT_ENGINE_SINGLE_LRC);
>  
>  	ve->base.i915 = siblings[0]->i915;
>  	ve->base.gt = siblings[0]->gt;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index ef72e07fe08c..a16f0f8908de 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1821,6 +1821,7 @@ struct drm_i915_gem_context_param {
>   * Extensions:
>   *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
>   *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
> + *   i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
>   */
>  #define I915_CONTEXT_PARAM_ENGINES	0xa
>  
> @@ -2046,6 +2047,132 @@ struct i915_context_engines_bond {
>  	struct i915_engine_class_instance engines[N__]; \
>  } __attribute__((packed)) name__
>  
> +/**
> + * struct i915_context_engines_parallel_submit - Configure engine for
> + * parallel submission.
> + *
> + * Setup a slot in the context engine map to allow multiple BBs to be submitted
> + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
> + * in parallel. Multiple hardware contexts are created internally in the i915
> + * run these BBs. Once a slot is configured for N BBs only N BBs can be
> + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
> + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
> + * many BBs there are based on the slot's configuration. The N BBs are the last
> + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
> + *
> + * The default placement behavior is to create implicit bonds between each
> + * context if each context maps to more than 1 physical engine (e.g. context is
> + * a virtual engine). Also we only allow contexts of same engine class and these
> + * contexts must be in logically contiguous order. Examples of the placement
> + * behavior described below. Lastly, the default is to not allow BBs to
> + * preempted mid BB rather insert coordinated preemption on all hardware
> + * contexts between each set of BBs. Flags may be added in the future to change
> + * both of these default behaviors.
> + *
> + * Returns -EINVAL if hardware context placement configuration is invalid or if
> + * the placement configuration isn't supported on the platform / submission
> + * interface.
> + * Returns -ENODEV if extension isn't supported on the platform / submission
> + * interface.
> + *
> + * .. code-block:: none
> + *
> + *	Example 1 pseudo code:
> + *	CS[X] = generic engine of same class, logical instance X
> + *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
> + *	set_engines(INVALID)
> + *	set_parallel(engine_index=0, width=2, num_siblings=1,
> + *		     engines=CS[0],CS[1])
> + *
> + *	Results in the following valid placement:
> + *	CS[0], CS[1]
> + *
> + *	Example 2 pseudo code:
> + *	CS[X] = generic engine of same class, logical instance X
> + *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
> + *	set_engines(INVALID)
> + *	set_parallel(engine_index=0, width=2, num_siblings=2,
> + *		     engines=CS[0],CS[2],CS[1],CS[3])
> + *
> + *	Results in the following valid placements:
> + *	CS[0], CS[1]
> + *	CS[2], CS[3]
> + *
> + *	This can also be thought of as 2 virtual engines described by 2-D array
> + *	in the engines the field with bonds placed between each index of the
> + *	virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
> + *	CS[3].
> + *	VE[0] = CS[0], CS[2]
> + *	VE[1] = CS[1], CS[3]
> + *
> + *	Example 3 pseudo code:
> + *	CS[X] = generic engine of same class, logical instance X
> + *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
> + *	set_engines(INVALID)
> + *	set_parallel(engine_index=0, width=2, num_siblings=2,
> + *		     engines=CS[0],CS[1],CS[1],CS[3])
> + *
> + *	Results in the following valid and invalid placements:
> + *	CS[0], CS[1]
> + *	CS[1], CS[3] - Not logical contiguous, return -EINVAL
> + */
> +struct i915_context_engines_parallel_submit {
> +	/**
> +	 * @base: base user extension.
> +	 */
> +	struct i915_user_extension base;
> +
> +	/**
> +	 * @engine_index: slot for parallel engine
> +	 */
> +	__u16 engine_index;
> +
> +	/**
> +	 * @width: number of contexts per parallel engine
> +	 */
> +	__u16 width;
> +
> +	/**
> +	 * @num_siblings: number of siblings per context
> +	 */
> +	__u16 num_siblings;
> +
> +	/**
> +	 * @mbz16: reserved for future use; must be zero
> +	 */
> +	__u16 mbz16;
> +
> +	/**
> +	 * @flags: all undefined flags must be zero, currently not defined flags
> +	 */
> +	__u64 flags;
> +
> +	/**
> +	 * @mbz64: reserved for future use; must be zero
> +	 */
> +	__u64 mbz64[3];
> +
> +	/**
> +	 * @engines: 2-d array of engine instances to configure parallel engine
> +	 *
> +	 * length = width (i) * num_siblings (j)
> +	 * index = j + i * num_siblings
> +	 */
> +	struct i915_engine_class_instance engines[0];
> +
> +} __packed;
> +
> +#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
> +	struct i915_user_extension base; \
> +	__u16 engine_index; \
> +	__u16 width; \
> +	__u16 num_siblings; \
> +	__u16 mbz16; \
> +	__u64 flags; \
> +	__u64 mbz64[3]; \
> +	struct i915_engine_class_instance engines[N__]; \
> +} __attribute__((packed)) name__
> +
>  /**
>   * DOC: Context Engine Map uAPI
>   *
> @@ -2105,6 +2232,7 @@ struct i915_context_param_engines {
>  	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
>  #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
>  #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
> +#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
>  	struct i915_engine_class_instance engines[0];
>  } __attribute__((packed));
>  
> -- 
> 2.28.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

  reply	other threads:[~2021-08-09 16:37 UTC|newest]

Thread overview: 186+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-03 22:28 [PATCH 00/46] Parallel submission aka multi-bb execbuf Matthew Brost
2021-08-03 22:28 ` [Intel-gfx] " Matthew Brost
2021-08-03 22:28 ` [PATCH 01/46] drm/i915/guc: Allow flexible number of context ids Matthew Brost
2021-08-03 22:28   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:28 ` [PATCH 02/46] drm/i915/guc: Connect the number of guc_ids to debugfs Matthew Brost
2021-08-03 22:28   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 03/46] drm/i915/guc: Don't return -EAGAIN to user when guc_ids exhausted Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-05  8:27   ` Daniel Vetter
2021-08-05  8:27     ` [Intel-gfx] " Daniel Vetter
2021-08-03 22:29 ` [PATCH 04/46] drm/i915/guc: Don't allow requests not ready to consume all guc_ids Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-05  8:29   ` Daniel Vetter
2021-08-03 22:29 ` [PATCH 05/46] drm/i915/guc: Introduce guc_submit_engine object Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 06/46] drm/i915/guc: Check return of __xa_store when registering a context Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 07/46] drm/i915/guc: Non-static lrc descriptor registration buffer Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 08/46] drm/i915/guc: Take GT PM ref when deregistering context Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 09/46] drm/i915: Add GT PM unpark worker Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 10/46] drm/i915/guc: Take engine PM when a context is pinned with GuC submission Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 14:23   ` Daniel Vetter
2021-08-09 14:23     ` [Intel-gfx] " Daniel Vetter
2021-08-09 18:11     ` Matthew Brost
2021-08-09 18:11       ` [Intel-gfx] " Matthew Brost
2021-08-10  6:43       ` Daniel Vetter
2021-08-10  6:43         ` [Intel-gfx] " Daniel Vetter
2021-08-10 21:29         ` Matthew Brost
2021-08-10 21:29           ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 11/46] drm/i915/guc: Don't call switch_to_kernel_context " Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 14:27   ` Daniel Vetter
2021-08-09 18:20     ` Matthew Brost
2021-08-10  6:47       ` Daniel Vetter
2021-08-11 17:47         ` Matthew Brost
2021-08-03 22:29 ` [PATCH 12/46] drm/i915/guc: Selftest for GuC flow control Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 13/46] drm/i915: Add logical engine mapping Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 14:28   ` Daniel Vetter
2021-08-09 14:28     ` [Intel-gfx] " Daniel Vetter
2021-08-09 18:28     ` Matthew Brost
2021-08-09 18:28       ` [Intel-gfx] " Matthew Brost
2021-08-10  6:49       ` Daniel Vetter
2021-08-10  6:49         ` [Intel-gfx] " Daniel Vetter
2021-08-03 22:29 ` [PATCH 14/46] drm/i915: Expose logical engine instance to user Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 14:30   ` Daniel Vetter
2021-08-09 14:30     ` [Intel-gfx] " Daniel Vetter
2021-08-09 18:37     ` Matthew Brost
2021-08-09 18:37       ` [Intel-gfx] " Matthew Brost
2021-08-10  6:53       ` Daniel Vetter
2021-08-10  6:53         ` [Intel-gfx] " Daniel Vetter
2021-08-11 17:55         ` Matthew Brost
2021-08-11 17:55           ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 15/46] drm/i915/guc: Introduce context parent-child relationship Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 14:37   ` Daniel Vetter
2021-08-09 14:40     ` Daniel Vetter
2021-08-09 18:45       ` Matthew Brost
2021-08-09 18:44     ` Matthew Brost
2021-08-10  8:45       ` Daniel Vetter
2021-08-03 22:29 ` [PATCH 16/46] drm/i915/guc: Implement GuC parent-child context pin / unpin functions Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 15:17   ` Daniel Vetter
2021-08-09 18:58     ` Matthew Brost
2021-08-10  8:53       ` Daniel Vetter
2021-08-10  9:07         ` Daniel Vetter
2021-08-11 18:06           ` Matthew Brost
2021-08-12 14:45             ` Daniel Vetter
2021-08-12 14:52               ` Daniel Vetter
2021-08-11 18:23         ` Matthew Brost
2021-08-03 22:29 ` [PATCH 17/46] drm/i915/guc: Add multi-lrc context registration Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 18/46] drm/i915/guc: Ensure GuC schedule operations do not operate on child contexts Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 19/46] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 15:31   ` Daniel Vetter
2021-08-09 15:31     ` [Intel-gfx] " Daniel Vetter
2021-08-09 19:03     ` Matthew Brost
2021-08-09 19:03       ` [Intel-gfx] " Matthew Brost
2021-08-10  9:12       ` Daniel Vetter
2021-08-10  9:12         ` [Intel-gfx] " Daniel Vetter
2021-08-03 22:29 ` [PATCH 20/46] drm/i915/guc: Add hang check to GuC submit engine Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 15:35   ` Daniel Vetter
2021-08-09 15:35     ` [Intel-gfx] " Daniel Vetter
2021-08-09 19:05     ` Matthew Brost
2021-08-09 19:05       ` [Intel-gfx] " Matthew Brost
2021-08-10  9:18       ` Daniel Vetter
2021-08-10  9:18         ` [Intel-gfx] " Daniel Vetter
2021-08-03 22:29 ` [PATCH 21/46] drm/i915/guc: Add guc_child_context_destroy Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 15:36   ` Daniel Vetter
2021-08-09 19:06     ` Matthew Brost
2021-08-03 22:29 ` [PATCH 22/46] drm/i915/guc: Implement multi-lrc submission Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 23/46] drm/i915/guc: Insert submit fences between requests in parent-child relationship Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 16:32   ` Daniel Vetter
2021-08-09 16:39     ` Matthew Brost
2021-08-09 17:03       ` Daniel Vetter
2021-08-03 22:29 ` [PATCH 24/46] drm/i915/guc: Implement multi-lrc reset Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 25/46] drm/i915/guc: Update debugfs for GuC multi-lrc Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 16:36   ` Daniel Vetter
2021-08-09 16:36     ` [Intel-gfx] " Daniel Vetter
2021-08-09 19:13     ` Matthew Brost
2021-08-09 19:13       ` [Intel-gfx] " Matthew Brost
2021-08-10  9:23       ` Daniel Vetter
2021-08-10  9:23         ` [Intel-gfx] " Daniel Vetter
2021-08-10  9:27         ` Daniel Vetter
2021-08-10  9:27           ` [Intel-gfx] " Daniel Vetter
2021-08-10 17:29           ` Matthew Brost
2021-08-10 17:29             ` [Intel-gfx] " Matthew Brost
2021-08-11 10:04             ` Daniel Vetter
2021-08-11 10:04               ` [Intel-gfx] " Daniel Vetter
2021-08-11 17:35               ` Matthew Brost
2021-08-11 17:35                 ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 26/46] drm/i915: Connect UAPI to GuC multi-lrc interface Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 16:37   ` Daniel Vetter [this message]
2021-08-09 16:37     ` Daniel Vetter
2021-08-03 22:29 ` [PATCH 27/46] drm/i915/doc: Update parallel submit doc to point to i915_drm.h Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 28/46] drm/i915/guc: Add basic GuC multi-lrc selftest Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 29/46] drm/i915/guc: Extend GuC flow control selftest for multi-lrc Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 30/46] drm/i915/guc: Implement no mid batch preemption " Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 31/46] drm/i915: Move secure execbuf check to execbuf2 Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 32/46] drm/i915: Move input/exec fence handling to i915_gem_execbuffer2 Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 33/46] drm/i915: Move output " Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 34/46] drm/i915: Return output fence from i915_gem_do_execbuffer Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 35/46] drm/i915: Store batch index in struct i915_execbuffer Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 36/46] drm/i915: Allow callers of i915_gem_do_execbuffer to override the batch index Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 37/46] drm/i915: Teach execbuf there can be more than one batch in the objects list Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 38/46] drm/i915: Only track object dependencies on first request Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 39/46] drm/i915: Force parallel contexts to use copy engine for reloc Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 16:39   ` Daniel Vetter
2021-08-09 16:39     ` [Intel-gfx] " Daniel Vetter
2021-08-03 22:29 ` [PATCH 40/46] drm/i915: Multi-batch execbuffer2 Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 17:02   ` Daniel Vetter
2021-08-09 17:02     ` [Intel-gfx] " Daniel Vetter
2021-08-03 22:29 ` [PATCH 41/46] drm/i915: Eliminate unnecessary VMA calls for multi-BB submission Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 17:07   ` Daniel Vetter
2021-08-09 17:12     ` Daniel Vetter
2021-08-03 22:29 ` [PATCH 42/46] drm/i915: Hold all parallel requests until last request, properly handle error Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 43/46] drm/i915/guc: Handle errors in multi-lrc requests Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 44/46] drm/i915: Enable multi-bb execbuf Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 45/46] drm/i915/execlists: Weak parallel submission support for execlists Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-03 22:29 ` [PATCH 46/46] drm/i915/guc: Add delay before disabling scheduling on contexts Matthew Brost
2021-08-03 22:29   ` [Intel-gfx] " Matthew Brost
2021-08-09 17:17   ` Daniel Vetter
2021-08-09 19:32     ` Matthew Brost
2021-08-11  9:55       ` Daniel Vetter
2021-08-11 17:43         ` Matthew Brost
2021-08-12 14:04           ` Daniel Vetter
2021-08-12 19:26   ` Daniel Vetter
2021-08-03 22:51 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Parallel submission aka multi-bb execbuf (rev2) Patchwork
2021-08-03 22:53 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-08-03 22:57 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2021-08-03 23:19 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-08-05  3:53 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YRFZwPSkrGqh4hpJ@phenom.ffwll.local \
    --to=daniel@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.