All of lore.kernel.org
 help / color / mirror / Atom feed
From: John Harrison <john.c.harrison@intel.com>
To: Matthew Brost <matthew.brost@intel.com>,
	<intel-gfx@lists.freedesktop.org>,
	<dri-devel@lists.freedesktop.org>
Cc: daniele.ceraolospurio@intel.com
Subject: Re: [PATCH 7/7] drm/i915/guc: Selftest for stealing of guc ids
Date: Fri, 10 Dec 2021 17:33:02 -0800	[thread overview]
Message-ID: <111dd483-dff5-2e60-2913-e6bacf35dda9@intel.com> (raw)
In-Reply-To: <20211211005612.8575-8-matthew.brost@intel.com>

On 12/10/2021 16:56, Matthew Brost wrote:
> Testing the stealing of guc ids is hard from user spaec as we have 64k
spaec -> space

> guc_ids. Add a selftest, which artificially reduces the number of guc
> ids, and forces a steal. Details of test has comment in code so will not
has -> are

But would a copy&paste really be so hard? It is useful to be able to 
read what a patch does from the commit log and not have to delve inside 
every time.


> repeat here.
>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  12 ++
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  15 +-
>   drivers/gpu/drm/i915/gt/uc/selftest_guc.c     | 171 ++++++++++++++++++
>   3 files changed, 193 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index 1cb46098030d..307380a2e2ff 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -94,6 +94,11 @@ struct intel_guc {
>   		 * @guc_ids: used to allocate new guc_ids, single-lrc
>   		 */
>   		struct ida guc_ids;
> +		/**
> +		 * @num_guc_ids: Number of guc_ids, selftest feature to be able
> +		 * to reduce this number of test.
of test -> while testing

Should have a CONFIG_SELFTEST around it? And define a wrapper that is 
GUC_MAX_LRC_DESCRIPTORS or num_guc_ids as appropriate.


> +		 */
> +		int num_guc_ids;
>   		/**
>   		 * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
>   		 */
> @@ -202,6 +207,13 @@ struct intel_guc {
>   		 */
>   		struct delayed_work work;
>   	} timestamp;
> +
> +#ifdef CONFIG_DRM_I915_SELFTEST
> +	/**
> +	 * @number_guc_id_stole: The number of guc_ids that have been stole
> +	 */
> +	int number_guc_id_stole;
stole -> stolen (in all three cases)

> +#endif
>   };
>   
>   static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 96fcf869e3ff..57019b190bfb 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -145,7 +145,7 @@ guc_create_parallel(struct intel_engine_cs **engines,
>    * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
>    * multi-lrc.
>    */
> -#define NUMBER_MULTI_LRC_GUC_ID		(GUC_MAX_LRC_DESCRIPTORS / 16)
> +#define NUMBER_MULTI_LRC_GUC_ID(guc)	(guc->submission_state.num_guc_ids / 16)
And keep the original definition for the non CONFIG_SELFTEST case?

>   
>   /*
>    * Below is a set of functions which control the GuC scheduling state which
> @@ -1775,7 +1775,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
>   		  destroyed_worker_func);
>   
>   	guc->submission_state.guc_ids_bitmap =
> -		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
> +		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
>   	if (!guc->submission_state.guc_ids_bitmap)
>   		return -ENOMEM;
>   
> @@ -1869,13 +1869,13 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
>   
>   	if (intel_context_is_parent(ce))
>   		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> -					      NUMBER_MULTI_LRC_GUC_ID,
> +					      NUMBER_MULTI_LRC_GUC_ID(guc),
>   					      order_base_2(ce->parallel.number_children
>   							   + 1));
>   	else
>   		ret = ida_simple_get(&guc->submission_state.guc_ids,
> -				     NUMBER_MULTI_LRC_GUC_ID,
> -				     GUC_MAX_LRC_DESCRIPTORS,
> +				     NUMBER_MULTI_LRC_GUC_ID(guc),
> +				     guc->submission_state.num_guc_ids,
>   				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
>   				     __GFP_NOWARN);
>   	if (unlikely(ret < 0))
> @@ -1941,6 +1941,10 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
>   
>   		set_context_guc_id_invalid(cn);
>   
> +#ifdef CONFIG_DRM_I915_SELFTEST
> +		guc->number_guc_id_stole++;
> +#endif
> +
>   		return 0;
>   	} else {
>   		return -EAGAIN;
> @@ -3779,6 +3783,7 @@ static bool __guc_submission_selected(struct intel_guc *guc)
>   
>   void intel_guc_submission_init_early(struct intel_guc *guc)
>   {
> +	guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS;
>   	guc->submission_supported = __guc_submission_supported(guc);
>   	guc->submission_selected = __guc_submission_selected(guc);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> index fb0e4a7bd8ca..9ab355e64b3f 100644
> --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> @@ -3,8 +3,21 @@
>    * Copyright �� 2021 Intel Corporation
>    */
>   
> +#include "selftests/igt_spinner.h"
>   #include "selftests/intel_scheduler_helpers.h"
>   
> +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
> +{
> +	int err = 0;
> +
> +	i915_request_get(rq);
> +	i915_request_add(rq);
> +	if (spin && !igt_wait_for_spinner(spin, rq))
> +		err = -ETIMEDOUT;
> +
> +	return err;
> +}
> +
>   static struct i915_request *nop_user_request(struct intel_context *ce,
>   					     struct i915_request *from)
>   {
> @@ -110,10 +123,168 @@ static int intel_guc_scrub_ctbs(void *arg)
>   	return ret;
>   }
>   
> +/*
> + * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one
> + *
> + * This test creates a spinner to which is used as to block all subsequent
to which -> which
as to block -> to block

> + * submissions until it completes. Next, a loop creates a context and a NOP
> + * request each iteration until the guc_ids are exhausted (request creation
> + * returns -EAGAIN). The spinner is completed unblocking all requests created in
spinner is ended,

> + * the loop. At this point all guc_ids are exhausted but are available to steal.
> + * Try to create another request which should successfully steal a guc_id. Wait
> + * on last request to complete, idle GPU, verify guc_id was stole via counter,
stole -> stolen

> + * and exit test. Test also artificially reduces the number of guc_ids so the
> + * test runs in a timely manner.
> + */
> +static int intel_guc_steal_guc_ids(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_guc *guc = &gt->uc.guc;
> +	int ret, sv, i = 0;
> +	intel_wakeref_t wakeref;
> +	struct intel_engine_cs *engine;
> +	struct intel_context **ce;
> +	struct igt_spinner spin;
> +	struct i915_request *spin_rq = NULL, *rq, *last = NULL;
> +	int number_guc_id_stole = guc->number_guc_id_stole;
stole -> stolen

> +
> +	ce = kzalloc(sizeof(*ce) * GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL);
> +	if (!ce) {
> +		pr_err("Context array allocation failed\n");
> +		return -ENOMEM;
> +	}
> +
> +	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
> +	engine = intel_selftest_find_any_engine(gt);
> +	sv = guc->submission_state.num_guc_ids;
> +	guc->submission_state.num_guc_ids = 4096;
> +
> +	/* Create spinner to block requests in below loop */
> +	ce[i++] = intel_context_create(engine);
> +	if (IS_ERR(ce[i - 1])) {
> +		ce[i - 1] = NULL;
> +		ret = PTR_ERR(ce[i - 1]);
Would be less peculiar looking to do the i++ after the if statement.

> +		pr_err("Failed to create context: %d\n", ret);
> +		goto err_wakeref;
> +	}
> +	ret = igt_spinner_init(&spin, engine->gt);
> +	if (ret) {
> +		pr_err("Failed to create spinner: %d\n", ret);
> +		goto err_contexts;
> +	}
> +	spin_rq = igt_spinner_create_request(&spin, ce[i - 1], MI_ARB_CHECK);
> +	if (IS_ERR(spin_rq)) {
> +		ret = PTR_ERR(spin_rq);
> +		pr_err("Failed to create spinner request: %d\n", ret);
> +		goto err_contexts;
> +	}
> +	ret = request_add_spin(spin_rq, &spin);
> +	if (ret) {
> +		pr_err("Failed to add Spinner request: %d\n", ret);
> +		goto err_spin_rq;
> +	}
> +
> +	/* Use all guc_ids */
> +	while (ret != -EAGAIN) {
> +		ce[i++] = intel_context_create(engine);
> +		if (IS_ERR(ce[i - 1])) {
> +			ce[i - 1] = NULL;
> +			ret = PTR_ERR(ce[i - 1]);
> +			pr_err("Failed to create context: %d\n", ret);
> +			goto err_spin_rq;
Won't this try to put the null context? Or rather will see the null 
context and immediately abort the clean up loop. Need to do the i++ 
after the if statement. Or after the nop_user_request call to get rid of 
all the ce[i - 1] things.

John.

> +		}
> +
> +		rq = nop_user_request(ce[i - 1], spin_rq);
> +		if (IS_ERR(rq)) {
> +			ret = PTR_ERR(rq);
> +			rq = NULL;
> +			if (ret != -EAGAIN) {
> +				pr_err("Failed to create request, %d: %d\n", i,
> +				       ret);
> +				goto err_spin_rq;
> +			}
> +		} else {
> +			if (last)
> +				i915_request_put(last);
> +			last = rq;
> +		}
> +	}
> +
> +	/* Release blocked requests */
> +	igt_spinner_end(&spin);
> +	ret = intel_selftest_wait_for_rq(spin_rq);
> +	if (ret) {
> +		pr_err("Spin request failed to complete: %d\n", ret);
> +		i915_request_put(last);
> +		goto err_spin_rq;
> +	}
> +	i915_request_put(spin_rq);
> +	igt_spinner_fini(&spin);
> +	spin_rq = NULL;
> +
> +	/* Wait for last request */
> +	ret = i915_request_wait(last, 0, HZ * 30);
> +	i915_request_put(last);
> +	if (ret < 0) {
> +		pr_err("Last request failed to complete: %d\n", ret);
> +		goto err_spin_rq;
> +	}
> +
> +	/* Try to steal guc_id */
> +	rq = nop_user_request(ce[i - 1], NULL);
> +	if (IS_ERR(rq)) {
> +		ret = PTR_ERR(rq);
> +		pr_err("Failed to steal guc_id, %d: %d\n", i, ret);
> +		goto err_spin_rq;
> +	}
> +
> +	/* Wait for last request */
> +	ret = i915_request_wait(rq, 0, HZ);
> +	i915_request_put(rq);
> +	if (ret < 0) {
> +		pr_err("Last request failed to complete: %d\n", ret);
> +		goto err_spin_rq;
> +	}
> +
> +	/* Wait for idle */
> +	ret = intel_gt_wait_for_idle(gt, HZ * 30);
> +	if (ret < 0) {
> +		pr_err("GT failed to idle: %d\n", ret);
> +		goto err_spin_rq;
> +	}
> +
> +	/* Verify a guc_id got stole */
> +	if (guc->number_guc_id_stole == number_guc_id_stole) {
> +		pr_err("No guc_ids stolen");
> +		ret = -EINVAL;
> +	} else {
> +		ret = 0;
> +	}
> +
> +err_spin_rq:
> +	if (spin_rq) {
> +		igt_spinner_end(&spin);
> +		intel_selftest_wait_for_rq(spin_rq);
> +		i915_request_put(spin_rq);
> +		igt_spinner_fini(&spin);
> +		intel_gt_wait_for_idle(gt, HZ * 30);
> +	}
> +err_contexts:
> +	while (i && ce[--i])
> +		intel_context_put(ce[i]);
> +err_wakeref:
> +	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
> +	kfree(ce);
> +	guc->submission_state.num_guc_ids = sv;
> +
> +	return ret;
> +}
> +
>   int intel_guc_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(intel_guc_scrub_ctbs),
> +		SUBTEST(intel_guc_steal_guc_ids),
>   	};
>   	struct intel_gt *gt = &i915->gt;
>   


WARNING: multiple messages have this Message-ID (diff)
From: John Harrison <john.c.harrison@intel.com>
To: Matthew Brost <matthew.brost@intel.com>,
	<intel-gfx@lists.freedesktop.org>,
	<dri-devel@lists.freedesktop.org>
Subject: Re: [Intel-gfx] [PATCH 7/7] drm/i915/guc: Selftest for stealing of guc ids
Date: Fri, 10 Dec 2021 17:33:02 -0800	[thread overview]
Message-ID: <111dd483-dff5-2e60-2913-e6bacf35dda9@intel.com> (raw)
In-Reply-To: <20211211005612.8575-8-matthew.brost@intel.com>

On 12/10/2021 16:56, Matthew Brost wrote:
> Testing the stealing of guc ids is hard from user spaec as we have 64k
spaec -> space

> guc_ids. Add a selftest, which artificially reduces the number of guc
> ids, and forces a steal. Details of test has comment in code so will not
has -> are

But would a copy&paste really be so hard? It is useful to be able to 
read what a patch does from the commit log and not have to delve inside 
every time.


> repeat here.
>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  12 ++
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  15 +-
>   drivers/gpu/drm/i915/gt/uc/selftest_guc.c     | 171 ++++++++++++++++++
>   3 files changed, 193 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index 1cb46098030d..307380a2e2ff 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -94,6 +94,11 @@ struct intel_guc {
>   		 * @guc_ids: used to allocate new guc_ids, single-lrc
>   		 */
>   		struct ida guc_ids;
> +		/**
> +		 * @num_guc_ids: Number of guc_ids, selftest feature to be able
> +		 * to reduce this number of test.
of test -> while testing

Should have a CONFIG_SELFTEST around it? And define a wrapper that is 
GUC_MAX_LRC_DESCRIPTORS or num_guc_ids as appropriate.


> +		 */
> +		int num_guc_ids;
>   		/**
>   		 * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
>   		 */
> @@ -202,6 +207,13 @@ struct intel_guc {
>   		 */
>   		struct delayed_work work;
>   	} timestamp;
> +
> +#ifdef CONFIG_DRM_I915_SELFTEST
> +	/**
> +	 * @number_guc_id_stole: The number of guc_ids that have been stole
> +	 */
> +	int number_guc_id_stole;
stole -> stolen (in all three cases)

> +#endif
>   };
>   
>   static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 96fcf869e3ff..57019b190bfb 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -145,7 +145,7 @@ guc_create_parallel(struct intel_engine_cs **engines,
>    * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
>    * multi-lrc.
>    */
> -#define NUMBER_MULTI_LRC_GUC_ID		(GUC_MAX_LRC_DESCRIPTORS / 16)
> +#define NUMBER_MULTI_LRC_GUC_ID(guc)	(guc->submission_state.num_guc_ids / 16)
And keep the original definition for the non CONFIG_SELFTEST case?

>   
>   /*
>    * Below is a set of functions which control the GuC scheduling state which
> @@ -1775,7 +1775,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
>   		  destroyed_worker_func);
>   
>   	guc->submission_state.guc_ids_bitmap =
> -		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
> +		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
>   	if (!guc->submission_state.guc_ids_bitmap)
>   		return -ENOMEM;
>   
> @@ -1869,13 +1869,13 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
>   
>   	if (intel_context_is_parent(ce))
>   		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> -					      NUMBER_MULTI_LRC_GUC_ID,
> +					      NUMBER_MULTI_LRC_GUC_ID(guc),
>   					      order_base_2(ce->parallel.number_children
>   							   + 1));
>   	else
>   		ret = ida_simple_get(&guc->submission_state.guc_ids,
> -				     NUMBER_MULTI_LRC_GUC_ID,
> -				     GUC_MAX_LRC_DESCRIPTORS,
> +				     NUMBER_MULTI_LRC_GUC_ID(guc),
> +				     guc->submission_state.num_guc_ids,
>   				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
>   				     __GFP_NOWARN);
>   	if (unlikely(ret < 0))
> @@ -1941,6 +1941,10 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
>   
>   		set_context_guc_id_invalid(cn);
>   
> +#ifdef CONFIG_DRM_I915_SELFTEST
> +		guc->number_guc_id_stole++;
> +#endif
> +
>   		return 0;
>   	} else {
>   		return -EAGAIN;
> @@ -3779,6 +3783,7 @@ static bool __guc_submission_selected(struct intel_guc *guc)
>   
>   void intel_guc_submission_init_early(struct intel_guc *guc)
>   {
> +	guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS;
>   	guc->submission_supported = __guc_submission_supported(guc);
>   	guc->submission_selected = __guc_submission_selected(guc);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> index fb0e4a7bd8ca..9ab355e64b3f 100644
> --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> @@ -3,8 +3,21 @@
>    * Copyright �� 2021 Intel Corporation
>    */
>   
> +#include "selftests/igt_spinner.h"
>   #include "selftests/intel_scheduler_helpers.h"
>   
> +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
> +{
> +	int err = 0;
> +
> +	i915_request_get(rq);
> +	i915_request_add(rq);
> +	if (spin && !igt_wait_for_spinner(spin, rq))
> +		err = -ETIMEDOUT;
> +
> +	return err;
> +}
> +
>   static struct i915_request *nop_user_request(struct intel_context *ce,
>   					     struct i915_request *from)
>   {
> @@ -110,10 +123,168 @@ static int intel_guc_scrub_ctbs(void *arg)
>   	return ret;
>   }
>   
> +/*
> + * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one
> + *
> + * This test creates a spinner to which is used as to block all subsequent
to which -> which
as to block -> to block

> + * submissions until it completes. Next, a loop creates a context and a NOP
> + * request each iteration until the guc_ids are exhausted (request creation
> + * returns -EAGAIN). The spinner is completed unblocking all requests created in
spinner is ended,

> + * the loop. At this point all guc_ids are exhausted but are available to steal.
> + * Try to create another request which should successfully steal a guc_id. Wait
> + * on last request to complete, idle GPU, verify guc_id was stole via counter,
stole -> stolen

> + * and exit test. Test also artificially reduces the number of guc_ids so the
> + * test runs in a timely manner.
> + */
> +static int intel_guc_steal_guc_ids(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_guc *guc = &gt->uc.guc;
> +	int ret, sv, i = 0;
> +	intel_wakeref_t wakeref;
> +	struct intel_engine_cs *engine;
> +	struct intel_context **ce;
> +	struct igt_spinner spin;
> +	struct i915_request *spin_rq = NULL, *rq, *last = NULL;
> +	int number_guc_id_stole = guc->number_guc_id_stole;
stole -> stolen

> +
> +	ce = kzalloc(sizeof(*ce) * GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL);
> +	if (!ce) {
> +		pr_err("Context array allocation failed\n");
> +		return -ENOMEM;
> +	}
> +
> +	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
> +	engine = intel_selftest_find_any_engine(gt);
> +	sv = guc->submission_state.num_guc_ids;
> +	guc->submission_state.num_guc_ids = 4096;
> +
> +	/* Create spinner to block requests in below loop */
> +	ce[i++] = intel_context_create(engine);
> +	if (IS_ERR(ce[i - 1])) {
> +		ce[i - 1] = NULL;
> +		ret = PTR_ERR(ce[i - 1]);
Would be less peculiar looking to do the i++ after the if statement.

> +		pr_err("Failed to create context: %d\n", ret);
> +		goto err_wakeref;
> +	}
> +	ret = igt_spinner_init(&spin, engine->gt);
> +	if (ret) {
> +		pr_err("Failed to create spinner: %d\n", ret);
> +		goto err_contexts;
> +	}
> +	spin_rq = igt_spinner_create_request(&spin, ce[i - 1], MI_ARB_CHECK);
> +	if (IS_ERR(spin_rq)) {
> +		ret = PTR_ERR(spin_rq);
> +		pr_err("Failed to create spinner request: %d\n", ret);
> +		goto err_contexts;
> +	}
> +	ret = request_add_spin(spin_rq, &spin);
> +	if (ret) {
> +		pr_err("Failed to add Spinner request: %d\n", ret);
> +		goto err_spin_rq;
> +	}
> +
> +	/* Use all guc_ids */
> +	while (ret != -EAGAIN) {
> +		ce[i++] = intel_context_create(engine);
> +		if (IS_ERR(ce[i - 1])) {
> +			ce[i - 1] = NULL;
> +			ret = PTR_ERR(ce[i - 1]);
> +			pr_err("Failed to create context: %d\n", ret);
> +			goto err_spin_rq;
Won't this try to put the null context? Or rather will see the null 
context and immediately abort the clean up loop. Need to do the i++ 
after the if statement. Or after the nop_user_request call to get rid of 
all the ce[i - 1] things.

John.

> +		}
> +
> +		rq = nop_user_request(ce[i - 1], spin_rq);
> +		if (IS_ERR(rq)) {
> +			ret = PTR_ERR(rq);
> +			rq = NULL;
> +			if (ret != -EAGAIN) {
> +				pr_err("Failed to create request, %d: %d\n", i,
> +				       ret);
> +				goto err_spin_rq;
> +			}
> +		} else {
> +			if (last)
> +				i915_request_put(last);
> +			last = rq;
> +		}
> +	}
> +
> +	/* Release blocked requests */
> +	igt_spinner_end(&spin);
> +	ret = intel_selftest_wait_for_rq(spin_rq);
> +	if (ret) {
> +		pr_err("Spin request failed to complete: %d\n", ret);
> +		i915_request_put(last);
> +		goto err_spin_rq;
> +	}
> +	i915_request_put(spin_rq);
> +	igt_spinner_fini(&spin);
> +	spin_rq = NULL;
> +
> +	/* Wait for last request */
> +	ret = i915_request_wait(last, 0, HZ * 30);
> +	i915_request_put(last);
> +	if (ret < 0) {
> +		pr_err("Last request failed to complete: %d\n", ret);
> +		goto err_spin_rq;
> +	}
> +
> +	/* Try to steal guc_id */
> +	rq = nop_user_request(ce[i - 1], NULL);
> +	if (IS_ERR(rq)) {
> +		ret = PTR_ERR(rq);
> +		pr_err("Failed to steal guc_id, %d: %d\n", i, ret);
> +		goto err_spin_rq;
> +	}
> +
> +	/* Wait for last request */
> +	ret = i915_request_wait(rq, 0, HZ);
> +	i915_request_put(rq);
> +	if (ret < 0) {
> +		pr_err("Last request failed to complete: %d\n", ret);
> +		goto err_spin_rq;
> +	}
> +
> +	/* Wait for idle */
> +	ret = intel_gt_wait_for_idle(gt, HZ * 30);
> +	if (ret < 0) {
> +		pr_err("GT failed to idle: %d\n", ret);
> +		goto err_spin_rq;
> +	}
> +
> +	/* Verify a guc_id got stole */
> +	if (guc->number_guc_id_stole == number_guc_id_stole) {
> +		pr_err("No guc_ids stolen");
> +		ret = -EINVAL;
> +	} else {
> +		ret = 0;
> +	}
> +
> +err_spin_rq:
> +	if (spin_rq) {
> +		igt_spinner_end(&spin);
> +		intel_selftest_wait_for_rq(spin_rq);
> +		i915_request_put(spin_rq);
> +		igt_spinner_fini(&spin);
> +		intel_gt_wait_for_idle(gt, HZ * 30);
> +	}
> +err_contexts:
> +	while (i && ce[--i])
> +		intel_context_put(ce[i]);
> +err_wakeref:
> +	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
> +	kfree(ce);
> +	guc->submission_state.num_guc_ids = sv;
> +
> +	return ret;
> +}
> +
>   int intel_guc_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(intel_guc_scrub_ctbs),
> +		SUBTEST(intel_guc_steal_guc_ids),
>   	};
>   	struct intel_gt *gt = &i915->gt;
>   


  reply	other threads:[~2021-12-11  1:33 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-11  0:56 [PATCH 0/7] Fix stealing guc_ids + test Matthew Brost
2021-12-11  0:56 ` [Intel-gfx] " Matthew Brost
2021-12-11  0:56 ` [PATCH 1/7] drm/i915/guc: Use correct context lock when callig clr_context_registered Matthew Brost
2021-12-11  0:56   ` [Intel-gfx] " Matthew Brost
2021-12-11  0:56 ` [PATCH 2/7] drm/i915/guc: Only assign guc_id.id when stealing guc_id Matthew Brost
2021-12-11  0:56   ` [Intel-gfx] " Matthew Brost
2021-12-11  1:08   ` John Harrison
2021-12-11  1:08     ` [Intel-gfx] " John Harrison
2021-12-11  0:56 ` [PATCH 3/7] drm/i915/guc: Remove racey GEM_BUG_ON Matthew Brost
2021-12-11  0:56   ` [Intel-gfx] " Matthew Brost
2021-12-11  0:56 ` [PATCH 4/7] drm/i915/guc: Don't hog IRQs when destroying contexts Matthew Brost
2021-12-11  0:56   ` [Intel-gfx] " Matthew Brost
2021-12-11  1:07   ` John Harrison
2021-12-11  1:07     ` [Intel-gfx] " John Harrison
2021-12-11  1:10     ` Matthew Brost
2021-12-11  1:10       ` [Intel-gfx] " Matthew Brost
2021-12-11  0:56 ` [PATCH 5/7] drm/i915/guc: Add extra debug on CT deadlock Matthew Brost
2021-12-11  0:56   ` [Intel-gfx] " Matthew Brost
2021-12-11  1:43   ` John Harrison
2021-12-11  1:43     ` [Intel-gfx] " John Harrison
2021-12-11  1:45     ` John Harrison
2021-12-11  3:24       ` Matthew Brost
2021-12-11  0:56 ` [PATCH 6/7] drm/i915/guc: Kick G2H tasklet if no credits Matthew Brost
2021-12-11  0:56   ` [Intel-gfx] " Matthew Brost
2021-12-11  0:56 ` [PATCH 7/7] drm/i915/guc: Selftest for stealing of guc ids Matthew Brost
2021-12-11  0:56   ` [Intel-gfx] " Matthew Brost
2021-12-11  1:33   ` John Harrison [this message]
2021-12-11  1:33     ` John Harrison
2021-12-11  3:31     ` Matthew Brost
2021-12-11  3:31       ` [Intel-gfx] " Matthew Brost
2021-12-11  3:32     ` Matthew Brost
2021-12-11  3:32       ` [Intel-gfx] " Matthew Brost
2021-12-11  2:28 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Fix stealing guc_ids + test Patchwork
2021-12-11  2:30 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-12-11  2:58 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-12-12  1:32 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2021-12-11 17:35 [PATCH 0/7] " Matthew Brost
2021-12-11 17:35 ` [PATCH 7/7] drm/i915/guc: Selftest for stealing of guc ids Matthew Brost
2021-12-14  0:26   ` John Harrison
2021-12-14 16:57     ` Matthew Brost
2021-12-14 17:04 [PATCH 0/7] Fix stealing guc_ids + test Matthew Brost
2021-12-14 17:05 ` [PATCH 7/7] drm/i915/guc: Selftest for stealing of guc ids Matthew Brost
2021-12-14 19:48   ` John Harrison

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=111dd483-dff5-2e60-2913-e6bacf35dda9@intel.com \
    --to=john.c.harrison@intel.com \
    --cc=daniele.ceraolospurio@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.