All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/selftests: Exercise independence of per-engine resets
@ 2017-07-05 11:48 Chris Wilson
  2017-07-05 12:05 ` ✓ Fi.CI.BAT: success for " Patchwork
  2017-07-06 17:15 ` [PATCH] " Michel Thierry
  0 siblings, 2 replies; 3+ messages in thread
From: Chris Wilson @ 2017-07-05 11:48 UTC (permalink / raw)
  To: intel-gfx

If all goes well, resetting one engine should not affect the operation of
any others. So to test this, we setup a continuous stream of requests
onto to each of the "innocent" engines whilst constantly resetting our
target engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 165 +++++++++++++++++++++++
 drivers/gpu/drm/i915/selftests/mock_context.c    |   8 ++
 drivers/gpu/drm/i915/selftests/mock_context.h    |   3 +
 3 files changed, 176 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 7096c3911cd3..dbfcb31ba9f4 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -22,8 +22,13 @@
  *
  */
 
+#include <linux/kthread.h>
+
 #include "../i915_selftest.h"
 
+#include "mock_context.h"
+#include "mock_drm.h"
+
 struct hang {
 	struct drm_i915_private *i915;
 	struct drm_i915_gem_object *hws;
@@ -372,6 +377,165 @@ static int igt_reset_engine(void *arg)
 	return err;
 }
 
+static int active_engine(void *data)
+{
+	struct intel_engine_cs *engine = data;
+	struct drm_i915_gem_request *rq[2] = {};
+	struct i915_gem_context *ctx[2];
+	struct drm_file *file;
+	unsigned long count = 0;
+	int err = 0;
+
+	file = mock_file(engine->i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&engine->i915->drm.struct_mutex);
+	ctx[0] = live_context(engine->i915, file);
+	mutex_unlock(&engine->i915->drm.struct_mutex);
+	if (IS_ERR(ctx[0])) {
+		err = PTR_ERR(ctx[0]);
+		goto err_file;
+	}
+
+	mutex_lock(&engine->i915->drm.struct_mutex);
+	ctx[1] = live_context(engine->i915, file);
+	mutex_unlock(&engine->i915->drm.struct_mutex);
+	if (IS_ERR(ctx[1])) {
+		err = PTR_ERR(ctx[1]);
+		i915_gem_context_put(ctx[0]);
+		goto err_file;
+	}
+
+	while (!kthread_should_stop()) {
+		unsigned int idx = count++ & 1;
+		struct drm_i915_gem_request *old = rq[idx];
+		struct drm_i915_gem_request *new;
+
+		mutex_lock(&engine->i915->drm.struct_mutex);
+		new = i915_gem_request_alloc(engine, ctx[idx]);
+		if (IS_ERR(new)) {
+			mutex_unlock(&engine->i915->drm.struct_mutex);
+			err = PTR_ERR(new);
+			break;
+		}
+
+		rq[idx] = i915_gem_request_get(new);
+		i915_add_request(new);
+		mutex_unlock(&engine->i915->drm.struct_mutex);
+
+		if (old) {
+			i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT);
+			i915_gem_request_put(old);
+		}
+	}
+
+	for (count = 0; count < ARRAY_SIZE(rq); count++)
+		i915_gem_request_put(rq[count]);
+
+err_file:
+	mock_file_free(engine->i915, file);
+	return err;
+}
+
+static int igt_reset_active_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine, *active;
+	enum intel_engine_id id, tmp;
+	int err = 0;
+
+	/* Check that issuing a reset on one engine does not interfere
+	 * with any other engine.
+	 */
+
+	if (!intel_has_reset_engine(i915))
+		return 0;
+
+	for_each_engine(engine, i915, id) {
+		struct task_struct *threads[I915_NUM_ENGINES];
+		unsigned long resets[I915_NUM_ENGINES];
+		unsigned long global = i915_reset_count(&i915->gpu_error);
+		IGT_TIMEOUT(end_time);
+
+		memset(threads, 0, sizeof(threads));
+		for_each_engine(active, i915, tmp) {
+			struct task_struct *tsk;
+
+			if (active == engine)
+				continue;
+
+			resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
+							      active);
+
+			tsk = kthread_run(active_engine, active,
+					  "igt/%s", active->name);
+			if (IS_ERR(tsk)) {
+				err = PTR_ERR(tsk);
+				goto unwind;
+			}
+
+			threads[tmp] = tsk;
+			get_task_struct(tsk);
+
+		}
+
+		set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
+		do {
+			err = i915_reset_engine(engine);
+			if (err) {
+				pr_err("i915_reset_engine(%s) failed, err=%d\n",
+				       engine->name, err);
+				break;
+			}
+		} while (time_before(jiffies, end_time));
+		clear_bit(I915_RESET_ENGINE + engine->id,
+			  &i915->gpu_error.flags);
+
+unwind:
+		for_each_engine(active, i915, tmp) {
+			int ret;
+
+			if (!threads[tmp])
+				continue;
+
+			ret = kthread_stop(threads[tmp]);
+			if (ret) {
+				pr_err("kthread for active engine %s failed, err=%d\n",
+				       active->name, ret);
+				if (!err)
+					err = ret;
+			}
+			put_task_struct(threads[tmp]);
+
+			if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
+								   active)) {
+				pr_err("Innocent engine %s was reset (count=%ld)\n",
+				       active->name,
+				       i915_reset_engine_count(&i915->gpu_error,
+							       active) - resets[tmp]);
+				err = -EIO;
+			}
+		}
+
+		if (global != i915_reset_count(&i915->gpu_error)) {
+			pr_err("Global reset (count=%ld)!\n",
+			       i915_reset_count(&i915->gpu_error) - global);
+			err = -EIO;
+		}
+
+		if (err)
+			break;
+
+		cond_resched();
+	}
+
+	if (i915_terminally_wedged(&i915->gpu_error))
+		err = -EIO;
+
+	return err;
+}
+
 static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
 {
 	u32 reset_count;
@@ -689,6 +853,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_hang_sanitycheck),
 		SUBTEST(igt_global_reset),
 		SUBTEST(igt_reset_engine),
+		SUBTEST(igt_reset_active_engines),
 		SUBTEST(igt_wait_reset),
 		SUBTEST(igt_reset_queue),
 		SUBTEST(igt_render_engine_reset_fallback),
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 9c7c68181f82..d436f2d5089b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -95,3 +95,11 @@ void mock_init_contexts(struct drm_i915_private *i915)
 	INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
 	init_llist_head(&i915->contexts.free_list);
 }
+
+struct i915_gem_context *
+live_context(struct drm_i915_private *i915, struct drm_file *file)
+{
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	return i915_gem_create_context(i915, file->driver_priv);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h
index 383941a61124..2f432c03d413 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.h
+++ b/drivers/gpu/drm/i915/selftests/mock_context.h
@@ -33,4 +33,7 @@ mock_context(struct drm_i915_private *i915,
 
 void mock_context_close(struct i915_gem_context *ctx);
 
+struct i915_gem_context *
+live_context(struct drm_i915_private *i915, struct drm_file *file);
+
 #endif /* !__MOCK_CONTEXT_H */
-- 
2.13.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/selftests: Exercise independence of per-engine resets
  2017-07-05 11:48 [PATCH] drm/i915/selftests: Exercise independence of per-engine resets Chris Wilson
@ 2017-07-05 12:05 ` Patchwork
  2017-07-06 17:15 ` [PATCH] " Michel Thierry
  1 sibling, 0 replies; 3+ messages in thread
From: Patchwork @ 2017-07-05 12:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/selftests: Exercise independence of per-engine resets
URL   : https://patchwork.freedesktop.org/series/26849/
State : success

== Summary ==

Series 26849v1 drm/i915/selftests: Exercise independence of per-engine resets
https://patchwork.freedesktop.org/api/1.0/series/26849/revisions/1/mbox/

Test gem_exec_suspend:
        Subgroup basic-s4-devices:
                dmesg-warn -> PASS       (fi-kbl-7560u) fdo#100125
Test kms_busy:
        Subgroup basic-flip-default-b:
                dmesg-warn -> PASS       (fi-skl-6700hq) fdo#101144
Test kms_pipe_crc_basic:
        Subgroup hang-read-crc-pipe-b:
                dmesg-warn -> PASS       (fi-pnv-d510) fdo#101597

fdo#100125 https://bugs.freedesktop.org/show_bug.cgi?id=100125
fdo#101144 https://bugs.freedesktop.org/show_bug.cgi?id=101144
fdo#101597 https://bugs.freedesktop.org/show_bug.cgi?id=101597

fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:437s
fi-bdw-gvtdvm    total:279  pass:257  dwarn:8   dfail:0   fail:0   skip:14  time:429s
fi-blb-e6850     total:279  pass:224  dwarn:1   dfail:0   fail:0   skip:54  time:351s
fi-bsw-n3050     total:279  pass:243  dwarn:0   dfail:0   fail:0   skip:36  time:529s
fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:506s
fi-byt-j1900     total:279  pass:254  dwarn:1   dfail:0   fail:0   skip:24  time:490s
fi-byt-n2820     total:279  pass:250  dwarn:1   dfail:0   fail:0   skip:28  time:487s
fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:590s
fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:433s
fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:416s
fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:418s
fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:500s
fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:478s
fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:464s
fi-kbl-7560u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:575s
fi-kbl-r         total:279  pass:260  dwarn:1   dfail:0   fail:0   skip:18  time:574s
fi-pnv-d510      total:279  pass:222  dwarn:2   dfail:0   fail:0   skip:55  time:555s
fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:455s
fi-skl-6700hq    total:279  pass:262  dwarn:0   dfail:0   fail:0   skip:17  time:583s
fi-skl-6700k     total:279  pass:257  dwarn:4   dfail:0   fail:0   skip:18  time:470s
fi-skl-6770hq    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:477s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:438s
fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:536s
fi-snb-2600      total:279  pass:250  dwarn:0   dfail:0   fail:0   skip:29  time:411s

24346e831017070c18f3c33b74a7b098682e20f7 drm-tip: 2017y-07m-04d-15h-39m-34s UTC integration manifest
9636bd8 drm/i915/selftests: Exercise independence of per-engine resets

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_5112/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/i915/selftests: Exercise independence of per-engine resets
  2017-07-05 11:48 [PATCH] drm/i915/selftests: Exercise independence of per-engine resets Chris Wilson
  2017-07-05 12:05 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2017-07-06 17:15 ` Michel Thierry
  1 sibling, 0 replies; 3+ messages in thread
From: Michel Thierry @ 2017-07-06 17:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 05/07/17 04:48, Chris Wilson wrote:
> If all goes well, resetting one engine should not affect the operation of
> any others. So to test this, we setup a continuous stream of requests
> onto to each of the "innocent" engines whilst constantly resetting our
> target engine.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>

Reviewed-by: Michel Thierry <michel.thierry@intel.com>

> ---
>   drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 165 +++++++++++++++++++++++
>   drivers/gpu/drm/i915/selftests/mock_context.c    |   8 ++
>   drivers/gpu/drm/i915/selftests/mock_context.h    |   3 +
>   3 files changed, 176 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 7096c3911cd3..dbfcb31ba9f4 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -22,8 +22,13 @@
>    *
>    */
>   
> +#include <linux/kthread.h>
> +
>   #include "../i915_selftest.h"
>   
> +#include "mock_context.h"
> +#include "mock_drm.h"
> +
>   struct hang {
>   	struct drm_i915_private *i915;
>   	struct drm_i915_gem_object *hws;
> @@ -372,6 +377,165 @@ static int igt_reset_engine(void *arg)
>   	return err;
>   }
>   
> +static int active_engine(void *data)
> +{
> +	struct intel_engine_cs *engine = data;
> +	struct drm_i915_gem_request *rq[2] = {};
> +	struct i915_gem_context *ctx[2];
> +	struct drm_file *file;
> +	unsigned long count = 0;
> +	int err = 0;
> +
> +	file = mock_file(engine->i915);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +
> +	mutex_lock(&engine->i915->drm.struct_mutex);
> +	ctx[0] = live_context(engine->i915, file);
> +	mutex_unlock(&engine->i915->drm.struct_mutex);
> +	if (IS_ERR(ctx[0])) {
> +		err = PTR_ERR(ctx[0]);
> +		goto err_file;
> +	}
> +
> +	mutex_lock(&engine->i915->drm.struct_mutex);
> +	ctx[1] = live_context(engine->i915, file);
> +	mutex_unlock(&engine->i915->drm.struct_mutex);
> +	if (IS_ERR(ctx[1])) {
> +		err = PTR_ERR(ctx[1]);
> +		i915_gem_context_put(ctx[0]);
> +		goto err_file;
> +	}
> +
> +	while (!kthread_should_stop()) {
> +		unsigned int idx = count++ & 1;
> +		struct drm_i915_gem_request *old = rq[idx];
> +		struct drm_i915_gem_request *new;
> +
> +		mutex_lock(&engine->i915->drm.struct_mutex);
> +		new = i915_gem_request_alloc(engine, ctx[idx]);
> +		if (IS_ERR(new)) {
> +			mutex_unlock(&engine->i915->drm.struct_mutex);
> +			err = PTR_ERR(new);
> +			break;
> +		}
> +
> +		rq[idx] = i915_gem_request_get(new);
> +		i915_add_request(new);
> +		mutex_unlock(&engine->i915->drm.struct_mutex);
> +
> +		if (old) {
> +			i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT);
> +			i915_gem_request_put(old);
> +		}
> +	}
> +
> +	for (count = 0; count < ARRAY_SIZE(rq); count++)
> +		i915_gem_request_put(rq[count]);
> +
> +err_file:
> +	mock_file_free(engine->i915, file);
> +	return err;
> +}
> +
> +static int igt_reset_active_engines(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *engine, *active;
> +	enum intel_engine_id id, tmp;
> +	int err = 0;
> +
> +	/* Check that issuing a reset on one engine does not interfere
> +	 * with any other engine.
> +	 */
> +
> +	if (!intel_has_reset_engine(i915))
> +		return 0;
> +
> +	for_each_engine(engine, i915, id) {
> +		struct task_struct *threads[I915_NUM_ENGINES];
> +		unsigned long resets[I915_NUM_ENGINES];
> +		unsigned long global = i915_reset_count(&i915->gpu_error);
> +		IGT_TIMEOUT(end_time);
> +
> +		memset(threads, 0, sizeof(threads));
> +		for_each_engine(active, i915, tmp) {
> +			struct task_struct *tsk;
> +
> +			if (active == engine)
> +				continue;
> +
> +			resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
> +							      active);
> +
> +			tsk = kthread_run(active_engine, active,
> +					  "igt/%s", active->name);
> +			if (IS_ERR(tsk)) {
> +				err = PTR_ERR(tsk);
> +				goto unwind;
> +			}
> +
> +			threads[tmp] = tsk;
> +			get_task_struct(tsk);
> +
> +		}
> +
> +		set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
> +		do {
> +			err = i915_reset_engine(engine);
> +			if (err) {
> +				pr_err("i915_reset_engine(%s) failed, err=%d\n",
> +				       engine->name, err);
> +				break;
> +			}
> +		} while (time_before(jiffies, end_time));
> +		clear_bit(I915_RESET_ENGINE + engine->id,
> +			  &i915->gpu_error.flags);
> +
> +unwind:
> +		for_each_engine(active, i915, tmp) {
> +			int ret;
> +
> +			if (!threads[tmp])
> +				continue;
> +
> +			ret = kthread_stop(threads[tmp]);
> +			if (ret) {
> +				pr_err("kthread for active engine %s failed, err=%d\n",
> +				       active->name, ret);
> +				if (!err)
> +					err = ret;
> +			}
> +			put_task_struct(threads[tmp]);
> +
> +			if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
> +								   active)) {
> +				pr_err("Innocent engine %s was reset (count=%ld)\n",
> +				       active->name,
> +				       i915_reset_engine_count(&i915->gpu_error,
> +							       active) - resets[tmp]);
> +				err = -EIO;
> +			}
> +		}
> +
> +		if (global != i915_reset_count(&i915->gpu_error)) {
> +			pr_err("Global reset (count=%ld)!\n",
> +			       i915_reset_count(&i915->gpu_error) - global);
> +			err = -EIO;
> +		}
> +
> +		if (err)
> +			break;
> +
> +		cond_resched();
> +	}
> +
> +	if (i915_terminally_wedged(&i915->gpu_error))
> +		err = -EIO;
> +
> +	return err;
> +}
> +
>   static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
>   {
>   	u32 reset_count;
> @@ -689,6 +853,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(igt_hang_sanitycheck),
>   		SUBTEST(igt_global_reset),
>   		SUBTEST(igt_reset_engine),
> +		SUBTEST(igt_reset_active_engines),
>   		SUBTEST(igt_wait_reset),
>   		SUBTEST(igt_reset_queue),
>   		SUBTEST(igt_render_engine_reset_fallback),
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 9c7c68181f82..d436f2d5089b 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -95,3 +95,11 @@ void mock_init_contexts(struct drm_i915_private *i915)
>   	INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
>   	init_llist_head(&i915->contexts.free_list);
>   }
> +
> +struct i915_gem_context *
> +live_context(struct drm_i915_private *i915, struct drm_file *file)
> +{
> +	lockdep_assert_held(&i915->drm.struct_mutex);
> +
> +	return i915_gem_create_context(i915, file->driver_priv);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h
> index 383941a61124..2f432c03d413 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.h
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.h
> @@ -33,4 +33,7 @@ mock_context(struct drm_i915_private *i915,
>   
>   void mock_context_close(struct i915_gem_context *ctx);
>   
> +struct i915_gem_context *
> +live_context(struct drm_i915_private *i915, struct drm_file *file);
> +
>   #endif /* !__MOCK_CONTEXT_H */
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-07-06 17:16 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-05 11:48 [PATCH] drm/i915/selftests: Exercise independence of per-engine resets Chris Wilson
2017-07-05 12:05 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-07-06 17:15 ` [PATCH] " Michel Thierry

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.