All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH i-g-t 1/2] intel-gpu-overlay: Add queued stat
@ 2017-11-22 12:47 Tvrtko Ursulin
  2017-11-22 12:47 ` [PATCH i-g-t 2/2] tests/perf_pmu: Add tests for engine " Tvrtko Ursulin
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Tvrtko Ursulin @ 2017-11-22 12:47 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 lib/igt_perf.h    |  6 ++++++
 overlay/gpu-top.c | 14 ++++++++++++++
 overlay/gpu-top.h |  6 ++++++
 overlay/overlay.c |  3 +++
 4 files changed, 29 insertions(+)

diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index 5428feb0c746..eaf7a928d296 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -35,9 +35,12 @@ enum drm_i915_pmu_engine_sample {
 	I915_SAMPLE_BUSY = 0,
 	I915_SAMPLE_WAIT = 1,
 	I915_SAMPLE_SEMA = 2,
+	I915_SAMPLE_QUEUED = 3,
 	I915_ENGINE_SAMPLE_MAX /* non-ABI */
 };
 
+#define I915_SAMPLE_QUEUED_SCALE 1e-2 /* No braces please. */
+
 #define I915_PMU_SAMPLE_BITS (4)
 #define I915_PMU_SAMPLE_MASK (0xf)
 #define I915_PMU_SAMPLE_INSTANCE_BITS (8)
@@ -58,6 +61,9 @@ enum drm_i915_pmu_engine_sample {
 #define I915_PMU_ENGINE_SEMA(class, instance) \
 	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
 
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
 #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
 
 #define I915_PMU_ACTUAL_FREQUENCY	__I915_PMU_OTHER(0)
diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c
index 61b8f62fd78c..c252a0a984f1 100644
--- a/overlay/gpu-top.c
+++ b/overlay/gpu-top.c
@@ -72,6 +72,10 @@ static int perf_init(struct gpu_top *gt)
 				 gt->fd) >= 0)
 		gt->have_sema = 1;
 
+	if (perf_i915_open_group(I915_PMU_ENGINE_QUEUED(d->class, d->inst),
+				 gt->fd) >= 0)
+		gt->have_queued = 1;
+
 	gt->ring[0].name = d->name;
 	gt->num_rings = 1;
 
@@ -93,6 +97,12 @@ static int perf_init(struct gpu_top *gt)
 				   gt->fd) < 0)
 			return -1;
 
+		if (gt->have_queued &&
+		    perf_i915_open_group(I915_PMU_ENGINE_QUEUED(d->class,
+								d->inst),
+				   gt->fd) < 0)
+			return -1;
+
 		gt->ring[gt->num_rings++].name = d->name;
 	}
 
@@ -298,6 +308,8 @@ int gpu_top_update(struct gpu_top *gt)
 				s->wait[n] = sample[m++];
 			if (gt->have_sema)
 				s->sema[n] = sample[m++];
+			if (gt->have_queued)
+				s->queued[n] = sample[m++];
 		}
 
 		if (gt->count == 1)
@@ -310,6 +322,8 @@ int gpu_top_update(struct gpu_top *gt)
 				gt->ring[n].u.u.wait = (100 * (s->wait[n] - d->wait[n]) + d_time/2) / d_time;
 			if (gt->have_sema)
 				gt->ring[n].u.u.sema = (100 * (s->sema[n] - d->sema[n]) + d_time/2) / d_time;
+			if (gt->have_queued)
+				gt->queued[n] = (double)((s->queued[n] - d->queued[n])) * 1e9 * I915_SAMPLE_QUEUED_SCALE / d_time;
 
 			/* in case of rounding + sampling errors, fudge */
 			if (gt->ring[n].u.u.busy > 100)
diff --git a/overlay/gpu-top.h b/overlay/gpu-top.h
index d3cdd779760f..7d3acb4aa256 100644
--- a/overlay/gpu-top.h
+++ b/overlay/gpu-top.h
@@ -36,6 +36,7 @@ struct gpu_top {
 	int num_rings;
 	int have_wait;
 	int have_sema;
+	int have_queued;
 
 	struct gpu_top_ring {
 		const char *name;
@@ -44,6 +45,7 @@ struct gpu_top {
 				uint8_t busy;
 				uint8_t wait;
 				uint8_t sema;
+				uint8_t queued;
 			} u;
 			uint32_t payload;
 		} u;
@@ -54,7 +56,11 @@ struct gpu_top {
 		uint64_t busy[MAX_RINGS];
 		uint64_t wait[MAX_RINGS];
 		uint64_t sema[MAX_RINGS];
+		uint64_t queued[MAX_RINGS];
 	} stat[2];
+
+	double queued[MAX_RINGS];
+
 	int count;
 };
 
diff --git a/overlay/overlay.c b/overlay/overlay.c
index 4804f813554a..15c0f09ba0fe 100644
--- a/overlay/overlay.c
+++ b/overlay/overlay.c
@@ -255,6 +255,9 @@ static void show_gpu_top(struct overlay_context *ctx, struct overlay_gpu_top *gt
 		len = sprintf(txt, "%s: %3d%% busy",
 			      gt->gpu_top.ring[n].name,
 			      gt->gpu_top.ring[n].u.u.busy);
+		if (gt->gpu_top.have_queued)
+			len += sprintf(txt + len, ", qd %.2f",
+				       gt->gpu_top.queued[n]);
 		if (gt->gpu_top.ring[n].u.u.wait)
 			len += sprintf(txt + len, ", %d%% wait",
 				       gt->gpu_top.ring[n].u.u.wait);
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH i-g-t 2/2] tests/perf_pmu: Add tests for engine queued stat
  2017-11-22 12:47 [PATCH i-g-t 1/2] intel-gpu-overlay: Add queued stat Tvrtko Ursulin
@ 2017-11-22 12:47 ` Tvrtko Ursulin
  2017-11-22 12:56   ` Chris Wilson
  2017-11-23 15:17 ` ✓ Fi.CI.BAT: success for series starting with [1/2] intel-gpu-overlay: Add " Patchwork
  2017-11-23 16:44 ` ✗ Fi.CI.IGT: warning " Patchwork
  2 siblings, 1 reply; 8+ messages in thread
From: Tvrtko Ursulin @ 2017-11-22 12:47 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Simple test to check correct queue-depth is reported per engine.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 8585ed7bcee8..17f0afca6fe1 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -87,6 +87,17 @@ static uint64_t pmu_read_single(int fd)
 	return data[0];
 }
 
+static uint64_t pmu_sample_single(int fd, uint64_t *val)
+{
+	uint64_t data[2];
+
+	igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
+
+	*val = data[0];
+
+	return data[1];
+}
+
 static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
 {
 	uint64_t buf[2 + num];
@@ -655,6 +666,65 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e)
 	assert_within_epsilon(val[1], slept, tolerance);
 }
 
+static double calc_queued(uint64_t d_val, uint64_t d_ns)
+{
+	return (double)d_val * 1e9 * I915_SAMPLE_QUEUED_SCALE / d_ns;
+}
+
+static void
+queued(int gem_fd, const struct intel_execution_engine2 *e)
+{
+	const unsigned long duration_ns = 500e6;
+	igt_spin_t *spin[2];
+	uint64_t val[2];
+	uint64_t ts[2];
+	int fd;
+
+	fd = open_pmu(I915_PMU_ENGINE_QUEUED(e->class, e->instance));
+
+	/*
+	 * First check on an idle engine.
+	 */
+	ts[0] = pmu_sample_single(fd, &val[0]);
+	usleep(duration_ns / 3000);
+	ts[1] = pmu_sample_single(fd, &val[1]);
+	assert_within_epsilon(calc_queued(val[1] - val[0], ts[1] - ts[0]),
+			      0.0, tolerance);
+
+	/*
+	 * First spin batch will be immediately executing.
+	 */
+	spin[0] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+	igt_spin_batch_set_timeout(spin[0], duration_ns);
+
+	ts[0] = pmu_sample_single(fd, &val[0]);
+	usleep(duration_ns / 3000);
+	ts[1] = pmu_sample_single(fd, &val[1]);
+	assert_within_epsilon(calc_queued(val[1] - val[0], ts[1] - ts[0]),
+			      1.0, tolerance);
+
+	/*
+	 * Second spin batch will sit in the execution queue behind the
+	 * first one so must cause the PMU to correctly report the queued
+	 * counter.
+	 */
+	spin[1] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+	igt_spin_batch_set_timeout(spin[1], duration_ns);
+
+	ts[0] = pmu_sample_single(fd, &val[0]);
+	usleep(duration_ns / 3000);
+	ts[1] = pmu_sample_single(fd, &val[1]);
+	assert_within_epsilon(calc_queued(val[1] - val[0], ts[1] - ts[0]),
+			      2.0, tolerance);
+
+	gem_sync(gem_fd, spin[0]->handle);
+	gem_sync(gem_fd, spin[1]->handle);
+
+	igt_spin_batch_free(gem_fd, spin[0]);
+	igt_spin_batch_free(gem_fd, spin[1]);
+	close(fd);
+}
+
 /**
  * Tests that i915 PMU corectly errors out in invalid initialization.
  * i915 PMU is uncore PMU, thus:
@@ -1112,6 +1182,9 @@ igt_main
 		igt_subtest_f("init-sema-%s", e->name)
 			init(fd, e, I915_SAMPLE_SEMA);
 
+		igt_subtest_f("init-queued-%s", e->name)
+			init(fd, e, I915_SAMPLE_QUEUED);
+
 		/**
 		 * Test that engines show no load when idle.
 		 */
@@ -1166,6 +1239,12 @@ igt_main
 		 */
 		igt_subtest_f("multi-client-%s", e->name)
 			multi_client(fd, e);
+
+		/**
+		 * Test that queued metric works.
+		 */
+		igt_subtest_f("queued-%s", e->name)
+			queued(fd, e);
 	}
 
 	/**
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH i-g-t 2/2] tests/perf_pmu: Add tests for engine queued stat
  2017-11-22 12:47 ` [PATCH i-g-t 2/2] tests/perf_pmu: Add tests for engine " Tvrtko Ursulin
@ 2017-11-22 12:56   ` Chris Wilson
  2017-11-22 13:42     ` Tvrtko Ursulin
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2017-11-22 12:56 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx

Quoting Tvrtko Ursulin (2017-11-22 12:47:05)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Simple test to check correct queue-depth is reported per engine.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  tests/perf_pmu.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 79 insertions(+)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 8585ed7bcee8..17f0afca6fe1 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -87,6 +87,17 @@ static uint64_t pmu_read_single(int fd)
>         return data[0];
>  }
>  
> +static uint64_t pmu_sample_single(int fd, uint64_t *val)
> +{
> +       uint64_t data[2];
> +
> +       igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
> +
> +       *val = data[0];
> +
> +       return data[1];
> +}
> +
>  static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
>  {
>         uint64_t buf[2 + num];
> @@ -655,6 +666,65 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e)
>         assert_within_epsilon(val[1], slept, tolerance);
>  }
>  
> +static double calc_queued(uint64_t d_val, uint64_t d_ns)
> +{
> +       return (double)d_val * 1e9 * I915_SAMPLE_QUEUED_SCALE / d_ns;
> +}
> +
> +static void
> +queued(int gem_fd, const struct intel_execution_engine2 *e)
> +{
> +       const unsigned long duration_ns = 500e6;

0.5s.

> +       igt_spin_t *spin[2];
> +       uint64_t val[2];
> +       uint64_t ts[2];
> +       int fd;
> +
> +       fd = open_pmu(I915_PMU_ENGINE_QUEUED(e->class, e->instance));
> +
> +       /*
> +        * First check on an idle engine.
> +        */
> +       ts[0] = pmu_sample_single(fd, &val[0]);
> +       usleep(duration_ns / 3000);
> +       ts[1] = pmu_sample_single(fd, &val[1]);
> +       assert_within_epsilon(calc_queued(val[1] - val[0], ts[1] - ts[0]),
> +                             0.0, tolerance);
> +
> +       /*
> +        * First spin batch will be immediately executing.
> +        */
> +       spin[0] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
> +       igt_spin_batch_set_timeout(spin[0], duration_ns);
> +
> +       ts[0] = pmu_sample_single(fd, &val[0]);
> +       usleep(duration_ns / 3000);
> +       ts[1] = pmu_sample_single(fd, &val[1]);
> +       assert_within_epsilon(calc_queued(val[1] - val[0], ts[1] - ts[0]),
> +                             1.0, tolerance);
> +

What I would like here is a for(n=1; n < 10; n++)
where max_n is chosen so that we terminate within 5s, changing sample
intervals to match if we want to increase N.

Hmm.

for (n = 1; n < 10; n++)
	ctx = gem_context_create()
	for (m = 0; m < n; m++)
		...etc...

(We probably either want to measure ring_size and avoid that, or use a
timeout that interrupts the last execbuf... Ok, that's better overall.)

And have qd geometrically increase. Basically just want to avoid hitting
magic numbers inside HW, ELSP/guc depth of 2 being the first magic
number we want to miss.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH i-g-t 2/2] tests/perf_pmu: Add tests for engine queued stat
  2017-11-22 12:56   ` Chris Wilson
@ 2017-11-22 13:42     ` Tvrtko Ursulin
  2017-11-22 13:51       ` Chris Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Tvrtko Ursulin @ 2017-11-22 13:42 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, Intel-gfx


On 22/11/2017 12:56, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2017-11-22 12:47:05)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Simple test to check correct queue-depth is reported per engine.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   tests/perf_pmu.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 79 insertions(+)
>>
>> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
>> index 8585ed7bcee8..17f0afca6fe1 100644
>> --- a/tests/perf_pmu.c
>> +++ b/tests/perf_pmu.c
>> @@ -87,6 +87,17 @@ static uint64_t pmu_read_single(int fd)
>>          return data[0];
>>   }
>>   
>> +static uint64_t pmu_sample_single(int fd, uint64_t *val)
>> +{
>> +       uint64_t data[2];
>> +
>> +       igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
>> +
>> +       *val = data[0];
>> +
>> +       return data[1];
>> +}
>> +
>>   static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
>>   {
>>          uint64_t buf[2 + num];
>> @@ -655,6 +666,65 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e)
>>          assert_within_epsilon(val[1], slept, tolerance);
>>   }
>>   
>> +static double calc_queued(uint64_t d_val, uint64_t d_ns)
>> +{
>> +       return (double)d_val * 1e9 * I915_SAMPLE_QUEUED_SCALE / d_ns;
>> +}
>> +
>> +static void
>> +queued(int gem_fd, const struct intel_execution_engine2 *e)
>> +{
>> +       const unsigned long duration_ns = 500e6;
> 
> 0.5s.

Not sure what you mean? Express it in a different way using some 
NSECS_PER_SEC define?

>> +       igt_spin_t *spin[2];
>> +       uint64_t val[2];
>> +       uint64_t ts[2];
>> +       int fd;
>> +
>> +       fd = open_pmu(I915_PMU_ENGINE_QUEUED(e->class, e->instance));
>> +
>> +       /*
>> +        * First check on an idle engine.
>> +        */
>> +       ts[0] = pmu_sample_single(fd, &val[0]);
>> +       usleep(duration_ns / 3000);
>> +       ts[1] = pmu_sample_single(fd, &val[1]);
>> +       assert_within_epsilon(calc_queued(val[1] - val[0], ts[1] - ts[0]),
>> +                             0.0, tolerance);
>> +
>> +       /*
>> +        * First spin batch will be immediately executing.
>> +        */
>> +       spin[0] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
>> +       igt_spin_batch_set_timeout(spin[0], duration_ns);
>> +
>> +       ts[0] = pmu_sample_single(fd, &val[0]);
>> +       usleep(duration_ns / 3000);
>> +       ts[1] = pmu_sample_single(fd, &val[1]);
>> +       assert_within_epsilon(calc_queued(val[1] - val[0], ts[1] - ts[0]),
>> +                             1.0, tolerance);
>> +
> 
> What I would like here is a for(n=1; n < 10; n++)
> where max_n is chosen so that we terminate within 5s, changing sample
> intervals to match if we want to increase N.
> 
> Hmm.
> 
> for (n = 1; n < 10; n++)
> 	ctx = gem_context_create()
> 	for (m = 0; m < n; m++)
> 		...etc...
> 
> (We probably either want to measure ring_size and avoid that, or use a
> timeout that interrupts the last execbuf... Ok, that's better overall.)
> 
> And have qd geometrically increase. Basically just want to avoid hitting
> magic numbers inside HW, ELSP/guc depth of 2 being the first magic
> number we want to miss.

I get the suggestion to test different queue depths and thats a good 
one. I did fail to keep track with the rest you wrote including why to 
add contexts into the picture?

How about simply grow the queue-depth exponentially until a set limit? 
with a 5s time budget with could go to a quite high qd, much more than 
we actually need.

We do have a facility to terminate the spin batch I think so don't have 
to wait for all of them to complete.

Regards,

Tvrtko







_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH i-g-t 2/2] tests/perf_pmu: Add tests for engine queued stat
  2017-11-22 13:42     ` Tvrtko Ursulin
@ 2017-11-22 13:51       ` Chris Wilson
  2017-11-22 14:16         ` Chris Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2017-11-22 13:51 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx

Quoting Tvrtko Ursulin (2017-11-22 13:42:04)
> 
> On 22/11/2017 12:56, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2017-11-22 12:47:05)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Simple test to check correct queue-depth is reported per engine.
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> ---
> >>   tests/perf_pmu.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>   1 file changed, 79 insertions(+)
> >>
> >> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> >> index 8585ed7bcee8..17f0afca6fe1 100644
> >> --- a/tests/perf_pmu.c
> >> +++ b/tests/perf_pmu.c
> >> @@ -87,6 +87,17 @@ static uint64_t pmu_read_single(int fd)
> >>          return data[0];
> >>   }
> >>   
> >> +static uint64_t pmu_sample_single(int fd, uint64_t *val)
> >> +{
> >> +       uint64_t data[2];
> >> +
> >> +       igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
> >> +
> >> +       *val = data[0];
> >> +
> >> +       return data[1];
> >> +}
> >> +
> >>   static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
> >>   {
> >>          uint64_t buf[2 + num];
> >> @@ -655,6 +666,65 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e)
> >>          assert_within_epsilon(val[1], slept, tolerance);
> >>   }
> >>   
> >> +static double calc_queued(uint64_t d_val, uint64_t d_ns)
> >> +{
> >> +       return (double)d_val * 1e9 * I915_SAMPLE_QUEUED_SCALE / d_ns;
> >> +}
> >> +
> >> +static void
> >> +queued(int gem_fd, const struct intel_execution_engine2 *e)
> >> +{
> >> +       const unsigned long duration_ns = 500e6;
> > 
> > 0.5s.
> 
> Not sure what you mean? Express it in a different way using some 
> NSECS_PER_SEC define?

I made a note for myself. Adding /* 0.5s */ would save me commenting out
loud :)

> 
> >> +       igt_spin_t *spin[2];
> >> +       uint64_t val[2];
> >> +       uint64_t ts[2];
> >> +       int fd;
> >> +
> >> +       fd = open_pmu(I915_PMU_ENGINE_QUEUED(e->class, e->instance));
> >> +
> >> +       /*
> >> +        * First check on an idle engine.
> >> +        */
> >> +       ts[0] = pmu_sample_single(fd, &val[0]);
> >> +       usleep(duration_ns / 3000);
> >> +       ts[1] = pmu_sample_single(fd, &val[1]);
> >> +       assert_within_epsilon(calc_queued(val[1] - val[0], ts[1] - ts[0]),
> >> +                             0.0, tolerance);
> >> +
> >> +       /*
> >> +        * First spin batch will be immediately executing.
> >> +        */
> >> +       spin[0] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
> >> +       igt_spin_batch_set_timeout(spin[0], duration_ns);
> >> +
> >> +       ts[0] = pmu_sample_single(fd, &val[0]);
> >> +       usleep(duration_ns / 3000);
> >> +       ts[1] = pmu_sample_single(fd, &val[1]);
> >> +       assert_within_epsilon(calc_queued(val[1] - val[0], ts[1] - ts[0]),
> >> +                             1.0, tolerance);
> >> +
> > 
> > What I would like here is a for(n=1; n < 10; n++)
> > where max_n is chosen so that we terminate within 5s, changing sample
> > intervals to match if we want to increase N.
> > 
> > Hmm.
> > 
> > for (n = 1; n < 10; n++)
> >       ctx = gem_context_create()
> >       for (m = 0; m < n; m++)
> >               ...etc...
> > 
> > (We probably either want to measure ring_size and avoid that, or use a
> > timeout that interrupts the last execbuf... Ok, that's better overall.)
> > 
> > And have qd geometrically increase. Basically just want to avoid hitting
> > magic numbers inside HW, ELSP/guc depth of 2 being the first magic
> > number we want to miss.
> 
> I get the suggestion to test different queue depths and thats a good 
> one. I did fail to keep track with the rest you wrote including why to 
> add contexts into the picture?

Throwing contexts into the picture was to be sure that it was counting
across contexts. (I was thinking about the complexity with the timelines
and the counter being on the engine, which I started worrying about.)
Next up would be adding waiting requests and demonstrating that they
aren't counted as queued.
 
> How about simply grow the queue-depth exponentially until a set limit? 
> with a 5s time budget with could go to a quite high qd, much more than 
> we actually need.

I'd just use the time limit. As you say we should be able to grow quite
large within a few seconds, it's just coordinating that with the sample
interval whilst keeping under the 6s to prevent a GPU hang.
 
> We do have a facility to terminate the spin batch I think so don't have 
> to wait for all of them to complete.

One trick that is quite fun is to keep submitting the same spin batch.
Then you only ever have one to worry about.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH i-g-t 2/2] tests/perf_pmu: Add tests for engine queued stat
  2017-11-22 13:51       ` Chris Wilson
@ 2017-11-22 14:16         ` Chris Wilson
  0 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2017-11-22 14:16 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx

Quoting Chris Wilson (2017-11-22 13:51:59)
> Throwing contexts into the picture was to be sure that it was counting
> across contexts. (I was thinking about the complexity with the timelines
> and the counter being on the engine, which I started worrying about.)
> Next up would be adding waiting requests and demonstrating that they
> aren't counted as queued.

Now thinking about something else...

If waiting is separate from queued, how do I keep track of my own queue
depth? i.e. how many requests have I submitted that are waiting before
being queued?

I think we need WAITING as well (a new metric). :|

engine->waiting++ in execbuf
engine->waiting-- in submit_notify() ?

The problem with WAITING for userspace load-balancing is that you do not
how many of those you can overtake (the only ones you are sure that you
have a dependency upon are those to your own timeline).

Hmm, I wonder if we are not just missing a trick in counting the dead
space for RT (i.e. learning from __intel_engine_get_busy_time()).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/2] intel-gpu-overlay: Add queued stat
  2017-11-22 12:47 [PATCH i-g-t 1/2] intel-gpu-overlay: Add queued stat Tvrtko Ursulin
  2017-11-22 12:47 ` [PATCH i-g-t 2/2] tests/perf_pmu: Add tests for engine " Tvrtko Ursulin
@ 2017-11-23 15:17 ` Patchwork
  2017-11-23 16:44 ` ✗ Fi.CI.IGT: warning " Patchwork
  2 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2017-11-23 15:17 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] intel-gpu-overlay: Add queued stat
URL   : https://patchwork.freedesktop.org/series/34228/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
a1e444f4c8178acb590d41c21e921c6447668be4 tests/perf_pmu: Bump measuring duration for semaphores as well

with latest DRM-Tip kernel build CI_DRM_3377
9d399f816945 drm-tip: 2017y-11m-23d-12h-28m-12s UTC integration manifest

Testlist changes:
+igt@perf_pmu@init-queued-bcs0
+igt@perf_pmu@init-queued-rcs0
+igt@perf_pmu@init-queued-vcs0
+igt@perf_pmu@init-queued-vcs1
+igt@perf_pmu@init-queued-vecs0
+igt@perf_pmu@queued-bcs0
+igt@perf_pmu@queued-rcs0
+igt@perf_pmu@queued-vcs0
+igt@perf_pmu@queued-vcs1
+igt@perf_pmu@queued-vecs0

Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713

fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713

fi-bdw-5557u     total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:448s
fi-bdw-gvtdvm    total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:460s
fi-blb-e6850     total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:386s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:542s
fi-bwr-2160      total:289  pass:183  dwarn:0   dfail:0   fail:0   skip:106 time:279s
fi-bxt-dsi       total:289  pass:259  dwarn:0   dfail:0   fail:0   skip:30  time:508s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:513s
fi-byt-j1900     total:289  pass:254  dwarn:0   dfail:0   fail:0   skip:35  time:503s
fi-byt-n2820     total:289  pass:250  dwarn:0   dfail:0   fail:0   skip:39  time:496s
fi-cfl-s2        total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:611s
fi-elk-e7500     total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:431s
fi-gdg-551       total:289  pass:178  dwarn:1   dfail:0   fail:1   skip:109 time:270s
fi-glk-1         total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:541s
fi-hsw-4770      total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:429s
fi-hsw-4770r     total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:438s
fi-ilk-650       total:289  pass:228  dwarn:0   dfail:0   fail:0   skip:61  time:432s
fi-ivb-3520m     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:488s
fi-ivb-3770      total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:466s
fi-pnv-d510      total:289  pass:222  dwarn:1   dfail:0   fail:0   skip:66  time:583s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:456s
fi-skl-6600u     total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:544s
fi-skl-6700hq    total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:563s
fi-skl-6700k     total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:523s
fi-skl-6770hq    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:501s
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:463s
fi-snb-2520m     total:246  pass:212  dwarn:0   dfail:0   fail:0   skip:33 
fi-snb-2600      total:289  pass:249  dwarn:0   dfail:0   fail:0   skip:40  time:438s
Blacklisted hosts:
fi-cnl-y         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:576s
fi-kbl-7500u     total:289  pass:264  dwarn:1   dfail:0   fail:0   skip:24  time:481s
fi-kbl-7560u     total:289  pass:270  dwarn:0   dfail:0   fail:0   skip:19  time:536s
fi-kbl-7567u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:477s
fi-kbl-r         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:539s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_530/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* ✗ Fi.CI.IGT: warning for series starting with [1/2] intel-gpu-overlay: Add queued stat
  2017-11-22 12:47 [PATCH i-g-t 1/2] intel-gpu-overlay: Add queued stat Tvrtko Ursulin
  2017-11-22 12:47 ` [PATCH i-g-t 2/2] tests/perf_pmu: Add tests for engine " Tvrtko Ursulin
  2017-11-23 15:17 ` ✓ Fi.CI.BAT: success for series starting with [1/2] intel-gpu-overlay: Add " Patchwork
@ 2017-11-23 16:44 ` Patchwork
  2 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2017-11-23 16:44 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] intel-gpu-overlay: Add queued stat
URL   : https://patchwork.freedesktop.org/series/34228/
State : warning

== Summary ==

Test kms_flip:
        Subgroup wf_vblank-ts-check-interruptible:
                pass       -> SKIP       (shard-hsw) fdo#103181
        Subgroup plain-flip-fb-recreate:
                pass       -> FAIL       (shard-hsw) fdo#100368
        Subgroup blt-flip-vs-panning-interruptible:
                dmesg-warn -> PASS       (shard-hsw)
Test kms_cursor_legacy:
        Subgroup cursor-vs-flip-varying-size:
                pass       -> SKIP       (shard-hsw)
Test drv_module_reload:
        Subgroup basic-no-display:
                dmesg-warn -> PASS       (shard-snb) fdo#102707 +1
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-offscren-pri-shrfb-draw-render:
                fail       -> PASS       (shard-snb) fdo#101623 +1
Test drv_selftest:
        Subgroup mock_sanitycheck:
                pass       -> DMESG-WARN (shard-snb) fdo#103717
                pass       -> DMESG-WARN (shard-hsw) fdo#103719
Test kms_setmode:
        Subgroup basic:
                pass       -> FAIL       (shard-hsw) fdo#99912

fdo#103181 https://bugs.freedesktop.org/show_bug.cgi?id=103181
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102707 https://bugs.freedesktop.org/show_bug.cgi?id=102707
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
fdo#103717 https://bugs.freedesktop.org/show_bug.cgi?id=103717
fdo#103719 https://bugs.freedesktop.org/show_bug.cgi?id=103719
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912

shard-hsw        total:2677 pass:1531 dwarn:2   dfail:0   fail:19  skip:1125 time:9491s
shard-snb        total:2677 pass:1311 dwarn:2   dfail:0   fail:19  skip:1345 time:8130s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_530/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-11-23 16:44 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-22 12:47 [PATCH i-g-t 1/2] intel-gpu-overlay: Add queued stat Tvrtko Ursulin
2017-11-22 12:47 ` [PATCH i-g-t 2/2] tests/perf_pmu: Add tests for engine " Tvrtko Ursulin
2017-11-22 12:56   ` Chris Wilson
2017-11-22 13:42     ` Tvrtko Ursulin
2017-11-22 13:51       ` Chris Wilson
2017-11-22 14:16         ` Chris Wilson
2017-11-23 15:17 ` ✓ Fi.CI.BAT: success for series starting with [1/2] intel-gpu-overlay: Add " Patchwork
2017-11-23 16:44 ` ✗ Fi.CI.IGT: warning " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.