All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
@ 2020-04-20  9:09 Chris Wilson
  2020-04-20  9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-20  9:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

One of the core tenents of reclocking the GPU is that its throughput
scales with the clock frequency. We can observe this by incrementing a
loop counter on the GPU, and compare the different execution rates at
the notional RPS frequencies.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_gt_pm.c |   3 +-
 drivers/gpu/drm/i915/gt/selftest_rps.c   | 249 +++++++++++++++++++++--
 drivers/gpu/drm/i915/gt/selftest_rps.h   |   1 +
 3 files changed, 240 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 0141c334f2ac..4b2733967c42 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -53,8 +53,9 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_rc6_manual),
-		SUBTEST(live_rps_interrupt),
+		SUBTEST(live_rps_frequency),
 		SUBTEST(live_rps_power),
+		SUBTEST(live_rps_interrupt),
 		SUBTEST(live_gt_resume),
 	};
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 360f56aa4b82..b1a435db1edc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -6,6 +6,7 @@
 #include <linux/sort.h>
 
 #include "intel_engine_pm.h"
+#include "intel_gpu_commands.h"
 #include "intel_gt_pm.h"
 #include "intel_rc6.h"
 #include "selftest_rps.h"
@@ -17,6 +18,242 @@ static void dummy_rps_work(struct work_struct *wrk)
 {
 }
 
+static int cmp_u64(const void *A, const void *B)
+{
+	const u64 *a = A, *b = B;
+
+	if (a < b)
+		return -1;
+	else if (a > b)
+		return 1;
+	else
+		return 0;
+}
+
+static struct i915_vma *
+create_spin_counter(struct intel_engine_cs *engine,
+		    struct i915_address_space *vm,
+		    u32 **cancel,
+		    u32 **counter)
+{
+	enum {
+		COUNT,
+		INC,
+		__NGPR__,
+	};
+#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	u32 *base, *cs;
+	int loop, i;
+	int err;
+
+	obj = i915_gem_object_create_internal(vm->i915, 4096);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return vma;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err) {
+		i915_vma_put(vma);
+		return ERR_PTR(err);
+	}
+
+	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(base)) {
+		i915_gem_object_put(obj);
+		return ERR_CAST(base);
+	}
+	cs = base;
+
+	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
+	for (i = 0; i < __NGPR__; i++) {
+		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
+		*cs++ = 0;
+		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
+		*cs++ = 0;
+	}
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
+	*cs++ = 1;
+
+	loop = cs - base;
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
+
+	*cs++ = MI_STORE_REGISTER_MEM_GEN8;
+	*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
+	*cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
+	*cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
+
+	*cs++ = MI_BATCH_BUFFER_START_GEN8;
+	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
+	*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
+
+	i915_gem_object_flush_map(obj);
+
+	*cancel = base + loop;
+	*counter = memset32(base + 1000, 0, 1);
+	return vma;
+}
+
+static u64 __measure_frequency(u32 *cntr, int duration_ms)
+{
+	u64 dc, dt;
+
+	dt = ktime_get();
+	dc = READ_ONCE(*cntr);
+	usleep_range(1000 * duration_ms, 2000 * duration_ms);
+	dc = READ_ONCE(*cntr) - dc;
+	dt = ktime_get() - dt;
+
+	return div64_u64(1000 * 1000 * dc, dt);
+}
+
+static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
+{
+	u64 x[5];
+	int i;
+
+	mutex_lock(&rps->lock);
+	GEM_BUG_ON(!rps->active);
+	intel_rps_set(rps, *freq);
+	mutex_unlock(&rps->lock);
+
+	msleep(20); /* more than enough time to stabilise! */
+
+	for (i = 0; i < 5; i++)
+		x[i] = __measure_frequency(cntr, 2);
+	*freq = read_cagf(rps);
+
+	/* A simple triangle filter for better result stability */
+	sort(x, 5, sizeof(*x), cmp_u64, NULL);
+	return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
+{
+	return f_d * x > f_n * y && f_n * x < f_d * y;
+}
+
+int live_rps_frequency(void *arg)
+{
+	void (*saved_work)(struct work_struct *wrk);
+	struct intel_gt *gt = arg;
+	struct intel_rps *rps = &gt->rps;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * The premise is that the GPU does change freqency at our behest.
+	 * Let's check there is a correspondence between the requested
+	 * frequency, the actual frequency, and the observed clock rate.
+	 */
+
+	if (!rps->enabled || rps->max_freq <= rps->min_freq)
+		return 0;
+
+	if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
+		return 0;
+
+	intel_gt_pm_wait_for_idle(gt);
+	saved_work = rps->work.func;
+	rps->work.func = dummy_rps_work;
+
+	for_each_engine(engine, gt, id) {
+		struct i915_request *rq;
+		struct i915_vma *vma;
+		u32 *cancel, *cntr;
+		struct {
+			u64 count;
+			int freq;
+		} min, max;
+
+		vma = create_spin_counter(engine,
+					  engine->kernel_context->vm,
+					  &cancel, &cntr);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			break;
+		}
+
+		rq = intel_engine_create_kernel_request(engine);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_vma;
+		}
+
+		i915_vma_lock(vma);
+		err = i915_request_await_object(rq, vma->obj, false);
+		if (!err)
+			err = i915_vma_move_to_active(vma, rq, 0);
+		if (!err)
+			err = rq->engine->emit_bb_start(rq,
+							vma->node.start,
+							PAGE_SIZE, 0);
+		i915_vma_unlock(vma);
+		i915_request_add(rq);
+		if (err)
+			goto err_vma;
+
+		if (wait_for(READ_ONCE(*cntr), 10)) {
+			pr_err("%s: timed loop did not start\n",
+			       engine->name);
+			goto err_vma;
+		}
+
+		min.freq = rps->min_freq;
+		min.count = measure_frequency_at(rps, cntr, &min.freq);
+
+		max.freq = rps->max_freq;
+		max.count = measure_frequency_at(rps, cntr, &max.freq);
+
+		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
+			engine->name,
+			min.count, intel_gpu_freq(rps, min.freq),
+			max.count, intel_gpu_freq(rps, max.freq),
+			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
+						     max.freq * min.count));
+
+		if (!scaled_within(max.freq * min.count,
+				   min.freq * max.count,
+				   1, 2)) {
+			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
+			       engine->name,
+			       max.freq * min.count,
+			       min.freq * max.count);
+			err = -EINVAL;
+		}
+
+err_vma:
+		*cancel = MI_BATCH_BUFFER_END;
+		i915_gem_object_unpin_map(vma->obj);
+		i915_vma_unpin(vma);
+		i915_vma_put(vma);
+
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
+	intel_gt_pm_wait_for_idle(gt);
+	rps->work.func = saved_work;
+
+	return err;
+}
+
 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
 {
 	/* Flush any previous EI */
@@ -248,18 +485,6 @@ static u64 __measure_power(int duration_ms)
 	return div64_u64(1000 * 1000 * dE, dt);
 }
 
-static int cmp_u64(const void *A, const void *B)
-{
-	const u64 *a = A, *b = B;
-
-	if (a < b)
-		return -1;
-	else if (a > b)
-		return 1;
-	else
-		return 0;
-}
-
 static u64 measure_power_at(struct intel_rps *rps, int freq)
 {
 	u64 x[5];
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
index cad515a7f0e5..07c2bddf8899 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.h
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
@@ -6,6 +6,7 @@
 #ifndef SELFTEST_RPS_H
 #define SELFTEST_RPS_H
 
+int live_rps_frequency(void *arg);
 int live_rps_interrupt(void *arg);
 int live_rps_power(void *arg);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq
  2020-04-20  9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
@ 2020-04-20  9:09 ` Chris Wilson
  2020-04-20 11:05   ` Mika Kuoppala
  2020-04-20  9:09 ` [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls Chris Wilson
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-04-20  9:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

If we can not manipulate the frequency with RPS, then comparing min/max
power consumption is pointless / misleading. We will leave the warning
about not being able to control the frequency selection via RPS to other
tests so as not to confuse this more specialised check.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_rps.c | 37 ++++++++++++++++----------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index b1a435db1edc..149a3de86cb9 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -485,26 +485,21 @@ static u64 __measure_power(int duration_ms)
 	return div64_u64(1000 * 1000 * dE, dt);
 }
 
-static u64 measure_power_at(struct intel_rps *rps, int freq)
+static u64 measure_power_at(struct intel_rps *rps, int *freq)
 {
 	u64 x[5];
 	int i;
 
 	mutex_lock(&rps->lock);
 	GEM_BUG_ON(!rps->active);
-	intel_rps_set(rps, freq);
+	intel_rps_set(rps, *freq);
 	mutex_unlock(&rps->lock);
 
 	msleep(20); /* more than enough time to stabilise! */
 
-	i = read_cagf(rps);
-	if (i != freq)
-		pr_notice("Running at %x [%uMHz], not target %x [%uMHz]\n",
-			  i, intel_gpu_freq(rps, i),
-			  freq, intel_gpu_freq(rps, freq));
-
 	for (i = 0; i < 5; i++)
 		x[i] = __measure_power(5);
+	*freq = read_cagf(rps);
 
 	/* A simple triangle filter for better result stability */
 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
@@ -542,7 +537,10 @@ int live_rps_power(void *arg)
 
 	for_each_engine(engine, gt, id) {
 		struct i915_request *rq;
-		u64 min, max;
+		struct {
+			u64 power;
+			int freq;
+		} min, max;
 
 		if (!intel_engine_can_store_dword(engine))
 			continue;
@@ -565,16 +563,27 @@ int live_rps_power(void *arg)
 			break;
 		}
 
-		max = measure_power_at(rps, rps->max_freq);
-		min = measure_power_at(rps, rps->min_freq);
+		max.freq = rps->max_freq;
+		max.power = measure_power_at(rps, &max.freq);
+
+		min.freq = rps->min_freq;
+		min.power = measure_power_at(rps, &min.freq);
 
 		igt_spinner_end(&spin);
 
 		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
 			engine->name,
-			min, intel_gpu_freq(rps, rps->min_freq),
-			max, intel_gpu_freq(rps, rps->max_freq));
-		if (11 * min > 10 * max) {
+			min.power, intel_gpu_freq(rps, min.freq),
+			max.power, intel_gpu_freq(rps, max.freq));
+
+		if (10 * min.freq >= 9 * max.freq) {
+			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
+				  min.freq, intel_gpu_freq(rps, min.freq),
+				  max.freq, intel_gpu_freq(rps, max.freq));
+			continue;
+		}
+
+		if (11 * min.power > 10 * max.power) {
 			pr_err("%s: did not conserve power when setting lower frequency!\n",
 			       engine->name);
 			err = -EINVAL;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls
  2020-04-20  9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
  2020-04-20  9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
@ 2020-04-20  9:09 ` Chris Wilson
  2020-04-20 16:41   ` Mika Kuoppala
  2020-04-20  9:09 ` [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement Chris Wilson
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-04-20  9:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Check that the GPU does respond to our RPS frequency requests by setting
our desired frequency.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_gt_pm.c |   1 +
 drivers/gpu/drm/i915/gt/selftest_rps.c   | 196 ++++++++++++++++++++---
 drivers/gpu/drm/i915/gt/selftest_rps.h   |   1 +
 3 files changed, 174 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 4b2733967c42..de3eaef40596 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -53,6 +53,7 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_rc6_manual),
+		SUBTEST(live_rps_control),
 		SUBTEST(live_rps_frequency),
 		SUBTEST(live_rps_power),
 		SUBTEST(live_rps_interrupt),
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 149a3de86cb9..19fa6a561de3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -107,6 +107,172 @@ create_spin_counter(struct intel_engine_cs *engine,
 	return vma;
 }
 
+static u8 rps_set_check(struct intel_rps *rps, u8 freq)
+{
+	u8 history[64], i;
+	unsigned long end;
+	int sleep;
+
+	mutex_lock(&rps->lock);
+	GEM_BUG_ON(!rps->active);
+	intel_rps_set(rps, freq);
+	GEM_BUG_ON(rps->last_freq != freq);
+	mutex_unlock(&rps->lock);
+
+	i = 0;
+	memset(history, freq, sizeof(history));
+	sleep = 20;
+
+	/* The PCU does not change instantly, but drifts towards the goal? */
+	end = jiffies + msecs_to_jiffies(50);
+	do {
+		u8 act;
+
+		act = read_cagf(rps);
+		if (time_after(jiffies, end))
+			return act;
+
+		/* Target acquired */
+		if (act == freq)
+			return act;
+
+		/* Any change witin the last N samples? */
+		if (!memchr_inv(history, act, sizeof(history)))
+			return act;
+
+		history[i] = act;
+		i = (i + 1) % ARRAY_SIZE(history);
+
+		usleep_range(sleep, 2 * sleep);
+		sleep *= 2;
+		if (sleep > 1000)
+			sleep = 1000;
+	} while (1);
+}
+
+int live_rps_control(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_rps *rps = &gt->rps;
+	void (*saved_work)(struct work_struct *wrk);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = 0;
+
+	/*
+	 * Check that the actual frequency matches our requested frequency,
+	 * to verify our control mechanism. We have to be careful that the
+	 * PCU may throttle the GPU in which case the actual frequency used
+	 * will be lowered than requested.
+	 */
+
+	if (!rps->enabled || rps->max_freq <= rps->min_freq)
+		return 0;
+
+	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
+		return 0;
+
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
+
+	intel_gt_pm_wait_for_idle(gt);
+	saved_work = rps->work.func;
+	rps->work.func = dummy_rps_work;
+
+	intel_gt_pm_get(gt);
+	for_each_engine(engine, gt, id) {
+		struct i915_request *rq;
+		ktime_t min_dt, max_dt;
+		int act, f, limit;
+		int min, max;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		rq = igt_spinner_create_request(&spin,
+						engine->kernel_context,
+						MI_NOOP);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+
+		if (!igt_wait_for_spinner(&spin, rq)) {
+			pr_err("%s: RPS spinner did not start\n",
+			       engine->name);
+			igt_spinner_end(&spin);
+			intel_gt_set_wedged(engine->gt);
+			err = -EIO;
+			break;
+		}
+
+		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
+			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
+			       engine->name, rps->min_freq, read_cagf(rps));
+			igt_spinner_end(&spin);
+			err = -EINVAL;
+			break;
+		}
+
+		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
+			act = rps_set_check(rps, f);
+			if (act < f)
+				break;
+		}
+
+		limit = rps_set_check(rps, f);
+
+		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
+			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
+			       engine->name, rps->min_freq, read_cagf(rps));
+			igt_spinner_end(&spin);
+			err = -EINVAL;
+			break;
+		}
+
+		max_dt = ktime_get();
+		max = rps_set_check(rps, limit);
+		max_dt = ktime_sub(ktime_get(), max_dt);
+
+		min_dt = ktime_get();
+		min = rps_set_check(rps, rps->min_freq);
+		min_dt = ktime_sub(ktime_get(), min_dt);
+
+		igt_spinner_end(&spin);
+
+		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] actual:[ %x:%uMHz, %x:%uMHz], %x:%x response %lluns:%lluns\n",
+			engine->name,
+			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
+			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
+			act, intel_gpu_freq(rps, act),
+			limit, intel_gpu_freq(rps, limit),
+			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
+
+		if (limit == rps->min_freq) {
+			pr_err("%s: GPU throttled to minimum!\n",
+			       engine->name);
+			err = -ENODEV;
+			break;
+		}
+
+		if (igt_flush_test(gt->i915)) {
+			err = -EIO;
+			break;
+		}
+	}
+	intel_gt_pm_put(gt);
+
+	igt_spinner_fini(&spin);
+
+	intel_gt_pm_wait_for_idle(gt);
+	rps->work.func = saved_work;
+
+	return err;
+}
+
 static u64 __measure_frequency(u32 *cntr, int duration_ms)
 {
 	u64 dc, dt;
@@ -125,16 +291,10 @@ static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
 	u64 x[5];
 	int i;
 
-	mutex_lock(&rps->lock);
-	GEM_BUG_ON(!rps->active);
-	intel_rps_set(rps, *freq);
-	mutex_unlock(&rps->lock);
-
-	msleep(20); /* more than enough time to stabilise! */
-
+	*freq = rps_set_check(rps, *freq);
 	for (i = 0; i < 5; i++)
 		x[i] = __measure_frequency(cntr, 2);
-	*freq = read_cagf(rps);
+	*freq = (*freq + read_cagf(rps)) / 2;
 
 	/* A simple triangle filter for better result stability */
 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
@@ -279,10 +439,7 @@ static int __rps_up_interrupt(struct intel_rps *rps,
 	if (!intel_engine_can_store_dword(engine))
 		return 0;
 
-	mutex_lock(&rps->lock);
-	GEM_BUG_ON(!rps->active);
-	intel_rps_set(rps, rps->min_freq);
-	mutex_unlock(&rps->lock);
+	rps_set_check(rps, rps->min_freq);
 
 	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
 	if (IS_ERR(rq))
@@ -354,10 +511,7 @@ static int __rps_down_interrupt(struct intel_rps *rps,
 	struct intel_uncore *uncore = engine->uncore;
 	u32 timeout;
 
-	mutex_lock(&rps->lock);
-	GEM_BUG_ON(!rps->active);
-	intel_rps_set(rps, rps->max_freq);
-	mutex_unlock(&rps->lock);
+	rps_set_check(rps, rps->max_freq);
 
 	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
 		pr_err("%s: RPS did not register DOWN interrupt\n",
@@ -490,16 +644,10 @@ static u64 measure_power_at(struct intel_rps *rps, int *freq)
 	u64 x[5];
 	int i;
 
-	mutex_lock(&rps->lock);
-	GEM_BUG_ON(!rps->active);
-	intel_rps_set(rps, *freq);
-	mutex_unlock(&rps->lock);
-
-	msleep(20); /* more than enough time to stabilise! */
-
+	*freq = rps_set_check(rps, *freq);
 	for (i = 0; i < 5; i++)
 		x[i] = __measure_power(5);
-	*freq = read_cagf(rps);
+	*freq = (*freq + read_cagf(rps)) / 2;
 
 	/* A simple triangle filter for better result stability */
 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
index 07c2bddf8899..be0bf8e3f639 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.h
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
@@ -6,6 +6,7 @@
 #ifndef SELFTEST_RPS_H
 #define SELFTEST_RPS_H
 
+int live_rps_control(void *arg);
 int live_rps_frequency(void *arg);
 int live_rps_interrupt(void *arg);
 int live_rps_power(void *arg);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement
  2020-04-20  9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
  2020-04-20  9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
  2020-04-20  9:09 ` [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls Chris Wilson
@ 2020-04-20  9:09 ` Chris Wilson
  2020-04-20 16:51   ` Mika Kuoppala
  2020-04-20 10:00 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS Patchwork
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-04-20  9:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Split the frequency measurement into two modes, so that we can judge the
impact of the llc setup on top of the pure CS frequency scaling.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_gt_pm.c |   3 +-
 drivers/gpu/drm/i915/gt/selftest_rps.c   | 157 ++++++++++++++++++++++-
 drivers/gpu/drm/i915/gt/selftest_rps.h   |   3 +-
 3 files changed, 154 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index de3eaef40596..9855e6f0ce7c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -54,7 +54,8 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_rc6_manual),
 		SUBTEST(live_rps_control),
-		SUBTEST(live_rps_frequency),
+		SUBTEST(live_rps_frequency_cs),
+		SUBTEST(live_rps_frequency_srm),
 		SUBTEST(live_rps_power),
 		SUBTEST(live_rps_interrupt),
 		SUBTEST(live_gt_resume),
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 19fa6a561de3..dbca673519a2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -33,6 +33,7 @@ static int cmp_u64(const void *A, const void *B)
 static struct i915_vma *
 create_spin_counter(struct intel_engine_cs *engine,
 		    struct i915_address_space *vm,
+		    bool srm,
 		    u32 **cancel,
 		    u32 **counter)
 {
@@ -91,10 +92,12 @@ create_spin_counter(struct intel_engine_cs *engine,
 	*cs++ = MI_MATH_ADD;
 	*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
 
-	*cs++ = MI_STORE_REGISTER_MEM_GEN8;
-	*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
-	*cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
-	*cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
+	if (srm) {
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
+		*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
+		*cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
+		*cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
+	}
 
 	*cs++ = MI_BATCH_BUFFER_START_GEN8;
 	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
@@ -103,7 +106,7 @@ create_spin_counter(struct intel_engine_cs *engine,
 	i915_gem_object_flush_map(obj);
 
 	*cancel = base + loop;
-	*counter = memset32(base + 1000, 0, 1);
+	*counter = srm ? memset32(base + 1000, 0, 1) : NULL;
 	return vma;
 }
 
@@ -301,12 +304,152 @@ static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
 	return div_u64(x[1] + 2 * x[2] + x[3], 4);
 }
 
+static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
+				  int duration_ms)
+{
+	u64 dc, dt;
+
+	dt = ktime_get();
+	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
+	usleep_range(1000 * duration_ms, 2000 * duration_ms);
+	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
+	dt = ktime_get() - dt;
+
+	return div64_u64(1000 * 1000 * dc, dt);
+}
+
+static u64 measure_cs_frequency_at(struct intel_rps *rps,
+				   struct intel_engine_cs *engine,
+				   int *freq)
+{
+	u64 x[5];
+	int i;
+
+	*freq = rps_set_check(rps, *freq);
+	for (i = 0; i < 5; i++)
+		x[i] = __measure_cs_frequency(engine, 2);
+	*freq = (*freq + read_cagf(rps)) / 2;
+
+	/* A simple triangle filter for better result stability */
+	sort(x, 5, sizeof(*x), cmp_u64, NULL);
+	return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
 {
 	return f_d * x > f_n * y && f_n * x < f_d * y;
 }
 
-int live_rps_frequency(void *arg)
+int live_rps_frequency_cs(void *arg)
+{
+	void (*saved_work)(struct work_struct *wrk);
+	struct intel_gt *gt = arg;
+	struct intel_rps *rps = &gt->rps;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * The premise is that the GPU does change freqency at our behest.
+	 * Let's check there is a correspondence between the requested
+	 * frequency, the actual frequency, and the observed clock rate.
+	 */
+
+	if (!rps->enabled || rps->max_freq <= rps->min_freq)
+		return 0;
+
+	if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
+		return 0;
+
+	intel_gt_pm_wait_for_idle(gt);
+	saved_work = rps->work.func;
+	rps->work.func = dummy_rps_work;
+
+	for_each_engine(engine, gt, id) {
+		struct i915_request *rq;
+		struct i915_vma *vma;
+		u32 *cancel, *cntr;
+		struct {
+			u64 count;
+			int freq;
+		} min, max;
+
+		vma = create_spin_counter(engine,
+					  engine->kernel_context->vm, false,
+					  &cancel, &cntr);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			break;
+		}
+
+		rq = intel_engine_create_kernel_request(engine);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_vma;
+		}
+
+		i915_vma_lock(vma);
+		err = i915_request_await_object(rq, vma->obj, false);
+		if (!err)
+			err = i915_vma_move_to_active(vma, rq, 0);
+		if (!err)
+			err = rq->engine->emit_bb_start(rq,
+							vma->node.start,
+							PAGE_SIZE, 0);
+		i915_vma_unlock(vma);
+		i915_request_add(rq);
+		if (err)
+			goto err_vma;
+
+		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
+			     10)) {
+			pr_err("%s: timed loop did not start\n",
+			       engine->name);
+			goto err_vma;
+		}
+
+		min.freq = rps->min_freq;
+		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
+
+		max.freq = rps->max_freq;
+		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
+
+		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
+			engine->name,
+			min.count, intel_gpu_freq(rps, min.freq),
+			max.count, intel_gpu_freq(rps, max.freq),
+			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
+						     max.freq * min.count));
+
+		if (!scaled_within(max.freq * min.count,
+				   min.freq * max.count,
+				   2, 3)) {
+			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
+			       engine->name,
+			       max.freq * min.count,
+			       min.freq * max.count);
+			err = -EINVAL;
+		}
+
+err_vma:
+		*cancel = MI_BATCH_BUFFER_END;
+		i915_gem_object_unpin_map(vma->obj);
+		i915_vma_unpin(vma);
+		i915_vma_put(vma);
+
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
+	intel_gt_pm_wait_for_idle(gt);
+	rps->work.func = saved_work;
+
+	return err;
+}
+
+int live_rps_frequency_srm(void *arg)
 {
 	void (*saved_work)(struct work_struct *wrk);
 	struct intel_gt *gt = arg;
@@ -341,7 +484,7 @@ int live_rps_frequency(void *arg)
 		} min, max;
 
 		vma = create_spin_counter(engine,
-					  engine->kernel_context->vm,
+					  engine->kernel_context->vm, true,
 					  &cancel, &cntr);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
index be0bf8e3f639..22e46c5341c5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.h
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
@@ -7,7 +7,8 @@
 #define SELFTEST_RPS_H
 
 int live_rps_control(void *arg);
-int live_rps_frequency(void *arg);
+int live_rps_frequency_cs(void *arg);
+int live_rps_frequency_srm(void *arg);
 int live_rps_interrupt(void *arg);
 int live_rps_power(void *arg);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
  2020-04-20  9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
                   ` (2 preceding siblings ...)
  2020-04-20  9:09 ` [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement Chris Wilson
@ 2020-04-20 10:00 ` Patchwork
  2020-04-20 10:54 ` [Intel-gfx] [PATCH 1/4] " Mika Kuoppala
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-20 10:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
URL   : https://patchwork.freedesktop.org/series/76184/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8327 -> Patchwork_17376
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_17376 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_17376, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_17376:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@gt_pm:
    - fi-icl-y:           [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-icl-y/igt@i915_selftest@live@gt_pm.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-icl-y/igt@i915_selftest@live@gt_pm.html
    - fi-skl-6600u:       [PASS][3] -> [DMESG-FAIL][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6600u/igt@i915_selftest@live@gt_pm.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6600u/igt@i915_selftest@live@gt_pm.html
    - fi-cfl-8109u:       [PASS][5] -> [DMESG-FAIL][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-cfl-8109u/igt@i915_selftest@live@gt_pm.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-cfl-8109u/igt@i915_selftest@live@gt_pm.html
    - fi-whl-u:           [PASS][7] -> [DMESG-FAIL][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-whl-u/igt@i915_selftest@live@gt_pm.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-whl-u/igt@i915_selftest@live@gt_pm.html

  
#### Warnings ####

  * igt@i915_selftest@live@gt_pm:
    - fi-tgl-y:           [DMESG-FAIL][9] ([i915#1725]) -> [DMESG-FAIL][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-tgl-y/igt@i915_selftest@live@gt_pm.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-tgl-y/igt@i915_selftest@live@gt_pm.html
    - fi-kbl-soraka:      [DMESG-FAIL][11] ([i915#1744]) -> [DMESG-FAIL][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-kbl-soraka/igt@i915_selftest@live@gt_pm.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-kbl-soraka/igt@i915_selftest@live@gt_pm.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_selftest@live@gt_pm:
    - {fi-tgl-u}:         [DMESG-FAIL][13] ([i915#1725]) -> [DMESG-FAIL][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-tgl-u/igt@i915_selftest@live@gt_pm.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-tgl-u/igt@i915_selftest@live@gt_pm.html

  * {igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1}:
    - {fi-tgl-dsi}:       [PASS][15] -> [INCOMPLETE][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-tgl-dsi/igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-tgl-dsi/igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1.html

  
Known issues
------------

  Here are the changes found in Patchwork_17376 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_module_load@reload:
    - fi-skl-6770hq:      [PASS][17] -> [DMESG-WARN][18] ([i915#203]) +1 similar issue
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@i915_module_load@reload.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@i915_module_load@reload.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence:
    - fi-skl-6770hq:      [PASS][19] -> [SKIP][20] ([fdo#109271]) +5 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-b:
    - fi-skl-6770hq:      [PASS][21] -> [DMESG-WARN][22] ([i915#106])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-b.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-b.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#106]: https://gitlab.freedesktop.org/drm/intel/issues/106
  [i915#1725]: https://gitlab.freedesktop.org/drm/intel/issues/1725
  [i915#1744]: https://gitlab.freedesktop.org/drm/intel/issues/1744
  [i915#203]: https://gitlab.freedesktop.org/drm/intel/issues/203


Participating hosts (49 -> 44)
------------------------------

  Additional (1): fi-kbl-7560u 
  Missing    (6): fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-bdw-samus fi-byt-clapper fi-skl-6700k2 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8327 -> Patchwork_17376

  CI-20190529: 20190529
  CI_DRM_8327: 17e0a63ab93b19ea2bfccd9a0425c93e52a65246 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5602: a8fcccd15dcc2dd409edd23785a2d6f6e85fb682 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17376: 589884d12beb98e0c84eb0026628ccb93ed0b946 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

589884d12beb drm/i915/selftests: Split RPS frequency measurement
affab98e14a6 drm/i915/selftests: Check RPS controls
979f48e06a1c drm/i915/selftests: Skip energy consumption tests if not controlling freq
27cd01ca67a5 drm/i915/selftests: Verify frequency scaling with RPS

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
  2020-04-20  9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
                   ` (3 preceding siblings ...)
  2020-04-20 10:00 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS Patchwork
@ 2020-04-20 10:54 ` Mika Kuoppala
  2020-04-20 11:02   ` Chris Wilson
  2020-04-20 11:12 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork
  2020-04-20 15:54 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  6 siblings, 1 reply; 14+ messages in thread
From: Mika Kuoppala @ 2020-04-20 10:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> One of the core tenents of reclocking the GPU is that its throughput
> scales with the clock frequency. We can observe this by incrementing a
> loop counter on the GPU, and compare the different execution rates at
> the notional RPS frequencies.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/selftest_gt_pm.c |   3 +-
>  drivers/gpu/drm/i915/gt/selftest_rps.c   | 249 +++++++++++++++++++++--
>  drivers/gpu/drm/i915/gt/selftest_rps.h   |   1 +
>  3 files changed, 240 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> index 0141c334f2ac..4b2733967c42 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> @@ -53,8 +53,9 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
>  {
>  	static const struct i915_subtest tests[] = {
>  		SUBTEST(live_rc6_manual),
> -		SUBTEST(live_rps_interrupt),
> +		SUBTEST(live_rps_frequency),
>  		SUBTEST(live_rps_power),
> +		SUBTEST(live_rps_interrupt),
>  		SUBTEST(live_gt_resume),
>  	};
>  
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> index 360f56aa4b82..b1a435db1edc 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> @@ -6,6 +6,7 @@
>  #include <linux/sort.h>
>  
>  #include "intel_engine_pm.h"
> +#include "intel_gpu_commands.h"
>  #include "intel_gt_pm.h"
>  #include "intel_rc6.h"
>  #include "selftest_rps.h"
> @@ -17,6 +18,242 @@ static void dummy_rps_work(struct work_struct *wrk)
>  {
>  }
>  
> +static int cmp_u64(const void *A, const void *B)
> +{
> +	const u64 *a = A, *b = B;
> +
> +	if (a < b)
> +		return -1;
> +	else if (a > b)
> +		return 1;
> +	else
> +		return 0;
> +}
> +
> +static struct i915_vma *
> +create_spin_counter(struct intel_engine_cs *engine,
> +		    struct i915_address_space *vm,
> +		    u32 **cancel,
> +		    u32 **counter)
> +{
> +	enum {
> +		COUNT,

ok, it starts from zero.

> +		INC,
> +		__NGPR__,
> +	};
> +#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
> +	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
> +	u32 *base, *cs;
> +	int loop, i;
> +	int err;
> +
> +	obj = i915_gem_object_create_internal(vm->i915, 4096);
> +	if (IS_ERR(obj))
> +		return ERR_CAST(obj);
> +
> +	vma = i915_vma_instance(obj, vm, NULL);
> +	if (IS_ERR(vma)) {
> +		i915_gem_object_put(obj);
> +		return vma;
> +	}
> +
> +	err = i915_vma_pin(vma, 0, 0, PIN_USER);
> +	if (err) {
> +		i915_vma_put(vma);

You forgot to put the obj.

> +		return ERR_PTR(err);
> +	}
> +
> +	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
> +	if (IS_ERR(base)) {
> +		i915_gem_object_put(obj);

You forgot to put the vma?

> +		return ERR_CAST(base);
> +	}
> +	cs = base;
> +
> +	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
> +	for (i = 0; i < __NGPR__; i++) {
> +		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
> +		*cs++ = 0;
> +		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
> +		*cs++ = 0;
> +	}
> +
> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
> +	*cs++ = 1;
> +
> +	loop = cs - base;
> +
> +	*cs++ = MI_MATH(4);
> +	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
> +	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
> +	*cs++ = MI_MATH_ADD;
> +	*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
> +
> +	*cs++ = MI_STORE_REGISTER_MEM_GEN8;
> +	*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
> +	*cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
> +	*cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
> +
> +	*cs++ = MI_BATCH_BUFFER_START_GEN8;
> +	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
> +	*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
> +
> +	i915_gem_object_flush_map(obj);
> +
> +	*cancel = base + loop;
> +	*counter = memset32(base + 1000, 0, 1);
> +	return vma;
> +}
> +
> +static u64 __measure_frequency(u32 *cntr, int duration_ms)
> +{
> +	u64 dc, dt;
> +
> +	dt = ktime_get();
> +	dc = READ_ONCE(*cntr);
> +	usleep_range(1000 * duration_ms, 2000 * duration_ms);
> +	dc = READ_ONCE(*cntr) - dc;
> +	dt = ktime_get() - dt;
> +
> +	return div64_u64(1000 * 1000 * dc, dt);
> +}
> +
> +static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
> +{
> +	u64 x[5];
> +	int i;
> +
> +	mutex_lock(&rps->lock);
> +	GEM_BUG_ON(!rps->active);
> +	intel_rps_set(rps, *freq);
> +	mutex_unlock(&rps->lock);
> +
> +	msleep(20); /* more than enough time to stabilise! */
> +
> +	for (i = 0; i < 5; i++)
> +		x[i] = __measure_frequency(cntr, 2);
> +	*freq = read_cagf(rps);
> +
> +	/* A simple triangle filter for better result stability */
> +	sort(x, 5, sizeof(*x), cmp_u64, NULL);
> +	return div_u64(x[1] + 2 * x[2] + x[3], 4);
> +}
> +
> +static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
> +{
> +	return f_d * x > f_n * y && f_n * x < f_d * y;
> +}
> +
> +int live_rps_frequency(void *arg)
> +{
> +	void (*saved_work)(struct work_struct *wrk);
> +	struct intel_gt *gt = arg;
> +	struct intel_rps *rps = &gt->rps;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	int err = 0;
> +
> +	/*
> +	 * The premise is that the GPU does change freqency at our behest.
> +	 * Let's check there is a correspondence between the requested
> +	 * frequency, the actual frequency, and the observed clock rate.
> +	 */
> +
> +	if (!rps->enabled || rps->max_freq <= rps->min_freq)
> +		return 0;
> +
> +	if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
> +		return 0;
> +
> +	intel_gt_pm_wait_for_idle(gt);
> +	saved_work = rps->work.func;
> +	rps->work.func = dummy_rps_work;
> +
> +	for_each_engine(engine, gt, id) {
> +		struct i915_request *rq;
> +		struct i915_vma *vma;
> +		u32 *cancel, *cntr;
> +		struct {
> +			u64 count;
> +			int freq;
> +		} min, max;
> +
> +		vma = create_spin_counter(engine,
> +					  engine->kernel_context->vm,
> +					  &cancel, &cntr);
> +		if (IS_ERR(vma)) {
> +			err = PTR_ERR(vma);
> +			break;
> +		}
> +
> +		rq = intel_engine_create_kernel_request(engine);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto err_vma;
> +		}
> +
> +		i915_vma_lock(vma);
> +		err = i915_request_await_object(rq, vma->obj, false);

I am puzzled what we need to wait asynchronously in here.

Further, intel_runtime_pm_get is missing.

-Mika

> +		if (!err)
> +			err = i915_vma_move_to_active(vma, rq, 0);
> +		if (!err)
> +			err = rq->engine->emit_bb_start(rq,
> +							vma->node.start,
> +							PAGE_SIZE, 0);
> +		i915_vma_unlock(vma);
> +		i915_request_add(rq);
> +		if (err)
> +			goto err_vma;
> +
> +		if (wait_for(READ_ONCE(*cntr), 10)) {
> +			pr_err("%s: timed loop did not start\n",
> +			       engine->name);
> +			goto err_vma;
> +		}
> +
> +		min.freq = rps->min_freq;
> +		min.count = measure_frequency_at(rps, cntr, &min.freq);
> +
> +		max.freq = rps->max_freq;
> +		max.count = measure_frequency_at(rps, cntr, &max.freq);
> +
> +		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
> +			engine->name,
> +			min.count, intel_gpu_freq(rps, min.freq),
> +			max.count, intel_gpu_freq(rps, max.freq),
> +			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
> +						     max.freq * min.count));
> +
> +		if (!scaled_within(max.freq * min.count,
> +				   min.freq * max.count,
> +				   1, 2)) {
> +			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
> +			       engine->name,
> +			       max.freq * min.count,
> +			       min.freq * max.count);
> +			err = -EINVAL;
> +		}
> +
> +err_vma:
> +		*cancel = MI_BATCH_BUFFER_END;
> +		i915_gem_object_unpin_map(vma->obj);
> +		i915_vma_unpin(vma);
> +		i915_vma_put(vma);
> +
> +		if (igt_flush_test(gt->i915))
> +			err = -EIO;
> +		if (err)
> +			break;
> +	}
> +
> +	intel_gt_pm_wait_for_idle(gt);
> +	rps->work.func = saved_work;
> +
> +	return err;
> +}
> +
>  static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
>  {
>  	/* Flush any previous EI */
> @@ -248,18 +485,6 @@ static u64 __measure_power(int duration_ms)
>  	return div64_u64(1000 * 1000 * dE, dt);
>  }
>  
> -static int cmp_u64(const void *A, const void *B)
> -{
> -	const u64 *a = A, *b = B;
> -
> -	if (a < b)
> -		return -1;
> -	else if (a > b)
> -		return 1;
> -	else
> -		return 0;
> -}
> -
>  static u64 measure_power_at(struct intel_rps *rps, int freq)
>  {
>  	u64 x[5];
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
> index cad515a7f0e5..07c2bddf8899 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.h
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
> @@ -6,6 +6,7 @@
>  #ifndef SELFTEST_RPS_H
>  #define SELFTEST_RPS_H
>  
> +int live_rps_frequency(void *arg);
>  int live_rps_interrupt(void *arg);
>  int live_rps_power(void *arg);
>  
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
  2020-04-20 10:54 ` [Intel-gfx] [PATCH 1/4] " Mika Kuoppala
@ 2020-04-20 11:02   ` Chris Wilson
  2020-04-20 12:54     ` Mika Kuoppala
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-04-20 11:02 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-04-20 11:54:38)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > One of the core tenents of reclocking the GPU is that its throughput
> > scales with the clock frequency. We can observe this by incrementing a
> > loop counter on the GPU, and compare the different execution rates at
> > the notional RPS frequencies.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/gt/selftest_gt_pm.c |   3 +-
> >  drivers/gpu/drm/i915/gt/selftest_rps.c   | 249 +++++++++++++++++++++--
> >  drivers/gpu/drm/i915/gt/selftest_rps.h   |   1 +
> >  3 files changed, 240 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> > index 0141c334f2ac..4b2733967c42 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> > @@ -53,8 +53,9 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
> >  {
> >       static const struct i915_subtest tests[] = {
> >               SUBTEST(live_rc6_manual),
> > -             SUBTEST(live_rps_interrupt),
> > +             SUBTEST(live_rps_frequency),
> >               SUBTEST(live_rps_power),
> > +             SUBTEST(live_rps_interrupt),
> >               SUBTEST(live_gt_resume),
> >       };
> >  
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> > index 360f56aa4b82..b1a435db1edc 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> > @@ -6,6 +6,7 @@
> >  #include <linux/sort.h>
> >  
> >  #include "intel_engine_pm.h"
> > +#include "intel_gpu_commands.h"
> >  #include "intel_gt_pm.h"
> >  #include "intel_rc6.h"
> >  #include "selftest_rps.h"
> > @@ -17,6 +18,242 @@ static void dummy_rps_work(struct work_struct *wrk)
> >  {
> >  }
> >  
> > +static int cmp_u64(const void *A, const void *B)
> > +{
> > +     const u64 *a = A, *b = B;
> > +
> > +     if (a < b)
> > +             return -1;
> > +     else if (a > b)
> > +             return 1;
> > +     else
> > +             return 0;
> > +}
> > +
> > +static struct i915_vma *
> > +create_spin_counter(struct intel_engine_cs *engine,
> > +                 struct i915_address_space *vm,
> > +                 u32 **cancel,
> > +                 u32 **counter)
> > +{
> > +     enum {
> > +             COUNT,
> 
> ok, it starts from zero.
> 
> > +             INC,
> > +             __NGPR__,
> > +     };
> > +#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
> > +     struct drm_i915_gem_object *obj;
> > +     struct i915_vma *vma;
> > +     u32 *base, *cs;
> > +     int loop, i;
> > +     int err;
> > +
> > +     obj = i915_gem_object_create_internal(vm->i915, 4096);
> > +     if (IS_ERR(obj))
> > +             return ERR_CAST(obj);
> > +
> > +     vma = i915_vma_instance(obj, vm, NULL);
> > +     if (IS_ERR(vma)) {
> > +             i915_gem_object_put(obj);
> > +             return vma;
> > +     }
> > +
> > +     err = i915_vma_pin(vma, 0, 0, PIN_USER);
> > +     if (err) {
> > +             i915_vma_put(vma);
> 
> You forgot to put the obj.

The i915_vma_put() is i915_gem_object_put().

I know, I am in for a reckoning when I have to fix all the allocations
for i915_vma.kref being independent of the object.

> 
> > +             return ERR_PTR(err);
> > +     }
> > +
> > +     base = i915_gem_object_pin_map(obj, I915_MAP_WC);
> > +     if (IS_ERR(base)) {
> > +             i915_gem_object_put(obj);
> 
> You forgot to put the vma?

One and the same :)

> 
> > +             return ERR_CAST(base);
> > +     }
> > +     cs = base;
> > +
> > +     *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
> > +     for (i = 0; i < __NGPR__; i++) {
> > +             *cs++ = i915_mmio_reg_offset(CS_GPR(i));
> > +             *cs++ = 0;
> > +             *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
> > +             *cs++ = 0;
> > +     }
> > +
> > +     *cs++ = MI_LOAD_REGISTER_IMM(1);
> > +     *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
> > +     *cs++ = 1;
> > +
> > +     loop = cs - base;
> > +
> > +     *cs++ = MI_MATH(4);
> > +     *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
> > +     *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
> > +     *cs++ = MI_MATH_ADD;
> > +     *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
> > +
> > +     *cs++ = MI_STORE_REGISTER_MEM_GEN8;
> > +     *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
> > +     *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
> > +     *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
> > +
> > +     *cs++ = MI_BATCH_BUFFER_START_GEN8;
> > +     *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
> > +     *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
> > +
> > +     i915_gem_object_flush_map(obj);
> > +
> > +     *cancel = base + loop;
> > +     *counter = memset32(base + 1000, 0, 1);
> > +     return vma;
> > +}
> > +
> > +static u64 __measure_frequency(u32 *cntr, int duration_ms)
> > +{
> > +     u64 dc, dt;
> > +
> > +     dt = ktime_get();
> > +     dc = READ_ONCE(*cntr);
> > +     usleep_range(1000 * duration_ms, 2000 * duration_ms);
> > +     dc = READ_ONCE(*cntr) - dc;
> > +     dt = ktime_get() - dt;
> > +
> > +     return div64_u64(1000 * 1000 * dc, dt);
> > +}
> > +
> > +static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
> > +{
> > +     u64 x[5];
> > +     int i;
> > +
> > +     mutex_lock(&rps->lock);
> > +     GEM_BUG_ON(!rps->active);
> > +     intel_rps_set(rps, *freq);
> > +     mutex_unlock(&rps->lock);
> > +
> > +     msleep(20); /* more than enough time to stabilise! */
> > +
> > +     for (i = 0; i < 5; i++)
> > +             x[i] = __measure_frequency(cntr, 2);
> > +     *freq = read_cagf(rps);
> > +
> > +     /* A simple triangle filter for better result stability */
> > +     sort(x, 5, sizeof(*x), cmp_u64, NULL);
> > +     return div_u64(x[1] + 2 * x[2] + x[3], 4);
> > +}
> > +
> > +static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
> > +{
> > +     return f_d * x > f_n * y && f_n * x < f_d * y;
> > +}
> > +
> > +int live_rps_frequency(void *arg)
> > +{
> > +     void (*saved_work)(struct work_struct *wrk);
> > +     struct intel_gt *gt = arg;
> > +     struct intel_rps *rps = &gt->rps;
> > +     struct intel_engine_cs *engine;
> > +     enum intel_engine_id id;
> > +     int err = 0;
> > +
> > +     /*
> > +      * The premise is that the GPU does change freqency at our behest.
> > +      * Let's check there is a correspondence between the requested
> > +      * frequency, the actual frequency, and the observed clock rate.
> > +      */
> > +
> > +     if (!rps->enabled || rps->max_freq <= rps->min_freq)
> > +             return 0;
> > +
> > +     if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
> > +             return 0;
> > +
> > +     intel_gt_pm_wait_for_idle(gt);
> > +     saved_work = rps->work.func;
> > +     rps->work.func = dummy_rps_work;
> > +
> > +     for_each_engine(engine, gt, id) {
> > +             struct i915_request *rq;
> > +             struct i915_vma *vma;
> > +             u32 *cancel, *cntr;
> > +             struct {
> > +                     u64 count;
> > +                     int freq;
> > +             } min, max;
> > +
> > +             vma = create_spin_counter(engine,
> > +                                       engine->kernel_context->vm,
> > +                                       &cancel, &cntr);
> > +             if (IS_ERR(vma)) {
> > +                     err = PTR_ERR(vma);
> > +                     break;
> > +             }
> > +
> > +             rq = intel_engine_create_kernel_request(engine);
> > +             if (IS_ERR(rq)) {
> > +                     err = PTR_ERR(rq);
> > +                     goto err_vma;
> > +             }
> > +
> > +             i915_vma_lock(vma);
> > +             err = i915_request_await_object(rq, vma->obj, false);
> 
> I am puzzled what we need to wait asynchronously in here.

To bind the vma, mostly. Yes that is now hidden away by
i915_vma_move_to_active(), but we established the pattern to always add
the waits even if we expect them to be no-ops -- because it's a hard
task to find a missing one later.
 
> Further, intel_runtime_pm_get is missing.

For what? We acquire the wakeref via the request on the engine.

We don't talk to intel_runtime_pm directly, everything we should be
doing is engine specific, which knows which gt and the power management
for that.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq
  2020-04-20  9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
@ 2020-04-20 11:05   ` Mika Kuoppala
  0 siblings, 0 replies; 14+ messages in thread
From: Mika Kuoppala @ 2020-04-20 11:05 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> If we can not manipulate the frequency with RPS, then comparing min/max
> power consumption is pointless / misleading. We will leave the warning
> about not being able to control the frequency selection via RPS to other
> tests so as not to confuse this more specialised check.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/selftest_rps.c | 37 ++++++++++++++++----------
>  1 file changed, 23 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> index b1a435db1edc..149a3de86cb9 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> @@ -485,26 +485,21 @@ static u64 __measure_power(int duration_ms)
>  	return div64_u64(1000 * 1000 * dE, dt);
>  }
>  
> -static u64 measure_power_at(struct intel_rps *rps, int freq)
> +static u64 measure_power_at(struct intel_rps *rps, int *freq)
>  {
>  	u64 x[5];
>  	int i;
>  
>  	mutex_lock(&rps->lock);
>  	GEM_BUG_ON(!rps->active);
> -	intel_rps_set(rps, freq);
> +	intel_rps_set(rps, *freq);
>  	mutex_unlock(&rps->lock);
>  
>  	msleep(20); /* more than enough time to stabilise! */
>  
> -	i = read_cagf(rps);
> -	if (i != freq)
> -		pr_notice("Running at %x [%uMHz], not target %x [%uMHz]\n",
> -			  i, intel_gpu_freq(rps, i),
> -			  freq, intel_gpu_freq(rps, freq));
> -
>  	for (i = 0; i < 5; i++)
>  		x[i] = __measure_power(5);
> +	*freq = read_cagf(rps);
>  
>  	/* A simple triangle filter for better result stability */
>  	sort(x, 5, sizeof(*x), cmp_u64, NULL);
> @@ -542,7 +537,10 @@ int live_rps_power(void *arg)
>  
>  	for_each_engine(engine, gt, id) {
>  		struct i915_request *rq;
> -		u64 min, max;
> +		struct {
> +			u64 power;
> +			int freq;

Typelocking the hw reprentation might have some benefits.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +		} min, max;
>  
>  		if (!intel_engine_can_store_dword(engine))
>  			continue;
> @@ -565,16 +563,27 @@ int live_rps_power(void *arg)
>  			break;
>  		}
>  
> -		max = measure_power_at(rps, rps->max_freq);
> -		min = measure_power_at(rps, rps->min_freq);
> +		max.freq = rps->max_freq;
> +		max.power = measure_power_at(rps, &max.freq);
> +
> +		min.freq = rps->min_freq;
> +		min.power = measure_power_at(rps, &min.freq);
>  
>  		igt_spinner_end(&spin);
>  
>  		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
>  			engine->name,
> -			min, intel_gpu_freq(rps, rps->min_freq),
> -			max, intel_gpu_freq(rps, rps->max_freq));
> -		if (11 * min > 10 * max) {
> +			min.power, intel_gpu_freq(rps, min.freq),
> +			max.power, intel_gpu_freq(rps, max.freq));
> +
> +		if (10 * min.freq >= 9 * max.freq) {
> +			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
> +				  min.freq, intel_gpu_freq(rps, min.freq),
> +				  max.freq, intel_gpu_freq(rps, max.freq));
> +			continue;
> +		}
> +
> +		if (11 * min.power > 10 * max.power) {
>  			pr_err("%s: did not conserve power when setting lower frequency!\n",
>  			       engine->name);
>  			err = -EINVAL;
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
  2020-04-20  9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
                   ` (4 preceding siblings ...)
  2020-04-20 10:54 ` [Intel-gfx] [PATCH 1/4] " Mika Kuoppala
@ 2020-04-20 11:12 ` Patchwork
  2020-04-20 15:54 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  6 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-20 11:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
URL   : https://patchwork.freedesktop.org/series/76184/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8327 -> Patchwork_17376
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_17376:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1}:
    - {fi-tgl-dsi}:       [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-tgl-dsi/igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-tgl-dsi/igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1.html

  
Known issues
------------

  Here are the changes found in Patchwork_17376 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_module_load@reload:
    - fi-skl-6770hq:      [PASS][3] -> [DMESG-WARN][4] ([i915#203]) +1 similar issue
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@i915_module_load@reload.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@i915_module_load@reload.html

  * igt@i915_selftest@live@gt_pm:
    - fi-icl-y:           [PASS][5] -> [DMESG-FAIL][6] ([i915#1751])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-icl-y/igt@i915_selftest@live@gt_pm.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-icl-y/igt@i915_selftest@live@gt_pm.html
    - fi-skl-6600u:       [PASS][7] -> [DMESG-FAIL][8] ([i915#1751])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6600u/igt@i915_selftest@live@gt_pm.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6600u/igt@i915_selftest@live@gt_pm.html
    - fi-cfl-8109u:       [PASS][9] -> [DMESG-FAIL][10] ([i915#1751])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-cfl-8109u/igt@i915_selftest@live@gt_pm.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-cfl-8109u/igt@i915_selftest@live@gt_pm.html
    - fi-whl-u:           [PASS][11] -> [DMESG-FAIL][12] ([i915#1751])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-whl-u/igt@i915_selftest@live@gt_pm.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-whl-u/igt@i915_selftest@live@gt_pm.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence:
    - fi-skl-6770hq:      [PASS][13] -> [SKIP][14] ([fdo#109271]) +5 similar issues
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-b:
    - fi-skl-6770hq:      [PASS][15] -> [DMESG-WARN][16] ([i915#106])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-b.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-b.html

  
#### Warnings ####

  * igt@i915_selftest@live@gt_pm:
    - fi-tgl-y:           [DMESG-FAIL][17] ([i915#1725]) -> [DMESG-FAIL][18] ([i915#1744])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-tgl-y/igt@i915_selftest@live@gt_pm.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-tgl-y/igt@i915_selftest@live@gt_pm.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#106]: https://gitlab.freedesktop.org/drm/intel/issues/106
  [i915#1725]: https://gitlab.freedesktop.org/drm/intel/issues/1725
  [i915#1744]: https://gitlab.freedesktop.org/drm/intel/issues/1744
  [i915#1751]: https://gitlab.freedesktop.org/drm/intel/issues/1751
  [i915#203]: https://gitlab.freedesktop.org/drm/intel/issues/203


Participating hosts (49 -> 44)
------------------------------

  Additional (1): fi-kbl-7560u 
  Missing    (6): fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-bdw-samus fi-byt-clapper fi-skl-6700k2 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8327 -> Patchwork_17376

  CI-20190529: 20190529
  CI_DRM_8327: 17e0a63ab93b19ea2bfccd9a0425c93e52a65246 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5602: a8fcccd15dcc2dd409edd23785a2d6f6e85fb682 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17376: 589884d12beb98e0c84eb0026628ccb93ed0b946 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

589884d12beb drm/i915/selftests: Split RPS frequency measurement
affab98e14a6 drm/i915/selftests: Check RPS controls
979f48e06a1c drm/i915/selftests: Skip energy consumption tests if not controlling freq
27cd01ca67a5 drm/i915/selftests: Verify frequency scaling with RPS

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
  2020-04-20 11:02   ` Chris Wilson
@ 2020-04-20 12:54     ` Mika Kuoppala
  2020-04-20 13:08       ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Mika Kuoppala @ 2020-04-20 12:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2020-04-20 11:54:38)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > One of the core tenents of reclocking the GPU is that its throughput
>> > scales with the clock frequency. We can observe this by incrementing a
>> > loop counter on the GPU, and compare the different execution rates at
>> > the notional RPS frequencies.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> >  drivers/gpu/drm/i915/gt/selftest_gt_pm.c |   3 +-
>> >  drivers/gpu/drm/i915/gt/selftest_rps.c   | 249 +++++++++++++++++++++--
>> >  drivers/gpu/drm/i915/gt/selftest_rps.h   |   1 +
>> >  3 files changed, 240 insertions(+), 13 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
>> > index 0141c334f2ac..4b2733967c42 100644
>> > --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
>> > +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
>> > @@ -53,8 +53,9 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
>> >  {
>> >       static const struct i915_subtest tests[] = {
>> >               SUBTEST(live_rc6_manual),
>> > -             SUBTEST(live_rps_interrupt),
>> > +             SUBTEST(live_rps_frequency),
>> >               SUBTEST(live_rps_power),
>> > +             SUBTEST(live_rps_interrupt),
>> >               SUBTEST(live_gt_resume),
>> >       };
>> >  
>> > diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
>> > index 360f56aa4b82..b1a435db1edc 100644
>> > --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
>> > +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
>> > @@ -6,6 +6,7 @@
>> >  #include <linux/sort.h>
>> >  
>> >  #include "intel_engine_pm.h"
>> > +#include "intel_gpu_commands.h"
>> >  #include "intel_gt_pm.h"
>> >  #include "intel_rc6.h"
>> >  #include "selftest_rps.h"
>> > @@ -17,6 +18,242 @@ static void dummy_rps_work(struct work_struct *wrk)
>> >  {
>> >  }
>> >  
>> > +static int cmp_u64(const void *A, const void *B)
>> > +{
>> > +     const u64 *a = A, *b = B;
>> > +
>> > +     if (a < b)
>> > +             return -1;
>> > +     else if (a > b)
>> > +             return 1;
>> > +     else
>> > +             return 0;
>> > +}
>> > +
>> > +static struct i915_vma *
>> > +create_spin_counter(struct intel_engine_cs *engine,
>> > +                 struct i915_address_space *vm,
>> > +                 u32 **cancel,
>> > +                 u32 **counter)
>> > +{
>> > +     enum {
>> > +             COUNT,
>> 
>> ok, it starts from zero.
>> 
>> > +             INC,
>> > +             __NGPR__,
>> > +     };
>> > +#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
>> > +     struct drm_i915_gem_object *obj;
>> > +     struct i915_vma *vma;
>> > +     u32 *base, *cs;
>> > +     int loop, i;
>> > +     int err;
>> > +
>> > +     obj = i915_gem_object_create_internal(vm->i915, 4096);
>> > +     if (IS_ERR(obj))
>> > +             return ERR_CAST(obj);
>> > +
>> > +     vma = i915_vma_instance(obj, vm, NULL);
>> > +     if (IS_ERR(vma)) {
>> > +             i915_gem_object_put(obj);
>> > +             return vma;
>> > +     }
>> > +
>> > +     err = i915_vma_pin(vma, 0, 0, PIN_USER);
>> > +     if (err) {
>> > +             i915_vma_put(vma);
>> 
>> You forgot to put the obj.
>
> The i915_vma_put() is i915_gem_object_put().
>
> I know, I am in for a reckoning when I have to fix all the allocations
> for i915_vma.kref being independent of the object.
>
>> 
>> > +             return ERR_PTR(err);
>> > +     }
>> > +
>> > +     base = i915_gem_object_pin_map(obj, I915_MAP_WC);
>> > +     if (IS_ERR(base)) {
>> > +             i915_gem_object_put(obj);
>> 
>> You forgot to put the vma?
>
> One and the same :)
>
>> 
>> > +             return ERR_CAST(base);
>> > +     }
>> > +     cs = base;
>> > +
>> > +     *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
>> > +     for (i = 0; i < __NGPR__; i++) {
>> > +             *cs++ = i915_mmio_reg_offset(CS_GPR(i));
>> > +             *cs++ = 0;
>> > +             *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
>> > +             *cs++ = 0;
>> > +     }
>> > +
>> > +     *cs++ = MI_LOAD_REGISTER_IMM(1);
>> > +     *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
>> > +     *cs++ = 1;
>> > +
>> > +     loop = cs - base;
>> > +
>> > +     *cs++ = MI_MATH(4);
>> > +     *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
>> > +     *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
>> > +     *cs++ = MI_MATH_ADD;
>> > +     *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
>> > +
>> > +     *cs++ = MI_STORE_REGISTER_MEM_GEN8;
>> > +     *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
>> > +     *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
>> > +     *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
>> > +
>> > +     *cs++ = MI_BATCH_BUFFER_START_GEN8;
>> > +     *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
>> > +     *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
>> > +
>> > +     i915_gem_object_flush_map(obj);
>> > +
>> > +     *cancel = base + loop;
>> > +     *counter = memset32(base + 1000, 0, 1);
>> > +     return vma;
>> > +}
>> > +
>> > +static u64 __measure_frequency(u32 *cntr, int duration_ms)
>> > +{
>> > +     u64 dc, dt;
>> > +
>> > +     dt = ktime_get();
>> > +     dc = READ_ONCE(*cntr);
>> > +     usleep_range(1000 * duration_ms, 2000 * duration_ms);
>> > +     dc = READ_ONCE(*cntr) - dc;
>> > +     dt = ktime_get() - dt;
>> > +
>> > +     return div64_u64(1000 * 1000 * dc, dt);
>> > +}
>> > +
>> > +static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
>> > +{
>> > +     u64 x[5];
>> > +     int i;
>> > +
>> > +     mutex_lock(&rps->lock);
>> > +     GEM_BUG_ON(!rps->active);
>> > +     intel_rps_set(rps, *freq);
>> > +     mutex_unlock(&rps->lock);
>> > +
>> > +     msleep(20); /* more than enough time to stabilise! */
>> > +
>> > +     for (i = 0; i < 5; i++)
>> > +             x[i] = __measure_frequency(cntr, 2);
>> > +     *freq = read_cagf(rps);
>> > +
>> > +     /* A simple triangle filter for better result stability */
>> > +     sort(x, 5, sizeof(*x), cmp_u64, NULL);
>> > +     return div_u64(x[1] + 2 * x[2] + x[3], 4);
>> > +}
>> > +
>> > +static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
>> > +{
>> > +     return f_d * x > f_n * y && f_n * x < f_d * y;
>> > +}
>> > +
>> > +int live_rps_frequency(void *arg)
>> > +{
>> > +     void (*saved_work)(struct work_struct *wrk);
>> > +     struct intel_gt *gt = arg;
>> > +     struct intel_rps *rps = &gt->rps;
>> > +     struct intel_engine_cs *engine;
>> > +     enum intel_engine_id id;
>> > +     int err = 0;
>> > +
>> > +     /*
>> > +      * The premise is that the GPU does change freqency at our behest.
>> > +      * Let's check there is a correspondence between the requested
>> > +      * frequency, the actual frequency, and the observed clock rate.
>> > +      */
>> > +
>> > +     if (!rps->enabled || rps->max_freq <= rps->min_freq)
>> > +             return 0;
>> > +
>> > +     if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
>> > +             return 0;
>> > +
>> > +     intel_gt_pm_wait_for_idle(gt);
>> > +     saved_work = rps->work.func;
>> > +     rps->work.func = dummy_rps_work;
>> > +
>> > +     for_each_engine(engine, gt, id) {
>> > +             struct i915_request *rq;
>> > +             struct i915_vma *vma;
>> > +             u32 *cancel, *cntr;
>> > +             struct {
>> > +                     u64 count;
>> > +                     int freq;
>> > +             } min, max;
>> > +
>> > +             vma = create_spin_counter(engine,
>> > +                                       engine->kernel_context->vm,
>> > +                                       &cancel, &cntr);
>> > +             if (IS_ERR(vma)) {
>> > +                     err = PTR_ERR(vma);
>> > +                     break;
>> > +             }
>> > +
>> > +             rq = intel_engine_create_kernel_request(engine);
>> > +             if (IS_ERR(rq)) {
>> > +                     err = PTR_ERR(rq);
>> > +                     goto err_vma;
>> > +             }
>> > +
>> > +             i915_vma_lock(vma);
>> > +             err = i915_request_await_object(rq, vma->obj, false);
>> 
>> I am puzzled what we need to wait asynchronously in here.
>
> To bind the vma, mostly. Yes that is now hidden away by
> i915_vma_move_to_active(), but we established the pattern to always add
> the waits even if we expect them to be no-ops -- because it's a hard
> task to find a missing one later.
>  
>> Further, intel_runtime_pm_get is missing.
>
> For what? We acquire the wakeref via the request on the engine.
>
> We don't talk to intel_runtime_pm directly, everything we should be
> doing is engine specific, which knows which gt and the power management
> for that.

I was worried about the read_cagf().

But as it is implied that the request will be running
and we have total control of it during reading th cagf,
it will work like this.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>



> -Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
  2020-04-20 12:54     ` Mika Kuoppala
@ 2020-04-20 13:08       ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-20 13:08 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-04-20 13:54:21)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Quoting Mika Kuoppala (2020-04-20 11:54:38)
> >> Further, intel_runtime_pm_get is missing.
> >
> > For what? We acquire the wakeref via the request on the engine.
> >
> > We don't talk to intel_runtime_pm directly, everything we should be
> > doing is engine specific, which knows which gt and the power management
> > for that.
> 
> I was worried about the read_cagf().
> 
> But as it is implied that the request will be running
> and we have total control of it during reading th cagf,
> it will work like this.

Yes, we are only looking at the HW inside either the request or explicit
pm_gt. And our mmio debug is good enough to warn if we do attempt to
read cagf without the rpm wakeref.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
  2020-04-20  9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
                   ` (5 preceding siblings ...)
  2020-04-20 11:12 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork
@ 2020-04-20 15:54 ` Patchwork
  6 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-20 15:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
URL   : https://patchwork.freedesktop.org/series/76184/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8327_full -> Patchwork_17376_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_17376_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_17376_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_17376_full:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_psr@sprite_render:
    - shard-tglb:         [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-tglb7/igt@kms_psr@sprite_render.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb3/igt@kms_psr@sprite_render.html

  * igt@runner@aborted:
    - shard-tglb:         NOTRUN -> [FAIL][3]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb3/igt@runner@aborted.html

  
#### Warnings ####

  * igt@i915_selftest@live@gt_pm:
    - shard-skl:          [DMESG-FAIL][4] ([i915#1744]) -> [DMESG-FAIL][5]
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl4/igt@i915_selftest@live@gt_pm.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl8/igt@i915_selftest@live@gt_pm.html

  
Known issues
------------

  Here are the changes found in Patchwork_17376_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_whisper@basic-queues-priority-all:
    - shard-tglb:         [PASS][6] -> [INCOMPLETE][7] ([i915#750])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-tglb5/igt@gem_exec_whisper@basic-queues-priority-all.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb3/igt@gem_exec_whisper@basic-queues-priority-all.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [PASS][8] -> [FAIL][9] ([i915#454])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-iclb1/igt@i915_pm_dc@dc6-psr.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-iclb8/igt@i915_pm_dc@dc6-psr.html

  * igt@kms_big_fb@linear-32bpp-rotate-0:
    - shard-kbl:          [PASS][10] -> [FAIL][11] ([i915#1119] / [i915#93] / [i915#95])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-kbl1/igt@kms_big_fb@linear-32bpp-rotate-0.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-kbl3/igt@kms_big_fb@linear-32bpp-rotate-0.html
    - shard-apl:          [PASS][12] -> [FAIL][13] ([i915#1119] / [i915#95])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-apl3/igt@kms_big_fb@linear-32bpp-rotate-0.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-apl8/igt@kms_big_fb@linear-32bpp-rotate-0.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-skl:          [PASS][14] -> [FAIL][15] ([i915#54])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl10/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl10/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-apl:          [PASS][16] -> [DMESG-WARN][17] ([i915#180]) +1 similar issue
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-apl4/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-apl6/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-fullscreen:
    - shard-tglb:         [PASS][18] -> [SKIP][19] ([i915#668]) +6 similar issues
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-tglb7/igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-fullscreen.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb6/igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-fullscreen.html

  * igt@kms_hdr@bpc-switch-suspend:
    - shard-skl:          [PASS][20] -> [FAIL][21] ([i915#1188])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl9/igt@kms_hdr@bpc-switch-suspend.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl4/igt@kms_hdr@bpc-switch-suspend.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
    - shard-skl:          [PASS][22] -> [INCOMPLETE][23] ([i915#69])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl4/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl6/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - shard-kbl:          [PASS][24] -> [DMESG-WARN][25] ([i915#180]) +1 similar issue
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-kbl1/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-kbl2/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html

  * igt@kms_psr@no_drrs:
    - shard-iclb:         [PASS][26] -> [FAIL][27] ([i915#173])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-iclb4/igt@kms_psr@no_drrs.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-iclb1/igt@kms_psr@no_drrs.html

  * igt@kms_psr@psr2_primary_mmap_cpu:
    - shard-iclb:         [PASS][28] -> [SKIP][29] ([fdo#109441]) +1 similar issue
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-iclb2/igt@kms_psr@psr2_primary_mmap_cpu.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-iclb6/igt@kms_psr@psr2_primary_mmap_cpu.html

  * igt@kms_setmode@basic:
    - shard-kbl:          [PASS][30] -> [FAIL][31] ([i915#31])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-kbl2/igt@kms_setmode@basic.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-kbl1/igt@kms_setmode@basic.html

  
#### Possible fixes ####

  * igt@i915_suspend@fence-restore-untiled:
    - shard-skl:          [INCOMPLETE][32] ([i915#69]) -> [PASS][33]
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl1/igt@i915_suspend@fence-restore-untiled.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl9/igt@i915_suspend@fence-restore-untiled.html

  * igt@kms_cursor_crc@pipe-b-cursor-128x128-random:
    - shard-skl:          [FAIL][34] ([i915#54]) -> [PASS][35] +1 similar issue
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl1/igt@kms_cursor_crc@pipe-b-cursor-128x128-random.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl6/igt@kms_cursor_crc@pipe-b-cursor-128x128-random.html

  * {igt@kms_flip@flip-vs-suspend-interruptible@c-dp1}:
    - shard-apl:          [DMESG-WARN][36] ([i915#180]) -> [PASS][37] +2 similar issues
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-apl6/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-apl1/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html

  * {igt@kms_flip@flip-vs-suspend@c-dp1}:
    - shard-kbl:          [DMESG-WARN][38] ([i915#180]) -> [PASS][39] +8 similar issues
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-kbl6/igt@kms_flip@flip-vs-suspend@c-dp1.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-kbl6/igt@kms_flip@flip-vs-suspend@c-dp1.html

  * igt@kms_hdr@bpc-switch-dpms:
    - shard-skl:          [FAIL][40] ([i915#1188]) -> [PASS][41]
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl3/igt@kms_hdr@bpc-switch-dpms.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl1/igt@kms_hdr@bpc-switch-dpms.html

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min:
    - shard-skl:          [FAIL][42] ([fdo#108145] / [i915#265]) -> [PASS][43]
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl9/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl5/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html

  * igt@kms_psr@psr2_cursor_plane_move:
    - shard-iclb:         [SKIP][44] ([fdo#109441]) -> [PASS][45] +2 similar issues
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-iclb6/igt@kms_psr@psr2_cursor_plane_move.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-iclb2/igt@kms_psr@psr2_cursor_plane_move.html

  * igt@kms_setmode@basic:
    - shard-apl:          [FAIL][46] ([i915#31]) -> [PASS][47]
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-apl1/igt@kms_setmode@basic.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-apl3/igt@kms_setmode@basic.html

  
#### Warnings ####

  * igt@i915_pm_dc@dc3co-vpb-simulation:
    - shard-iclb:         [SKIP][48] ([i915#588]) -> [SKIP][49] ([i915#658])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-iclb2/igt@i915_pm_dc@dc3co-vpb-simulation.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-iclb3/igt@i915_pm_dc@dc3co-vpb-simulation.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-tglb:         [FAIL][50] ([i915#454]) -> [SKIP][51] ([i915#668])
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-tglb7/igt@i915_pm_dc@dc6-psr.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb6/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_selftest@live@gt_pm:
    - shard-tglb:         [DMESG-FAIL][52] ([i915#1725]) -> [DMESG-FAIL][53] ([i915#1744])
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-tglb6/igt@i915_selftest@live@gt_pm.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb5/igt@i915_selftest@live@gt_pm.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [i915#1119]: https://gitlab.freedesktop.org/drm/intel/issues/1119
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#1725]: https://gitlab.freedesktop.org/drm/intel/issues/1725
  [i915#173]: https://gitlab.freedesktop.org/drm/intel/issues/173
  [i915#1744]: https://gitlab.freedesktop.org/drm/intel/issues/1744
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#34]: https://gitlab.freedesktop.org/drm/intel/issues/34
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#588]: https://gitlab.freedesktop.org/drm/intel/issues/588
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#668]: https://gitlab.freedesktop.org/drm/intel/issues/668
  [i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69
  [i915#750]: https://gitlab.freedesktop.org/drm/intel/issues/750
  [i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8327 -> Patchwork_17376

  CI-20190529: 20190529
  CI_DRM_8327: 17e0a63ab93b19ea2bfccd9a0425c93e52a65246 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5602: a8fcccd15dcc2dd409edd23785a2d6f6e85fb682 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17376: 589884d12beb98e0c84eb0026628ccb93ed0b946 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls
  2020-04-20  9:09 ` [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls Chris Wilson
@ 2020-04-20 16:41   ` Mika Kuoppala
  0 siblings, 0 replies; 14+ messages in thread
From: Mika Kuoppala @ 2020-04-20 16:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Check that the GPU does respond to our RPS frequency requests by setting
> our desired frequency.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/selftest_gt_pm.c |   1 +
>  drivers/gpu/drm/i915/gt/selftest_rps.c   | 196 ++++++++++++++++++++---
>  drivers/gpu/drm/i915/gt/selftest_rps.h   |   1 +
>  3 files changed, 174 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> index 4b2733967c42..de3eaef40596 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> @@ -53,6 +53,7 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
>  {
>  	static const struct i915_subtest tests[] = {
>  		SUBTEST(live_rc6_manual),
> +		SUBTEST(live_rps_control),
>  		SUBTEST(live_rps_frequency),
>  		SUBTEST(live_rps_power),
>  		SUBTEST(live_rps_interrupt),
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> index 149a3de86cb9..19fa6a561de3 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> @@ -107,6 +107,172 @@ create_spin_counter(struct intel_engine_cs *engine,
>  	return vma;
>  }
>  
> +static u8 rps_set_check(struct intel_rps *rps, u8 freq)
> +{
> +	u8 history[64], i;
> +	unsigned long end;
> +	int sleep;
> +
> +	mutex_lock(&rps->lock);
> +	GEM_BUG_ON(!rps->active);
> +	intel_rps_set(rps, freq);
> +	GEM_BUG_ON(rps->last_freq != freq);
> +	mutex_unlock(&rps->lock);
> +
> +	i = 0;
> +	memset(history, freq, sizeof(history));
> +	sleep = 20;
> +
> +	/* The PCU does not change instantly, but drifts towards the goal? */
> +	end = jiffies + msecs_to_jiffies(50);
> +	do {
> +		u8 act;
> +
> +		act = read_cagf(rps);
> +		if (time_after(jiffies, end))
> +			return act;
> +
> +		/* Target acquired */
> +		if (act == freq)
> +			return act;
> +
> +		/* Any change witin the last N samples? */

within

> +		if (!memchr_inv(history, act, sizeof(history)))
> +			return act;
> +
> +		history[i] = act;
> +		i = (i + 1) % ARRAY_SIZE(history);
> +
> +		usleep_range(sleep, 2 * sleep);
> +		sleep *= 2;
> +		if (sleep > 1000)
> +			sleep = 1000;
> +	} while (1);
> +}
> +
> +int live_rps_control(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_rps *rps = &gt->rps;
> +	void (*saved_work)(struct work_struct *wrk);
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	struct igt_spinner spin;
> +	int err = 0;
> +
> +	/*
> +	 * Check that the actual frequency matches our requested frequency,
> +	 * to verify our control mechanism. We have to be careful that the
> +	 * PCU may throttle the GPU in which case the actual frequency used
> +	 * will be lowered than requested.
> +	 */
> +
> +	if (!rps->enabled || rps->max_freq <= rps->min_freq)
> +		return 0;
> +
> +	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
> +		return 0;
> +
> +	if (igt_spinner_init(&spin, gt))
> +		return -ENOMEM;
> +
> +	intel_gt_pm_wait_for_idle(gt);
> +	saved_work = rps->work.func;
> +	rps->work.func = dummy_rps_work;
> +
> +	intel_gt_pm_get(gt);
> +	for_each_engine(engine, gt, id) {
> +		struct i915_request *rq;
> +		ktime_t min_dt, max_dt;
> +		int act, f, limit;
> +		int min, max;
> +
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
> +
> +		rq = igt_spinner_create_request(&spin,
> +						engine->kernel_context,
> +						MI_NOOP);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		i915_request_add(rq);
> +
> +		if (!igt_wait_for_spinner(&spin, rq)) {
> +			pr_err("%s: RPS spinner did not start\n",
> +			       engine->name);
> +			igt_spinner_end(&spin);
> +			intel_gt_set_wedged(engine->gt);
> +			err = -EIO;
> +			break;
> +		}
> +
> +		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
> +			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
> +			       engine->name, rps->min_freq, read_cagf(rps));
> +			igt_spinner_end(&spin);
> +			err = -EINVAL;
> +			break;
> +		}
> +
> +		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
> +			act = rps_set_check(rps, f);
> +			if (act < f)
> +				break;
> +		}

After discussion in irc, it seems we might have to settle that we
manage to get atleast one bin upwards and not have more expectations.

If/when issues get resolved, we might want to consider a minimum
limit of bins a hardware needs to reach instead of bin_zero + 1 :P

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>


> +
> +		limit = rps_set_check(rps, f);
> +
> +		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
> +			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
> +			       engine->name, rps->min_freq, read_cagf(rps));
> +			igt_spinner_end(&spin);
> +			err = -EINVAL;
> +			break;
> +		}
> +
> +		max_dt = ktime_get();
> +		max = rps_set_check(rps, limit);
> +		max_dt = ktime_sub(ktime_get(), max_dt);
> +
> +		min_dt = ktime_get();
> +		min = rps_set_check(rps, rps->min_freq);
> +		min_dt = ktime_sub(ktime_get(), min_dt);
> +
> +		igt_spinner_end(&spin);
> +
> +		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] actual:[ %x:%uMHz, %x:%uMHz], %x:%x response %lluns:%lluns\n",
> +			engine->name,
> +			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
> +			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
> +			act, intel_gpu_freq(rps, act),
> +			limit, intel_gpu_freq(rps, limit),
> +			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
> +
> +		if (limit == rps->min_freq) {
> +			pr_err("%s: GPU throttled to minimum!\n",
> +			       engine->name);
> +			err = -ENODEV;
> +			break;
> +		}
> +
> +		if (igt_flush_test(gt->i915)) {
> +			err = -EIO;
> +			break;
> +		}
> +	}
> +	intel_gt_pm_put(gt);
> +
> +	igt_spinner_fini(&spin);
> +
> +	intel_gt_pm_wait_for_idle(gt);
> +	rps->work.func = saved_work;
> +
> +	return err;
> +}
> +
>  static u64 __measure_frequency(u32 *cntr, int duration_ms)
>  {
>  	u64 dc, dt;
> @@ -125,16 +291,10 @@ static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
>  	u64 x[5];
>  	int i;
>  
> -	mutex_lock(&rps->lock);
> -	GEM_BUG_ON(!rps->active);
> -	intel_rps_set(rps, *freq);
> -	mutex_unlock(&rps->lock);
> -
> -	msleep(20); /* more than enough time to stabilise! */
> -
> +	*freq = rps_set_check(rps, *freq);
>  	for (i = 0; i < 5; i++)
>  		x[i] = __measure_frequency(cntr, 2);
> -	*freq = read_cagf(rps);
> +	*freq = (*freq + read_cagf(rps)) / 2;
>  
>  	/* A simple triangle filter for better result stability */
>  	sort(x, 5, sizeof(*x), cmp_u64, NULL);
> @@ -279,10 +439,7 @@ static int __rps_up_interrupt(struct intel_rps *rps,
>  	if (!intel_engine_can_store_dword(engine))
>  		return 0;
>  
> -	mutex_lock(&rps->lock);
> -	GEM_BUG_ON(!rps->active);
> -	intel_rps_set(rps, rps->min_freq);
> -	mutex_unlock(&rps->lock);
> +	rps_set_check(rps, rps->min_freq);
>  
>  	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
>  	if (IS_ERR(rq))
> @@ -354,10 +511,7 @@ static int __rps_down_interrupt(struct intel_rps *rps,
>  	struct intel_uncore *uncore = engine->uncore;
>  	u32 timeout;
>  
> -	mutex_lock(&rps->lock);
> -	GEM_BUG_ON(!rps->active);
> -	intel_rps_set(rps, rps->max_freq);
> -	mutex_unlock(&rps->lock);
> +	rps_set_check(rps, rps->max_freq);
>  
>  	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
>  		pr_err("%s: RPS did not register DOWN interrupt\n",
> @@ -490,16 +644,10 @@ static u64 measure_power_at(struct intel_rps *rps, int *freq)
>  	u64 x[5];
>  	int i;
>  
> -	mutex_lock(&rps->lock);
> -	GEM_BUG_ON(!rps->active);
> -	intel_rps_set(rps, *freq);
> -	mutex_unlock(&rps->lock);
> -
> -	msleep(20); /* more than enough time to stabilise! */
> -
> +	*freq = rps_set_check(rps, *freq);
>  	for (i = 0; i < 5; i++)
>  		x[i] = __measure_power(5);
> -	*freq = read_cagf(rps);
> +	*freq = (*freq + read_cagf(rps)) / 2;
>  
>  	/* A simple triangle filter for better result stability */
>  	sort(x, 5, sizeof(*x), cmp_u64, NULL);
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
> index 07c2bddf8899..be0bf8e3f639 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.h
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
> @@ -6,6 +6,7 @@
>  #ifndef SELFTEST_RPS_H
>  #define SELFTEST_RPS_H
>  
> +int live_rps_control(void *arg);
>  int live_rps_frequency(void *arg);
>  int live_rps_interrupt(void *arg);
>  int live_rps_power(void *arg);
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement
  2020-04-20  9:09 ` [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement Chris Wilson
@ 2020-04-20 16:51   ` Mika Kuoppala
  0 siblings, 0 replies; 14+ messages in thread
From: Mika Kuoppala @ 2020-04-20 16:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Split the frequency measurement into two modes, so that we can judge the
> impact of the llc setup on top of the pure CS frequency scaling.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/selftest_gt_pm.c |   3 +-
>  drivers/gpu/drm/i915/gt/selftest_rps.c   | 157 ++++++++++++++++++++++-
>  drivers/gpu/drm/i915/gt/selftest_rps.h   |   3 +-
>  3 files changed, 154 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> index de3eaef40596..9855e6f0ce7c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> @@ -54,7 +54,8 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
>  	static const struct i915_subtest tests[] = {
>  		SUBTEST(live_rc6_manual),
>  		SUBTEST(live_rps_control),
> -		SUBTEST(live_rps_frequency),
> +		SUBTEST(live_rps_frequency_cs),
> +		SUBTEST(live_rps_frequency_srm),
>  		SUBTEST(live_rps_power),
>  		SUBTEST(live_rps_interrupt),
>  		SUBTEST(live_gt_resume),
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> index 19fa6a561de3..dbca673519a2 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> @@ -33,6 +33,7 @@ static int cmp_u64(const void *A, const void *B)
>  static struct i915_vma *
>  create_spin_counter(struct intel_engine_cs *engine,
>  		    struct i915_address_space *vm,
> +		    bool srm,
>  		    u32 **cancel,
>  		    u32 **counter)
>  {
> @@ -91,10 +92,12 @@ create_spin_counter(struct intel_engine_cs *engine,
>  	*cs++ = MI_MATH_ADD;
>  	*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
>  
> -	*cs++ = MI_STORE_REGISTER_MEM_GEN8;
> -	*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
> -	*cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
> -	*cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
> +	if (srm) {
> +		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
> +		*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
> +		*cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
> +		*cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
> +	}
>  
>  	*cs++ = MI_BATCH_BUFFER_START_GEN8;
>  	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
> @@ -103,7 +106,7 @@ create_spin_counter(struct intel_engine_cs *engine,
>  	i915_gem_object_flush_map(obj);
>  
>  	*cancel = base + loop;
> -	*counter = memset32(base + 1000, 0, 1);
> +	*counter = srm ? memset32(base + 1000, 0, 1) : NULL;

Ok hmm you want a blowout on wrong usage. Fair enough.

>  	return vma;
>  }
>  
> @@ -301,12 +304,152 @@ static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
>  	return div_u64(x[1] + 2 * x[2] + x[3], 4);
>  }
>  
> +static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
> +				  int duration_ms)
> +{
> +	u64 dc, dt;
> +
> +	dt = ktime_get();
> +	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
> +	usleep_range(1000 * duration_ms, 2000 * duration_ms);
> +	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;

Ok, again you measure active engine and getwaway with this.

On the basis that you want minimal impact on the observed
engine,

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +	dt = ktime_get() - dt;
> +
> +	return div64_u64(1000 * 1000 * dc, dt);
> +}
> +
> +static u64 measure_cs_frequency_at(struct intel_rps *rps,
> +				   struct intel_engine_cs *engine,
> +				   int *freq)
> +{
> +	u64 x[5];
> +	int i;
> +
> +	*freq = rps_set_check(rps, *freq);
> +	for (i = 0; i < 5; i++)
> +		x[i] = __measure_cs_frequency(engine, 2);
> +	*freq = (*freq + read_cagf(rps)) / 2;
> +
> +	/* A simple triangle filter for better result stability */
> +	sort(x, 5, sizeof(*x), cmp_u64, NULL);
> +	return div_u64(x[1] + 2 * x[2] + x[3], 4);
> +}
> +
>  static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
>  {
>  	return f_d * x > f_n * y && f_n * x < f_d * y;
>  }
>  
> -int live_rps_frequency(void *arg)
> +int live_rps_frequency_cs(void *arg)
> +{
> +	void (*saved_work)(struct work_struct *wrk);
> +	struct intel_gt *gt = arg;
> +	struct intel_rps *rps = &gt->rps;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	int err = 0;
> +
> +	/*
> +	 * The premise is that the GPU does change freqency at our behest.
> +	 * Let's check there is a correspondence between the requested
> +	 * frequency, the actual frequency, and the observed clock rate.
> +	 */
> +
> +	if (!rps->enabled || rps->max_freq <= rps->min_freq)
> +		return 0;
> +
> +	if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
> +		return 0;
> +
> +	intel_gt_pm_wait_for_idle(gt);
> +	saved_work = rps->work.func;
> +	rps->work.func = dummy_rps_work;
> +
> +	for_each_engine(engine, gt, id) {
> +		struct i915_request *rq;
> +		struct i915_vma *vma;
> +		u32 *cancel, *cntr;
> +		struct {
> +			u64 count;
> +			int freq;
> +		} min, max;
> +
> +		vma = create_spin_counter(engine,
> +					  engine->kernel_context->vm, false,
> +					  &cancel, &cntr);
> +		if (IS_ERR(vma)) {
> +			err = PTR_ERR(vma);
> +			break;
> +		}
> +
> +		rq = intel_engine_create_kernel_request(engine);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto err_vma;
> +		}
> +
> +		i915_vma_lock(vma);
> +		err = i915_request_await_object(rq, vma->obj, false);
> +		if (!err)
> +			err = i915_vma_move_to_active(vma, rq, 0);
> +		if (!err)
> +			err = rq->engine->emit_bb_start(rq,
> +							vma->node.start,
> +							PAGE_SIZE, 0);
> +		i915_vma_unlock(vma);
> +		i915_request_add(rq);
> +		if (err)
> +			goto err_vma;
> +
> +		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
> +			     10)) {
> +			pr_err("%s: timed loop did not start\n",
> +			       engine->name);
> +			goto err_vma;
> +		}
> +
> +		min.freq = rps->min_freq;
> +		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
> +
> +		max.freq = rps->max_freq;
> +		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
> +
> +		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
> +			engine->name,
> +			min.count, intel_gpu_freq(rps, min.freq),
> +			max.count, intel_gpu_freq(rps, max.freq),
> +			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
> +						     max.freq * min.count));
> +
> +		if (!scaled_within(max.freq * min.count,
> +				   min.freq * max.count,
> +				   2, 3)) {
> +			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
> +			       engine->name,
> +			       max.freq * min.count,
> +			       min.freq * max.count);
> +			err = -EINVAL;
> +		}
> +
> +err_vma:
> +		*cancel = MI_BATCH_BUFFER_END;
> +		i915_gem_object_unpin_map(vma->obj);
> +		i915_vma_unpin(vma);
> +		i915_vma_put(vma);
> +
> +		if (igt_flush_test(gt->i915))
> +			err = -EIO;
> +		if (err)
> +			break;
> +	}
> +
> +	intel_gt_pm_wait_for_idle(gt);
> +	rps->work.func = saved_work;
> +
> +	return err;
> +}
> +
> +int live_rps_frequency_srm(void *arg)
>  {
>  	void (*saved_work)(struct work_struct *wrk);
>  	struct intel_gt *gt = arg;
> @@ -341,7 +484,7 @@ int live_rps_frequency(void *arg)
>  		} min, max;
>  
>  		vma = create_spin_counter(engine,
> -					  engine->kernel_context->vm,
> +					  engine->kernel_context->vm, true,
>  					  &cancel, &cntr);
>  		if (IS_ERR(vma)) {
>  			err = PTR_ERR(vma);
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
> index be0bf8e3f639..22e46c5341c5 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.h
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
> @@ -7,7 +7,8 @@
>  #define SELFTEST_RPS_H
>  
>  int live_rps_control(void *arg);
> -int live_rps_frequency(void *arg);
> +int live_rps_frequency_cs(void *arg);
> +int live_rps_frequency_srm(void *arg);
>  int live_rps_interrupt(void *arg);
>  int live_rps_power(void *arg);
>  
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-04-20 16:53 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-20  9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
2020-04-20  9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
2020-04-20 11:05   ` Mika Kuoppala
2020-04-20  9:09 ` [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls Chris Wilson
2020-04-20 16:41   ` Mika Kuoppala
2020-04-20  9:09 ` [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement Chris Wilson
2020-04-20 16:51   ` Mika Kuoppala
2020-04-20 10:00 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS Patchwork
2020-04-20 10:54 ` [Intel-gfx] [PATCH 1/4] " Mika Kuoppala
2020-04-20 11:02   ` Chris Wilson
2020-04-20 12:54     ` Mika Kuoppala
2020-04-20 13:08       ` Chris Wilson
2020-04-20 11:12 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork
2020-04-20 15:54 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.