* [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
@ 2020-04-20 9:09 Chris Wilson
2020-04-20 9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
` (6 more replies)
0 siblings, 7 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-20 9:09 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
One of the core tenents of reclocking the GPU is that its throughput
scales with the clock frequency. We can observe this by incrementing a
loop counter on the GPU, and compare the different execution rates at
the notional RPS frequencies.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 3 +-
drivers/gpu/drm/i915/gt/selftest_rps.c | 249 +++++++++++++++++++++--
drivers/gpu/drm/i915/gt/selftest_rps.h | 1 +
3 files changed, 240 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 0141c334f2ac..4b2733967c42 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -53,8 +53,9 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_rc6_manual),
- SUBTEST(live_rps_interrupt),
+ SUBTEST(live_rps_frequency),
SUBTEST(live_rps_power),
+ SUBTEST(live_rps_interrupt),
SUBTEST(live_gt_resume),
};
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 360f56aa4b82..b1a435db1edc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -6,6 +6,7 @@
#include <linux/sort.h>
#include "intel_engine_pm.h"
+#include "intel_gpu_commands.h"
#include "intel_gt_pm.h"
#include "intel_rc6.h"
#include "selftest_rps.h"
@@ -17,6 +18,242 @@ static void dummy_rps_work(struct work_struct *wrk)
{
}
+static int cmp_u64(const void *A, const void *B)
+{
+ const u64 *a = A, *b = B;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
+static struct i915_vma *
+create_spin_counter(struct intel_engine_cs *engine,
+ struct i915_address_space *vm,
+ u32 **cancel,
+ u32 **counter)
+{
+ enum {
+ COUNT,
+ INC,
+ __NGPR__,
+ };
+#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *base, *cs;
+ int loop, i;
+ int err;
+
+ obj = i915_gem_object_create_internal(vm->i915, 4096);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err) {
+ i915_vma_put(vma);
+ return ERR_PTR(err);
+ }
+
+ base = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(base)) {
+ i915_gem_object_put(obj);
+ return ERR_CAST(base);
+ }
+ cs = base;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
+ for (i = 0; i < __NGPR__; i++) {
+ *cs++ = i915_mmio_reg_offset(CS_GPR(i));
+ *cs++ = 0;
+ *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
+ *cs++ = 0;
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
+ *cs++ = 1;
+
+ loop = cs - base;
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
+ *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
+ *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
+
+ *cs++ = MI_BATCH_BUFFER_START_GEN8;
+ *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
+ *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
+
+ i915_gem_object_flush_map(obj);
+
+ *cancel = base + loop;
+ *counter = memset32(base + 1000, 0, 1);
+ return vma;
+}
+
+static u64 __measure_frequency(u32 *cntr, int duration_ms)
+{
+ u64 dc, dt;
+
+ dt = ktime_get();
+ dc = READ_ONCE(*cntr);
+ usleep_range(1000 * duration_ms, 2000 * duration_ms);
+ dc = READ_ONCE(*cntr) - dc;
+ dt = ktime_get() - dt;
+
+ return div64_u64(1000 * 1000 * dc, dt);
+}
+
+static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
+{
+ u64 x[5];
+ int i;
+
+ mutex_lock(&rps->lock);
+ GEM_BUG_ON(!rps->active);
+ intel_rps_set(rps, *freq);
+ mutex_unlock(&rps->lock);
+
+ msleep(20); /* more than enough time to stabilise! */
+
+ for (i = 0; i < 5; i++)
+ x[i] = __measure_frequency(cntr, 2);
+ *freq = read_cagf(rps);
+
+ /* A simple triangle filter for better result stability */
+ sort(x, 5, sizeof(*x), cmp_u64, NULL);
+ return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
+{
+ return f_d * x > f_n * y && f_n * x < f_d * y;
+}
+
+int live_rps_frequency(void *arg)
+{
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = >->rps;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * The premise is that the GPU does change freqency at our behest.
+ * Let's check there is a correspondence between the requested
+ * frequency, the actual frequency, and the observed clock rate.
+ */
+
+ if (!rps->enabled || rps->max_freq <= rps->min_freq)
+ return 0;
+
+ if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
+ return 0;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u32 *cancel, *cntr;
+ struct {
+ u64 count;
+ int freq;
+ } min, max;
+
+ vma = create_spin_counter(engine,
+ engine->kernel_context->vm,
+ &cancel, &cntr);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ break;
+ }
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_vma;
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(vma);
+ i915_request_add(rq);
+ if (err)
+ goto err_vma;
+
+ if (wait_for(READ_ONCE(*cntr), 10)) {
+ pr_err("%s: timed loop did not start\n",
+ engine->name);
+ goto err_vma;
+ }
+
+ min.freq = rps->min_freq;
+ min.count = measure_frequency_at(rps, cntr, &min.freq);
+
+ max.freq = rps->max_freq;
+ max.count = measure_frequency_at(rps, cntr, &max.freq);
+
+ pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
+ engine->name,
+ min.count, intel_gpu_freq(rps, min.freq),
+ max.count, intel_gpu_freq(rps, max.freq),
+ (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
+ max.freq * min.count));
+
+ if (!scaled_within(max.freq * min.count,
+ min.freq * max.count,
+ 1, 2)) {
+ pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
+ engine->name,
+ max.freq * min.count,
+ min.freq * max.count);
+ err = -EINVAL;
+ }
+
+err_vma:
+ *cancel = MI_BATCH_BUFFER_END;
+ i915_gem_object_unpin_map(vma->obj);
+ i915_vma_unpin(vma);
+ i915_vma_put(vma);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ return err;
+}
+
static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
{
/* Flush any previous EI */
@@ -248,18 +485,6 @@ static u64 __measure_power(int duration_ms)
return div64_u64(1000 * 1000 * dE, dt);
}
-static int cmp_u64(const void *A, const void *B)
-{
- const u64 *a = A, *b = B;
-
- if (a < b)
- return -1;
- else if (a > b)
- return 1;
- else
- return 0;
-}
-
static u64 measure_power_at(struct intel_rps *rps, int freq)
{
u64 x[5];
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
index cad515a7f0e5..07c2bddf8899 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.h
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
@@ -6,6 +6,7 @@
#ifndef SELFTEST_RPS_H
#define SELFTEST_RPS_H
+int live_rps_frequency(void *arg);
int live_rps_interrupt(void *arg);
int live_rps_power(void *arg);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq
2020-04-20 9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
@ 2020-04-20 9:09 ` Chris Wilson
2020-04-20 11:05 ` Mika Kuoppala
2020-04-20 9:09 ` [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls Chris Wilson
` (5 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-04-20 9:09 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
If we can not manipulate the frequency with RPS, then comparing min/max
power consumption is pointless / misleading. We will leave the warning
about not being able to control the frequency selection via RPS to other
tests so as not to confuse this more specialised check.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/selftest_rps.c | 37 ++++++++++++++++----------
1 file changed, 23 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index b1a435db1edc..149a3de86cb9 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -485,26 +485,21 @@ static u64 __measure_power(int duration_ms)
return div64_u64(1000 * 1000 * dE, dt);
}
-static u64 measure_power_at(struct intel_rps *rps, int freq)
+static u64 measure_power_at(struct intel_rps *rps, int *freq)
{
u64 x[5];
int i;
mutex_lock(&rps->lock);
GEM_BUG_ON(!rps->active);
- intel_rps_set(rps, freq);
+ intel_rps_set(rps, *freq);
mutex_unlock(&rps->lock);
msleep(20); /* more than enough time to stabilise! */
- i = read_cagf(rps);
- if (i != freq)
- pr_notice("Running at %x [%uMHz], not target %x [%uMHz]\n",
- i, intel_gpu_freq(rps, i),
- freq, intel_gpu_freq(rps, freq));
-
for (i = 0; i < 5; i++)
x[i] = __measure_power(5);
+ *freq = read_cagf(rps);
/* A simple triangle filter for better result stability */
sort(x, 5, sizeof(*x), cmp_u64, NULL);
@@ -542,7 +537,10 @@ int live_rps_power(void *arg)
for_each_engine(engine, gt, id) {
struct i915_request *rq;
- u64 min, max;
+ struct {
+ u64 power;
+ int freq;
+ } min, max;
if (!intel_engine_can_store_dword(engine))
continue;
@@ -565,16 +563,27 @@ int live_rps_power(void *arg)
break;
}
- max = measure_power_at(rps, rps->max_freq);
- min = measure_power_at(rps, rps->min_freq);
+ max.freq = rps->max_freq;
+ max.power = measure_power_at(rps, &max.freq);
+
+ min.freq = rps->min_freq;
+ min.power = measure_power_at(rps, &min.freq);
igt_spinner_end(&spin);
pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
engine->name,
- min, intel_gpu_freq(rps, rps->min_freq),
- max, intel_gpu_freq(rps, rps->max_freq));
- if (11 * min > 10 * max) {
+ min.power, intel_gpu_freq(rps, min.freq),
+ max.power, intel_gpu_freq(rps, max.freq));
+
+ if (10 * min.freq >= 9 * max.freq) {
+ pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
+ min.freq, intel_gpu_freq(rps, min.freq),
+ max.freq, intel_gpu_freq(rps, max.freq));
+ continue;
+ }
+
+ if (11 * min.power > 10 * max.power) {
pr_err("%s: did not conserve power when setting lower frequency!\n",
engine->name);
err = -EINVAL;
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls
2020-04-20 9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
2020-04-20 9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
@ 2020-04-20 9:09 ` Chris Wilson
2020-04-20 16:41 ` Mika Kuoppala
2020-04-20 9:09 ` [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement Chris Wilson
` (4 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-04-20 9:09 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
Check that the GPU does respond to our RPS frequency requests by setting
our desired frequency.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 1 +
drivers/gpu/drm/i915/gt/selftest_rps.c | 196 ++++++++++++++++++++---
drivers/gpu/drm/i915/gt/selftest_rps.h | 1 +
3 files changed, 174 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 4b2733967c42..de3eaef40596 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -53,6 +53,7 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_rc6_manual),
+ SUBTEST(live_rps_control),
SUBTEST(live_rps_frequency),
SUBTEST(live_rps_power),
SUBTEST(live_rps_interrupt),
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 149a3de86cb9..19fa6a561de3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -107,6 +107,172 @@ create_spin_counter(struct intel_engine_cs *engine,
return vma;
}
+static u8 rps_set_check(struct intel_rps *rps, u8 freq)
+{
+ u8 history[64], i;
+ unsigned long end;
+ int sleep;
+
+ mutex_lock(&rps->lock);
+ GEM_BUG_ON(!rps->active);
+ intel_rps_set(rps, freq);
+ GEM_BUG_ON(rps->last_freq != freq);
+ mutex_unlock(&rps->lock);
+
+ i = 0;
+ memset(history, freq, sizeof(history));
+ sleep = 20;
+
+ /* The PCU does not change instantly, but drifts towards the goal? */
+ end = jiffies + msecs_to_jiffies(50);
+ do {
+ u8 act;
+
+ act = read_cagf(rps);
+ if (time_after(jiffies, end))
+ return act;
+
+ /* Target acquired */
+ if (act == freq)
+ return act;
+
+ /* Any change witin the last N samples? */
+ if (!memchr_inv(history, act, sizeof(history)))
+ return act;
+
+ history[i] = act;
+ i = (i + 1) % ARRAY_SIZE(history);
+
+ usleep_range(sleep, 2 * sleep);
+ sleep *= 2;
+ if (sleep > 1000)
+ sleep = 1000;
+ } while (1);
+}
+
+int live_rps_control(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = >->rps;
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * Check that the actual frequency matches our requested frequency,
+ * to verify our control mechanism. We have to be careful that the
+ * PCU may throttle the GPU in which case the actual frequency used
+ * will be lowered than requested.
+ */
+
+ if (!rps->enabled || rps->max_freq <= rps->min_freq)
+ return 0;
+
+ if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ intel_gt_pm_get(gt);
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq;
+ ktime_t min_dt, max_dt;
+ int act, f, limit;
+ int min, max;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ rq = igt_spinner_create_request(&spin,
+ engine->kernel_context,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ pr_err("%s: RPS spinner did not start\n",
+ engine->name);
+ igt_spinner_end(&spin);
+ intel_gt_set_wedged(engine->gt);
+ err = -EIO;
+ break;
+ }
+
+ if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
+ pr_err("%s: could not set minimum frequency [%x], only %x!\n",
+ engine->name, rps->min_freq, read_cagf(rps));
+ igt_spinner_end(&spin);
+ err = -EINVAL;
+ break;
+ }
+
+ for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
+ act = rps_set_check(rps, f);
+ if (act < f)
+ break;
+ }
+
+ limit = rps_set_check(rps, f);
+
+ if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
+ pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
+ engine->name, rps->min_freq, read_cagf(rps));
+ igt_spinner_end(&spin);
+ err = -EINVAL;
+ break;
+ }
+
+ max_dt = ktime_get();
+ max = rps_set_check(rps, limit);
+ max_dt = ktime_sub(ktime_get(), max_dt);
+
+ min_dt = ktime_get();
+ min = rps_set_check(rps, rps->min_freq);
+ min_dt = ktime_sub(ktime_get(), min_dt);
+
+ igt_spinner_end(&spin);
+
+ pr_info("%s: range:[%x:%uMHz, %x:%uMHz] actual:[ %x:%uMHz, %x:%uMHz], %x:%x response %lluns:%lluns\n",
+ engine->name,
+ rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
+ rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
+ act, intel_gpu_freq(rps, act),
+ limit, intel_gpu_freq(rps, limit),
+ min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
+
+ if (limit == rps->min_freq) {
+ pr_err("%s: GPU throttled to minimum!\n",
+ engine->name);
+ err = -ENODEV;
+ break;
+ }
+
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ break;
+ }
+ }
+ intel_gt_pm_put(gt);
+
+ igt_spinner_fini(&spin);
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ return err;
+}
+
static u64 __measure_frequency(u32 *cntr, int duration_ms)
{
u64 dc, dt;
@@ -125,16 +291,10 @@ static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
u64 x[5];
int i;
- mutex_lock(&rps->lock);
- GEM_BUG_ON(!rps->active);
- intel_rps_set(rps, *freq);
- mutex_unlock(&rps->lock);
-
- msleep(20); /* more than enough time to stabilise! */
-
+ *freq = rps_set_check(rps, *freq);
for (i = 0; i < 5; i++)
x[i] = __measure_frequency(cntr, 2);
- *freq = read_cagf(rps);
+ *freq = (*freq + read_cagf(rps)) / 2;
/* A simple triangle filter for better result stability */
sort(x, 5, sizeof(*x), cmp_u64, NULL);
@@ -279,10 +439,7 @@ static int __rps_up_interrupt(struct intel_rps *rps,
if (!intel_engine_can_store_dword(engine))
return 0;
- mutex_lock(&rps->lock);
- GEM_BUG_ON(!rps->active);
- intel_rps_set(rps, rps->min_freq);
- mutex_unlock(&rps->lock);
+ rps_set_check(rps, rps->min_freq);
rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
if (IS_ERR(rq))
@@ -354,10 +511,7 @@ static int __rps_down_interrupt(struct intel_rps *rps,
struct intel_uncore *uncore = engine->uncore;
u32 timeout;
- mutex_lock(&rps->lock);
- GEM_BUG_ON(!rps->active);
- intel_rps_set(rps, rps->max_freq);
- mutex_unlock(&rps->lock);
+ rps_set_check(rps, rps->max_freq);
if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
pr_err("%s: RPS did not register DOWN interrupt\n",
@@ -490,16 +644,10 @@ static u64 measure_power_at(struct intel_rps *rps, int *freq)
u64 x[5];
int i;
- mutex_lock(&rps->lock);
- GEM_BUG_ON(!rps->active);
- intel_rps_set(rps, *freq);
- mutex_unlock(&rps->lock);
-
- msleep(20); /* more than enough time to stabilise! */
-
+ *freq = rps_set_check(rps, *freq);
for (i = 0; i < 5; i++)
x[i] = __measure_power(5);
- *freq = read_cagf(rps);
+ *freq = (*freq + read_cagf(rps)) / 2;
/* A simple triangle filter for better result stability */
sort(x, 5, sizeof(*x), cmp_u64, NULL);
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
index 07c2bddf8899..be0bf8e3f639 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.h
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
@@ -6,6 +6,7 @@
#ifndef SELFTEST_RPS_H
#define SELFTEST_RPS_H
+int live_rps_control(void *arg);
int live_rps_frequency(void *arg);
int live_rps_interrupt(void *arg);
int live_rps_power(void *arg);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement
2020-04-20 9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
2020-04-20 9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
2020-04-20 9:09 ` [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls Chris Wilson
@ 2020-04-20 9:09 ` Chris Wilson
2020-04-20 16:51 ` Mika Kuoppala
2020-04-20 10:00 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS Patchwork
` (3 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-04-20 9:09 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
Split the frequency measurement into two modes, so that we can judge the
impact of the llc setup on top of the pure CS frequency scaling.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 3 +-
drivers/gpu/drm/i915/gt/selftest_rps.c | 157 ++++++++++++++++++++++-
drivers/gpu/drm/i915/gt/selftest_rps.h | 3 +-
3 files changed, 154 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index de3eaef40596..9855e6f0ce7c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -54,7 +54,8 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
static const struct i915_subtest tests[] = {
SUBTEST(live_rc6_manual),
SUBTEST(live_rps_control),
- SUBTEST(live_rps_frequency),
+ SUBTEST(live_rps_frequency_cs),
+ SUBTEST(live_rps_frequency_srm),
SUBTEST(live_rps_power),
SUBTEST(live_rps_interrupt),
SUBTEST(live_gt_resume),
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 19fa6a561de3..dbca673519a2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -33,6 +33,7 @@ static int cmp_u64(const void *A, const void *B)
static struct i915_vma *
create_spin_counter(struct intel_engine_cs *engine,
struct i915_address_space *vm,
+ bool srm,
u32 **cancel,
u32 **counter)
{
@@ -91,10 +92,12 @@ create_spin_counter(struct intel_engine_cs *engine,
*cs++ = MI_MATH_ADD;
*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
- *cs++ = MI_STORE_REGISTER_MEM_GEN8;
- *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
- *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
- *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
+ if (srm) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
+ *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
+ *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
+ }
*cs++ = MI_BATCH_BUFFER_START_GEN8;
*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
@@ -103,7 +106,7 @@ create_spin_counter(struct intel_engine_cs *engine,
i915_gem_object_flush_map(obj);
*cancel = base + loop;
- *counter = memset32(base + 1000, 0, 1);
+ *counter = srm ? memset32(base + 1000, 0, 1) : NULL;
return vma;
}
@@ -301,12 +304,152 @@ static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
return div_u64(x[1] + 2 * x[2] + x[3], 4);
}
+static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
+ int duration_ms)
+{
+ u64 dc, dt;
+
+ dt = ktime_get();
+ dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
+ usleep_range(1000 * duration_ms, 2000 * duration_ms);
+ dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
+ dt = ktime_get() - dt;
+
+ return div64_u64(1000 * 1000 * dc, dt);
+}
+
+static u64 measure_cs_frequency_at(struct intel_rps *rps,
+ struct intel_engine_cs *engine,
+ int *freq)
+{
+ u64 x[5];
+ int i;
+
+ *freq = rps_set_check(rps, *freq);
+ for (i = 0; i < 5; i++)
+ x[i] = __measure_cs_frequency(engine, 2);
+ *freq = (*freq + read_cagf(rps)) / 2;
+
+ /* A simple triangle filter for better result stability */
+ sort(x, 5, sizeof(*x), cmp_u64, NULL);
+ return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
{
return f_d * x > f_n * y && f_n * x < f_d * y;
}
-int live_rps_frequency(void *arg)
+int live_rps_frequency_cs(void *arg)
+{
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = >->rps;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * The premise is that the GPU does change freqency at our behest.
+ * Let's check there is a correspondence between the requested
+ * frequency, the actual frequency, and the observed clock rate.
+ */
+
+ if (!rps->enabled || rps->max_freq <= rps->min_freq)
+ return 0;
+
+ if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
+ return 0;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u32 *cancel, *cntr;
+ struct {
+ u64 count;
+ int freq;
+ } min, max;
+
+ vma = create_spin_counter(engine,
+ engine->kernel_context->vm, false,
+ &cancel, &cntr);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ break;
+ }
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_vma;
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(vma);
+ i915_request_add(rq);
+ if (err)
+ goto err_vma;
+
+ if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
+ 10)) {
+ pr_err("%s: timed loop did not start\n",
+ engine->name);
+ goto err_vma;
+ }
+
+ min.freq = rps->min_freq;
+ min.count = measure_cs_frequency_at(rps, engine, &min.freq);
+
+ max.freq = rps->max_freq;
+ max.count = measure_cs_frequency_at(rps, engine, &max.freq);
+
+ pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
+ engine->name,
+ min.count, intel_gpu_freq(rps, min.freq),
+ max.count, intel_gpu_freq(rps, max.freq),
+ (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
+ max.freq * min.count));
+
+ if (!scaled_within(max.freq * min.count,
+ min.freq * max.count,
+ 2, 3)) {
+ pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
+ engine->name,
+ max.freq * min.count,
+ min.freq * max.count);
+ err = -EINVAL;
+ }
+
+err_vma:
+ *cancel = MI_BATCH_BUFFER_END;
+ i915_gem_object_unpin_map(vma->obj);
+ i915_vma_unpin(vma);
+ i915_vma_put(vma);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ return err;
+}
+
+int live_rps_frequency_srm(void *arg)
{
void (*saved_work)(struct work_struct *wrk);
struct intel_gt *gt = arg;
@@ -341,7 +484,7 @@ int live_rps_frequency(void *arg)
} min, max;
vma = create_spin_counter(engine,
- engine->kernel_context->vm,
+ engine->kernel_context->vm, true,
&cancel, &cntr);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
index be0bf8e3f639..22e46c5341c5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.h
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
@@ -7,7 +7,8 @@
#define SELFTEST_RPS_H
int live_rps_control(void *arg);
-int live_rps_frequency(void *arg);
+int live_rps_frequency_cs(void *arg);
+int live_rps_frequency_srm(void *arg);
int live_rps_interrupt(void *arg);
int live_rps_power(void *arg);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
2020-04-20 9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
` (2 preceding siblings ...)
2020-04-20 9:09 ` [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement Chris Wilson
@ 2020-04-20 10:00 ` Patchwork
2020-04-20 10:54 ` [Intel-gfx] [PATCH 1/4] " Mika Kuoppala
` (2 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-20 10:00 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
URL : https://patchwork.freedesktop.org/series/76184/
State : failure
== Summary ==
CI Bug Log - changes from CI_DRM_8327 -> Patchwork_17376
====================================================
Summary
-------
**FAILURE**
Serious unknown changes coming with Patchwork_17376 absolutely need to be
verified manually.
If you think the reported changes have nothing to do with the changes
introduced in Patchwork_17376, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/index.html
Possible new issues
-------------------
Here are the unknown changes that may have been introduced in Patchwork_17376:
### IGT changes ###
#### Possible regressions ####
* igt@i915_selftest@live@gt_pm:
- fi-icl-y: [PASS][1] -> [DMESG-FAIL][2]
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-icl-y/igt@i915_selftest@live@gt_pm.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-icl-y/igt@i915_selftest@live@gt_pm.html
- fi-skl-6600u: [PASS][3] -> [DMESG-FAIL][4]
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6600u/igt@i915_selftest@live@gt_pm.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6600u/igt@i915_selftest@live@gt_pm.html
- fi-cfl-8109u: [PASS][5] -> [DMESG-FAIL][6]
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-cfl-8109u/igt@i915_selftest@live@gt_pm.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-cfl-8109u/igt@i915_selftest@live@gt_pm.html
- fi-whl-u: [PASS][7] -> [DMESG-FAIL][8]
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-whl-u/igt@i915_selftest@live@gt_pm.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-whl-u/igt@i915_selftest@live@gt_pm.html
#### Warnings ####
* igt@i915_selftest@live@gt_pm:
- fi-tgl-y: [DMESG-FAIL][9] ([i915#1725]) -> [DMESG-FAIL][10]
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-tgl-y/igt@i915_selftest@live@gt_pm.html
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-tgl-y/igt@i915_selftest@live@gt_pm.html
- fi-kbl-soraka: [DMESG-FAIL][11] ([i915#1744]) -> [DMESG-FAIL][12]
[11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-kbl-soraka/igt@i915_selftest@live@gt_pm.html
[12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-kbl-soraka/igt@i915_selftest@live@gt_pm.html
#### Suppressed ####
The following results come from untrusted machines, tests, or statuses.
They do not affect the overall result.
* igt@i915_selftest@live@gt_pm:
- {fi-tgl-u}: [DMESG-FAIL][13] ([i915#1725]) -> [DMESG-FAIL][14]
[13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-tgl-u/igt@i915_selftest@live@gt_pm.html
[14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-tgl-u/igt@i915_selftest@live@gt_pm.html
* {igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1}:
- {fi-tgl-dsi}: [PASS][15] -> [INCOMPLETE][16]
[15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-tgl-dsi/igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1.html
[16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-tgl-dsi/igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1.html
Known issues
------------
Here are the changes found in Patchwork_17376 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@i915_module_load@reload:
- fi-skl-6770hq: [PASS][17] -> [DMESG-WARN][18] ([i915#203]) +1 similar issue
[17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@i915_module_load@reload.html
[18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@i915_module_load@reload.html
* igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence:
- fi-skl-6770hq: [PASS][19] -> [SKIP][20] ([fdo#109271]) +5 similar issues
[19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html
[20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html
* igt@kms_pipe_crc_basic@read-crc-pipe-b:
- fi-skl-6770hq: [PASS][21] -> [DMESG-WARN][22] ([i915#106])
[21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-b.html
[22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-b.html
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
[i915#106]: https://gitlab.freedesktop.org/drm/intel/issues/106
[i915#1725]: https://gitlab.freedesktop.org/drm/intel/issues/1725
[i915#1744]: https://gitlab.freedesktop.org/drm/intel/issues/1744
[i915#203]: https://gitlab.freedesktop.org/drm/intel/issues/203
Participating hosts (49 -> 44)
------------------------------
Additional (1): fi-kbl-7560u
Missing (6): fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-bdw-samus fi-byt-clapper fi-skl-6700k2
Build changes
-------------
* CI: CI-20190529 -> None
* Linux: CI_DRM_8327 -> Patchwork_17376
CI-20190529: 20190529
CI_DRM_8327: 17e0a63ab93b19ea2bfccd9a0425c93e52a65246 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_5602: a8fcccd15dcc2dd409edd23785a2d6f6e85fb682 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_17376: 589884d12beb98e0c84eb0026628ccb93ed0b946 @ git://anongit.freedesktop.org/gfx-ci/linux
== Linux commits ==
589884d12beb drm/i915/selftests: Split RPS frequency measurement
affab98e14a6 drm/i915/selftests: Check RPS controls
979f48e06a1c drm/i915/selftests: Skip energy consumption tests if not controlling freq
27cd01ca67a5 drm/i915/selftests: Verify frequency scaling with RPS
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
2020-04-20 9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
` (3 preceding siblings ...)
2020-04-20 10:00 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS Patchwork
@ 2020-04-20 10:54 ` Mika Kuoppala
2020-04-20 11:02 ` Chris Wilson
2020-04-20 11:12 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork
2020-04-20 15:54 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
6 siblings, 1 reply; 14+ messages in thread
From: Mika Kuoppala @ 2020-04-20 10:54 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Chris Wilson
Chris Wilson <chris@chris-wilson.co.uk> writes:
> One of the core tenents of reclocking the GPU is that its throughput
> scales with the clock frequency. We can observe this by incrementing a
> loop counter on the GPU, and compare the different execution rates at
> the notional RPS frequencies.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 3 +-
> drivers/gpu/drm/i915/gt/selftest_rps.c | 249 +++++++++++++++++++++--
> drivers/gpu/drm/i915/gt/selftest_rps.h | 1 +
> 3 files changed, 240 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> index 0141c334f2ac..4b2733967c42 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> @@ -53,8 +53,9 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
> {
> static const struct i915_subtest tests[] = {
> SUBTEST(live_rc6_manual),
> - SUBTEST(live_rps_interrupt),
> + SUBTEST(live_rps_frequency),
> SUBTEST(live_rps_power),
> + SUBTEST(live_rps_interrupt),
> SUBTEST(live_gt_resume),
> };
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> index 360f56aa4b82..b1a435db1edc 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> @@ -6,6 +6,7 @@
> #include <linux/sort.h>
>
> #include "intel_engine_pm.h"
> +#include "intel_gpu_commands.h"
> #include "intel_gt_pm.h"
> #include "intel_rc6.h"
> #include "selftest_rps.h"
> @@ -17,6 +18,242 @@ static void dummy_rps_work(struct work_struct *wrk)
> {
> }
>
> +static int cmp_u64(const void *A, const void *B)
> +{
> + const u64 *a = A, *b = B;
> +
> + if (a < b)
> + return -1;
> + else if (a > b)
> + return 1;
> + else
> + return 0;
> +}
> +
> +static struct i915_vma *
> +create_spin_counter(struct intel_engine_cs *engine,
> + struct i915_address_space *vm,
> + u32 **cancel,
> + u32 **counter)
> +{
> + enum {
> + COUNT,
ok, it starts from zero.
> + INC,
> + __NGPR__,
> + };
> +#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
> + struct drm_i915_gem_object *obj;
> + struct i915_vma *vma;
> + u32 *base, *cs;
> + int loop, i;
> + int err;
> +
> + obj = i915_gem_object_create_internal(vm->i915, 4096);
> + if (IS_ERR(obj))
> + return ERR_CAST(obj);
> +
> + vma = i915_vma_instance(obj, vm, NULL);
> + if (IS_ERR(vma)) {
> + i915_gem_object_put(obj);
> + return vma;
> + }
> +
> + err = i915_vma_pin(vma, 0, 0, PIN_USER);
> + if (err) {
> + i915_vma_put(vma);
You forgot to put the obj.
> + return ERR_PTR(err);
> + }
> +
> + base = i915_gem_object_pin_map(obj, I915_MAP_WC);
> + if (IS_ERR(base)) {
> + i915_gem_object_put(obj);
You forgot to put the vma?
> + return ERR_CAST(base);
> + }
> + cs = base;
> +
> + *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
> + for (i = 0; i < __NGPR__; i++) {
> + *cs++ = i915_mmio_reg_offset(CS_GPR(i));
> + *cs++ = 0;
> + *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
> + *cs++ = 0;
> + }
> +
> + *cs++ = MI_LOAD_REGISTER_IMM(1);
> + *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
> + *cs++ = 1;
> +
> + loop = cs - base;
> +
> + *cs++ = MI_MATH(4);
> + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
> + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
> + *cs++ = MI_MATH_ADD;
> + *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
> +
> + *cs++ = MI_STORE_REGISTER_MEM_GEN8;
> + *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
> + *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
> + *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
> +
> + *cs++ = MI_BATCH_BUFFER_START_GEN8;
> + *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
> + *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
> +
> + i915_gem_object_flush_map(obj);
> +
> + *cancel = base + loop;
> + *counter = memset32(base + 1000, 0, 1);
> + return vma;
> +}
> +
> +static u64 __measure_frequency(u32 *cntr, int duration_ms)
> +{
> + u64 dc, dt;
> +
> + dt = ktime_get();
> + dc = READ_ONCE(*cntr);
> + usleep_range(1000 * duration_ms, 2000 * duration_ms);
> + dc = READ_ONCE(*cntr) - dc;
> + dt = ktime_get() - dt;
> +
> + return div64_u64(1000 * 1000 * dc, dt);
> +}
> +
> +static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
> +{
> + u64 x[5];
> + int i;
> +
> + mutex_lock(&rps->lock);
> + GEM_BUG_ON(!rps->active);
> + intel_rps_set(rps, *freq);
> + mutex_unlock(&rps->lock);
> +
> + msleep(20); /* more than enough time to stabilise! */
> +
> + for (i = 0; i < 5; i++)
> + x[i] = __measure_frequency(cntr, 2);
> + *freq = read_cagf(rps);
> +
> + /* A simple triangle filter for better result stability */
> + sort(x, 5, sizeof(*x), cmp_u64, NULL);
> + return div_u64(x[1] + 2 * x[2] + x[3], 4);
> +}
> +
> +static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
> +{
> + return f_d * x > f_n * y && f_n * x < f_d * y;
> +}
> +
> +int live_rps_frequency(void *arg)
> +{
> + void (*saved_work)(struct work_struct *wrk);
> + struct intel_gt *gt = arg;
> + struct intel_rps *rps = >->rps;
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + int err = 0;
> +
> + /*
> + * The premise is that the GPU does change freqency at our behest.
> + * Let's check there is a correspondence between the requested
> + * frequency, the actual frequency, and the observed clock rate.
> + */
> +
> + if (!rps->enabled || rps->max_freq <= rps->min_freq)
> + return 0;
> +
> + if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
> + return 0;
> +
> + intel_gt_pm_wait_for_idle(gt);
> + saved_work = rps->work.func;
> + rps->work.func = dummy_rps_work;
> +
> + for_each_engine(engine, gt, id) {
> + struct i915_request *rq;
> + struct i915_vma *vma;
> + u32 *cancel, *cntr;
> + struct {
> + u64 count;
> + int freq;
> + } min, max;
> +
> + vma = create_spin_counter(engine,
> + engine->kernel_context->vm,
> + &cancel, &cntr);
> + if (IS_ERR(vma)) {
> + err = PTR_ERR(vma);
> + break;
> + }
> +
> + rq = intel_engine_create_kernel_request(engine);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + goto err_vma;
> + }
> +
> + i915_vma_lock(vma);
> + err = i915_request_await_object(rq, vma->obj, false);
I am puzzled what we need to wait asynchronously in here.
Further, intel_runtime_pm_get is missing.
-Mika
> + if (!err)
> + err = i915_vma_move_to_active(vma, rq, 0);
> + if (!err)
> + err = rq->engine->emit_bb_start(rq,
> + vma->node.start,
> + PAGE_SIZE, 0);
> + i915_vma_unlock(vma);
> + i915_request_add(rq);
> + if (err)
> + goto err_vma;
> +
> + if (wait_for(READ_ONCE(*cntr), 10)) {
> + pr_err("%s: timed loop did not start\n",
> + engine->name);
> + goto err_vma;
> + }
> +
> + min.freq = rps->min_freq;
> + min.count = measure_frequency_at(rps, cntr, &min.freq);
> +
> + max.freq = rps->max_freq;
> + max.count = measure_frequency_at(rps, cntr, &max.freq);
> +
> + pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
> + engine->name,
> + min.count, intel_gpu_freq(rps, min.freq),
> + max.count, intel_gpu_freq(rps, max.freq),
> + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
> + max.freq * min.count));
> +
> + if (!scaled_within(max.freq * min.count,
> + min.freq * max.count,
> + 1, 2)) {
> + pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
> + engine->name,
> + max.freq * min.count,
> + min.freq * max.count);
> + err = -EINVAL;
> + }
> +
> +err_vma:
> + *cancel = MI_BATCH_BUFFER_END;
> + i915_gem_object_unpin_map(vma->obj);
> + i915_vma_unpin(vma);
> + i915_vma_put(vma);
> +
> + if (igt_flush_test(gt->i915))
> + err = -EIO;
> + if (err)
> + break;
> + }
> +
> + intel_gt_pm_wait_for_idle(gt);
> + rps->work.func = saved_work;
> +
> + return err;
> +}
> +
> static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
> {
> /* Flush any previous EI */
> @@ -248,18 +485,6 @@ static u64 __measure_power(int duration_ms)
> return div64_u64(1000 * 1000 * dE, dt);
> }
>
> -static int cmp_u64(const void *A, const void *B)
> -{
> - const u64 *a = A, *b = B;
> -
> - if (a < b)
> - return -1;
> - else if (a > b)
> - return 1;
> - else
> - return 0;
> -}
> -
> static u64 measure_power_at(struct intel_rps *rps, int freq)
> {
> u64 x[5];
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
> index cad515a7f0e5..07c2bddf8899 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.h
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
> @@ -6,6 +6,7 @@
> #ifndef SELFTEST_RPS_H
> #define SELFTEST_RPS_H
>
> +int live_rps_frequency(void *arg);
> int live_rps_interrupt(void *arg);
> int live_rps_power(void *arg);
>
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
2020-04-20 10:54 ` [Intel-gfx] [PATCH 1/4] " Mika Kuoppala
@ 2020-04-20 11:02 ` Chris Wilson
2020-04-20 12:54 ` Mika Kuoppala
0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-04-20 11:02 UTC (permalink / raw)
To: Mika Kuoppala, intel-gfx
Quoting Mika Kuoppala (2020-04-20 11:54:38)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
> > One of the core tenents of reclocking the GPU is that its throughput
> > scales with the clock frequency. We can observe this by incrementing a
> > loop counter on the GPU, and compare the different execution rates at
> > the notional RPS frequencies.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 3 +-
> > drivers/gpu/drm/i915/gt/selftest_rps.c | 249 +++++++++++++++++++++--
> > drivers/gpu/drm/i915/gt/selftest_rps.h | 1 +
> > 3 files changed, 240 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> > index 0141c334f2ac..4b2733967c42 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> > @@ -53,8 +53,9 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
> > {
> > static const struct i915_subtest tests[] = {
> > SUBTEST(live_rc6_manual),
> > - SUBTEST(live_rps_interrupt),
> > + SUBTEST(live_rps_frequency),
> > SUBTEST(live_rps_power),
> > + SUBTEST(live_rps_interrupt),
> > SUBTEST(live_gt_resume),
> > };
> >
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> > index 360f56aa4b82..b1a435db1edc 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> > @@ -6,6 +6,7 @@
> > #include <linux/sort.h>
> >
> > #include "intel_engine_pm.h"
> > +#include "intel_gpu_commands.h"
> > #include "intel_gt_pm.h"
> > #include "intel_rc6.h"
> > #include "selftest_rps.h"
> > @@ -17,6 +18,242 @@ static void dummy_rps_work(struct work_struct *wrk)
> > {
> > }
> >
> > +static int cmp_u64(const void *A, const void *B)
> > +{
> > + const u64 *a = A, *b = B;
> > +
> > + if (a < b)
> > + return -1;
> > + else if (a > b)
> > + return 1;
> > + else
> > + return 0;
> > +}
> > +
> > +static struct i915_vma *
> > +create_spin_counter(struct intel_engine_cs *engine,
> > + struct i915_address_space *vm,
> > + u32 **cancel,
> > + u32 **counter)
> > +{
> > + enum {
> > + COUNT,
>
> ok, it starts from zero.
>
> > + INC,
> > + __NGPR__,
> > + };
> > +#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
> > + struct drm_i915_gem_object *obj;
> > + struct i915_vma *vma;
> > + u32 *base, *cs;
> > + int loop, i;
> > + int err;
> > +
> > + obj = i915_gem_object_create_internal(vm->i915, 4096);
> > + if (IS_ERR(obj))
> > + return ERR_CAST(obj);
> > +
> > + vma = i915_vma_instance(obj, vm, NULL);
> > + if (IS_ERR(vma)) {
> > + i915_gem_object_put(obj);
> > + return vma;
> > + }
> > +
> > + err = i915_vma_pin(vma, 0, 0, PIN_USER);
> > + if (err) {
> > + i915_vma_put(vma);
>
> You forgot to put the obj.
The i915_vma_put() is i915_gem_object_put().
I know, I am in for a reckoning when I have to fix all the allocations
for i915_vma.kref being independent of the object.
>
> > + return ERR_PTR(err);
> > + }
> > +
> > + base = i915_gem_object_pin_map(obj, I915_MAP_WC);
> > + if (IS_ERR(base)) {
> > + i915_gem_object_put(obj);
>
> You forgot to put the vma?
One and the same :)
>
> > + return ERR_CAST(base);
> > + }
> > + cs = base;
> > +
> > + *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
> > + for (i = 0; i < __NGPR__; i++) {
> > + *cs++ = i915_mmio_reg_offset(CS_GPR(i));
> > + *cs++ = 0;
> > + *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
> > + *cs++ = 0;
> > + }
> > +
> > + *cs++ = MI_LOAD_REGISTER_IMM(1);
> > + *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
> > + *cs++ = 1;
> > +
> > + loop = cs - base;
> > +
> > + *cs++ = MI_MATH(4);
> > + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
> > + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
> > + *cs++ = MI_MATH_ADD;
> > + *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
> > +
> > + *cs++ = MI_STORE_REGISTER_MEM_GEN8;
> > + *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
> > + *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
> > + *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
> > +
> > + *cs++ = MI_BATCH_BUFFER_START_GEN8;
> > + *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
> > + *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
> > +
> > + i915_gem_object_flush_map(obj);
> > +
> > + *cancel = base + loop;
> > + *counter = memset32(base + 1000, 0, 1);
> > + return vma;
> > +}
> > +
> > +static u64 __measure_frequency(u32 *cntr, int duration_ms)
> > +{
> > + u64 dc, dt;
> > +
> > + dt = ktime_get();
> > + dc = READ_ONCE(*cntr);
> > + usleep_range(1000 * duration_ms, 2000 * duration_ms);
> > + dc = READ_ONCE(*cntr) - dc;
> > + dt = ktime_get() - dt;
> > +
> > + return div64_u64(1000 * 1000 * dc, dt);
> > +}
> > +
> > +static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
> > +{
> > + u64 x[5];
> > + int i;
> > +
> > + mutex_lock(&rps->lock);
> > + GEM_BUG_ON(!rps->active);
> > + intel_rps_set(rps, *freq);
> > + mutex_unlock(&rps->lock);
> > +
> > + msleep(20); /* more than enough time to stabilise! */
> > +
> > + for (i = 0; i < 5; i++)
> > + x[i] = __measure_frequency(cntr, 2);
> > + *freq = read_cagf(rps);
> > +
> > + /* A simple triangle filter for better result stability */
> > + sort(x, 5, sizeof(*x), cmp_u64, NULL);
> > + return div_u64(x[1] + 2 * x[2] + x[3], 4);
> > +}
> > +
> > +static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
> > +{
> > + return f_d * x > f_n * y && f_n * x < f_d * y;
> > +}
> > +
> > +int live_rps_frequency(void *arg)
> > +{
> > + void (*saved_work)(struct work_struct *wrk);
> > + struct intel_gt *gt = arg;
> > + struct intel_rps *rps = >->rps;
> > + struct intel_engine_cs *engine;
> > + enum intel_engine_id id;
> > + int err = 0;
> > +
> > + /*
> > + * The premise is that the GPU does change freqency at our behest.
> > + * Let's check there is a correspondence between the requested
> > + * frequency, the actual frequency, and the observed clock rate.
> > + */
> > +
> > + if (!rps->enabled || rps->max_freq <= rps->min_freq)
> > + return 0;
> > +
> > + if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
> > + return 0;
> > +
> > + intel_gt_pm_wait_for_idle(gt);
> > + saved_work = rps->work.func;
> > + rps->work.func = dummy_rps_work;
> > +
> > + for_each_engine(engine, gt, id) {
> > + struct i915_request *rq;
> > + struct i915_vma *vma;
> > + u32 *cancel, *cntr;
> > + struct {
> > + u64 count;
> > + int freq;
> > + } min, max;
> > +
> > + vma = create_spin_counter(engine,
> > + engine->kernel_context->vm,
> > + &cancel, &cntr);
> > + if (IS_ERR(vma)) {
> > + err = PTR_ERR(vma);
> > + break;
> > + }
> > +
> > + rq = intel_engine_create_kernel_request(engine);
> > + if (IS_ERR(rq)) {
> > + err = PTR_ERR(rq);
> > + goto err_vma;
> > + }
> > +
> > + i915_vma_lock(vma);
> > + err = i915_request_await_object(rq, vma->obj, false);
>
> I am puzzled what we need to wait asynchronously in here.
To bind the vma, mostly. Yes that is now hidden away by
i915_vma_move_to_active(), but we established the pattern to always add
the waits even if we expect them to be no-ops -- because it's a hard
task to find a missing one later.
> Further, intel_runtime_pm_get is missing.
For what? We acquire the wakeref via the request on the engine.
We don't talk to intel_runtime_pm directly, everything we should be
doing is engine specific, which knows which gt and the power management
for that.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq
2020-04-20 9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
@ 2020-04-20 11:05 ` Mika Kuoppala
0 siblings, 0 replies; 14+ messages in thread
From: Mika Kuoppala @ 2020-04-20 11:05 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Chris Wilson
Chris Wilson <chris@chris-wilson.co.uk> writes:
> If we can not manipulate the frequency with RPS, then comparing min/max
> power consumption is pointless / misleading. We will leave the warning
> about not being able to control the frequency selection via RPS to other
> tests so as not to confuse this more specialised check.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gt/selftest_rps.c | 37 ++++++++++++++++----------
> 1 file changed, 23 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> index b1a435db1edc..149a3de86cb9 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> @@ -485,26 +485,21 @@ static u64 __measure_power(int duration_ms)
> return div64_u64(1000 * 1000 * dE, dt);
> }
>
> -static u64 measure_power_at(struct intel_rps *rps, int freq)
> +static u64 measure_power_at(struct intel_rps *rps, int *freq)
> {
> u64 x[5];
> int i;
>
> mutex_lock(&rps->lock);
> GEM_BUG_ON(!rps->active);
> - intel_rps_set(rps, freq);
> + intel_rps_set(rps, *freq);
> mutex_unlock(&rps->lock);
>
> msleep(20); /* more than enough time to stabilise! */
>
> - i = read_cagf(rps);
> - if (i != freq)
> - pr_notice("Running at %x [%uMHz], not target %x [%uMHz]\n",
> - i, intel_gpu_freq(rps, i),
> - freq, intel_gpu_freq(rps, freq));
> -
> for (i = 0; i < 5; i++)
> x[i] = __measure_power(5);
> + *freq = read_cagf(rps);
>
> /* A simple triangle filter for better result stability */
> sort(x, 5, sizeof(*x), cmp_u64, NULL);
> @@ -542,7 +537,10 @@ int live_rps_power(void *arg)
>
> for_each_engine(engine, gt, id) {
> struct i915_request *rq;
> - u64 min, max;
> + struct {
> + u64 power;
> + int freq;
Typelocking the hw reprentation might have some benefits.
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> + } min, max;
>
> if (!intel_engine_can_store_dword(engine))
> continue;
> @@ -565,16 +563,27 @@ int live_rps_power(void *arg)
> break;
> }
>
> - max = measure_power_at(rps, rps->max_freq);
> - min = measure_power_at(rps, rps->min_freq);
> + max.freq = rps->max_freq;
> + max.power = measure_power_at(rps, &max.freq);
> +
> + min.freq = rps->min_freq;
> + min.power = measure_power_at(rps, &min.freq);
>
> igt_spinner_end(&spin);
>
> pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
> engine->name,
> - min, intel_gpu_freq(rps, rps->min_freq),
> - max, intel_gpu_freq(rps, rps->max_freq));
> - if (11 * min > 10 * max) {
> + min.power, intel_gpu_freq(rps, min.freq),
> + max.power, intel_gpu_freq(rps, max.freq));
> +
> + if (10 * min.freq >= 9 * max.freq) {
> + pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
> + min.freq, intel_gpu_freq(rps, min.freq),
> + max.freq, intel_gpu_freq(rps, max.freq));
> + continue;
> + }
> +
> + if (11 * min.power > 10 * max.power) {
> pr_err("%s: did not conserve power when setting lower frequency!\n",
> engine->name);
> err = -EINVAL;
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
2020-04-20 9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
` (4 preceding siblings ...)
2020-04-20 10:54 ` [Intel-gfx] [PATCH 1/4] " Mika Kuoppala
@ 2020-04-20 11:12 ` Patchwork
2020-04-20 15:54 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
6 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-20 11:12 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
URL : https://patchwork.freedesktop.org/series/76184/
State : success
== Summary ==
CI Bug Log - changes from CI_DRM_8327 -> Patchwork_17376
====================================================
Summary
-------
**SUCCESS**
No regressions found.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/index.html
Possible new issues
-------------------
Here are the unknown changes that may have been introduced in Patchwork_17376:
### IGT changes ###
#### Suppressed ####
The following results come from untrusted machines, tests, or statuses.
They do not affect the overall result.
* {igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1}:
- {fi-tgl-dsi}: [PASS][1] -> [INCOMPLETE][2]
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-tgl-dsi/igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-tgl-dsi/igt@kms_flip@basic-flip-vs-wf_vblank@a-dsi1.html
Known issues
------------
Here are the changes found in Patchwork_17376 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@i915_module_load@reload:
- fi-skl-6770hq: [PASS][3] -> [DMESG-WARN][4] ([i915#203]) +1 similar issue
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@i915_module_load@reload.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@i915_module_load@reload.html
* igt@i915_selftest@live@gt_pm:
- fi-icl-y: [PASS][5] -> [DMESG-FAIL][6] ([i915#1751])
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-icl-y/igt@i915_selftest@live@gt_pm.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-icl-y/igt@i915_selftest@live@gt_pm.html
- fi-skl-6600u: [PASS][7] -> [DMESG-FAIL][8] ([i915#1751])
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6600u/igt@i915_selftest@live@gt_pm.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6600u/igt@i915_selftest@live@gt_pm.html
- fi-cfl-8109u: [PASS][9] -> [DMESG-FAIL][10] ([i915#1751])
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-cfl-8109u/igt@i915_selftest@live@gt_pm.html
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-cfl-8109u/igt@i915_selftest@live@gt_pm.html
- fi-whl-u: [PASS][11] -> [DMESG-FAIL][12] ([i915#1751])
[11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-whl-u/igt@i915_selftest@live@gt_pm.html
[12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-whl-u/igt@i915_selftest@live@gt_pm.html
* igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence:
- fi-skl-6770hq: [PASS][13] -> [SKIP][14] ([fdo#109271]) +5 similar issues
[13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html
[14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html
* igt@kms_pipe_crc_basic@read-crc-pipe-b:
- fi-skl-6770hq: [PASS][15] -> [DMESG-WARN][16] ([i915#106])
[15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-b.html
[16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-b.html
#### Warnings ####
* igt@i915_selftest@live@gt_pm:
- fi-tgl-y: [DMESG-FAIL][17] ([i915#1725]) -> [DMESG-FAIL][18] ([i915#1744])
[17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/fi-tgl-y/igt@i915_selftest@live@gt_pm.html
[18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/fi-tgl-y/igt@i915_selftest@live@gt_pm.html
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
[i915#106]: https://gitlab.freedesktop.org/drm/intel/issues/106
[i915#1725]: https://gitlab.freedesktop.org/drm/intel/issues/1725
[i915#1744]: https://gitlab.freedesktop.org/drm/intel/issues/1744
[i915#1751]: https://gitlab.freedesktop.org/drm/intel/issues/1751
[i915#203]: https://gitlab.freedesktop.org/drm/intel/issues/203
Participating hosts (49 -> 44)
------------------------------
Additional (1): fi-kbl-7560u
Missing (6): fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-bdw-samus fi-byt-clapper fi-skl-6700k2
Build changes
-------------
* CI: CI-20190529 -> None
* Linux: CI_DRM_8327 -> Patchwork_17376
CI-20190529: 20190529
CI_DRM_8327: 17e0a63ab93b19ea2bfccd9a0425c93e52a65246 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_5602: a8fcccd15dcc2dd409edd23785a2d6f6e85fb682 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_17376: 589884d12beb98e0c84eb0026628ccb93ed0b946 @ git://anongit.freedesktop.org/gfx-ci/linux
== Linux commits ==
589884d12beb drm/i915/selftests: Split RPS frequency measurement
affab98e14a6 drm/i915/selftests: Check RPS controls
979f48e06a1c drm/i915/selftests: Skip energy consumption tests if not controlling freq
27cd01ca67a5 drm/i915/selftests: Verify frequency scaling with RPS
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
2020-04-20 11:02 ` Chris Wilson
@ 2020-04-20 12:54 ` Mika Kuoppala
2020-04-20 13:08 ` Chris Wilson
0 siblings, 1 reply; 14+ messages in thread
From: Mika Kuoppala @ 2020-04-20 12:54 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Quoting Mika Kuoppala (2020-04-20 11:54:38)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>>
>> > One of the core tenents of reclocking the GPU is that its throughput
>> > scales with the clock frequency. We can observe this by incrementing a
>> > loop counter on the GPU, and compare the different execution rates at
>> > the notional RPS frequencies.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> > drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 3 +-
>> > drivers/gpu/drm/i915/gt/selftest_rps.c | 249 +++++++++++++++++++++--
>> > drivers/gpu/drm/i915/gt/selftest_rps.h | 1 +
>> > 3 files changed, 240 insertions(+), 13 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
>> > index 0141c334f2ac..4b2733967c42 100644
>> > --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
>> > +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
>> > @@ -53,8 +53,9 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
>> > {
>> > static const struct i915_subtest tests[] = {
>> > SUBTEST(live_rc6_manual),
>> > - SUBTEST(live_rps_interrupt),
>> > + SUBTEST(live_rps_frequency),
>> > SUBTEST(live_rps_power),
>> > + SUBTEST(live_rps_interrupt),
>> > SUBTEST(live_gt_resume),
>> > };
>> >
>> > diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
>> > index 360f56aa4b82..b1a435db1edc 100644
>> > --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
>> > +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
>> > @@ -6,6 +6,7 @@
>> > #include <linux/sort.h>
>> >
>> > #include "intel_engine_pm.h"
>> > +#include "intel_gpu_commands.h"
>> > #include "intel_gt_pm.h"
>> > #include "intel_rc6.h"
>> > #include "selftest_rps.h"
>> > @@ -17,6 +18,242 @@ static void dummy_rps_work(struct work_struct *wrk)
>> > {
>> > }
>> >
>> > +static int cmp_u64(const void *A, const void *B)
>> > +{
>> > + const u64 *a = A, *b = B;
>> > +
>> > + if (a < b)
>> > + return -1;
>> > + else if (a > b)
>> > + return 1;
>> > + else
>> > + return 0;
>> > +}
>> > +
>> > +static struct i915_vma *
>> > +create_spin_counter(struct intel_engine_cs *engine,
>> > + struct i915_address_space *vm,
>> > + u32 **cancel,
>> > + u32 **counter)
>> > +{
>> > + enum {
>> > + COUNT,
>>
>> ok, it starts from zero.
>>
>> > + INC,
>> > + __NGPR__,
>> > + };
>> > +#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
>> > + struct drm_i915_gem_object *obj;
>> > + struct i915_vma *vma;
>> > + u32 *base, *cs;
>> > + int loop, i;
>> > + int err;
>> > +
>> > + obj = i915_gem_object_create_internal(vm->i915, 4096);
>> > + if (IS_ERR(obj))
>> > + return ERR_CAST(obj);
>> > +
>> > + vma = i915_vma_instance(obj, vm, NULL);
>> > + if (IS_ERR(vma)) {
>> > + i915_gem_object_put(obj);
>> > + return vma;
>> > + }
>> > +
>> > + err = i915_vma_pin(vma, 0, 0, PIN_USER);
>> > + if (err) {
>> > + i915_vma_put(vma);
>>
>> You forgot to put the obj.
>
> The i915_vma_put() is i915_gem_object_put().
>
> I know, I am in for a reckoning when I have to fix all the allocations
> for i915_vma.kref being independent of the object.
>
>>
>> > + return ERR_PTR(err);
>> > + }
>> > +
>> > + base = i915_gem_object_pin_map(obj, I915_MAP_WC);
>> > + if (IS_ERR(base)) {
>> > + i915_gem_object_put(obj);
>>
>> You forgot to put the vma?
>
> One and the same :)
>
>>
>> > + return ERR_CAST(base);
>> > + }
>> > + cs = base;
>> > +
>> > + *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
>> > + for (i = 0; i < __NGPR__; i++) {
>> > + *cs++ = i915_mmio_reg_offset(CS_GPR(i));
>> > + *cs++ = 0;
>> > + *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
>> > + *cs++ = 0;
>> > + }
>> > +
>> > + *cs++ = MI_LOAD_REGISTER_IMM(1);
>> > + *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
>> > + *cs++ = 1;
>> > +
>> > + loop = cs - base;
>> > +
>> > + *cs++ = MI_MATH(4);
>> > + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
>> > + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
>> > + *cs++ = MI_MATH_ADD;
>> > + *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
>> > +
>> > + *cs++ = MI_STORE_REGISTER_MEM_GEN8;
>> > + *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
>> > + *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
>> > + *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
>> > +
>> > + *cs++ = MI_BATCH_BUFFER_START_GEN8;
>> > + *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
>> > + *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
>> > +
>> > + i915_gem_object_flush_map(obj);
>> > +
>> > + *cancel = base + loop;
>> > + *counter = memset32(base + 1000, 0, 1);
>> > + return vma;
>> > +}
>> > +
>> > +static u64 __measure_frequency(u32 *cntr, int duration_ms)
>> > +{
>> > + u64 dc, dt;
>> > +
>> > + dt = ktime_get();
>> > + dc = READ_ONCE(*cntr);
>> > + usleep_range(1000 * duration_ms, 2000 * duration_ms);
>> > + dc = READ_ONCE(*cntr) - dc;
>> > + dt = ktime_get() - dt;
>> > +
>> > + return div64_u64(1000 * 1000 * dc, dt);
>> > +}
>> > +
>> > +static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
>> > +{
>> > + u64 x[5];
>> > + int i;
>> > +
>> > + mutex_lock(&rps->lock);
>> > + GEM_BUG_ON(!rps->active);
>> > + intel_rps_set(rps, *freq);
>> > + mutex_unlock(&rps->lock);
>> > +
>> > + msleep(20); /* more than enough time to stabilise! */
>> > +
>> > + for (i = 0; i < 5; i++)
>> > + x[i] = __measure_frequency(cntr, 2);
>> > + *freq = read_cagf(rps);
>> > +
>> > + /* A simple triangle filter for better result stability */
>> > + sort(x, 5, sizeof(*x), cmp_u64, NULL);
>> > + return div_u64(x[1] + 2 * x[2] + x[3], 4);
>> > +}
>> > +
>> > +static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
>> > +{
>> > + return f_d * x > f_n * y && f_n * x < f_d * y;
>> > +}
>> > +
>> > +int live_rps_frequency(void *arg)
>> > +{
>> > + void (*saved_work)(struct work_struct *wrk);
>> > + struct intel_gt *gt = arg;
>> > + struct intel_rps *rps = >->rps;
>> > + struct intel_engine_cs *engine;
>> > + enum intel_engine_id id;
>> > + int err = 0;
>> > +
>> > + /*
>> > + * The premise is that the GPU does change freqency at our behest.
>> > + * Let's check there is a correspondence between the requested
>> > + * frequency, the actual frequency, and the observed clock rate.
>> > + */
>> > +
>> > + if (!rps->enabled || rps->max_freq <= rps->min_freq)
>> > + return 0;
>> > +
>> > + if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
>> > + return 0;
>> > +
>> > + intel_gt_pm_wait_for_idle(gt);
>> > + saved_work = rps->work.func;
>> > + rps->work.func = dummy_rps_work;
>> > +
>> > + for_each_engine(engine, gt, id) {
>> > + struct i915_request *rq;
>> > + struct i915_vma *vma;
>> > + u32 *cancel, *cntr;
>> > + struct {
>> > + u64 count;
>> > + int freq;
>> > + } min, max;
>> > +
>> > + vma = create_spin_counter(engine,
>> > + engine->kernel_context->vm,
>> > + &cancel, &cntr);
>> > + if (IS_ERR(vma)) {
>> > + err = PTR_ERR(vma);
>> > + break;
>> > + }
>> > +
>> > + rq = intel_engine_create_kernel_request(engine);
>> > + if (IS_ERR(rq)) {
>> > + err = PTR_ERR(rq);
>> > + goto err_vma;
>> > + }
>> > +
>> > + i915_vma_lock(vma);
>> > + err = i915_request_await_object(rq, vma->obj, false);
>>
>> I am puzzled what we need to wait asynchronously in here.
>
> To bind the vma, mostly. Yes that is now hidden away by
> i915_vma_move_to_active(), but we established the pattern to always add
> the waits even if we expect them to be no-ops -- because it's a hard
> task to find a missing one later.
>
>> Further, intel_runtime_pm_get is missing.
>
> For what? We acquire the wakeref via the request on the engine.
>
> We don't talk to intel_runtime_pm directly, everything we should be
> doing is engine specific, which knows which gt and the power management
> for that.
I was worried about the read_cagf().
But as it is implied that the request will be running
and we have total control of it during reading th cagf,
it will work like this.
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> -Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
2020-04-20 12:54 ` Mika Kuoppala
@ 2020-04-20 13:08 ` Chris Wilson
0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2020-04-20 13:08 UTC (permalink / raw)
To: Mika Kuoppala, intel-gfx
Quoting Mika Kuoppala (2020-04-20 13:54:21)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
> > Quoting Mika Kuoppala (2020-04-20 11:54:38)
> >> Further, intel_runtime_pm_get is missing.
> >
> > For what? We acquire the wakeref via the request on the engine.
> >
> > We don't talk to intel_runtime_pm directly, everything we should be
> > doing is engine specific, which knows which gt and the power management
> > for that.
>
> I was worried about the read_cagf().
>
> But as it is implied that the request will be running
> and we have total control of it during reading th cagf,
> it will work like this.
Yes, we are only looking at the HW inside either the request or explicit
pm_gt. And our mmio debug is good enough to warn if we do attempt to
read cagf without the rpm wakeref.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
2020-04-20 9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
` (5 preceding siblings ...)
2020-04-20 11:12 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork
@ 2020-04-20 15:54 ` Patchwork
6 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-20 15:54 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS
URL : https://patchwork.freedesktop.org/series/76184/
State : failure
== Summary ==
CI Bug Log - changes from CI_DRM_8327_full -> Patchwork_17376_full
====================================================
Summary
-------
**FAILURE**
Serious unknown changes coming with Patchwork_17376_full absolutely need to be
verified manually.
If you think the reported changes have nothing to do with the changes
introduced in Patchwork_17376_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.
Possible new issues
-------------------
Here are the unknown changes that may have been introduced in Patchwork_17376_full:
### IGT changes ###
#### Possible regressions ####
* igt@kms_psr@sprite_render:
- shard-tglb: [PASS][1] -> [FAIL][2]
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-tglb7/igt@kms_psr@sprite_render.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb3/igt@kms_psr@sprite_render.html
* igt@runner@aborted:
- shard-tglb: NOTRUN -> [FAIL][3]
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb3/igt@runner@aborted.html
#### Warnings ####
* igt@i915_selftest@live@gt_pm:
- shard-skl: [DMESG-FAIL][4] ([i915#1744]) -> [DMESG-FAIL][5]
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl4/igt@i915_selftest@live@gt_pm.html
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl8/igt@i915_selftest@live@gt_pm.html
Known issues
------------
Here are the changes found in Patchwork_17376_full that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@gem_exec_whisper@basic-queues-priority-all:
- shard-tglb: [PASS][6] -> [INCOMPLETE][7] ([i915#750])
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-tglb5/igt@gem_exec_whisper@basic-queues-priority-all.html
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb3/igt@gem_exec_whisper@basic-queues-priority-all.html
* igt@i915_pm_dc@dc6-psr:
- shard-iclb: [PASS][8] -> [FAIL][9] ([i915#454])
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-iclb1/igt@i915_pm_dc@dc6-psr.html
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-iclb8/igt@i915_pm_dc@dc6-psr.html
* igt@kms_big_fb@linear-32bpp-rotate-0:
- shard-kbl: [PASS][10] -> [FAIL][11] ([i915#1119] / [i915#93] / [i915#95])
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-kbl1/igt@kms_big_fb@linear-32bpp-rotate-0.html
[11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-kbl3/igt@kms_big_fb@linear-32bpp-rotate-0.html
- shard-apl: [PASS][12] -> [FAIL][13] ([i915#1119] / [i915#95])
[12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-apl3/igt@kms_big_fb@linear-32bpp-rotate-0.html
[13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-apl8/igt@kms_big_fb@linear-32bpp-rotate-0.html
* igt@kms_cursor_crc@pipe-a-cursor-suspend:
- shard-skl: [PASS][14] -> [FAIL][15] ([i915#54])
[14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl10/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
[15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl10/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
* igt@kms_cursor_crc@pipe-c-cursor-suspend:
- shard-apl: [PASS][16] -> [DMESG-WARN][17] ([i915#180]) +1 similar issue
[16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-apl4/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
[17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-apl6/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
* igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-fullscreen:
- shard-tglb: [PASS][18] -> [SKIP][19] ([i915#668]) +6 similar issues
[18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-tglb7/igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-fullscreen.html
[19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb6/igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-fullscreen.html
* igt@kms_hdr@bpc-switch-suspend:
- shard-skl: [PASS][20] -> [FAIL][21] ([i915#1188])
[20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl9/igt@kms_hdr@bpc-switch-suspend.html
[21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl4/igt@kms_hdr@bpc-switch-suspend.html
* igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
- shard-skl: [PASS][22] -> [INCOMPLETE][23] ([i915#69])
[22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl4/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b.html
[23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl6/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b.html
* igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
- shard-kbl: [PASS][24] -> [DMESG-WARN][25] ([i915#180]) +1 similar issue
[24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-kbl1/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html
[25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-kbl2/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html
* igt@kms_psr@no_drrs:
- shard-iclb: [PASS][26] -> [FAIL][27] ([i915#173])
[26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-iclb4/igt@kms_psr@no_drrs.html
[27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-iclb1/igt@kms_psr@no_drrs.html
* igt@kms_psr@psr2_primary_mmap_cpu:
- shard-iclb: [PASS][28] -> [SKIP][29] ([fdo#109441]) +1 similar issue
[28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-iclb2/igt@kms_psr@psr2_primary_mmap_cpu.html
[29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-iclb6/igt@kms_psr@psr2_primary_mmap_cpu.html
* igt@kms_setmode@basic:
- shard-kbl: [PASS][30] -> [FAIL][31] ([i915#31])
[30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-kbl2/igt@kms_setmode@basic.html
[31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-kbl1/igt@kms_setmode@basic.html
#### Possible fixes ####
* igt@i915_suspend@fence-restore-untiled:
- shard-skl: [INCOMPLETE][32] ([i915#69]) -> [PASS][33]
[32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl1/igt@i915_suspend@fence-restore-untiled.html
[33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl9/igt@i915_suspend@fence-restore-untiled.html
* igt@kms_cursor_crc@pipe-b-cursor-128x128-random:
- shard-skl: [FAIL][34] ([i915#54]) -> [PASS][35] +1 similar issue
[34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl1/igt@kms_cursor_crc@pipe-b-cursor-128x128-random.html
[35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl6/igt@kms_cursor_crc@pipe-b-cursor-128x128-random.html
* {igt@kms_flip@flip-vs-suspend-interruptible@c-dp1}:
- shard-apl: [DMESG-WARN][36] ([i915#180]) -> [PASS][37] +2 similar issues
[36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-apl6/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html
[37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-apl1/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html
* {igt@kms_flip@flip-vs-suspend@c-dp1}:
- shard-kbl: [DMESG-WARN][38] ([i915#180]) -> [PASS][39] +8 similar issues
[38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-kbl6/igt@kms_flip@flip-vs-suspend@c-dp1.html
[39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-kbl6/igt@kms_flip@flip-vs-suspend@c-dp1.html
* igt@kms_hdr@bpc-switch-dpms:
- shard-skl: [FAIL][40] ([i915#1188]) -> [PASS][41]
[40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl3/igt@kms_hdr@bpc-switch-dpms.html
[41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl1/igt@kms_hdr@bpc-switch-dpms.html
* igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min:
- shard-skl: [FAIL][42] ([fdo#108145] / [i915#265]) -> [PASS][43]
[42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-skl9/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html
[43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-skl5/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html
* igt@kms_psr@psr2_cursor_plane_move:
- shard-iclb: [SKIP][44] ([fdo#109441]) -> [PASS][45] +2 similar issues
[44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-iclb6/igt@kms_psr@psr2_cursor_plane_move.html
[45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-iclb2/igt@kms_psr@psr2_cursor_plane_move.html
* igt@kms_setmode@basic:
- shard-apl: [FAIL][46] ([i915#31]) -> [PASS][47]
[46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-apl1/igt@kms_setmode@basic.html
[47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-apl3/igt@kms_setmode@basic.html
#### Warnings ####
* igt@i915_pm_dc@dc3co-vpb-simulation:
- shard-iclb: [SKIP][48] ([i915#588]) -> [SKIP][49] ([i915#658])
[48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-iclb2/igt@i915_pm_dc@dc3co-vpb-simulation.html
[49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-iclb3/igt@i915_pm_dc@dc3co-vpb-simulation.html
* igt@i915_pm_dc@dc6-psr:
- shard-tglb: [FAIL][50] ([i915#454]) -> [SKIP][51] ([i915#668])
[50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-tglb7/igt@i915_pm_dc@dc6-psr.html
[51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb6/igt@i915_pm_dc@dc6-psr.html
* igt@i915_selftest@live@gt_pm:
- shard-tglb: [DMESG-FAIL][52] ([i915#1725]) -> [DMESG-FAIL][53] ([i915#1744])
[52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8327/shard-tglb6/igt@i915_selftest@live@gt_pm.html
[53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/shard-tglb5/igt@i915_selftest@live@gt_pm.html
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
[fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
[i915#1119]: https://gitlab.freedesktop.org/drm/intel/issues/1119
[i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
[i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
[i915#1725]: https://gitlab.freedesktop.org/drm/intel/issues/1725
[i915#173]: https://gitlab.freedesktop.org/drm/intel/issues/173
[i915#1744]: https://gitlab.freedesktop.org/drm/intel/issues/1744
[i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
[i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
[i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
[i915#34]: https://gitlab.freedesktop.org/drm/intel/issues/34
[i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
[i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
[i915#588]: https://gitlab.freedesktop.org/drm/intel/issues/588
[i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
[i915#668]: https://gitlab.freedesktop.org/drm/intel/issues/668
[i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69
[i915#750]: https://gitlab.freedesktop.org/drm/intel/issues/750
[i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
[i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95
Participating hosts (10 -> 10)
------------------------------
No changes in participating hosts
Build changes
-------------
* CI: CI-20190529 -> None
* Linux: CI_DRM_8327 -> Patchwork_17376
CI-20190529: 20190529
CI_DRM_8327: 17e0a63ab93b19ea2bfccd9a0425c93e52a65246 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_5602: a8fcccd15dcc2dd409edd23785a2d6f6e85fb682 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_17376: 589884d12beb98e0c84eb0026628ccb93ed0b946 @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17376/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls
2020-04-20 9:09 ` [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls Chris Wilson
@ 2020-04-20 16:41 ` Mika Kuoppala
0 siblings, 0 replies; 14+ messages in thread
From: Mika Kuoppala @ 2020-04-20 16:41 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Chris Wilson
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Check that the GPU does respond to our RPS frequency requests by setting
> our desired frequency.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 1 +
> drivers/gpu/drm/i915/gt/selftest_rps.c | 196 ++++++++++++++++++++---
> drivers/gpu/drm/i915/gt/selftest_rps.h | 1 +
> 3 files changed, 174 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> index 4b2733967c42..de3eaef40596 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> @@ -53,6 +53,7 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
> {
> static const struct i915_subtest tests[] = {
> SUBTEST(live_rc6_manual),
> + SUBTEST(live_rps_control),
> SUBTEST(live_rps_frequency),
> SUBTEST(live_rps_power),
> SUBTEST(live_rps_interrupt),
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> index 149a3de86cb9..19fa6a561de3 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> @@ -107,6 +107,172 @@ create_spin_counter(struct intel_engine_cs *engine,
> return vma;
> }
>
> +static u8 rps_set_check(struct intel_rps *rps, u8 freq)
> +{
> + u8 history[64], i;
> + unsigned long end;
> + int sleep;
> +
> + mutex_lock(&rps->lock);
> + GEM_BUG_ON(!rps->active);
> + intel_rps_set(rps, freq);
> + GEM_BUG_ON(rps->last_freq != freq);
> + mutex_unlock(&rps->lock);
> +
> + i = 0;
> + memset(history, freq, sizeof(history));
> + sleep = 20;
> +
> + /* The PCU does not change instantly, but drifts towards the goal? */
> + end = jiffies + msecs_to_jiffies(50);
> + do {
> + u8 act;
> +
> + act = read_cagf(rps);
> + if (time_after(jiffies, end))
> + return act;
> +
> + /* Target acquired */
> + if (act == freq)
> + return act;
> +
> + /* Any change witin the last N samples? */
within
> + if (!memchr_inv(history, act, sizeof(history)))
> + return act;
> +
> + history[i] = act;
> + i = (i + 1) % ARRAY_SIZE(history);
> +
> + usleep_range(sleep, 2 * sleep);
> + sleep *= 2;
> + if (sleep > 1000)
> + sleep = 1000;
> + } while (1);
> +}
> +
> +int live_rps_control(void *arg)
> +{
> + struct intel_gt *gt = arg;
> + struct intel_rps *rps = >->rps;
> + void (*saved_work)(struct work_struct *wrk);
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + struct igt_spinner spin;
> + int err = 0;
> +
> + /*
> + * Check that the actual frequency matches our requested frequency,
> + * to verify our control mechanism. We have to be careful that the
> + * PCU may throttle the GPU in which case the actual frequency used
> + * will be lowered than requested.
> + */
> +
> + if (!rps->enabled || rps->max_freq <= rps->min_freq)
> + return 0;
> +
> + if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
> + return 0;
> +
> + if (igt_spinner_init(&spin, gt))
> + return -ENOMEM;
> +
> + intel_gt_pm_wait_for_idle(gt);
> + saved_work = rps->work.func;
> + rps->work.func = dummy_rps_work;
> +
> + intel_gt_pm_get(gt);
> + for_each_engine(engine, gt, id) {
> + struct i915_request *rq;
> + ktime_t min_dt, max_dt;
> + int act, f, limit;
> + int min, max;
> +
> + if (!intel_engine_can_store_dword(engine))
> + continue;
> +
> + rq = igt_spinner_create_request(&spin,
> + engine->kernel_context,
> + MI_NOOP);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + break;
> + }
> +
> + i915_request_add(rq);
> +
> + if (!igt_wait_for_spinner(&spin, rq)) {
> + pr_err("%s: RPS spinner did not start\n",
> + engine->name);
> + igt_spinner_end(&spin);
> + intel_gt_set_wedged(engine->gt);
> + err = -EIO;
> + break;
> + }
> +
> + if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
> + pr_err("%s: could not set minimum frequency [%x], only %x!\n",
> + engine->name, rps->min_freq, read_cagf(rps));
> + igt_spinner_end(&spin);
> + err = -EINVAL;
> + break;
> + }
> +
> + for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
> + act = rps_set_check(rps, f);
> + if (act < f)
> + break;
> + }
After discussion in irc, it seems we might have to settle that we
manage to get atleast one bin upwards and not have more expectations.
If/when issues get resolved, we might want to consider a minimum
limit of bins a hardware needs to reach instead of bin_zero + 1 :P
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> +
> + limit = rps_set_check(rps, f);
> +
> + if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
> + pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
> + engine->name, rps->min_freq, read_cagf(rps));
> + igt_spinner_end(&spin);
> + err = -EINVAL;
> + break;
> + }
> +
> + max_dt = ktime_get();
> + max = rps_set_check(rps, limit);
> + max_dt = ktime_sub(ktime_get(), max_dt);
> +
> + min_dt = ktime_get();
> + min = rps_set_check(rps, rps->min_freq);
> + min_dt = ktime_sub(ktime_get(), min_dt);
> +
> + igt_spinner_end(&spin);
> +
> + pr_info("%s: range:[%x:%uMHz, %x:%uMHz] actual:[ %x:%uMHz, %x:%uMHz], %x:%x response %lluns:%lluns\n",
> + engine->name,
> + rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
> + rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
> + act, intel_gpu_freq(rps, act),
> + limit, intel_gpu_freq(rps, limit),
> + min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
> +
> + if (limit == rps->min_freq) {
> + pr_err("%s: GPU throttled to minimum!\n",
> + engine->name);
> + err = -ENODEV;
> + break;
> + }
> +
> + if (igt_flush_test(gt->i915)) {
> + err = -EIO;
> + break;
> + }
> + }
> + intel_gt_pm_put(gt);
> +
> + igt_spinner_fini(&spin);
> +
> + intel_gt_pm_wait_for_idle(gt);
> + rps->work.func = saved_work;
> +
> + return err;
> +}
> +
> static u64 __measure_frequency(u32 *cntr, int duration_ms)
> {
> u64 dc, dt;
> @@ -125,16 +291,10 @@ static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
> u64 x[5];
> int i;
>
> - mutex_lock(&rps->lock);
> - GEM_BUG_ON(!rps->active);
> - intel_rps_set(rps, *freq);
> - mutex_unlock(&rps->lock);
> -
> - msleep(20); /* more than enough time to stabilise! */
> -
> + *freq = rps_set_check(rps, *freq);
> for (i = 0; i < 5; i++)
> x[i] = __measure_frequency(cntr, 2);
> - *freq = read_cagf(rps);
> + *freq = (*freq + read_cagf(rps)) / 2;
>
> /* A simple triangle filter for better result stability */
> sort(x, 5, sizeof(*x), cmp_u64, NULL);
> @@ -279,10 +439,7 @@ static int __rps_up_interrupt(struct intel_rps *rps,
> if (!intel_engine_can_store_dword(engine))
> return 0;
>
> - mutex_lock(&rps->lock);
> - GEM_BUG_ON(!rps->active);
> - intel_rps_set(rps, rps->min_freq);
> - mutex_unlock(&rps->lock);
> + rps_set_check(rps, rps->min_freq);
>
> rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
> if (IS_ERR(rq))
> @@ -354,10 +511,7 @@ static int __rps_down_interrupt(struct intel_rps *rps,
> struct intel_uncore *uncore = engine->uncore;
> u32 timeout;
>
> - mutex_lock(&rps->lock);
> - GEM_BUG_ON(!rps->active);
> - intel_rps_set(rps, rps->max_freq);
> - mutex_unlock(&rps->lock);
> + rps_set_check(rps, rps->max_freq);
>
> if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
> pr_err("%s: RPS did not register DOWN interrupt\n",
> @@ -490,16 +644,10 @@ static u64 measure_power_at(struct intel_rps *rps, int *freq)
> u64 x[5];
> int i;
>
> - mutex_lock(&rps->lock);
> - GEM_BUG_ON(!rps->active);
> - intel_rps_set(rps, *freq);
> - mutex_unlock(&rps->lock);
> -
> - msleep(20); /* more than enough time to stabilise! */
> -
> + *freq = rps_set_check(rps, *freq);
> for (i = 0; i < 5; i++)
> x[i] = __measure_power(5);
> - *freq = read_cagf(rps);
> + *freq = (*freq + read_cagf(rps)) / 2;
>
> /* A simple triangle filter for better result stability */
> sort(x, 5, sizeof(*x), cmp_u64, NULL);
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
> index 07c2bddf8899..be0bf8e3f639 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.h
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
> @@ -6,6 +6,7 @@
> #ifndef SELFTEST_RPS_H
> #define SELFTEST_RPS_H
>
> +int live_rps_control(void *arg);
> int live_rps_frequency(void *arg);
> int live_rps_interrupt(void *arg);
> int live_rps_power(void *arg);
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement
2020-04-20 9:09 ` [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement Chris Wilson
@ 2020-04-20 16:51 ` Mika Kuoppala
0 siblings, 0 replies; 14+ messages in thread
From: Mika Kuoppala @ 2020-04-20 16:51 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Chris Wilson
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Split the frequency measurement into two modes, so that we can judge the
> impact of the llc setup on top of the pure CS frequency scaling.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 3 +-
> drivers/gpu/drm/i915/gt/selftest_rps.c | 157 ++++++++++++++++++++++-
> drivers/gpu/drm/i915/gt/selftest_rps.h | 3 +-
> 3 files changed, 154 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> index de3eaef40596..9855e6f0ce7c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> @@ -54,7 +54,8 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
> static const struct i915_subtest tests[] = {
> SUBTEST(live_rc6_manual),
> SUBTEST(live_rps_control),
> - SUBTEST(live_rps_frequency),
> + SUBTEST(live_rps_frequency_cs),
> + SUBTEST(live_rps_frequency_srm),
> SUBTEST(live_rps_power),
> SUBTEST(live_rps_interrupt),
> SUBTEST(live_gt_resume),
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> index 19fa6a561de3..dbca673519a2 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> @@ -33,6 +33,7 @@ static int cmp_u64(const void *A, const void *B)
> static struct i915_vma *
> create_spin_counter(struct intel_engine_cs *engine,
> struct i915_address_space *vm,
> + bool srm,
> u32 **cancel,
> u32 **counter)
> {
> @@ -91,10 +92,12 @@ create_spin_counter(struct intel_engine_cs *engine,
> *cs++ = MI_MATH_ADD;
> *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
>
> - *cs++ = MI_STORE_REGISTER_MEM_GEN8;
> - *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
> - *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
> - *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
> + if (srm) {
> + *cs++ = MI_STORE_REGISTER_MEM_GEN8;
> + *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
> + *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
> + *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
> + }
>
> *cs++ = MI_BATCH_BUFFER_START_GEN8;
> *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
> @@ -103,7 +106,7 @@ create_spin_counter(struct intel_engine_cs *engine,
> i915_gem_object_flush_map(obj);
>
> *cancel = base + loop;
> - *counter = memset32(base + 1000, 0, 1);
> + *counter = srm ? memset32(base + 1000, 0, 1) : NULL;
Ok hmm you want a blowout on wrong usage. Fair enough.
> return vma;
> }
>
> @@ -301,12 +304,152 @@ static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
> return div_u64(x[1] + 2 * x[2] + x[3], 4);
> }
>
> +static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
> + int duration_ms)
> +{
> + u64 dc, dt;
> +
> + dt = ktime_get();
> + dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
> + usleep_range(1000 * duration_ms, 2000 * duration_ms);
> + dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
Ok, again you measure active engine and getwaway with this.
On the basis that you want minimal impact on the observed
engine,
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> + dt = ktime_get() - dt;
> +
> + return div64_u64(1000 * 1000 * dc, dt);
> +}
> +
> +static u64 measure_cs_frequency_at(struct intel_rps *rps,
> + struct intel_engine_cs *engine,
> + int *freq)
> +{
> + u64 x[5];
> + int i;
> +
> + *freq = rps_set_check(rps, *freq);
> + for (i = 0; i < 5; i++)
> + x[i] = __measure_cs_frequency(engine, 2);
> + *freq = (*freq + read_cagf(rps)) / 2;
> +
> + /* A simple triangle filter for better result stability */
> + sort(x, 5, sizeof(*x), cmp_u64, NULL);
> + return div_u64(x[1] + 2 * x[2] + x[3], 4);
> +}
> +
> static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
> {
> return f_d * x > f_n * y && f_n * x < f_d * y;
> }
>
> -int live_rps_frequency(void *arg)
> +int live_rps_frequency_cs(void *arg)
> +{
> + void (*saved_work)(struct work_struct *wrk);
> + struct intel_gt *gt = arg;
> + struct intel_rps *rps = >->rps;
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + int err = 0;
> +
> + /*
> + * The premise is that the GPU does change freqency at our behest.
> + * Let's check there is a correspondence between the requested
> + * frequency, the actual frequency, and the observed clock rate.
> + */
> +
> + if (!rps->enabled || rps->max_freq <= rps->min_freq)
> + return 0;
> +
> + if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
> + return 0;
> +
> + intel_gt_pm_wait_for_idle(gt);
> + saved_work = rps->work.func;
> + rps->work.func = dummy_rps_work;
> +
> + for_each_engine(engine, gt, id) {
> + struct i915_request *rq;
> + struct i915_vma *vma;
> + u32 *cancel, *cntr;
> + struct {
> + u64 count;
> + int freq;
> + } min, max;
> +
> + vma = create_spin_counter(engine,
> + engine->kernel_context->vm, false,
> + &cancel, &cntr);
> + if (IS_ERR(vma)) {
> + err = PTR_ERR(vma);
> + break;
> + }
> +
> + rq = intel_engine_create_kernel_request(engine);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + goto err_vma;
> + }
> +
> + i915_vma_lock(vma);
> + err = i915_request_await_object(rq, vma->obj, false);
> + if (!err)
> + err = i915_vma_move_to_active(vma, rq, 0);
> + if (!err)
> + err = rq->engine->emit_bb_start(rq,
> + vma->node.start,
> + PAGE_SIZE, 0);
> + i915_vma_unlock(vma);
> + i915_request_add(rq);
> + if (err)
> + goto err_vma;
> +
> + if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
> + 10)) {
> + pr_err("%s: timed loop did not start\n",
> + engine->name);
> + goto err_vma;
> + }
> +
> + min.freq = rps->min_freq;
> + min.count = measure_cs_frequency_at(rps, engine, &min.freq);
> +
> + max.freq = rps->max_freq;
> + max.count = measure_cs_frequency_at(rps, engine, &max.freq);
> +
> + pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
> + engine->name,
> + min.count, intel_gpu_freq(rps, min.freq),
> + max.count, intel_gpu_freq(rps, max.freq),
> + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
> + max.freq * min.count));
> +
> + if (!scaled_within(max.freq * min.count,
> + min.freq * max.count,
> + 2, 3)) {
> + pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
> + engine->name,
> + max.freq * min.count,
> + min.freq * max.count);
> + err = -EINVAL;
> + }
> +
> +err_vma:
> + *cancel = MI_BATCH_BUFFER_END;
> + i915_gem_object_unpin_map(vma->obj);
> + i915_vma_unpin(vma);
> + i915_vma_put(vma);
> +
> + if (igt_flush_test(gt->i915))
> + err = -EIO;
> + if (err)
> + break;
> + }
> +
> + intel_gt_pm_wait_for_idle(gt);
> + rps->work.func = saved_work;
> +
> + return err;
> +}
> +
> +int live_rps_frequency_srm(void *arg)
> {
> void (*saved_work)(struct work_struct *wrk);
> struct intel_gt *gt = arg;
> @@ -341,7 +484,7 @@ int live_rps_frequency(void *arg)
> } min, max;
>
> vma = create_spin_counter(engine,
> - engine->kernel_context->vm,
> + engine->kernel_context->vm, true,
> &cancel, &cntr);
> if (IS_ERR(vma)) {
> err = PTR_ERR(vma);
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
> index be0bf8e3f639..22e46c5341c5 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.h
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
> @@ -7,7 +7,8 @@
> #define SELFTEST_RPS_H
>
> int live_rps_control(void *arg);
> -int live_rps_frequency(void *arg);
> +int live_rps_frequency_cs(void *arg);
> +int live_rps_frequency_srm(void *arg);
> int live_rps_interrupt(void *arg);
> int live_rps_power(void *arg);
>
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2020-04-20 16:53 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-20 9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
2020-04-20 9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
2020-04-20 11:05 ` Mika Kuoppala
2020-04-20 9:09 ` [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls Chris Wilson
2020-04-20 16:41 ` Mika Kuoppala
2020-04-20 9:09 ` [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement Chris Wilson
2020-04-20 16:51 ` Mika Kuoppala
2020-04-20 10:00 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS Patchwork
2020-04-20 10:54 ` [Intel-gfx] [PATCH 1/4] " Mika Kuoppala
2020-04-20 11:02 ` Chris Wilson
2020-04-20 12:54 ` Mika Kuoppala
2020-04-20 13:08 ` Chris Wilson
2020-04-20 11:12 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork
2020-04-20 15:54 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.