All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
@ 2019-09-11 16:38 Chris Wilson
  2019-09-12  5:09 ` ✓ Fi.CI.BAT: success for drm/i915/pmu: Use GT parked for estimating RC6 while asleep (rev3) Patchwork
                   ` (7 more replies)
  0 siblings, 8 replies; 16+ messages in thread
From: Chris Wilson @ 2019-09-11 16:38 UTC (permalink / raw)
  To: intel-gfx

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 ++++
 drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c      |  22 ++---
 drivers/gpu/drm/i915/i915_pmu.c          | 120 +++++++++++------------
 drivers/gpu/drm/i915/i915_pmu.h          |   4 +-
 5 files changed, 90 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3bd764104d41..45a72cb698db 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -141,6 +141,21 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
 	return switch_to_kernel_context_sync(&i915->gt);
 }
 
+static void user_forcewake(struct intel_gt *gt, bool suspend)
+{
+	int count = atomic_read(&gt->user_wakeref);
+
+	if (likely(!count))
+		return;
+
+	intel_gt_pm_get(gt);
+	if (suspend)
+		atomic_sub(count, &gt->wakeref.count);
+	else
+		atomic_add(count, &gt->wakeref.count);
+	intel_gt_pm_put(gt);
+}
+
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	GEM_TRACE("\n");
@@ -148,6 +163,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
 	flush_workqueue(i915->wq);
 
+	user_forcewake(&i915->gt, true);
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	/*
@@ -259,6 +276,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	if (!i915_gem_load_power_context(i915))
 		goto err_wedged;
 
+	user_forcewake(&i915->gt, false);
+
 out_unlock:
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	mutex_unlock(&i915->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..3039cef64b11 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -50,6 +50,7 @@ struct intel_gt {
 	} timelines;
 
 	struct intel_wakeref wakeref;
+	atomic_t user_wakeref;
 
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 708855e051b5..f00c0dc4f57e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3988,13 +3988,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	file->private_data =
-		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
-	intel_uncore_forcewake_user_get(&i915->uncore);
+	atomic_inc(&gt->user_wakeref);
+	intel_gt_pm_get(gt);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_get(gt->uncore);
 
 	return 0;
 }
@@ -4002,13 +4001,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	intel_uncore_forcewake_user_put(&i915->uncore);
-	intel_runtime_pm_put(&i915->runtime_pm,
-			     (intel_wakeref_t)(uintptr_t)file->private_data);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_put(&i915->uncore);
+	intel_gt_pm_put(gt);
+	atomic_dec(&gt->user_wakeref);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8e251e719390..a3d3d3e39c15 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -116,19 +116,51 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 	return enable;
 }
 
+static u64 __get_rc6(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	u64 val;
+
+	val = intel_rc6_residency_ns(i915,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
+
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+	return val;
+}
+
 void i915_pmu_gt_parked(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;
+	u64 val;
 
 	if (!pmu->base.event_init)
 		return;
 
 	spin_lock_irq(&pmu->lock);
+
+	val = 0;
+	if (pmu->sample[__I915_SAMPLE_RC6].cur)
+		val = __get_rc6(&i915->gt);
+
+	if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+		pmu->sample[__I915_SAMPLE_RC6].cur = val;
+	}
+	pmu->sleep_last = ktime_get();
+
 	/*
 	 * Signal sampling timer to stop if only engine events are enabled and
 	 * GPU went idle.
 	 */
 	pmu->timer_enabled = pmu_needs_timer(pmu, false);
+
 	spin_unlock_irq(&pmu->lock);
 }
 
@@ -143,6 +175,11 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 	}
 }
 
+static inline s64 ktime_since(const ktime_t kt)
+{
+	return ktime_to_ns(ktime_sub(ktime_get(), kt));
+}
+
 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;
@@ -151,10 +188,22 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 		return;
 
 	spin_lock_irq(&pmu->lock);
+
 	/*
 	 * Re-enable sampling timer when GPU goes active.
 	 */
 	__i915_pmu_maybe_start_timer(pmu);
+
+	/* Estimate how long we slept and accumulate that into rc6 counters */
+	if (pmu->sample[__I915_SAMPLE_RC6].cur) {
+		u64 val;
+
+		val = ktime_since(pmu->sleep_last);
+		val += pmu->sample[__I915_SAMPLE_RC6].cur;
+
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+	}
+
 	spin_unlock_irq(&pmu->lock);
 }
 
@@ -426,39 +475,18 @@ static int i915_pmu_event_init(struct perf_event *event)
 	return 0;
 }
 
-static u64 __get_rc6(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	u64 val;
-
-	val = intel_rc6_residency_ns(i915,
-				     IS_VALLEYVIEW(i915) ?
-				     VLV_GT_RENDER_RC6 :
-				     GEN6_GT_GFX_RC6);
-
-	if (HAS_RC6p(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-	if (HAS_RC6pp(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-	return val;
-}
-
 static u64 get_rc6(struct intel_gt *gt)
 {
-#if IS_ENABLED(CONFIG_PM)
 	struct drm_i915_private *i915 = gt->i915;
-	struct intel_runtime_pm *rpm = &i915->runtime_pm;
 	struct i915_pmu *pmu = &i915->pmu;
-	intel_wakeref_t wakeref;
 	unsigned long flags;
 	u64 val;
 
-	wakeref = intel_runtime_pm_get_if_in_use(rpm);
-	if (wakeref) {
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	if (intel_gt_pm_get_if_awake(gt)) {
 		val = __get_rc6(gt);
-		intel_runtime_pm_put(rpm, wakeref);
+		intel_gt_pm_put(gt);
 
 		/*
 		 * If we are coming back from being runtime suspended we must
@@ -466,19 +494,13 @@ static u64 get_rc6(struct intel_gt *gt)
 		 * previously.
 		 */
 
-		spin_lock_irqsave(&pmu->lock, flags);
-
 		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
 			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
 			pmu->sample[__I915_SAMPLE_RC6].cur = val;
 		} else {
 			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
 		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
 	} else {
-		struct device *kdev = rpm->kdev;
-
 		/*
 		 * We are runtime suspended.
 		 *
@@ -486,42 +508,16 @@ static u64 get_rc6(struct intel_gt *gt)
 		 * on top of the last known real value, as the approximated RC6
 		 * counter value.
 		 */
-		spin_lock_irqsave(&pmu->lock, flags);
 
-		/*
-		 * After the above branch intel_runtime_pm_get_if_in_use failed
-		 * to get the runtime PM reference we cannot assume we are in
-		 * runtime suspend since we can either: a) race with coming out
-		 * of it before we took the power.lock, or b) there are other
-		 * states than suspended which can bring us here.
-		 *
-		 * We need to double-check that we are indeed currently runtime
-		 * suspended and if not we cannot do better than report the last
-		 * known RC6 value.
-		 */
-		if (pm_runtime_status_suspended(kdev)) {
-			val = pm_runtime_suspended_time(kdev);
+		val = ktime_since(pmu->sleep_last);
+		val += pmu->sample[__I915_SAMPLE_RC6].cur;
 
-			if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-				pmu->suspended_time_last = val;
-
-			val -= pmu->suspended_time_last;
-			val += pmu->sample[__I915_SAMPLE_RC6].cur;
-
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-		} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
 	}
 
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
 	return val;
-#else
-	return __get_rc6(gt);
-#endif
 }
 
 static u64 __i915_pmu_event_read(struct perf_event *event)
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 4fc4f2478301..067dbbf3bdff 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -97,9 +97,9 @@ struct i915_pmu {
 	 */
 	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
 	/**
-	 * @suspended_time_last: Cached suspend time from PM core.
+	 * @sleep_last: Last time GT parked for RC6 estimation.
 	 */
-	u64 suspended_time_last;
+	ktime_t sleep_last;
 	/**
 	 * @i915_attr: Memory block holding device attributes.
 	 */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/pmu: Use GT parked for estimating RC6 while asleep (rev3)
  2019-09-11 16:38 [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
@ 2019-09-12  5:09 ` Patchwork
  2019-09-12  8:24 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2019-09-12  5:09 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Use GT parked for estimating RC6 while asleep (rev3)
URL   : https://patchwork.freedesktop.org/series/56583/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6874 -> Patchwork_14364
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/

Known issues
------------

  Here are the changes found in Patchwork_14364 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s4-devices:
    - fi-blb-e6850:       [PASS][1] -> [INCOMPLETE][2] ([fdo#107718])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/fi-blb-e6850/igt@gem_exec_suspend@basic-s4-devices.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/fi-blb-e6850/igt@gem_exec_suspend@basic-s4-devices.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-icl-u2:          [PASS][3] -> [FAIL][4] ([fdo#109635 ] / [fdo#110387])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/fi-icl-u2/igt@kms_chamelium@dp-crc-fast.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/fi-icl-u2/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_chamelium@hdmi-crc-fast:
    - fi-kbl-7500u:       [PASS][5] -> [FAIL][6] ([fdo#109635 ])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/fi-kbl-7500u/igt@kms_chamelium@hdmi-crc-fast.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/fi-kbl-7500u/igt@kms_chamelium@hdmi-crc-fast.html

  * igt@vgem_basic@unload:
    - fi-icl-u3:          [PASS][7] -> [DMESG-WARN][8] ([fdo#107724]) +1 similar issue
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/fi-icl-u3/igt@vgem_basic@unload.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/fi-icl-u3/igt@vgem_basic@unload.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s4-devices:
    - fi-kbl-7500u:       [DMESG-WARN][9] ([fdo#105128] / [fdo#107139]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/fi-kbl-7500u/igt@gem_exec_suspend@basic-s4-devices.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/fi-kbl-7500u/igt@gem_exec_suspend@basic-s4-devices.html

  * igt@gem_mmap_gtt@basic-short:
    - fi-icl-u3:          [DMESG-WARN][11] ([fdo#107724]) -> [PASS][12] +1 similar issue
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/fi-icl-u3/igt@gem_mmap_gtt@basic-short.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/fi-icl-u3/igt@gem_mmap_gtt@basic-short.html

  * igt@i915_selftest@live_execlists:
    - fi-skl-gvtdvm:      [DMESG-FAIL][13] ([fdo#111108]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/fi-skl-gvtdvm/igt@i915_selftest@live_execlists.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/fi-skl-gvtdvm/igt@i915_selftest@live_execlists.html

  * igt@i915_selftest@live_gem_contexts:
    - fi-cfl-guc:         [INCOMPLETE][15] ([fdo#111514]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/fi-cfl-guc/igt@i915_selftest@live_gem_contexts.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/fi-cfl-guc/igt@i915_selftest@live_gem_contexts.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#105128]: https://bugs.freedesktop.org/show_bug.cgi?id=105128
  [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107
  [fdo#106350]: https://bugs.freedesktop.org/show_bug.cgi?id=106350
  [fdo#107139]: https://bugs.freedesktop.org/show_bug.cgi?id=107139
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109635 ]: https://bugs.freedesktop.org/show_bug.cgi?id=109635 
  [fdo#110387]: https://bugs.freedesktop.org/show_bug.cgi?id=110387
  [fdo#111108]: https://bugs.freedesktop.org/show_bug.cgi?id=111108
  [fdo#111514]: https://bugs.freedesktop.org/show_bug.cgi?id=111514


Participating hosts (54 -> 46)
------------------------------

  Missing    (8): fi-ilk-m540 fi-hsw-4200u fi-skl-6770hq fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6874 -> Patchwork_14364

  CI-20190529: 20190529
  CI_DRM_6874: f9ee689ff55489da8db3cd5ddad533967c07561f @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5178: efb4539494d94f03374874d3b61bd04ef3802aaa @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14364: 004d0ffa224bd6a4c23026177ca012a1a9b89545 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

004d0ffa224b drm/i915/pmu: Use GT parked for estimating RC6 while asleep

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915/pmu: Use GT parked for estimating RC6 while asleep (rev3)
  2019-09-11 16:38 [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
  2019-09-12  5:09 ` ✓ Fi.CI.BAT: success for drm/i915/pmu: Use GT parked for estimating RC6 while asleep (rev3) Patchwork
@ 2019-09-12  8:24 ` Patchwork
  2019-09-12  9:20 ` [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Tvrtko Ursulin
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2019-09-12  8:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Use GT parked for estimating RC6 while asleep (rev3)
URL   : https://patchwork.freedesktop.org/series/56583/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6874_full -> Patchwork_14364_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_14364_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_isolation@rcs0-s3:
    - shard-iclb:         [PASS][1] -> [INCOMPLETE][2] ([fdo#107713] / [fdo#109100])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-iclb5/igt@gem_ctx_isolation@rcs0-s3.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-iclb7/igt@gem_ctx_isolation@rcs0-s3.html

  * igt@gem_exec_schedule@wide-bsd:
    - shard-iclb:         [PASS][3] -> [SKIP][4] ([fdo#111325]) +6 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-iclb5/igt@gem_exec_schedule@wide-bsd.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-iclb1/igt@gem_exec_schedule@wide-bsd.html

  * igt@i915_hangman@error-state-capture-vecs0:
    - shard-apl:          [PASS][5] -> [INCOMPLETE][6] ([fdo#103927]) +2 similar issues
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-apl7/igt@i915_hangman@error-state-capture-vecs0.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-apl7/igt@i915_hangman@error-state-capture-vecs0.html

  * igt@i915_pm_rpm@system-suspend-modeset:
    - shard-skl:          [PASS][7] -> [INCOMPLETE][8] ([fdo#104108] / [fdo#107807])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-skl1/igt@i915_pm_rpm@system-suspend-modeset.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-skl8/igt@i915_pm_rpm@system-suspend-modeset.html

  * igt@i915_suspend@fence-restore-tiled2untiled:
    - shard-apl:          [PASS][9] -> [DMESG-WARN][10] ([fdo#108566]) +3 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-apl5/igt@i915_suspend@fence-restore-tiled2untiled.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-apl2/igt@i915_suspend@fence-restore-tiled2untiled.html

  * igt@kms_flip@flip-vs-panning-interruptible:
    - shard-skl:          [PASS][11] -> [DMESG-WARN][12] ([fdo#106107])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-skl6/igt@kms_flip@flip-vs-panning-interruptible.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-skl7/igt@kms_flip@flip-vs-panning-interruptible.html

  * igt@kms_flip@flip-vs-suspend:
    - shard-iclb:         [PASS][13] -> [INCOMPLETE][14] ([fdo#107713] / [fdo#109507])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-iclb4/igt@kms_flip@flip-vs-suspend.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-iclb3/igt@kms_flip@flip-vs-suspend.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-hsw:          [PASS][15] -> [INCOMPLETE][16] ([fdo#103540])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-hsw2/igt@kms_flip@flip-vs-suspend-interruptible.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-hsw5/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_flip@modeset-vs-vblank-race:
    - shard-hsw:          [PASS][17] -> [DMESG-WARN][18] ([fdo#102614])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-hsw5/igt@kms_flip@modeset-vs-vblank-race.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-hsw5/igt@kms_flip@modeset-vs-vblank-race.html

  * igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw:
    - shard-iclb:         [PASS][19] -> [FAIL][20] ([fdo#103167]) +3 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-iclb1/igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-iclb3/igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [PASS][21] -> [FAIL][22] ([fdo#108145] / [fdo#110403])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-skl9/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-skl4/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_setmode@basic:
    - shard-apl:          [PASS][23] -> [FAIL][24] ([fdo#99912])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-apl2/igt@kms_setmode@basic.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-apl2/igt@kms_setmode@basic.html

  * igt@prime_busy@after-bsd2:
    - shard-iclb:         [PASS][25] -> [SKIP][26] ([fdo#109276]) +13 similar issues
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-iclb2/igt@prime_busy@after-bsd2.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-iclb5/igt@prime_busy@after-bsd2.html

  
#### Possible fixes ####

  * igt@gem_ctx_shared@exec-single-timeline-bsd:
    - shard-iclb:         [SKIP][27] ([fdo#110841]) -> [PASS][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-iclb2/igt@gem_ctx_shared@exec-single-timeline-bsd.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-iclb5/igt@gem_ctx_shared@exec-single-timeline-bsd.html

  * igt@gem_exec_async@concurrent-writes-bsd:
    - shard-iclb:         [SKIP][29] ([fdo#111325]) -> [PASS][30] +4 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-iclb2/igt@gem_exec_async@concurrent-writes-bsd.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-iclb5/igt@gem_exec_async@concurrent-writes-bsd.html

  * igt@kms_cursor_crc@pipe-b-cursor-suspend:
    - shard-apl:          [DMESG-WARN][31] ([fdo#108566]) -> [PASS][32] +2 similar issues
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-apl7/igt@kms_cursor_crc@pipe-b-cursor-suspend.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-apl4/igt@kms_cursor_crc@pipe-b-cursor-suspend.html

  * igt@kms_cursor_legacy@2x-long-flip-vs-cursor-legacy:
    - shard-glk:          [FAIL][33] ([fdo#104873]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-glk1/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-legacy.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-glk4/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-legacy.html

  * igt@kms_cursor_legacy@cursor-vs-flip-atomic:
    - shard-hsw:          [FAIL][35] ([fdo#103355]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-hsw6/igt@kms_cursor_legacy@cursor-vs-flip-atomic.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-hsw4/igt@kms_cursor_legacy@cursor-vs-flip-atomic.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-mmap-cpu:
    - shard-iclb:         [FAIL][37] ([fdo#103167]) -> [PASS][38] +4 similar issues
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-iclb8/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-mmap-cpu.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-iclb6/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-mmap-cpu.html

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min:
    - shard-skl:          [FAIL][39] ([fdo#108145]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-skl7/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-skl10/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          [FAIL][41] ([fdo#108145] / [fdo#110403]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-skl5/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-skl4/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [SKIP][43] ([fdo#109441]) -> [PASS][44] +2 similar issues
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-iclb5/igt@kms_psr@psr2_sprite_plane_move.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@perf@blocking:
    - shard-skl:          [FAIL][45] ([fdo#110728]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-skl4/igt@perf@blocking.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-skl3/igt@perf@blocking.html

  * igt@prime_busy@hang-bsd2:
    - shard-iclb:         [SKIP][47] ([fdo#109276]) -> [PASS][48] +20 similar issues
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-iclb3/igt@prime_busy@hang-bsd2.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-iclb2/igt@prime_busy@hang-bsd2.html

  * igt@prime_busy@wait-hang-blt:
    - shard-hsw:          [INCOMPLETE][49] ([fdo#103540]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6874/shard-hsw4/igt@prime_busy@wait-hang-blt.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/shard-hsw4/igt@prime_busy@wait-hang-blt.html

  
  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103355]: https://bugs.freedesktop.org/show_bug.cgi?id=103355
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#104873]: https://bugs.freedesktop.org/show_bug.cgi?id=104873
  [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107807]: https://bugs.freedesktop.org/show_bug.cgi?id=107807
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#109100]: https://bugs.freedesktop.org/show_bug.cgi?id=109100
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109507]: https://bugs.freedesktop.org/show_bug.cgi?id=109507
  [fdo#110403]: https://bugs.freedesktop.org/show_bug.cgi?id=110403
  [fdo#110728]: https://bugs.freedesktop.org/show_bug.cgi?id=110728
  [fdo#110841]: https://bugs.freedesktop.org/show_bug.cgi?id=110841
  [fdo#111325]: https://bugs.freedesktop.org/show_bug.cgi?id=111325
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6874 -> Patchwork_14364

  CI-20190529: 20190529
  CI_DRM_6874: f9ee689ff55489da8db3cd5ddad533967c07561f @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5178: efb4539494d94f03374874d3b61bd04ef3802aaa @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14364: 004d0ffa224bd6a4c23026177ca012a1a9b89545 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14364/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-11 16:38 [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
  2019-09-12  5:09 ` ✓ Fi.CI.BAT: success for drm/i915/pmu: Use GT parked for estimating RC6 while asleep (rev3) Patchwork
  2019-09-12  8:24 ` ✓ Fi.CI.IGT: " Patchwork
@ 2019-09-12  9:20 ` Tvrtko Ursulin
  2019-09-12  9:39   ` Chris Wilson
  2019-09-12 10:13 ` [PATCH v4] " Chris Wilson
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 16+ messages in thread
From: Tvrtko Ursulin @ 2019-09-12  9:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/09/2019 17:38, Chris Wilson wrote:
> As we track when we put the GT device to sleep upon idling, we can use
> that callback to sample the current rc6 counters and record the
> timestamp for estimating samples after that point while asleep.
> 
> v2: Stick to using ktime_t
> v3: Track user_wakerefs that interfere with the new
> intel_gt_pm_wait_for_idle
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 ++++
>   drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
>   drivers/gpu/drm/i915/i915_debugfs.c      |  22 ++---
>   drivers/gpu/drm/i915/i915_pmu.c          | 120 +++++++++++------------
>   drivers/gpu/drm/i915/i915_pmu.h          |   4 +-
>   5 files changed, 90 insertions(+), 76 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 3bd764104d41..45a72cb698db 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -141,6 +141,21 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
>   	return switch_to_kernel_context_sync(&i915->gt);
>   }
>   
> +static void user_forcewake(struct intel_gt *gt, bool suspend)
> +{
> +	int count = atomic_read(&gt->user_wakeref);
> +
> +	if (likely(!count))
> +		return;
> +
> +	intel_gt_pm_get(gt);
> +	if (suspend)
> +		atomic_sub(count, &gt->wakeref.count);
> +	else
> +		atomic_add(count, &gt->wakeref.count);
> +	intel_gt_pm_put(gt);
> +}
> +
>   void i915_gem_suspend(struct drm_i915_private *i915)
>   {
>   	GEM_TRACE("\n");
> @@ -148,6 +163,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
>   	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
>   	flush_workqueue(i915->wq);
>   
> +	user_forcewake(&i915->gt, true);

This complication is needed only because you changed user forcewake 
handling to use intel_gt_pm_get/put instead of intel_runtime_pm_get? 
Which in turn is because of the CONFIG_PM ifdef removal below? Wouldn't 
it be simpler to keep both as were? Maybe I am missing something...

> +
>   	mutex_lock(&i915->drm.struct_mutex);
>   
>   	/*
> @@ -259,6 +276,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
>   	if (!i915_gem_load_power_context(i915))
>   		goto err_wedged;
>   
> +	user_forcewake(&i915->gt, false);
> +
>   out_unlock:
>   	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
>   	mutex_unlock(&i915->drm.struct_mutex);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index dc295c196d11..3039cef64b11 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -50,6 +50,7 @@ struct intel_gt {
>   	} timelines;
>   
>   	struct intel_wakeref wakeref;
> +	atomic_t user_wakeref;
>   
>   	struct list_head closed_vma;
>   	spinlock_t closed_lock; /* guards the list of closed_vma */
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 708855e051b5..f00c0dc4f57e 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3988,13 +3988,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
>   static int i915_forcewake_open(struct inode *inode, struct file *file)
>   {
>   	struct drm_i915_private *i915 = inode->i_private;
> +	struct intel_gt *gt = &i915->gt;
>   
> -	if (INTEL_GEN(i915) < 6)
> -		return 0;
> -
> -	file->private_data =
> -		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
> -	intel_uncore_forcewake_user_get(&i915->uncore);
> +	atomic_inc(&gt->user_wakeref);
> +	intel_gt_pm_get(gt);
> +	if (INTEL_GEN(i915) >= 6)
> +		intel_uncore_forcewake_user_get(gt->uncore);
>   
>   	return 0;
>   }
> @@ -4002,13 +4001,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
>   static int i915_forcewake_release(struct inode *inode, struct file *file)
>   {
>   	struct drm_i915_private *i915 = inode->i_private;
> +	struct intel_gt *gt = &i915->gt;
>   
> -	if (INTEL_GEN(i915) < 6)
> -		return 0;
> -
> -	intel_uncore_forcewake_user_put(&i915->uncore);
> -	intel_runtime_pm_put(&i915->runtime_pm,
> -			     (intel_wakeref_t)(uintptr_t)file->private_data);
> +	if (INTEL_GEN(i915) >= 6)
> +		intel_uncore_forcewake_user_put(&i915->uncore);
> +	intel_gt_pm_put(gt);
> +	atomic_dec(&gt->user_wakeref);
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 8e251e719390..a3d3d3e39c15 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -116,19 +116,51 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
>   	return enable;
>   }
>   
> +static u64 __get_rc6(struct intel_gt *gt)
> +{
> +	struct drm_i915_private *i915 = gt->i915;
> +	u64 val;
> +
> +	val = intel_rc6_residency_ns(i915,
> +				     IS_VALLEYVIEW(i915) ?
> +				     VLV_GT_RENDER_RC6 :
> +				     GEN6_GT_GFX_RC6);
> +
> +	if (HAS_RC6p(i915))
> +		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
> +
> +	if (HAS_RC6pp(i915))
> +		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
> +
> +	return val;
> +}
> +
>   void i915_pmu_gt_parked(struct drm_i915_private *i915)
>   {
>   	struct i915_pmu *pmu = &i915->pmu;
> +	u64 val;
>   
>   	if (!pmu->base.event_init)
>   		return;
>   
>   	spin_lock_irq(&pmu->lock);
> +
> +	val = 0;
> +	if (pmu->sample[__I915_SAMPLE_RC6].cur)
> +		val = __get_rc6(&i915->gt);
> +
> +	if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> +		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> +		pmu->sample[__I915_SAMPLE_RC6].cur = val;
> +	}
> +	pmu->sleep_last = ktime_get();
> +
>   	/*
>   	 * Signal sampling timer to stop if only engine events are enabled and
>   	 * GPU went idle.
>   	 */
>   	pmu->timer_enabled = pmu_needs_timer(pmu, false);
> +
>   	spin_unlock_irq(&pmu->lock);
>   }
>   
> @@ -143,6 +175,11 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
>   	}
>   }
>   
> +static inline s64 ktime_since(const ktime_t kt)
> +{
> +	return ktime_to_ns(ktime_sub(ktime_get(), kt));
> +}
> +
>   void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>   {
>   	struct i915_pmu *pmu = &i915->pmu;
> @@ -151,10 +188,22 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>   		return;
>   
>   	spin_lock_irq(&pmu->lock);
> +
>   	/*
>   	 * Re-enable sampling timer when GPU goes active.
>   	 */
>   	__i915_pmu_maybe_start_timer(pmu);
> +
> +	/* Estimate how long we slept and accumulate that into rc6 counters */
> +	if (pmu->sample[__I915_SAMPLE_RC6].cur) {
> +		u64 val;
> +
> +		val = ktime_since(pmu->sleep_last);
> +		val += pmu->sample[__I915_SAMPLE_RC6].cur;
> +
> +		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
> +	}
> +
>   	spin_unlock_irq(&pmu->lock);
>   }
>   
> @@ -426,39 +475,18 @@ static int i915_pmu_event_init(struct perf_event *event)
>   	return 0;
>   }
>   
> -static u64 __get_rc6(struct intel_gt *gt)
> -{
> -	struct drm_i915_private *i915 = gt->i915;
> -	u64 val;
> -
> -	val = intel_rc6_residency_ns(i915,
> -				     IS_VALLEYVIEW(i915) ?
> -				     VLV_GT_RENDER_RC6 :
> -				     GEN6_GT_GFX_RC6);
> -
> -	if (HAS_RC6p(i915))
> -		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
> -
> -	if (HAS_RC6pp(i915))
> -		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
> -
> -	return val;
> -}
> -
>   static u64 get_rc6(struct intel_gt *gt)
>   {
> -#if IS_ENABLED(CONFIG_PM)
>   	struct drm_i915_private *i915 = gt->i915;
> -	struct intel_runtime_pm *rpm = &i915->runtime_pm;
>   	struct i915_pmu *pmu = &i915->pmu;
> -	intel_wakeref_t wakeref;
>   	unsigned long flags;
>   	u64 val;
>   
> -	wakeref = intel_runtime_pm_get_if_in_use(rpm);
> -	if (wakeref) {
> +	spin_lock_irqsave(&pmu->lock, flags);
> +
> +	if (intel_gt_pm_get_if_awake(gt)) {
>   		val = __get_rc6(gt);
> -		intel_runtime_pm_put(rpm, wakeref);
> +		intel_gt_pm_put(gt);
>   
>   		/*
>   		 * If we are coming back from being runtime suspended we must
> @@ -466,19 +494,13 @@ static u64 get_rc6(struct intel_gt *gt)
>   		 * previously.
>   		 */
>   
> -		spin_lock_irqsave(&pmu->lock, flags);
> -
>   		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
>   			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
>   			pmu->sample[__I915_SAMPLE_RC6].cur = val;
>   		} else {
>   			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
>   		}
> -
> -		spin_unlock_irqrestore(&pmu->lock, flags);

For this branch pmu->lock is only needed over the estimation block, not 
over __get_rc6() and intel_gt_pm_get_if_awake(). But I agree it's more 
efficient to do it like this to avoid multiple irq-off-on transitions 
via intel_rc6_residency_ns. I wanted to suggest local_irq_disable and 
separate spin_(un)lock's on both if branches for more self-documenting , 
less confusion, but then single call also has it's benefits.

>   	} else {
> -		struct device *kdev = rpm->kdev;
> -
>   		/*
>   		 * We are runtime suspended.
>   		 *
> @@ -486,42 +508,16 @@ static u64 get_rc6(struct intel_gt *gt)
>   		 * on top of the last known real value, as the approximated RC6
>   		 * counter value.
>   		 */
> -		spin_lock_irqsave(&pmu->lock, flags);
>   
> -		/*
> -		 * After the above branch intel_runtime_pm_get_if_in_use failed
> -		 * to get the runtime PM reference we cannot assume we are in
> -		 * runtime suspend since we can either: a) race with coming out
> -		 * of it before we took the power.lock, or b) there are other
> -		 * states than suspended which can bring us here.
> -		 *
> -		 * We need to double-check that we are indeed currently runtime
> -		 * suspended and if not we cannot do better than report the last
> -		 * known RC6 value.
> -		 */
> -		if (pm_runtime_status_suspended(kdev)) {
> -			val = pm_runtime_suspended_time(kdev);
> +		val = ktime_since(pmu->sleep_last);
> +		val += pmu->sample[__I915_SAMPLE_RC6].cur;
>   
> -			if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
> -				pmu->suspended_time_last = val;
> -
> -			val -= pmu->suspended_time_last;
> -			val += pmu->sample[__I915_SAMPLE_RC6].cur;
> -
> -			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
> -		} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> -			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
> -		} else {
> -			val = pmu->sample[__I915_SAMPLE_RC6].cur;
> -		}
> -
> -		spin_unlock_irqrestore(&pmu->lock, flags);
> +		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
>   	}
>   
> +	spin_unlock_irqrestore(&pmu->lock, flags);
> +
>   	return val;
> -#else
> -	return __get_rc6(gt);
> -#endif

Don't we end up doing the irqsave spinlock needlessly when !CONFIG_PM?

>   }
>   
>   static u64 __i915_pmu_event_read(struct perf_event *event)
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 4fc4f2478301..067dbbf3bdff 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -97,9 +97,9 @@ struct i915_pmu {
>   	 */
>   	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
>   	/**
> -	 * @suspended_time_last: Cached suspend time from PM core.
> +	 * @sleep_last: Last time GT parked for RC6 estimation.
>   	 */
> -	u64 suspended_time_last;
> +	ktime_t sleep_last;
>   	/**
>   	 * @i915_attr: Memory block holding device attributes.
>   	 */
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-12  9:20 ` [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Tvrtko Ursulin
@ 2019-09-12  9:39   ` Chris Wilson
  2019-09-12  9:55     ` Tvrtko Ursulin
  0 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2019-09-12  9:39 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-09-12 10:20:39)
> 
> On 11/09/2019 17:38, Chris Wilson wrote:
> > As we track when we put the GT device to sleep upon idling, we can use
> > that callback to sample the current rc6 counters and record the
> > timestamp for estimating samples after that point while asleep.
> > 
> > v2: Stick to using ktime_t
> > v3: Track user_wakerefs that interfere with the new
> > intel_gt_pm_wait_for_idle
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 ++++
> >   drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
> >   drivers/gpu/drm/i915/i915_debugfs.c      |  22 ++---
> >   drivers/gpu/drm/i915/i915_pmu.c          | 120 +++++++++++------------
> >   drivers/gpu/drm/i915/i915_pmu.h          |   4 +-
> >   5 files changed, 90 insertions(+), 76 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > index 3bd764104d41..45a72cb698db 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > @@ -141,6 +141,21 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
> >       return switch_to_kernel_context_sync(&i915->gt);
> >   }
> >   
> > +static void user_forcewake(struct intel_gt *gt, bool suspend)
> > +{
> > +     int count = atomic_read(&gt->user_wakeref);
> > +
> > +     if (likely(!count))
> > +             return;
> > +
> > +     intel_gt_pm_get(gt);
> > +     if (suspend)
> > +             atomic_sub(count, &gt->wakeref.count);
> > +     else
> > +             atomic_add(count, &gt->wakeref.count);
> > +     intel_gt_pm_put(gt);
> > +}
> > +
> >   void i915_gem_suspend(struct drm_i915_private *i915)
> >   {
> >       GEM_TRACE("\n");
> > @@ -148,6 +163,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
> >       intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
> >       flush_workqueue(i915->wq);
> >   
> > +     user_forcewake(&i915->gt, true);
> 
> This complication is needed only because you changed user forcewake 
> handling to use intel_gt_pm_get/put instead of intel_runtime_pm_get? 
> Which in turn is because of the CONFIG_PM ifdef removal below? Wouldn't 
> it be simpler to keep both as were? Maybe I am missing something...

Not quite. The change is because we stop tracking rc6 after parking,
because we stop using the pm-timestamps in favour of our own gt
tracking. However, that required tying the debugfs into the gt pm in
order for us to notice the forced wakeup outside of the request flow.

Either we keep using the unreliable runtime-pm interactions, or not. The
patch hinges upon that decision. Or alternative, we say we just don't
care about miscounting with debugfs/i915_user_forcewake.

> >   static u64 get_rc6(struct intel_gt *gt)
> >   {
> > -#if IS_ENABLED(CONFIG_PM)
> >       struct drm_i915_private *i915 = gt->i915;
> > -     struct intel_runtime_pm *rpm = &i915->runtime_pm;
> >       struct i915_pmu *pmu = &i915->pmu;
> > -     intel_wakeref_t wakeref;
> >       unsigned long flags;
> >       u64 val;
> >   
> > -     wakeref = intel_runtime_pm_get_if_in_use(rpm);
> > -     if (wakeref) {
> > +     spin_lock_irqsave(&pmu->lock, flags);
> > +
> > +     if (intel_gt_pm_get_if_awake(gt)) {
> >               val = __get_rc6(gt);
> > -             intel_runtime_pm_put(rpm, wakeref);
> > +             intel_gt_pm_put(gt);
> >   
> >               /*
> >                * If we are coming back from being runtime suspended we must
> > @@ -466,19 +494,13 @@ static u64 get_rc6(struct intel_gt *gt)
> >                * previously.
> >                */
> >   
> > -             spin_lock_irqsave(&pmu->lock, flags);
> > -
> >               if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> >                       pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> >                       pmu->sample[__I915_SAMPLE_RC6].cur = val;
> >               } else {
> >                       val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
> >               }
> > -
> > -             spin_unlock_irqrestore(&pmu->lock, flags);
> 
> For this branch pmu->lock is only needed over the estimation block, not 
> over __get_rc6() and intel_gt_pm_get_if_awake(). But I agree it's more 
> efficient to do it like this to avoid multiple irq-off-on transitions 
> via intel_rc6_residency_ns. I wanted to suggest local_irq_disable and 
> separate spin_(un)lock's on both if branches for more self-documenting , 
> less confusion, but then single call also has it's benefits.

If I am not mistaken, we need to serialise over the get_if_awake. Or at
least it makes it easier to argue about the GT state and whether we
need to choose between updating ESTIMATED or ACTUAL.

[snip]

> Don't we end up doing the irqsave spinlock needlessly when !CONFIG_PM?

No, the intent is to serialise with i915_pmu_gt_parked and
i915_pmu_gt_unparked (and the GT awake state), which are independent of
CONFIG_PM.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-12  9:39   ` Chris Wilson
@ 2019-09-12  9:55     ` Tvrtko Ursulin
  2019-09-12 10:02       ` Chris Wilson
  0 siblings, 1 reply; 16+ messages in thread
From: Tvrtko Ursulin @ 2019-09-12  9:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 12/09/2019 10:39, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-09-12 10:20:39)
>>
>> On 11/09/2019 17:38, Chris Wilson wrote:
>>> As we track when we put the GT device to sleep upon idling, we can use
>>> that callback to sample the current rc6 counters and record the
>>> timestamp for estimating samples after that point while asleep.
>>>
>>> v2: Stick to using ktime_t
>>> v3: Track user_wakerefs that interfere with the new
>>> intel_gt_pm_wait_for_idle
>>>
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 ++++
>>>    drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
>>>    drivers/gpu/drm/i915/i915_debugfs.c      |  22 ++---
>>>    drivers/gpu/drm/i915/i915_pmu.c          | 120 +++++++++++------------
>>>    drivers/gpu/drm/i915/i915_pmu.h          |   4 +-
>>>    5 files changed, 90 insertions(+), 76 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> index 3bd764104d41..45a72cb698db 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> @@ -141,6 +141,21 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
>>>        return switch_to_kernel_context_sync(&i915->gt);
>>>    }
>>>    
>>> +static void user_forcewake(struct intel_gt *gt, bool suspend)
>>> +{
>>> +     int count = atomic_read(&gt->user_wakeref); >>> +
>>> +     if (likely(!count))
>>> +             return;
>>> +
>>> +     intel_gt_pm_get(gt);
>>> +     if (suspend)
>>> +             atomic_sub(count, &gt->wakeref.count);

GEM_BUG_ON for underflow?

Presumably count is effectively atomic here since userspace is not 
running yet/any more. Might warrant a comment?

>>> +     else
>>> +             atomic_add(count, &gt->wakeref.count);
>>> +     intel_gt_pm_put(gt);
>>> +}
>>> +
>>>    void i915_gem_suspend(struct drm_i915_private *i915)
>>>    {
>>>        GEM_TRACE("\n");
>>> @@ -148,6 +163,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
>>>        intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
>>>        flush_workqueue(i915->wq);
>>>    
>>> +     user_forcewake(&i915->gt, true);
>>
>> This complication is needed only because you changed user forcewake
>> handling to use intel_gt_pm_get/put instead of intel_runtime_pm_get?
>> Which in turn is because of the CONFIG_PM ifdef removal below? Wouldn't
>> it be simpler to keep both as were? Maybe I am missing something...
> 
> Not quite. The change is because we stop tracking rc6 after parking,
> because we stop using the pm-timestamps in favour of our own gt
> tracking. However, that required tying the debugfs into the gt pm in
> order for us to notice the forced wakeup outside of the request flow.
> 
> Either we keep using the unreliable runtime-pm interactions, or not. The
> patch hinges upon that decision. Or alternative, we say we just don't
> care about miscounting with debugfs/i915_user_forcewake.

True, I think we have to account for it.

>>>    static u64 get_rc6(struct intel_gt *gt)
>>>    {
>>> -#if IS_ENABLED(CONFIG_PM)
>>>        struct drm_i915_private *i915 = gt->i915;
>>> -     struct intel_runtime_pm *rpm = &i915->runtime_pm;
>>>        struct i915_pmu *pmu = &i915->pmu;
>>> -     intel_wakeref_t wakeref;
>>>        unsigned long flags;
>>>        u64 val;
>>>    
>>> -     wakeref = intel_runtime_pm_get_if_in_use(rpm);
>>> -     if (wakeref) {
>>> +     spin_lock_irqsave(&pmu->lock, flags);
>>> +
>>> +     if (intel_gt_pm_get_if_awake(gt)) {
>>>                val = __get_rc6(gt);
>>> -             intel_runtime_pm_put(rpm, wakeref);
>>> +             intel_gt_pm_put(gt);
>>>    
>>>                /*
>>>                 * If we are coming back from being runtime suspended we must
>>> @@ -466,19 +494,13 @@ static u64 get_rc6(struct intel_gt *gt)
>>>                 * previously.
>>>                 */
>>>    
>>> -             spin_lock_irqsave(&pmu->lock, flags);
>>> -
>>>                if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
>>>                        pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
>>>                        pmu->sample[__I915_SAMPLE_RC6].cur = val;
>>>                } else {
>>>                        val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
>>>                }
>>> -
>>> -             spin_unlock_irqrestore(&pmu->lock, flags);
>>
>> For this branch pmu->lock is only needed over the estimation block, not
>> over __get_rc6() and intel_gt_pm_get_if_awake(). But I agree it's more
>> efficient to do it like this to avoid multiple irq-off-on transitions
>> via intel_rc6_residency_ns. I wanted to suggest local_irq_disable and
>> separate spin_(un)lock's on both if branches for more self-documenting ,
>> less confusion, but then single call also has it's benefits.
> 
> If I am not mistaken, we need to serialise over the get_if_awake. Or at
> least it makes it easier to argue about the GT state and whether we
> need to choose between updating ESTIMATED or ACTUAL.

I am not sure that we do but anyway doesn't harm a lot and has the above 
described benefits as well so okay.

> 
> [snip]
> 
>> Don't we end up doing the irqsave spinlock needlessly when !CONFIG_PM?
> 
> No, the intent is to serialise with i915_pmu_gt_parked and
> i915_pmu_gt_unparked (and the GT awake state), which are independent of
> CONFIG_PM.

Yes but with !CONFIG_PM we can always read the real counters and don't 
need to do any additional magic. In fact code in i915_pmu_gt_(un)parked 
could be ifdef-ed out for that case as well.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-12  9:55     ` Tvrtko Ursulin
@ 2019-09-12 10:02       ` Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-09-12 10:02 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-09-12 10:55:00)
> 
> On 12/09/2019 10:39, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-09-12 10:20:39)
> >> Don't we end up doing the irqsave spinlock needlessly when !CONFIG_PM?
> > 
> > No, the intent is to serialise with i915_pmu_gt_parked and
> > i915_pmu_gt_unparked (and the GT awake state), which are independent of
> > CONFIG_PM.
> 
> Yes but with !CONFIG_PM we can always read the real counters and don't 
> need to do any additional magic. In fact code in i915_pmu_gt_(un)parked 
> could be ifdef-ed out for that case as well.

Oh, you mean if we didn't have to worry about runtime-pm at all for
mmio access. I was not thinking of that at all, just balancing parked vs
sample.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-11 16:38 [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
                   ` (2 preceding siblings ...)
  2019-09-12  9:20 ` [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Tvrtko Ursulin
@ 2019-09-12 10:13 ` Chris Wilson
  2019-09-12 10:16   ` Chris Wilson
  2019-09-12 10:19 ` [PATCH] " Tvrtko Ursulin
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2019-09-12 10:13 UTC (permalink / raw)
  To: intel-gfx

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle
v4: No need for parked/unparked estimation if !CONFIG_PM

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  22 +++
 drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c      |  22 +--
 drivers/gpu/drm/i915/i915_pmu.c          | 225 ++++++++++++-----------
 drivers/gpu/drm/i915/i915_pmu.h          |  14 +-
 5 files changed, 158 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3bd764104d41..a11ad4d914ca 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -141,6 +141,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
 	return switch_to_kernel_context_sync(&i915->gt);
 }
 
+static void user_forcewake(struct intel_gt *gt, bool suspend)
+{
+	int count = atomic_read(&gt->user_wakeref);
+
+	/* Inside suspend/resume so single threaded, no races to worry about. */
+	if (likely(!count))
+		return;
+
+	intel_gt_pm_get(gt);
+	if (suspend) {
+		GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
+		atomic_sub(count, &gt->wakeref.count);
+	} else {
+		atomic_add(count, &gt->wakeref.count);
+	}
+	intel_gt_pm_put(gt);
+}
+
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	GEM_TRACE("\n");
@@ -148,6 +166,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
 	flush_workqueue(i915->wq);
 
+	user_forcewake(&i915->gt, true);
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	/*
@@ -259,6 +279,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	if (!i915_gem_load_power_context(i915))
 		goto err_wedged;
 
+	user_forcewake(&i915->gt, false);
+
 out_unlock:
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	mutex_unlock(&i915->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..3039cef64b11 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -50,6 +50,7 @@ struct intel_gt {
 	} timelines;
 
 	struct intel_wakeref wakeref;
+	atomic_t user_wakeref;
 
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e5835337f022..f3ae525b77c0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3995,13 +3995,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	file->private_data =
-		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
-	intel_uncore_forcewake_user_get(&i915->uncore);
+	atomic_inc(&gt->user_wakeref);
+	intel_gt_pm_get(gt);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_get(gt->uncore);
 
 	return 0;
 }
@@ -4009,13 +4008,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	intel_uncore_forcewake_user_put(&i915->uncore);
-	intel_runtime_pm_put(&i915->runtime_pm,
-			     (intel_wakeref_t)(uintptr_t)file->private_data);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_put(&i915->uncore);
+	intel_gt_pm_put(gt);
+	atomic_dec(&gt->user_wakeref);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8e251e719390..f934d8dfb2d1 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -116,20 +116,23 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 	return enable;
 }
 
-void i915_pmu_gt_parked(struct drm_i915_private *i915)
+static u64 __get_rc6(const struct intel_gt *gt)
 {
-	struct i915_pmu *pmu = &i915->pmu;
+	struct drm_i915_private *i915 = gt->i915;
+	u64 val;
 
-	if (!pmu->base.event_init)
-		return;
+	val = intel_rc6_residency_ns(i915,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
 
-	spin_lock_irq(&pmu->lock);
-	/*
-	 * Signal sampling timer to stop if only engine events are enabled and
-	 * GPU went idle.
-	 */
-	pmu->timer_enabled = pmu_needs_timer(pmu, false);
-	spin_unlock_irq(&pmu->lock);
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+	return val;
 }
 
 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
@@ -143,6 +146,42 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 	}
 }
 
+#if IS_ENABLED(CONFIG_PM)
+
+static inline s64 ktime_since(const ktime_t kt)
+{
+	return ktime_to_ns(ktime_sub(ktime_get(), kt));
+}
+
+void i915_pmu_gt_parked(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+	u64 val;
+
+	if (!pmu->base.event_init)
+		return;
+
+	spin_lock_irq(&pmu->lock);
+
+	val = 0;
+	if (pmu->sample[__I915_SAMPLE_RC6].cur)
+		val = __get_rc6(&i915->gt);
+
+	if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+		pmu->sample[__I915_SAMPLE_RC6].cur = val;
+	}
+	pmu->sleep_last = ktime_get();
+
+	/*
+	 * Signal sampling timer to stop if only engine events are enabled and
+	 * GPU went idle.
+	 */
+	pmu->timer_enabled = pmu_needs_timer(pmu, false);
+
+	spin_unlock_irq(&pmu->lock);
+}
+
 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;
@@ -151,13 +190,79 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 		return;
 
 	spin_lock_irq(&pmu->lock);
+
 	/*
 	 * Re-enable sampling timer when GPU goes active.
 	 */
 	__i915_pmu_maybe_start_timer(pmu);
+
+	/* Estimate how long we slept and accumulate that into rc6 counters */
+	if (pmu->sample[__I915_SAMPLE_RC6].cur) {
+		u64 val;
+
+		val = ktime_since(pmu->sleep_last);
+		val += pmu->sample[__I915_SAMPLE_RC6].cur;
+
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+	}
+
 	spin_unlock_irq(&pmu->lock);
 }
 
+static u64 get_rc6(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct i915_pmu *pmu = &i915->pmu;
+	unsigned long flags;
+	u64 val;
+
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	if (intel_gt_pm_get_if_awake(gt)) {
+		val = __get_rc6(gt);
+		intel_gt_pm_put(gt);
+
+		/*
+		 * If we are coming back from being runtime suspended we must
+		 * be careful not to report a larger value than returned
+		 * previously.
+		 */
+
+		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+			pmu->sample[__I915_SAMPLE_RC6].cur = val;
+		} else {
+			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+		}
+	} else {
+		/*
+		 * We are runtime suspended.
+		 *
+		 * Report the delta from when the device was suspended to now,
+		 * on top of the last known real value, as the approximated RC6
+		 * counter value.
+		 */
+
+		val = ktime_since(pmu->sleep_last);
+		val += pmu->sample[__I915_SAMPLE_RC6].cur;
+
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+	}
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return val;
+}
+
+#else
+
+static u64 get_rc6(struct intel_gt *gt)
+{
+	return __get_rc6(gt);
+}
+
+#endif
+
 static void
 add_sample(struct i915_pmu_sample *sample, u32 val)
 {
@@ -426,104 +531,6 @@ static int i915_pmu_event_init(struct perf_event *event)
 	return 0;
 }
 
-static u64 __get_rc6(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	u64 val;
-
-	val = intel_rc6_residency_ns(i915,
-				     IS_VALLEYVIEW(i915) ?
-				     VLV_GT_RENDER_RC6 :
-				     GEN6_GT_GFX_RC6);
-
-	if (HAS_RC6p(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-	if (HAS_RC6pp(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-	return val;
-}
-
-static u64 get_rc6(struct intel_gt *gt)
-{
-#if IS_ENABLED(CONFIG_PM)
-	struct drm_i915_private *i915 = gt->i915;
-	struct intel_runtime_pm *rpm = &i915->runtime_pm;
-	struct i915_pmu *pmu = &i915->pmu;
-	intel_wakeref_t wakeref;
-	unsigned long flags;
-	u64 val;
-
-	wakeref = intel_runtime_pm_get_if_in_use(rpm);
-	if (wakeref) {
-		val = __get_rc6(gt);
-		intel_runtime_pm_put(rpm, wakeref);
-
-		/*
-		 * If we are coming back from being runtime suspended we must
-		 * be careful not to report a larger value than returned
-		 * previously.
-		 */
-
-		spin_lock_irqsave(&pmu->lock, flags);
-
-		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
-			pmu->sample[__I915_SAMPLE_RC6].cur = val;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
-	} else {
-		struct device *kdev = rpm->kdev;
-
-		/*
-		 * We are runtime suspended.
-		 *
-		 * Report the delta from when the device was suspended to now,
-		 * on top of the last known real value, as the approximated RC6
-		 * counter value.
-		 */
-		spin_lock_irqsave(&pmu->lock, flags);
-
-		/*
-		 * After the above branch intel_runtime_pm_get_if_in_use failed
-		 * to get the runtime PM reference we cannot assume we are in
-		 * runtime suspend since we can either: a) race with coming out
-		 * of it before we took the power.lock, or b) there are other
-		 * states than suspended which can bring us here.
-		 *
-		 * We need to double-check that we are indeed currently runtime
-		 * suspended and if not we cannot do better than report the last
-		 * known RC6 value.
-		 */
-		if (pm_runtime_status_suspended(kdev)) {
-			val = pm_runtime_suspended_time(kdev);
-
-			if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-				pmu->suspended_time_last = val;
-
-			val -= pmu->suspended_time_last;
-			val += pmu->sample[__I915_SAMPLE_RC6].cur;
-
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-		} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
-	}
-
-	return val;
-#else
-	return __get_rc6(gt);
-#endif
-}
-
 static u64 __i915_pmu_event_read(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 4fc4f2478301..ca44758ef198 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -97,9 +97,9 @@ struct i915_pmu {
 	 */
 	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
 	/**
-	 * @suspended_time_last: Cached suspend time from PM core.
+	 * @sleep_last: Last time GT parked for RC6 estimation.
 	 */
-	u64 suspended_time_last;
+	ktime_t sleep_last;
 	/**
 	 * @i915_attr: Memory block holding device attributes.
 	 */
@@ -110,14 +110,18 @@ struct i915_pmu {
 	void *pmu_attr;
 };
 
-#ifdef CONFIG_PERF_EVENTS
+#if IS_ENABLED(CONFIG_PERF_EVENTS)
 void i915_pmu_register(struct drm_i915_private *i915);
 void i915_pmu_unregister(struct drm_i915_private *i915);
-void i915_pmu_gt_parked(struct drm_i915_private *i915);
-void i915_pmu_gt_unparked(struct drm_i915_private *i915);
 #else
 static inline void i915_pmu_register(struct drm_i915_private *i915) {}
 static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
+#endif
+
+#if IS_ENABLED(CONFIG_PERF_EVENTS) && IS_ENABLED(CONFIG_PM)
+void i915_pmu_gt_parked(struct drm_i915_private *i915);
+void i915_pmu_gt_unparked(struct drm_i915_private *i915);
+#else
 static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {}
 static inline void i915_pmu_gt_unparked(struct drm_i915_private *i915) {}
 #endif
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v4] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-12 10:13 ` [PATCH v4] " Chris Wilson
@ 2019-09-12 10:16   ` Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-09-12 10:16 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2019-09-12 11:13:39)
> +#if IS_ENABLED(CONFIG_PM)
> +
> +static inline s64 ktime_since(const ktime_t kt)
> +{
> +       return ktime_to_ns(ktime_sub(ktime_get(), kt));
> +}
> +
> +void i915_pmu_gt_parked(struct drm_i915_private *i915)
> +{
> +       struct i915_pmu *pmu = &i915->pmu;
> +       u64 val;
> +
> +       if (!pmu->base.event_init)
> +               return;
> +
> +       spin_lock_irq(&pmu->lock);
> +
> +       val = 0;
> +       if (pmu->sample[__I915_SAMPLE_RC6].cur)
> +               val = __get_rc6(&i915->gt);
> +
> +       if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> +               pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> +               pmu->sample[__I915_SAMPLE_RC6].cur = val;
> +       }
> +       pmu->sleep_last = ktime_get();
> +
> +       /*
> +        * Signal sampling timer to stop if only engine events are enabled and
> +        * GPU went idle.
> +        */
> +       pmu->timer_enabled = pmu_needs_timer(pmu, false);

The caveat here is that we don't stop the rc6 pmu timer while we sleep.
Seems like 6 of one, half-a-dozen of the other.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-11 16:38 [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
                   ` (3 preceding siblings ...)
  2019-09-12 10:13 ` [PATCH v4] " Chris Wilson
@ 2019-09-12 10:19 ` Tvrtko Ursulin
  2019-09-12 10:31 ` [PATCH v5] " Chris Wilson
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2019-09-12 10:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/09/2019 17:38, Chris Wilson wrote:
> As we track when we put the GT device to sleep upon idling, we can use
> that callback to sample the current rc6 counters and record the
> timestamp for estimating samples after that point while asleep.
> 
> v2: Stick to using ktime_t
> v3: Track user_wakerefs that interfere with the new
> intel_gt_pm_wait_for_idle
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 ++++
>   drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
>   drivers/gpu/drm/i915/i915_debugfs.c      |  22 ++---
>   drivers/gpu/drm/i915/i915_pmu.c          | 120 +++++++++++------------
>   drivers/gpu/drm/i915/i915_pmu.h          |   4 +-
>   5 files changed, 90 insertions(+), 76 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 3bd764104d41..45a72cb698db 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -141,6 +141,21 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
>   	return switch_to_kernel_context_sync(&i915->gt);
>   }
>   
> +static void user_forcewake(struct intel_gt *gt, bool suspend)
> +{
> +	int count = atomic_read(&gt->user_wakeref);
> +
> +	if (likely(!count))
> +		return;
> +
> +	intel_gt_pm_get(gt);
> +	if (suspend)
> +		atomic_sub(count, &gt->wakeref.count);
> +	else
> +		atomic_add(count, &gt->wakeref.count);
> +	intel_gt_pm_put(gt);
> +}
> +
>   void i915_gem_suspend(struct drm_i915_private *i915)
>   {
>   	GEM_TRACE("\n");
> @@ -148,6 +163,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
>   	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
>   	flush_workqueue(i915->wq);
>   
> +	user_forcewake(&i915->gt, true);
> +
>   	mutex_lock(&i915->drm.struct_mutex);
>   
>   	/*
> @@ -259,6 +276,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
>   	if (!i915_gem_load_power_context(i915))
>   		goto err_wedged;
>   
> +	user_forcewake(&i915->gt, false);
> +
>   out_unlock:
>   	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
>   	mutex_unlock(&i915->drm.struct_mutex);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index dc295c196d11..3039cef64b11 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -50,6 +50,7 @@ struct intel_gt {
>   	} timelines;
>   
>   	struct intel_wakeref wakeref;
> +	atomic_t user_wakeref;
>   
>   	struct list_head closed_vma;
>   	spinlock_t closed_lock; /* guards the list of closed_vma */
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 708855e051b5..f00c0dc4f57e 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3988,13 +3988,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
>   static int i915_forcewake_open(struct inode *inode, struct file *file)
>   {
>   	struct drm_i915_private *i915 = inode->i_private;
> +	struct intel_gt *gt = &i915->gt;
>   
> -	if (INTEL_GEN(i915) < 6)
> -		return 0;
> -
> -	file->private_data =
> -		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
> -	intel_uncore_forcewake_user_get(&i915->uncore);
> +	atomic_inc(&gt->user_wakeref);
> +	intel_gt_pm_get(gt);
> +	if (INTEL_GEN(i915) >= 6)
> +		intel_uncore_forcewake_user_get(gt->uncore);
>   
>   	return 0;
>   }
> @@ -4002,13 +4001,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
>   static int i915_forcewake_release(struct inode *inode, struct file *file)
>   {
>   	struct drm_i915_private *i915 = inode->i_private;
> +	struct intel_gt *gt = &i915->gt;
>   
> -	if (INTEL_GEN(i915) < 6)
> -		return 0;
> -
> -	intel_uncore_forcewake_user_put(&i915->uncore);
> -	intel_runtime_pm_put(&i915->runtime_pm,
> -			     (intel_wakeref_t)(uintptr_t)file->private_data);
> +	if (INTEL_GEN(i915) >= 6)
> +		intel_uncore_forcewake_user_put(&i915->uncore);
> +	intel_gt_pm_put(gt);
> +	atomic_dec(&gt->user_wakeref);
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 8e251e719390..a3d3d3e39c15 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -116,19 +116,51 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
>   	return enable;
>   }
>   
> +static u64 __get_rc6(struct intel_gt *gt)
> +{
> +	struct drm_i915_private *i915 = gt->i915;
> +	u64 val;
> +
> +	val = intel_rc6_residency_ns(i915,
> +				     IS_VALLEYVIEW(i915) ?
> +				     VLV_GT_RENDER_RC6 :
> +				     GEN6_GT_GFX_RC6);
> +
> +	if (HAS_RC6p(i915))
> +		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
> +
> +	if (HAS_RC6pp(i915))
> +		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
> +
> +	return val;
> +}
> +
>   void i915_pmu_gt_parked(struct drm_i915_private *i915)
>   {
>   	struct i915_pmu *pmu = &i915->pmu;
> +	u64 val;
>   
>   	if (!pmu->base.event_init)
>   		return;
>   
>   	spin_lock_irq(&pmu->lock);
> +
> +	val = 0;
> +	if (pmu->sample[__I915_SAMPLE_RC6].cur)

Are you using this check here and in unpark effectively as proxy for 
event being enabled? Might be clearer to write it like that.

> +		val = __get_rc6(&i915->gt);
> +
> +	if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> +		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> +		pmu->sample[__I915_SAMPLE_RC6].cur = val;
> +	}

And then this block is the same counter update as in get_rc6 so it would 
be good to consolidate for readability. __pmu_update_rc6(pmu, val)? 
Actually would a single block of:

if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
   __pmu_update_rc6(pmu, __get_rc6(&i915->gt));

Be all that is needed and clear?

> +	pmu->sleep_last = ktime_get();
> +
>   	/*
>   	 * Signal sampling timer to stop if only engine events are enabled and
>   	 * GPU went idle.
>   	 */
>   	pmu->timer_enabled = pmu_needs_timer(pmu, false);
> +
>   	spin_unlock_irq(&pmu->lock);
>   }
>   
> @@ -143,6 +175,11 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
>   	}
>   }
>   
> +static inline s64 ktime_since(const ktime_t kt)
> +{
> +	return ktime_to_ns(ktime_sub(ktime_get(), kt));
> +}
> +
>   void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>   {
>   	struct i915_pmu *pmu = &i915->pmu;
> @@ -151,10 +188,22 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>   		return;
>   
>   	spin_lock_irq(&pmu->lock);
> +
>   	/*
>   	 * Re-enable sampling timer when GPU goes active.
>   	 */
>   	__i915_pmu_maybe_start_timer(pmu);
> +
> +	/* Estimate how long we slept and accumulate that into rc6 counters */
> +	if (pmu->sample[__I915_SAMPLE_RC6].cur) {
> +		u64 val;
> +
> +		val = ktime_since(pmu->sleep_last);
> +		val += pmu->sample[__I915_SAMPLE_RC6].cur;
> +
> +		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;

Same treatment for this block? How would it look..

if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
	__pmu_update_estimated_rc6(pmu);

get_rc6() could use the same "val = __pmu_update_estimated_rc6(pmu);" in 
the estimation branch.

Hope I haven't missed something..

Regards,

Tvrtko

> +	}
> +
>   	spin_unlock_irq(&pmu->lock);
>   }
>   
> @@ -426,39 +475,18 @@ static int i915_pmu_event_init(struct perf_event *event)
>   	return 0;
>   }
>   
> -static u64 __get_rc6(struct intel_gt *gt)
> -{
> -	struct drm_i915_private *i915 = gt->i915;
> -	u64 val;
> -
> -	val = intel_rc6_residency_ns(i915,
> -				     IS_VALLEYVIEW(i915) ?
> -				     VLV_GT_RENDER_RC6 :
> -				     GEN6_GT_GFX_RC6);
> -
> -	if (HAS_RC6p(i915))
> -		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
> -
> -	if (HAS_RC6pp(i915))
> -		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
> -
> -	return val;
> -}
> -
>   static u64 get_rc6(struct intel_gt *gt)
>   {
> -#if IS_ENABLED(CONFIG_PM)
>   	struct drm_i915_private *i915 = gt->i915;
> -	struct intel_runtime_pm *rpm = &i915->runtime_pm;
>   	struct i915_pmu *pmu = &i915->pmu;
> -	intel_wakeref_t wakeref;
>   	unsigned long flags;
>   	u64 val;
>   
> -	wakeref = intel_runtime_pm_get_if_in_use(rpm);
> -	if (wakeref) {
> +	spin_lock_irqsave(&pmu->lock, flags);
> +
> +	if (intel_gt_pm_get_if_awake(gt)) {
>   		val = __get_rc6(gt);
> -		intel_runtime_pm_put(rpm, wakeref);
> +		intel_gt_pm_put(gt);
>   
>   		/*
>   		 * If we are coming back from being runtime suspended we must
> @@ -466,19 +494,13 @@ static u64 get_rc6(struct intel_gt *gt)
>   		 * previously.
>   		 */
>   
> -		spin_lock_irqsave(&pmu->lock, flags);
> -
>   		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
>   			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
>   			pmu->sample[__I915_SAMPLE_RC6].cur = val;
>   		} else {
>   			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
>   		}
> -
> -		spin_unlock_irqrestore(&pmu->lock, flags);
>   	} else {
> -		struct device *kdev = rpm->kdev;
> -
>   		/*
>   		 * We are runtime suspended.
>   		 *
> @@ -486,42 +508,16 @@ static u64 get_rc6(struct intel_gt *gt)
>   		 * on top of the last known real value, as the approximated RC6
>   		 * counter value.
>   		 */
> -		spin_lock_irqsave(&pmu->lock, flags);
>   
> -		/*
> -		 * After the above branch intel_runtime_pm_get_if_in_use failed
> -		 * to get the runtime PM reference we cannot assume we are in
> -		 * runtime suspend since we can either: a) race with coming out
> -		 * of it before we took the power.lock, or b) there are other
> -		 * states than suspended which can bring us here.
> -		 *
> -		 * We need to double-check that we are indeed currently runtime
> -		 * suspended and if not we cannot do better than report the last
> -		 * known RC6 value.
> -		 */
> -		if (pm_runtime_status_suspended(kdev)) {
> -			val = pm_runtime_suspended_time(kdev);
> +		val = ktime_since(pmu->sleep_last);
> +		val += pmu->sample[__I915_SAMPLE_RC6].cur;
>   
> -			if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
> -				pmu->suspended_time_last = val;
> -
> -			val -= pmu->suspended_time_last;
> -			val += pmu->sample[__I915_SAMPLE_RC6].cur;
> -
> -			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
> -		} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> -			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
> -		} else {
> -			val = pmu->sample[__I915_SAMPLE_RC6].cur;
> -		}
> -
> -		spin_unlock_irqrestore(&pmu->lock, flags);
> +		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
>   	}
>   
> +	spin_unlock_irqrestore(&pmu->lock, flags);
> +
>   	return val;
> -#else
> -	return __get_rc6(gt);
> -#endif
>   }
>   
>   static u64 __i915_pmu_event_read(struct perf_event *event)
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 4fc4f2478301..067dbbf3bdff 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -97,9 +97,9 @@ struct i915_pmu {
>   	 */
>   	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
>   	/**
> -	 * @suspended_time_last: Cached suspend time from PM core.
> +	 * @sleep_last: Last time GT parked for RC6 estimation.
>   	 */
> -	u64 suspended_time_last;
> +	ktime_t sleep_last;
>   	/**
>   	 * @i915_attr: Memory block holding device attributes.
>   	 */
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v5] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-11 16:38 [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
                   ` (4 preceding siblings ...)
  2019-09-12 10:19 ` [PATCH] " Tvrtko Ursulin
@ 2019-09-12 10:31 ` Chris Wilson
  2019-09-12 10:41 ` [PATCH v6] " Chris Wilson
  2019-09-12 11:32 ` [PATCH v7] " Chris Wilson
  7 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-09-12 10:31 UTC (permalink / raw)
  To: intel-gfx

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle
v4: No need for parked/unparked estimation if !CONFIG_PM
v5: Keep timer park/unpark logic as was

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  22 +++
 drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c      |  22 +--
 drivers/gpu/drm/i915/i915_pmu.c          | 238 +++++++++++++----------
 drivers/gpu/drm/i915/i915_pmu.h          |   4 +-
 5 files changed, 166 insertions(+), 121 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3bd764104d41..a11ad4d914ca 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -141,6 +141,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
 	return switch_to_kernel_context_sync(&i915->gt);
 }
 
+static void user_forcewake(struct intel_gt *gt, bool suspend)
+{
+	int count = atomic_read(&gt->user_wakeref);
+
+	/* Inside suspend/resume so single threaded, no races to worry about. */
+	if (likely(!count))
+		return;
+
+	intel_gt_pm_get(gt);
+	if (suspend) {
+		GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
+		atomic_sub(count, &gt->wakeref.count);
+	} else {
+		atomic_add(count, &gt->wakeref.count);
+	}
+	intel_gt_pm_put(gt);
+}
+
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	GEM_TRACE("\n");
@@ -148,6 +166,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
 	flush_workqueue(i915->wq);
 
+	user_forcewake(&i915->gt, true);
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	/*
@@ -259,6 +279,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	if (!i915_gem_load_power_context(i915))
 		goto err_wedged;
 
+	user_forcewake(&i915->gt, false);
+
 out_unlock:
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	mutex_unlock(&i915->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..3039cef64b11 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -50,6 +50,7 @@ struct intel_gt {
 	} timelines;
 
 	struct intel_wakeref wakeref;
+	atomic_t user_wakeref;
 
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e5835337f022..f3ae525b77c0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3995,13 +3995,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	file->private_data =
-		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
-	intel_uncore_forcewake_user_get(&i915->uncore);
+	atomic_inc(&gt->user_wakeref);
+	intel_gt_pm_get(gt);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_get(gt->uncore);
 
 	return 0;
 }
@@ -4009,13 +4008,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	intel_uncore_forcewake_user_put(&i915->uncore);
-	intel_runtime_pm_put(&i915->runtime_pm,
-			     (intel_wakeref_t)(uintptr_t)file->private_data);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_put(&i915->uncore);
+	intel_gt_pm_put(gt);
+	atomic_dec(&gt->user_wakeref);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8e251e719390..d172bc9f17b1 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -116,22 +116,120 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 	return enable;
 }
 
-void i915_pmu_gt_parked(struct drm_i915_private *i915)
+static u64 __get_rc6(const struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	u64 val;
+
+	val = intel_rc6_residency_ns(i915,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
+
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+	return val;
+}
+
+#if IS_ENABLED(CONFIG_PM)
+
+static inline s64 ktime_since(const ktime_t kt)
+{
+	return ktime_to_ns(ktime_sub(ktime_get(), kt));
+}
+
+static u64 get_rc6(struct intel_gt *gt)
 {
+	struct drm_i915_private *i915 = gt->i915;
 	struct i915_pmu *pmu = &i915->pmu;
+	unsigned long flags;
+	u64 val;
 
-	if (!pmu->base.event_init)
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	if (intel_gt_pm_get_if_awake(gt)) {
+		val = __get_rc6(gt);
+		intel_gt_pm_put(gt);
+
+		/*
+		 * If we are coming back from being runtime suspended we must
+		 * be careful not to report a larger value than returned
+		 * previously.
+		 */
+
+		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+			pmu->sample[__I915_SAMPLE_RC6].cur = val;
+		} else {
+			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+		}
+	} else {
+		/*
+		 * We are runtime suspended.
+		 *
+		 * Report the delta from when the device was suspended to now,
+		 * on top of the last known real value, as the approximated RC6
+		 * counter value.
+		 */
+
+		val = ktime_since(pmu->sleep_last);
+		val += pmu->sample[__I915_SAMPLE_RC6].cur;
+
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+	}
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return val;
+}
+
+static void park_rc6(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+	u64 val;
+
+	val = 0;
+	if (pmu->sample[__I915_SAMPLE_RC6].cur)
+		val = __get_rc6(&i915->gt);
+
+	if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+		pmu->sample[__I915_SAMPLE_RC6].cur = val;
+	}
+	pmu->sleep_last = ktime_get();
+}
+
+static void unpark_rc6(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+	u64 val;
+
+	/* Estimate how long we slept and accumulate that into rc6 counters */
+	if (!pmu->sample[__I915_SAMPLE_RC6].cur)
 		return;
 
-	spin_lock_irq(&pmu->lock);
-	/*
-	 * Signal sampling timer to stop if only engine events are enabled and
-	 * GPU went idle.
-	 */
-	pmu->timer_enabled = pmu_needs_timer(pmu, false);
-	spin_unlock_irq(&pmu->lock);
+	val = ktime_since(pmu->sleep_last);
+	val += pmu->sample[__I915_SAMPLE_RC6].cur;
+
+	pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
 }
 
+#else
+
+static u64 get_rc6(struct intel_gt *gt)
+{
+	return __get_rc6(gt);
+}
+
+static void park_rc6(struct drm_i915_private *i915) {};
+static void unpark_rc6(struct drm_i915_private *i915) {};
+
+#endif
+
 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 {
 	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
@@ -143,6 +241,26 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 	}
 }
 
+void i915_pmu_gt_parked(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (!pmu->base.event_init)
+		return;
+
+	spin_lock_irq(&pmu->lock);
+
+	park_rc6(i915);
+
+	/*
+	 * Signal sampling timer to stop if only engine events are enabled and
+	 * GPU went idle.
+	 */
+	pmu->timer_enabled = pmu_needs_timer(pmu, false);
+
+	spin_unlock_irq(&pmu->lock);
+}
+
 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;
@@ -151,10 +269,14 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 		return;
 
 	spin_lock_irq(&pmu->lock);
+
 	/*
 	 * Re-enable sampling timer when GPU goes active.
 	 */
 	__i915_pmu_maybe_start_timer(pmu);
+
+	unpark_rc6(i915);
+
 	spin_unlock_irq(&pmu->lock);
 }
 
@@ -426,104 +548,6 @@ static int i915_pmu_event_init(struct perf_event *event)
 	return 0;
 }
 
-static u64 __get_rc6(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	u64 val;
-
-	val = intel_rc6_residency_ns(i915,
-				     IS_VALLEYVIEW(i915) ?
-				     VLV_GT_RENDER_RC6 :
-				     GEN6_GT_GFX_RC6);
-
-	if (HAS_RC6p(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-	if (HAS_RC6pp(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-	return val;
-}
-
-static u64 get_rc6(struct intel_gt *gt)
-{
-#if IS_ENABLED(CONFIG_PM)
-	struct drm_i915_private *i915 = gt->i915;
-	struct intel_runtime_pm *rpm = &i915->runtime_pm;
-	struct i915_pmu *pmu = &i915->pmu;
-	intel_wakeref_t wakeref;
-	unsigned long flags;
-	u64 val;
-
-	wakeref = intel_runtime_pm_get_if_in_use(rpm);
-	if (wakeref) {
-		val = __get_rc6(gt);
-		intel_runtime_pm_put(rpm, wakeref);
-
-		/*
-		 * If we are coming back from being runtime suspended we must
-		 * be careful not to report a larger value than returned
-		 * previously.
-		 */
-
-		spin_lock_irqsave(&pmu->lock, flags);
-
-		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
-			pmu->sample[__I915_SAMPLE_RC6].cur = val;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
-	} else {
-		struct device *kdev = rpm->kdev;
-
-		/*
-		 * We are runtime suspended.
-		 *
-		 * Report the delta from when the device was suspended to now,
-		 * on top of the last known real value, as the approximated RC6
-		 * counter value.
-		 */
-		spin_lock_irqsave(&pmu->lock, flags);
-
-		/*
-		 * After the above branch intel_runtime_pm_get_if_in_use failed
-		 * to get the runtime PM reference we cannot assume we are in
-		 * runtime suspend since we can either: a) race with coming out
-		 * of it before we took the power.lock, or b) there are other
-		 * states than suspended which can bring us here.
-		 *
-		 * We need to double-check that we are indeed currently runtime
-		 * suspended and if not we cannot do better than report the last
-		 * known RC6 value.
-		 */
-		if (pm_runtime_status_suspended(kdev)) {
-			val = pm_runtime_suspended_time(kdev);
-
-			if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-				pmu->suspended_time_last = val;
-
-			val -= pmu->suspended_time_last;
-			val += pmu->sample[__I915_SAMPLE_RC6].cur;
-
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-		} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
-	}
-
-	return val;
-#else
-	return __get_rc6(gt);
-#endif
-}
-
 static u64 __i915_pmu_event_read(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 4fc4f2478301..067dbbf3bdff 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -97,9 +97,9 @@ struct i915_pmu {
 	 */
 	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
 	/**
-	 * @suspended_time_last: Cached suspend time from PM core.
+	 * @sleep_last: Last time GT parked for RC6 estimation.
 	 */
-	u64 suspended_time_last;
+	ktime_t sleep_last;
 	/**
 	 * @i915_attr: Memory block holding device attributes.
 	 */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v6] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-11 16:38 [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
                   ` (5 preceding siblings ...)
  2019-09-12 10:31 ` [PATCH v5] " Chris Wilson
@ 2019-09-12 10:41 ` Chris Wilson
  2019-09-12 11:29   ` Chris Wilson
  2019-09-12 11:32 ` [PATCH v7] " Chris Wilson
  7 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2019-09-12 10:41 UTC (permalink / raw)
  To: intel-gfx

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle
v4: No need for parked/unparked estimation if !CONFIG_PM
v5: Keep timer park/unpark logic as was
v6: Refactor duplicated estimate/update rc6 logic

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  22 +++
 drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c      |  22 +--
 drivers/gpu/drm/i915/i915_pmu.c          | 242 +++++++++++++----------
 drivers/gpu/drm/i915/i915_pmu.h          |   4 +-
 5 files changed, 169 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3bd764104d41..a11ad4d914ca 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -141,6 +141,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
 	return switch_to_kernel_context_sync(&i915->gt);
 }
 
+static void user_forcewake(struct intel_gt *gt, bool suspend)
+{
+	int count = atomic_read(&gt->user_wakeref);
+
+	/* Inside suspend/resume so single threaded, no races to worry about. */
+	if (likely(!count))
+		return;
+
+	intel_gt_pm_get(gt);
+	if (suspend) {
+		GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
+		atomic_sub(count, &gt->wakeref.count);
+	} else {
+		atomic_add(count, &gt->wakeref.count);
+	}
+	intel_gt_pm_put(gt);
+}
+
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	GEM_TRACE("\n");
@@ -148,6 +166,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
 	flush_workqueue(i915->wq);
 
+	user_forcewake(&i915->gt, true);
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	/*
@@ -259,6 +279,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	if (!i915_gem_load_power_context(i915))
 		goto err_wedged;
 
+	user_forcewake(&i915->gt, false);
+
 out_unlock:
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	mutex_unlock(&i915->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..3039cef64b11 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -50,6 +50,7 @@ struct intel_gt {
 	} timelines;
 
 	struct intel_wakeref wakeref;
+	atomic_t user_wakeref;
 
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e5835337f022..f3ae525b77c0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3995,13 +3995,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	file->private_data =
-		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
-	intel_uncore_forcewake_user_get(&i915->uncore);
+	atomic_inc(&gt->user_wakeref);
+	intel_gt_pm_get(gt);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_get(gt->uncore);
 
 	return 0;
 }
@@ -4009,13 +4008,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	intel_uncore_forcewake_user_put(&i915->uncore);
-	intel_runtime_pm_put(&i915->runtime_pm,
-			     (intel_wakeref_t)(uintptr_t)file->private_data);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_put(&i915->uncore);
+	intel_gt_pm_put(gt);
+	atomic_dec(&gt->user_wakeref);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8e251e719390..57a2d24b5d5c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -116,22 +116,122 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 	return enable;
 }
 
-void i915_pmu_gt_parked(struct drm_i915_private *i915)
+static u64 __get_rc6(const struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	u64 val;
+
+	val = intel_rc6_residency_ns(i915,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
+
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+	return val;
+}
+
+#if IS_ENABLED(CONFIG_PM)
+
+static inline s64 ktime_since(const ktime_t kt)
+{
+	return ktime_to_ns(ktime_sub(ktime_get(), kt));
+}
+
+static u64 __pmu_estimate_rc6(struct i915_pmu *pmu)
+{
+	u64 val;
+
+	val = ktime_since(pmu->sleep_last);
+	val += pmu->sample[__I915_SAMPLE_RC6].cur;
+
+	pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+
+	return val;
+}
+
+static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val)
 {
+	if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+		pmu->sample[__I915_SAMPLE_RC6].cur = val;
+	} else {
+		val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+	}
+
+	return val;
+}
+
+static u64 get_rc6(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
 	struct i915_pmu *pmu = &i915->pmu;
+	unsigned long flags;
+	u64 val;
 
-	if (!pmu->base.event_init)
-		return;
+	spin_lock_irqsave(&pmu->lock, flags);
 
-	spin_lock_irq(&pmu->lock);
-	/*
-	 * Signal sampling timer to stop if only engine events are enabled and
-	 * GPU went idle.
-	 */
-	pmu->timer_enabled = pmu_needs_timer(pmu, false);
-	spin_unlock_irq(&pmu->lock);
+	if (intel_gt_pm_get_if_awake(gt)) {
+		val = __get_rc6(gt);
+		intel_gt_pm_put(gt);
+
+		/*
+		 * If we are coming back from being runtime suspended we must
+		 * be careful not to report a larger value than returned
+		 * previously.
+		 */
+		val = __pmu_update_rc6(pmu, val);
+	} else {
+		/*
+		 * We are runtime suspended.
+		 *
+		 * Report the delta from when the device was suspended to now,
+		 * on top of the last known real value, as the approximated RC6
+		 * counter value.
+		 */
+		val = __pmu_estimate_rc6(pmu);
+	}
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return val;
+}
+
+static void park_rc6(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
+		__pmu_update_rc6(pmu, __get_rc6(&i915->gt));
+
+	pmu->sleep_last = ktime_get();
+}
+
+static void unpark_rc6(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	/* Estimate how long we slept and accumulate that into rc6 counters */
+	if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
+		__pmu_estimate_rc6(pmu);
+}
+
+#else
+
+static u64 get_rc6(struct intel_gt *gt)
+{
+	return __get_rc6(gt);
 }
 
+static void park_rc6(struct drm_i915_private *i915) {};
+static void unpark_rc6(struct drm_i915_private *i915) {};
+
+#endif
+
 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 {
 	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
@@ -143,6 +243,26 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 	}
 }
 
+void i915_pmu_gt_parked(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (!pmu->base.event_init)
+		return;
+
+	spin_lock_irq(&pmu->lock);
+
+	park_rc6(i915);
+
+	/*
+	 * Signal sampling timer to stop if only engine events are enabled and
+	 * GPU went idle.
+	 */
+	pmu->timer_enabled = pmu_needs_timer(pmu, false);
+
+	spin_unlock_irq(&pmu->lock);
+}
+
 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;
@@ -151,10 +271,14 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 		return;
 
 	spin_lock_irq(&pmu->lock);
+
 	/*
 	 * Re-enable sampling timer when GPU goes active.
 	 */
 	__i915_pmu_maybe_start_timer(pmu);
+
+	unpark_rc6(i915);
+
 	spin_unlock_irq(&pmu->lock);
 }
 
@@ -426,104 +550,6 @@ static int i915_pmu_event_init(struct perf_event *event)
 	return 0;
 }
 
-static u64 __get_rc6(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	u64 val;
-
-	val = intel_rc6_residency_ns(i915,
-				     IS_VALLEYVIEW(i915) ?
-				     VLV_GT_RENDER_RC6 :
-				     GEN6_GT_GFX_RC6);
-
-	if (HAS_RC6p(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-	if (HAS_RC6pp(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-	return val;
-}
-
-static u64 get_rc6(struct intel_gt *gt)
-{
-#if IS_ENABLED(CONFIG_PM)
-	struct drm_i915_private *i915 = gt->i915;
-	struct intel_runtime_pm *rpm = &i915->runtime_pm;
-	struct i915_pmu *pmu = &i915->pmu;
-	intel_wakeref_t wakeref;
-	unsigned long flags;
-	u64 val;
-
-	wakeref = intel_runtime_pm_get_if_in_use(rpm);
-	if (wakeref) {
-		val = __get_rc6(gt);
-		intel_runtime_pm_put(rpm, wakeref);
-
-		/*
-		 * If we are coming back from being runtime suspended we must
-		 * be careful not to report a larger value than returned
-		 * previously.
-		 */
-
-		spin_lock_irqsave(&pmu->lock, flags);
-
-		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
-			pmu->sample[__I915_SAMPLE_RC6].cur = val;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
-	} else {
-		struct device *kdev = rpm->kdev;
-
-		/*
-		 * We are runtime suspended.
-		 *
-		 * Report the delta from when the device was suspended to now,
-		 * on top of the last known real value, as the approximated RC6
-		 * counter value.
-		 */
-		spin_lock_irqsave(&pmu->lock, flags);
-
-		/*
-		 * After the above branch intel_runtime_pm_get_if_in_use failed
-		 * to get the runtime PM reference we cannot assume we are in
-		 * runtime suspend since we can either: a) race with coming out
-		 * of it before we took the power.lock, or b) there are other
-		 * states than suspended which can bring us here.
-		 *
-		 * We need to double-check that we are indeed currently runtime
-		 * suspended and if not we cannot do better than report the last
-		 * known RC6 value.
-		 */
-		if (pm_runtime_status_suspended(kdev)) {
-			val = pm_runtime_suspended_time(kdev);
-
-			if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-				pmu->suspended_time_last = val;
-
-			val -= pmu->suspended_time_last;
-			val += pmu->sample[__I915_SAMPLE_RC6].cur;
-
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-		} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
-	}
-
-	return val;
-#else
-	return __get_rc6(gt);
-#endif
-}
-
 static u64 __i915_pmu_event_read(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 4fc4f2478301..067dbbf3bdff 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -97,9 +97,9 @@ struct i915_pmu {
 	 */
 	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
 	/**
-	 * @suspended_time_last: Cached suspend time from PM core.
+	 * @sleep_last: Last time GT parked for RC6 estimation.
 	 */
-	u64 suspended_time_last;
+	ktime_t sleep_last;
 	/**
 	 * @i915_attr: Memory block holding device attributes.
 	 */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v6] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-12 10:41 ` [PATCH v6] " Chris Wilson
@ 2019-09-12 11:29   ` Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-09-12 11:29 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2019-09-12 11:41:31)
> +       spin_lock_irqsave(&pmu->lock, flags);
> +       if (intel_gt_pm_get_if_awake(gt)) {
> +               val = __get_rc6(gt);
> +               intel_gt_pm_put(gt);

As food for thought, what about

val = 0;
if (intel_gt_pm_get_if_awake(gt)) {
	val = __get_rc6(gt);
	intel_gt_pm_put(gt);
}

spin_lock_irqsave(&pmu->lock, flags);
if (val) {
> +
> +               /*
> +                * If we are coming back from being runtime suspended we must
> +                * be careful not to report a larger value than returned
> +                * previously.
> +                */
> +               val = __pmu_update_rc6(pmu, val);
> +       } else {
> +               /*
> +                * We are runtime suspended.
> +                *
> +                * Report the delta from when the device was suspended to now,
> +                * on top of the last known real value, as the approximated RC6
> +                * counter value.
> +                */
> +               val = __pmu_estimate_rc6(pmu);
> +       }
> +
> +       spin_unlock_irqrestore(&pmu->lock, flags);
> +
> +       return val;
> +}

Just feels riskier... But we can try and sleep on it.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v7] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-11 16:38 [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
                   ` (6 preceding siblings ...)
  2019-09-12 10:41 ` [PATCH v6] " Chris Wilson
@ 2019-09-12 11:32 ` Chris Wilson
  2019-09-12 12:38   ` Tvrtko Ursulin
  7 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2019-09-12 11:32 UTC (permalink / raw)
  To: intel-gfx

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle
v4: No need for parked/unparked estimation if !CONFIG_PM
v5: Keep timer park/unpark logic as was
v6: Refactor duplicated estimate/update rc6 logic
v7: Pull intel_get_pm_get_if_awake() out from the pmu->lock.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  22 ++
 drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c      |  22 +-
 drivers/gpu/drm/i915/i915_pmu.c          | 244 +++++++++++++----------
 drivers/gpu/drm/i915/i915_pmu.h          |   4 +-
 5 files changed, 171 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3bd764104d41..a11ad4d914ca 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -141,6 +141,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
 	return switch_to_kernel_context_sync(&i915->gt);
 }
 
+static void user_forcewake(struct intel_gt *gt, bool suspend)
+{
+	int count = atomic_read(&gt->user_wakeref);
+
+	/* Inside suspend/resume so single threaded, no races to worry about. */
+	if (likely(!count))
+		return;
+
+	intel_gt_pm_get(gt);
+	if (suspend) {
+		GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
+		atomic_sub(count, &gt->wakeref.count);
+	} else {
+		atomic_add(count, &gt->wakeref.count);
+	}
+	intel_gt_pm_put(gt);
+}
+
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	GEM_TRACE("\n");
@@ -148,6 +166,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
 	flush_workqueue(i915->wq);
 
+	user_forcewake(&i915->gt, true);
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	/*
@@ -259,6 +279,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	if (!i915_gem_load_power_context(i915))
 		goto err_wedged;
 
+	user_forcewake(&i915->gt, false);
+
 out_unlock:
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	mutex_unlock(&i915->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..3039cef64b11 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -50,6 +50,7 @@ struct intel_gt {
 	} timelines;
 
 	struct intel_wakeref wakeref;
+	atomic_t user_wakeref;
 
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e5835337f022..f3ae525b77c0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3995,13 +3995,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	file->private_data =
-		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
-	intel_uncore_forcewake_user_get(&i915->uncore);
+	atomic_inc(&gt->user_wakeref);
+	intel_gt_pm_get(gt);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_get(gt->uncore);
 
 	return 0;
 }
@@ -4009,13 +4008,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	intel_uncore_forcewake_user_put(&i915->uncore);
-	intel_runtime_pm_put(&i915->runtime_pm,
-			     (intel_wakeref_t)(uintptr_t)file->private_data);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_put(&i915->uncore);
+	intel_gt_pm_put(gt);
+	atomic_dec(&gt->user_wakeref);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8e251e719390..e63649b86915 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -116,22 +116,124 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 	return enable;
 }
 
-void i915_pmu_gt_parked(struct drm_i915_private *i915)
+static u64 __get_rc6(const struct intel_gt *gt)
 {
+	struct drm_i915_private *i915 = gt->i915;
+	u64 val;
+
+	val = intel_rc6_residency_ns(i915,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
+
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+	return val;
+}
+
+#if IS_ENABLED(CONFIG_PM)
+
+static inline s64 ktime_since(const ktime_t kt)
+{
+	return ktime_to_ns(ktime_sub(ktime_get(), kt));
+}
+
+static u64 __pmu_estimate_rc6(struct i915_pmu *pmu)
+{
+	u64 val;
+
+	val = ktime_since(pmu->sleep_last);
+	val += pmu->sample[__I915_SAMPLE_RC6].cur;
+
+	pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+
+	return val;
+}
+
+static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val)
+{
+	if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+		pmu->sample[__I915_SAMPLE_RC6].cur = val;
+	} else {
+		val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+	}
+
+	return val;
+}
+
+static u64 get_rc6(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
 	struct i915_pmu *pmu = &i915->pmu;
+	unsigned long flags;
+	u64 val;
 
-	if (!pmu->base.event_init)
-		return;
+	val = 0;
+	if (intel_gt_pm_get_if_awake(gt)) {
+		val = __get_rc6(gt);
+		intel_gt_pm_put(gt);
+	}
 
-	spin_lock_irq(&pmu->lock);
-	/*
-	 * Signal sampling timer to stop if only engine events are enabled and
-	 * GPU went idle.
-	 */
-	pmu->timer_enabled = pmu_needs_timer(pmu, false);
-	spin_unlock_irq(&pmu->lock);
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	if (val)
+		/*
+		 * If we are coming back from being runtime suspended we must
+		 * be careful not to report a larger value than returned
+		 * previously.
+		 */
+		val = __pmu_update_rc6(pmu, val);
+	else
+		/*
+		 * We were runtime suspended.
+		 *
+		 * Report the delta from when the device was suspended to now,
+		 * on top of the last known real value, as the approximated RC6
+		 * counter value.
+		 */
+		val = __pmu_estimate_rc6(pmu);
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return val;
+}
+
+static void park_rc6(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
+		__pmu_update_rc6(pmu, __get_rc6(&i915->gt));
+
+	pmu->sleep_last = ktime_get();
+}
+
+static void unpark_rc6(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	/* Estimate how long we slept and accumulate that into rc6 counters */
+	if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
+		__pmu_estimate_rc6(pmu);
+}
+
+#else
+
+static u64 get_rc6(struct intel_gt *gt)
+{
+	return __get_rc6(gt);
 }
 
+static void park_rc6(struct drm_i915_private *i915) {};
+static void unpark_rc6(struct drm_i915_private *i915) {};
+
+#endif
+
 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 {
 	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
@@ -143,6 +245,26 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 	}
 }
 
+void i915_pmu_gt_parked(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (!pmu->base.event_init)
+		return;
+
+	spin_lock_irq(&pmu->lock);
+
+	park_rc6(i915);
+
+	/*
+	 * Signal sampling timer to stop if only engine events are enabled and
+	 * GPU went idle.
+	 */
+	pmu->timer_enabled = pmu_needs_timer(pmu, false);
+
+	spin_unlock_irq(&pmu->lock);
+}
+
 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;
@@ -151,10 +273,14 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 		return;
 
 	spin_lock_irq(&pmu->lock);
+
 	/*
 	 * Re-enable sampling timer when GPU goes active.
 	 */
 	__i915_pmu_maybe_start_timer(pmu);
+
+	unpark_rc6(i915);
+
 	spin_unlock_irq(&pmu->lock);
 }
 
@@ -426,104 +552,6 @@ static int i915_pmu_event_init(struct perf_event *event)
 	return 0;
 }
 
-static u64 __get_rc6(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	u64 val;
-
-	val = intel_rc6_residency_ns(i915,
-				     IS_VALLEYVIEW(i915) ?
-				     VLV_GT_RENDER_RC6 :
-				     GEN6_GT_GFX_RC6);
-
-	if (HAS_RC6p(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-	if (HAS_RC6pp(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-	return val;
-}
-
-static u64 get_rc6(struct intel_gt *gt)
-{
-#if IS_ENABLED(CONFIG_PM)
-	struct drm_i915_private *i915 = gt->i915;
-	struct intel_runtime_pm *rpm = &i915->runtime_pm;
-	struct i915_pmu *pmu = &i915->pmu;
-	intel_wakeref_t wakeref;
-	unsigned long flags;
-	u64 val;
-
-	wakeref = intel_runtime_pm_get_if_in_use(rpm);
-	if (wakeref) {
-		val = __get_rc6(gt);
-		intel_runtime_pm_put(rpm, wakeref);
-
-		/*
-		 * If we are coming back from being runtime suspended we must
-		 * be careful not to report a larger value than returned
-		 * previously.
-		 */
-
-		spin_lock_irqsave(&pmu->lock, flags);
-
-		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
-			pmu->sample[__I915_SAMPLE_RC6].cur = val;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
-	} else {
-		struct device *kdev = rpm->kdev;
-
-		/*
-		 * We are runtime suspended.
-		 *
-		 * Report the delta from when the device was suspended to now,
-		 * on top of the last known real value, as the approximated RC6
-		 * counter value.
-		 */
-		spin_lock_irqsave(&pmu->lock, flags);
-
-		/*
-		 * After the above branch intel_runtime_pm_get_if_in_use failed
-		 * to get the runtime PM reference we cannot assume we are in
-		 * runtime suspend since we can either: a) race with coming out
-		 * of it before we took the power.lock, or b) there are other
-		 * states than suspended which can bring us here.
-		 *
-		 * We need to double-check that we are indeed currently runtime
-		 * suspended and if not we cannot do better than report the last
-		 * known RC6 value.
-		 */
-		if (pm_runtime_status_suspended(kdev)) {
-			val = pm_runtime_suspended_time(kdev);
-
-			if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-				pmu->suspended_time_last = val;
-
-			val -= pmu->suspended_time_last;
-			val += pmu->sample[__I915_SAMPLE_RC6].cur;
-
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-		} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
-	}
-
-	return val;
-#else
-	return __get_rc6(gt);
-#endif
-}
-
 static u64 __i915_pmu_event_read(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 4fc4f2478301..067dbbf3bdff 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -97,9 +97,9 @@ struct i915_pmu {
 	 */
 	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
 	/**
-	 * @suspended_time_last: Cached suspend time from PM core.
+	 * @sleep_last: Last time GT parked for RC6 estimation.
 	 */
-	u64 suspended_time_last;
+	ktime_t sleep_last;
 	/**
 	 * @i915_attr: Memory block holding device attributes.
 	 */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v7] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-12 11:32 ` [PATCH v7] " Chris Wilson
@ 2019-09-12 12:38   ` Tvrtko Ursulin
  2019-09-12 12:44     ` Chris Wilson
  0 siblings, 1 reply; 16+ messages in thread
From: Tvrtko Ursulin @ 2019-09-12 12:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 12/09/2019 12:32, Chris Wilson wrote:
> As we track when we put the GT device to sleep upon idling, we can use
> that callback to sample the current rc6 counters and record the
> timestamp for estimating samples after that point while asleep.
> 
> v2: Stick to using ktime_t
> v3: Track user_wakerefs that interfere with the new
> intel_gt_pm_wait_for_idle
> v4: No need for parked/unparked estimation if !CONFIG_PM
> v5: Keep timer park/unpark logic as was
> v6: Refactor duplicated estimate/update rc6 logic
> v7: Pull intel_get_pm_get_if_awake() out from the pmu->lock.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  22 ++
>   drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
>   drivers/gpu/drm/i915/i915_debugfs.c      |  22 +-
>   drivers/gpu/drm/i915/i915_pmu.c          | 244 +++++++++++++----------
>   drivers/gpu/drm/i915/i915_pmu.h          |   4 +-
>   5 files changed, 171 insertions(+), 122 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 3bd764104d41..a11ad4d914ca 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -141,6 +141,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
>   	return switch_to_kernel_context_sync(&i915->gt);
>   }
>   
> +static void user_forcewake(struct intel_gt *gt, bool suspend)
> +{
> +	int count = atomic_read(&gt->user_wakeref);
> +
> +	/* Inside suspend/resume so single threaded, no races to worry about. */
> +	if (likely(!count))
> +		return;
> +
> +	intel_gt_pm_get(gt);
> +	if (suspend) {
> +		GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
> +		atomic_sub(count, &gt->wakeref.count);
> +	} else {
> +		atomic_add(count, &gt->wakeref.count);
> +	}
> +	intel_gt_pm_put(gt);
> +}
> +
>   void i915_gem_suspend(struct drm_i915_private *i915)
>   {
>   	GEM_TRACE("\n");
> @@ -148,6 +166,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
>   	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
>   	flush_workqueue(i915->wq);
>   
> +	user_forcewake(&i915->gt, true);
> +
>   	mutex_lock(&i915->drm.struct_mutex);
>   
>   	/*
> @@ -259,6 +279,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
>   	if (!i915_gem_load_power_context(i915))
>   		goto err_wedged;
>   
> +	user_forcewake(&i915->gt, false);
> +
>   out_unlock:
>   	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
>   	mutex_unlock(&i915->drm.struct_mutex);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index dc295c196d11..3039cef64b11 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -50,6 +50,7 @@ struct intel_gt {
>   	} timelines;
>   
>   	struct intel_wakeref wakeref;
> +	atomic_t user_wakeref;
>   
>   	struct list_head closed_vma;
>   	spinlock_t closed_lock; /* guards the list of closed_vma */
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e5835337f022..f3ae525b77c0 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3995,13 +3995,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
>   static int i915_forcewake_open(struct inode *inode, struct file *file)
>   {
>   	struct drm_i915_private *i915 = inode->i_private;
> +	struct intel_gt *gt = &i915->gt;
>   
> -	if (INTEL_GEN(i915) < 6)
> -		return 0;
> -
> -	file->private_data =
> -		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
> -	intel_uncore_forcewake_user_get(&i915->uncore);
> +	atomic_inc(&gt->user_wakeref);
> +	intel_gt_pm_get(gt);
> +	if (INTEL_GEN(i915) >= 6)
> +		intel_uncore_forcewake_user_get(gt->uncore);
>   
>   	return 0;
>   }
> @@ -4009,13 +4008,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
>   static int i915_forcewake_release(struct inode *inode, struct file *file)
>   {
>   	struct drm_i915_private *i915 = inode->i_private;
> +	struct intel_gt *gt = &i915->gt;
>   
> -	if (INTEL_GEN(i915) < 6)
> -		return 0;
> -
> -	intel_uncore_forcewake_user_put(&i915->uncore);
> -	intel_runtime_pm_put(&i915->runtime_pm,
> -			     (intel_wakeref_t)(uintptr_t)file->private_data);
> +	if (INTEL_GEN(i915) >= 6)
> +		intel_uncore_forcewake_user_put(&i915->uncore);
> +	intel_gt_pm_put(gt);
> +	atomic_dec(&gt->user_wakeref);
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 8e251e719390..e63649b86915 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -116,22 +116,124 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
>   	return enable;
>   }
>   
> -void i915_pmu_gt_parked(struct drm_i915_private *i915)
> +static u64 __get_rc6(const struct intel_gt *gt)
>   {
> +	struct drm_i915_private *i915 = gt->i915;
> +	u64 val;
> +
> +	val = intel_rc6_residency_ns(i915,
> +				     IS_VALLEYVIEW(i915) ?
> +				     VLV_GT_RENDER_RC6 :
> +				     GEN6_GT_GFX_RC6);
> +
> +	if (HAS_RC6p(i915))
> +		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
> +
> +	if (HAS_RC6pp(i915))
> +		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
> +
> +	return val;
> +}
> +
> +#if IS_ENABLED(CONFIG_PM)
> +
> +static inline s64 ktime_since(const ktime_t kt)
> +{
> +	return ktime_to_ns(ktime_sub(ktime_get(), kt));
> +}
> +
> +static u64 __pmu_estimate_rc6(struct i915_pmu *pmu)
> +{
> +	u64 val;
> +
> +	val = ktime_since(pmu->sleep_last);
> +	val += pmu->sample[__I915_SAMPLE_RC6].cur;
> +
> +	pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
> +
> +	return val;
> +}
> +
> +static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val)
> +{
> +	if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> +		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> +		pmu->sample[__I915_SAMPLE_RC6].cur = val;
> +	} else {
> +		val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
> +	}
> +
> +	return val;
> +}
> +
> +static u64 get_rc6(struct intel_gt *gt)
> +{
> +	struct drm_i915_private *i915 = gt->i915;
>   	struct i915_pmu *pmu = &i915->pmu;
> +	unsigned long flags;
> +	u64 val;
>   
> -	if (!pmu->base.event_init)
> -		return;
> +	val = 0;
> +	if (intel_gt_pm_get_if_awake(gt)) {
> +		val = __get_rc6(gt);
> +		intel_gt_pm_put(gt);
> +	}
>   
> -	spin_lock_irq(&pmu->lock);
> -	/*
> -	 * Signal sampling timer to stop if only engine events are enabled and
> -	 * GPU went idle.
> -	 */
> -	pmu->timer_enabled = pmu_needs_timer(pmu, false);
> -	spin_unlock_irq(&pmu->lock);
> +	spin_lock_irqsave(&pmu->lock, flags);
> +
> +	if (val)
> +		/*
> +		 * If we are coming back from being runtime suspended we must
> +		 * be careful not to report a larger value than returned
> +		 * previously.
> +		 */

The comment is a bit dislocated from the logic so feels it would better 
go into __pmu_update_rc6.

> +		val = __pmu_update_rc6(pmu, val);
> +	else
> +		/*
> +		 * We were runtime suspended.

s/were/are/, or maybe it is "think we are". :)

> +		 *
> +		 * Report the delta from when the device was suspended to now,
> +		 * on top of the last known real value, as the approximated RC6
> +		 * counter value.
> +		 */
> +		val = __pmu_estimate_rc6(pmu);
> +
> +	spin_unlock_irqrestore(&pmu->lock, flags);
> +
> +	return val;
> +}
> +
> +static void park_rc6(struct drm_i915_private *i915)
> +{
> +	struct i915_pmu *pmu = &i915->pmu;
> +
> +	if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
> +		__pmu_update_rc6(pmu, __get_rc6(&i915->gt));
> +
> +	pmu->sleep_last = ktime_get();
> +}
> +
> +static void unpark_rc6(struct drm_i915_private *i915)
> +{
> +	struct i915_pmu *pmu = &i915->pmu;
> +
> +	/* Estimate how long we slept and accumulate that into rc6 counters */
> +	if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
> +		__pmu_estimate_rc6(pmu);
> +}
> +
> +#else
> +
> +static u64 get_rc6(struct intel_gt *gt)
> +{
> +	return __get_rc6(gt);
>   }
>   
> +static void park_rc6(struct drm_i915_private *i915) {};
> +static void unpark_rc6(struct drm_i915_private *i915) {};
> +
> +#endif
> +
>   static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
>   {
>   	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
> @@ -143,6 +245,26 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
>   	}
>   }
>   
> +void i915_pmu_gt_parked(struct drm_i915_private *i915)
> +{
> +	struct i915_pmu *pmu = &i915->pmu;
> +
> +	if (!pmu->base.event_init)
> +		return;
> +
> +	spin_lock_irq(&pmu->lock);
> +
> +	park_rc6(i915);
> +
> +	/*
> +	 * Signal sampling timer to stop if only engine events are enabled and
> +	 * GPU went idle.
> +	 */
> +	pmu->timer_enabled = pmu_needs_timer(pmu, false);
> +
> +	spin_unlock_irq(&pmu->lock);
> +}
> +
>   void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>   {
>   	struct i915_pmu *pmu = &i915->pmu;
> @@ -151,10 +273,14 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>   		return;
>   
>   	spin_lock_irq(&pmu->lock);
> +
>   	/*
>   	 * Re-enable sampling timer when GPU goes active.
>   	 */
>   	__i915_pmu_maybe_start_timer(pmu);
> +
> +	unpark_rc6(i915);
> +
>   	spin_unlock_irq(&pmu->lock);
>   }
>   
> @@ -426,104 +552,6 @@ static int i915_pmu_event_init(struct perf_event *event)
>   	return 0;
>   }
>   
> -static u64 __get_rc6(struct intel_gt *gt)
> -{
> -	struct drm_i915_private *i915 = gt->i915;
> -	u64 val;
> -
> -	val = intel_rc6_residency_ns(i915,
> -				     IS_VALLEYVIEW(i915) ?
> -				     VLV_GT_RENDER_RC6 :
> -				     GEN6_GT_GFX_RC6);
> -
> -	if (HAS_RC6p(i915))
> -		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
> -
> -	if (HAS_RC6pp(i915))
> -		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
> -
> -	return val;
> -}
> -
> -static u64 get_rc6(struct intel_gt *gt)
> -{
> -#if IS_ENABLED(CONFIG_PM)
> -	struct drm_i915_private *i915 = gt->i915;
> -	struct intel_runtime_pm *rpm = &i915->runtime_pm;
> -	struct i915_pmu *pmu = &i915->pmu;
> -	intel_wakeref_t wakeref;
> -	unsigned long flags;
> -	u64 val;
> -
> -	wakeref = intel_runtime_pm_get_if_in_use(rpm);
> -	if (wakeref) {
> -		val = __get_rc6(gt);
> -		intel_runtime_pm_put(rpm, wakeref);
> -
> -		/*
> -		 * If we are coming back from being runtime suspended we must
> -		 * be careful not to report a larger value than returned
> -		 * previously.
> -		 */
> -
> -		spin_lock_irqsave(&pmu->lock, flags);
> -
> -		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> -			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> -			pmu->sample[__I915_SAMPLE_RC6].cur = val;
> -		} else {
> -			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
> -		}
> -
> -		spin_unlock_irqrestore(&pmu->lock, flags);
> -	} else {
> -		struct device *kdev = rpm->kdev;
> -
> -		/*
> -		 * We are runtime suspended.
> -		 *
> -		 * Report the delta from when the device was suspended to now,
> -		 * on top of the last known real value, as the approximated RC6
> -		 * counter value.
> -		 */
> -		spin_lock_irqsave(&pmu->lock, flags);
> -
> -		/*
> -		 * After the above branch intel_runtime_pm_get_if_in_use failed
> -		 * to get the runtime PM reference we cannot assume we are in
> -		 * runtime suspend since we can either: a) race with coming out
> -		 * of it before we took the power.lock, or b) there are other
> -		 * states than suspended which can bring us here.
> -		 *
> -		 * We need to double-check that we are indeed currently runtime
> -		 * suspended and if not we cannot do better than report the last
> -		 * known RC6 value.
> -		 */
> -		if (pm_runtime_status_suspended(kdev)) {
> -			val = pm_runtime_suspended_time(kdev);
> -
> -			if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
> -				pmu->suspended_time_last = val;
> -
> -			val -= pmu->suspended_time_last;
> -			val += pmu->sample[__I915_SAMPLE_RC6].cur;
> -
> -			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
> -		} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> -			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
> -		} else {
> -			val = pmu->sample[__I915_SAMPLE_RC6].cur;
> -		}
> -
> -		spin_unlock_irqrestore(&pmu->lock, flags);
> -	}
> -
> -	return val;
> -#else
> -	return __get_rc6(gt);
> -#endif
> -}
> -
>   static u64 __i915_pmu_event_read(struct perf_event *event)
>   {
>   	struct drm_i915_private *i915 =
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 4fc4f2478301..067dbbf3bdff 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -97,9 +97,9 @@ struct i915_pmu {
>   	 */
>   	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
>   	/**
> -	 * @suspended_time_last: Cached suspend time from PM core.
> +	 * @sleep_last: Last time GT parked for RC6 estimation.
>   	 */
> -	u64 suspended_time_last;
> +	ktime_t sleep_last;
>   	/**
>   	 * @i915_attr: Memory block holding device attributes.
>   	 */
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-09-12 12:38   ` Tvrtko Ursulin
@ 2019-09-12 12:44     ` Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-09-12 12:44 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-09-12 13:38:47)
> 
> On 12/09/2019 12:32, Chris Wilson wrote:
> > +     if (val)
> > +             /*
> > +              * If we are coming back from being runtime suspended we must
> > +              * be careful not to report a larger value than returned
> > +              * previously.
> > +              */
> 
> The comment is a bit dislocated from the logic so feels it would better 
> go into __pmu_update_rc6.
> 
> > +             val = __pmu_update_rc6(pmu, val);
> > +     else
> > +             /*
> > +              * We were runtime suspended.
> 
> s/were/are/, or maybe it is "think we are". :)

"We think we are" captures my anxiety.

Moving both into their respective routines.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2019-09-12 12:44 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-11 16:38 [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
2019-09-12  5:09 ` ✓ Fi.CI.BAT: success for drm/i915/pmu: Use GT parked for estimating RC6 while asleep (rev3) Patchwork
2019-09-12  8:24 ` ✓ Fi.CI.IGT: " Patchwork
2019-09-12  9:20 ` [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Tvrtko Ursulin
2019-09-12  9:39   ` Chris Wilson
2019-09-12  9:55     ` Tvrtko Ursulin
2019-09-12 10:02       ` Chris Wilson
2019-09-12 10:13 ` [PATCH v4] " Chris Wilson
2019-09-12 10:16   ` Chris Wilson
2019-09-12 10:19 ` [PATCH] " Tvrtko Ursulin
2019-09-12 10:31 ` [PATCH v5] " Chris Wilson
2019-09-12 10:41 ` [PATCH v6] " Chris Wilson
2019-09-12 11:29   ` Chris Wilson
2019-09-12 11:32 ` [PATCH v7] " Chris Wilson
2019-09-12 12:38   ` Tvrtko Ursulin
2019-09-12 12:44     ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.