All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness
@ 2022-01-27  2:00 Umesh Nerlige Ramappa
  2022-01-27  2:00 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use existing uncore helper to read gpm_timestamp Umesh Nerlige Ramappa
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Umesh Nerlige Ramappa @ 2022-01-27  2:00 UTC (permalink / raw)
  To: intel-gfx

GuC updates shared memory and KMD reads it. Since this is not
synchronized, we run into a race where the value read is inconsistent.
Sometimes the inconsistency is in reading the upper MSB bytes of the
last_switch_in value. 2 types of cases are seen - upper 8 bits are zero
and upper 24 bits are zero. Since these are non-zero values, it is
not trivial to determine validity of these values. Instead we read the
values multiple times until they are consistent. In test runs, 3
attempts results in consistent values. The upper bound is set to 6
attempts and may need to be tuned as per any new occurences.

Since the duration that gt is parked can vary, the patch also updates
the gt timestamp on unpark before starting the worker.

v2:
- Initialize i
- Use READ_ONCE to access engine record

Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu")
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 58 +++++++++++++++++--
 1 file changed, 54 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index db9615dcb0ec..4e9154cacc58 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1114,6 +1114,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
 	if (new_start == lower_32_bits(*prev_start))
 		return;
 
+	/*
+	 * When gt is unparked, we update the gt timestamp and start the ping
+	 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
+	 * is unparked, all switched in contexts will have a start time that is
+	 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
+	 *
+	 * If neither gt_stamp nor new_start has rolled over, then the
+	 * gt_stamp_hi does not need to be adjusted, however if one of them has
+	 * rolled over, we need to adjust gt_stamp_hi accordingly.
+	 *
+	 * The below conditions address the cases of new_start rollover and
+	 * gt_stamp_last rollover respectively.
+	 */
 	if (new_start < gt_stamp_last &&
 	    (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
 		gt_stamp_hi++;
@@ -1125,17 +1138,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
 	*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
 }
 
-static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+/*
+ * GuC updates shared memory and KMD reads it. Since this is not synchronized,
+ * we run into a race where the value read is inconsistent. Sometimes the
+ * inconsistency is in reading the upper MSB bytes of the last_in value when
+ * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
+ * 24 bits are zero. Since these are non-zero values, it is non-trivial to
+ * determine validity of these values. Instead we read the values multiple times
+ * until they are consistent. In test runs, 3 attempts results in consistent
+ * values. The upper bound is set to 6 attempts and may need to be tuned as per
+ * any new occurences.
+ */
+static void __get_engine_usage_record(struct intel_engine_cs *engine,
+				      u32 *last_in, u32 *id, u32 *total)
 {
 	struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+	int i = 0;
+
+	do {
+		*last_in = READ_ONCE(rec->last_switch_in_stamp);
+		*id = READ_ONCE(rec->current_context_index);
+		*total = READ_ONCE(rec->total_runtime);
+
+		if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
+		    READ_ONCE(rec->current_context_index) == *id &&
+		    READ_ONCE(rec->total_runtime) == *total)
+			break;
+	} while (++i < 6);
+}
+
+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+{
 	struct intel_engine_guc_stats *stats = &engine->stats.guc;
 	struct intel_guc *guc = &engine->gt->uc.guc;
-	u32 last_switch = rec->last_switch_in_stamp;
-	u32 ctx_id = rec->current_context_index;
-	u32 total = rec->total_runtime;
+	u32 last_switch, ctx_id, total;
 
 	lockdep_assert_held(&guc->timestamp.lock);
 
+	__get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
+
 	stats->running = ctx_id != ~0U && last_switch;
 	if (stats->running)
 		__extend_last_switch(guc, &stats->start_gt_clk, last_switch);
@@ -1237,6 +1278,10 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 	if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
 		stats_saved = *stats;
 		gt_stamp_saved = guc->timestamp.gt_stamp;
+		/*
+		 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
+		 * start_gt_clk' calculation below for active engines.
+		 */
 		guc_update_engine_gt_clks(engine);
 		guc_update_pm_timestamp(guc, now);
 		intel_gt_pm_put_async(gt);
@@ -1365,10 +1410,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
 void intel_guc_busyness_unpark(struct intel_gt *gt)
 {
 	struct intel_guc *guc = &gt->uc.guc;
+	unsigned long flags;
+	ktime_t unused;
 
 	if (!guc_submission_initialized(guc))
 		return;
 
+	spin_lock_irqsave(&guc->timestamp.lock, flags);
+	guc_update_pm_timestamp(guc, &unused);
+	spin_unlock_irqrestore(&guc->timestamp.lock, flags);
 	mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
 			 guc->timestamp.ping_delay);
 }
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use existing uncore helper to read gpm_timestamp
  2022-01-27  2:00 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness Umesh Nerlige Ramappa
@ 2022-01-27  2:00 ` Umesh Nerlige Ramappa
  2022-01-27  2:52 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Umesh Nerlige Ramappa @ 2022-01-27  2:00 UTC (permalink / raw)
  To: intel-gfx

Use intel_uncore_read64_2x32 to read upper and lower fields of the GPM
timestamp.

v2: Fix compile error

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c   | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 4e9154cacc58..6052148068d7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1205,20 +1205,6 @@ static u32 gpm_timestamp_shift(struct intel_gt *gt)
 	return 3 - shift;
 }
 
-static u64 gpm_timestamp(struct intel_gt *gt)
-{
-	u32 lo, hi, old_hi, loop = 0;
-
-	hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
-	do {
-		lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
-		old_hi = hi;
-		hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
-	} while (old_hi != hi && loop++ < 2);
-
-	return ((u64)hi << 32) | lo;
-}
-
 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
@@ -1228,7 +1214,8 @@ static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
 	lockdep_assert_held(&guc->timestamp.lock);
 
 	gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
-	gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
+	gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
+					  MISC_STATUS1) >> guc->timestamp.shift;
 	gt_stamp_lo = lower_32_bits(gpm_ts);
 	*now = ktime_get();
 
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness
  2022-01-27  2:00 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness Umesh Nerlige Ramappa
  2022-01-27  2:00 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use existing uncore helper to read gpm_timestamp Umesh Nerlige Ramappa
@ 2022-01-27  2:52 ` Patchwork
  2022-01-27  8:45 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  2022-01-28  9:34 ` [Intel-gfx] [PATCH 1/2] " Tvrtko Ursulin
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2022-01-27  2:52 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 8558 bytes --]

== Series Details ==

Series: series starting with [1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness
URL   : https://patchwork.freedesktop.org/series/99388/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11148 -> Patchwork_22119
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/index.html

Participating hosts (43 -> 42)
------------------------------

  Additional (3): fi-kbl-soraka fi-icl-u2 fi-pnv-d510 
  Missing    (4): fi-ctg-p8600 fi-bsw-cyan fi-bdw-samus fi-hsw-4200u 

Known issues
------------

  Here are the changes found in Patchwork_22119 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_basic@query-info:
    - fi-bsw-kefka:       NOTRUN -> [SKIP][1] ([fdo#109271]) +17 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-bsw-kefka/igt@amdgpu/amd_basic@query-info.html

  * igt@amdgpu/amd_cs_nop@fork-gfx0:
    - fi-icl-u2:          NOTRUN -> [SKIP][2] ([fdo#109315]) +17 similar issues
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-icl-u2/igt@amdgpu/amd_cs_nop@fork-gfx0.html

  * igt@amdgpu/amd_cs_nop@sync-gfx0:
    - fi-bsw-n3050:       NOTRUN -> [SKIP][3] ([fdo#109271]) +17 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-bsw-n3050/igt@amdgpu/amd_cs_nop@sync-gfx0.html

  * igt@gem_exec_fence@basic-busy@bcs0:
    - fi-kbl-soraka:      NOTRUN -> [SKIP][4] ([fdo#109271]) +8 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-kbl-soraka/igt@gem_exec_fence@basic-busy@bcs0.html

  * igt@gem_huc_copy@huc-copy:
    - fi-kbl-soraka:      NOTRUN -> [SKIP][5] ([fdo#109271] / [i915#2190])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-kbl-soraka/igt@gem_huc_copy@huc-copy.html
    - fi-icl-u2:          NOTRUN -> [SKIP][6] ([i915#2190])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-icl-u2/igt@gem_huc_copy@huc-copy.html

  * igt@gem_lmem_swapping@basic:
    - fi-kbl-soraka:      NOTRUN -> [SKIP][7] ([fdo#109271] / [i915#4613]) +3 similar issues
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-kbl-soraka/igt@gem_lmem_swapping@basic.html

  * igt@gem_lmem_swapping@parallel-random-engines:
    - fi-icl-u2:          NOTRUN -> [SKIP][8] ([i915#4613]) +3 similar issues
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-icl-u2/igt@gem_lmem_swapping@parallel-random-engines.html

  * igt@i915_selftest@live@gt_pm:
    - fi-kbl-soraka:      NOTRUN -> [DMESG-FAIL][9] ([i915#1886] / [i915#2291])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-kbl-soraka/igt@i915_selftest@live@gt_pm.html

  * igt@i915_selftest@live@hangcheck:
    - bat-dg1-6:          [PASS][10] -> [DMESG-FAIL][11] ([i915#4957])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/bat-dg1-6/igt@i915_selftest@live@hangcheck.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/bat-dg1-6/igt@i915_selftest@live@hangcheck.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-kbl-soraka:      NOTRUN -> [SKIP][12] ([fdo#109271] / [fdo#111827]) +8 similar issues
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-kbl-soraka/igt@kms_chamelium@common-hpd-after-suspend.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-icl-u2:          NOTRUN -> [SKIP][13] ([fdo#111827]) +8 similar issues
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-icl-u2/igt@kms_chamelium@hdmi-hpd-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
    - fi-icl-u2:          NOTRUN -> [SKIP][14] ([fdo#109278]) +2 similar issues
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-icl-u2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html

  * igt@kms_force_connector_basic@force-load-detect:
    - fi-icl-u2:          NOTRUN -> [SKIP][15] ([fdo#109285])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-icl-u2/igt@kms_force_connector_basic@force-load-detect.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - fi-kbl-soraka:      NOTRUN -> [SKIP][16] ([fdo#109271] / [i915#533])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-kbl-soraka/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@prime_vgem@basic-userptr:
    - fi-pnv-d510:        NOTRUN -> [SKIP][17] ([fdo#109271]) +57 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-pnv-d510/igt@prime_vgem@basic-userptr.html
    - fi-icl-u2:          NOTRUN -> [SKIP][18] ([i915#3301])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-icl-u2/igt@prime_vgem@basic-userptr.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@execlists:
    - fi-bsw-kefka:       [INCOMPLETE][19] ([i915#2940]) -> [PASS][20]
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/fi-bsw-kefka/igt@i915_selftest@live@execlists.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-bsw-kefka/igt@i915_selftest@live@execlists.html

  * igt@i915_selftest@live@late_gt_pm:
    - fi-bsw-n3050:       [DMESG-FAIL][21] ([i915#2927] / [i915#3428]) -> [PASS][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/fi-bsw-n3050/igt@i915_selftest@live@late_gt_pm.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-bsw-n3050/igt@i915_selftest@live@late_gt_pm.html

  
#### Warnings ####

  * igt@i915_selftest@live@hangcheck:
    - bat-dg1-5:          [DMESG-FAIL][23] ([i915#4494] / [i915#4957]) -> [DMESG-FAIL][24] ([i915#4494])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/bat-dg1-5/igt@i915_selftest@live@hangcheck.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/bat-dg1-5/igt@i915_selftest@live@hangcheck.html

  * igt@kms_psr@primary_page_flip:
    - fi-skl-6600u:       [INCOMPLETE][25] ([i915#4547] / [i915#4838]) -> [INCOMPLETE][26] ([i915#4838])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/fi-skl-6600u/igt@kms_psr@primary_page_flip.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/fi-skl-6600u/igt@kms_psr@primary_page_flip.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1886]: https://gitlab.freedesktop.org/drm/intel/issues/1886
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#2291]: https://gitlab.freedesktop.org/drm/intel/issues/2291
  [i915#2927]: https://gitlab.freedesktop.org/drm/intel/issues/2927
  [i915#2940]: https://gitlab.freedesktop.org/drm/intel/issues/2940
  [i915#3301]: https://gitlab.freedesktop.org/drm/intel/issues/3301
  [i915#3428]: https://gitlab.freedesktop.org/drm/intel/issues/3428
  [i915#4312]: https://gitlab.freedesktop.org/drm/intel/issues/4312
  [i915#4494]: https://gitlab.freedesktop.org/drm/intel/issues/4494
  [i915#4547]: https://gitlab.freedesktop.org/drm/intel/issues/4547
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4838]: https://gitlab.freedesktop.org/drm/intel/issues/4838
  [i915#4897]: https://gitlab.freedesktop.org/drm/intel/issues/4897
  [i915#4957]: https://gitlab.freedesktop.org/drm/intel/issues/4957
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533


Build changes
-------------

  * Linux: CI_DRM_11148 -> Patchwork_22119

  CI-20190529: 20190529
  CI_DRM_11148: a8cba5ef0c0735a1f4867ca4c19909fd3edc388a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6335: 2b30115edd692b60d16cb10375730a87f51f0e37 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_22119: a71addb68faa6dd3a45c0bb5f395cf04f48ba44d @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

a71addb68faa drm/i915/pmu: Use existing uncore helper to read gpm_timestamp
8ce71f3d696c drm/i915/pmu: Fix KMD and GuC race on accessing busyness

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/index.html

[-- Attachment #2: Type: text/html, Size: 10418 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness
  2022-01-27  2:00 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness Umesh Nerlige Ramappa
  2022-01-27  2:00 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use existing uncore helper to read gpm_timestamp Umesh Nerlige Ramappa
  2022-01-27  2:52 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness Patchwork
@ 2022-01-27  8:45 ` Patchwork
  2022-01-28  9:34 ` [Intel-gfx] [PATCH 1/2] " Tvrtko Ursulin
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2022-01-27  8:45 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 30306 bytes --]

== Series Details ==

Series: series starting with [1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness
URL   : https://patchwork.freedesktop.org/series/99388/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11148_full -> Patchwork_22119_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_22119_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_22119_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_22119_full:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend:
    - shard-snb:          [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-snb7/igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-snb4/igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend.html

  
Known issues
------------

  Here are the changes found in Patchwork_22119_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_eio@in-flight-suspend:
    - shard-skl:          [PASS][3] -> [INCOMPLETE][4] ([i915#4843])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl4/igt@gem_eio@in-flight-suspend.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl8/igt@gem_eio@in-flight-suspend.html

  * igt@gem_exec_balancer@parallel-balancer:
    - shard-iclb:         [PASS][5] -> [SKIP][6] ([i915#4525]) +1 similar issue
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-iclb1/igt@gem_exec_balancer@parallel-balancer.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-iclb7/igt@gem_exec_balancer@parallel-balancer.html

  * igt@gem_exec_capture@pi@vcs0:
    - shard-skl:          NOTRUN -> [INCOMPLETE][7] ([i915#4547])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl9/igt@gem_exec_capture@pi@vcs0.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
    - shard-glk:          [PASS][8] -> [FAIL][9] ([i915#2842])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-glk9/igt@gem_exec_fair@basic-pace-solo@rcs0.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-glk3/igt@gem_exec_fair@basic-pace-solo@rcs0.html
    - shard-tglb:         NOTRUN -> [FAIL][10] ([i915#2842])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-tglb2/igt@gem_exec_fair@basic-pace-solo@rcs0.html

  * igt@gem_exec_fair@basic-pace@vcs1:
    - shard-kbl:          [PASS][11] -> [FAIL][12] ([i915#2842]) +4 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl6/igt@gem_exec_fair@basic-pace@vcs1.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl6/igt@gem_exec_fair@basic-pace@vcs1.html

  * igt@gem_exec_whisper@basic-normal-all:
    - shard-glk:          [PASS][13] -> [DMESG-WARN][14] ([i915#118])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-glk2/igt@gem_exec_whisper@basic-normal-all.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-glk2/igt@gem_exec_whisper@basic-normal-all.html

  * igt@gem_lmem_swapping@parallel-random-verify:
    - shard-kbl:          NOTRUN -> [SKIP][15] ([fdo#109271] / [i915#4613]) +1 similar issue
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@gem_lmem_swapping@parallel-random-verify.html

  * igt@gem_lmem_swapping@random-engines:
    - shard-skl:          NOTRUN -> [SKIP][16] ([fdo#109271] / [i915#4613])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl1/igt@gem_lmem_swapping@random-engines.html

  * igt@gem_lmem_swapping@verify-random:
    - shard-apl:          NOTRUN -> [SKIP][17] ([fdo#109271] / [i915#4613])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl1/igt@gem_lmem_swapping@verify-random.html

  * igt@gem_render_copy@y-tiled-mc-ccs-to-vebox-y-tiled:
    - shard-snb:          NOTRUN -> [SKIP][18] ([fdo#109271]) +68 similar issues
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-snb6/igt@gem_render_copy@y-tiled-mc-ccs-to-vebox-y-tiled.html

  * igt@gem_userptr_blits@dmabuf-sync:
    - shard-kbl:          NOTRUN -> [SKIP][19] ([fdo#109271] / [i915#3323])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@gem_userptr_blits@dmabuf-sync.html

  * igt@gem_userptr_blits@input-checking:
    - shard-skl:          NOTRUN -> [DMESG-WARN][20] ([i915#4990])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl1/igt@gem_userptr_blits@input-checking.html

  * igt@gem_workarounds@suspend-resume-context:
    - shard-skl:          [PASS][21] -> [INCOMPLETE][22] ([i915#4939])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl1/igt@gem_workarounds@suspend-resume-context.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl7/igt@gem_workarounds@suspend-resume-context.html

  * igt@i915_pm_rpm@system-suspend:
    - shard-skl:          [PASS][23] -> [INCOMPLETE][24] ([i915#151])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl8/igt@i915_pm_rpm@system-suspend.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl8/igt@i915_pm_rpm@system-suspend.html

  * igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-0-hflip:
    - shard-apl:          NOTRUN -> [SKIP][25] ([fdo#109271] / [i915#3777])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl1/igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-0-hflip.html

  * igt@kms_big_fb@yf-tiled-32bpp-rotate-180:
    - shard-tglb:         NOTRUN -> [SKIP][26] ([fdo#111615])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-tglb5/igt@kms_big_fb@yf-tiled-32bpp-rotate-180.html

  * igt@kms_big_fb@yf-tiled-max-hw-stride-32bpp-rotate-0-hflip:
    - shard-kbl:          NOTRUN -> [SKIP][27] ([fdo#109271] / [i915#3777])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@kms_big_fb@yf-tiled-max-hw-stride-32bpp-rotate-0-hflip.html

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_gen12_mc_ccs:
    - shard-apl:          NOTRUN -> [SKIP][28] ([fdo#109271] / [i915#3886]) +4 similar issues
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl6/igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-a-missing-ccs-buffer-y_tiled_gen12_mc_ccs:
    - shard-kbl:          NOTRUN -> [SKIP][29] ([fdo#109271] / [i915#3886]) +7 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@kms_ccs@pipe-a-missing-ccs-buffer-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-b-ccs-on-another-bo-y_tiled_gen12_mc_ccs:
    - shard-skl:          NOTRUN -> [SKIP][30] ([fdo#109271] / [i915#3886])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl6/igt@kms_ccs@pipe-b-ccs-on-another-bo-y_tiled_gen12_mc_ccs.html
    - shard-tglb:         NOTRUN -> [SKIP][31] ([i915#3689] / [i915#3886])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-tglb5/igt@kms_ccs@pipe-b-ccs-on-another-bo-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-d-missing-ccs-buffer-y_tiled_gen12_mc_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][32] ([i915#3689])
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-tglb5/igt@kms_ccs@pipe-d-missing-ccs-buffer-y_tiled_gen12_mc_ccs.html

  * igt@kms_chamelium@vga-hpd-for-each-pipe:
    - shard-kbl:          NOTRUN -> [SKIP][33] ([fdo#109271] / [fdo#111827]) +10 similar issues
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@kms_chamelium@vga-hpd-for-each-pipe.html

  * igt@kms_chamelium@vga-hpd-with-enabled-mode:
    - shard-skl:          NOTRUN -> [SKIP][34] ([fdo#109271] / [fdo#111827])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl1/igt@kms_chamelium@vga-hpd-with-enabled-mode.html

  * igt@kms_chamelium@vga-hpd-without-ddc:
    - shard-snb:          NOTRUN -> [SKIP][35] ([fdo#109271] / [fdo#111827]) +4 similar issues
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-snb6/igt@kms_chamelium@vga-hpd-without-ddc.html

  * igt@kms_color_chamelium@pipe-b-gamma:
    - shard-apl:          NOTRUN -> [SKIP][36] ([fdo#109271] / [fdo#111827]) +6 similar issues
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl1/igt@kms_color_chamelium@pipe-b-gamma.html

  * igt@kms_color_chamelium@pipe-c-degamma:
    - shard-tglb:         NOTRUN -> [SKIP][37] ([fdo#109284] / [fdo#111827]) +2 similar issues
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-tglb5/igt@kms_color_chamelium@pipe-c-degamma.html

  * igt@kms_content_protection@legacy:
    - shard-kbl:          NOTRUN -> [TIMEOUT][38] ([i915#1319]) +1 similar issue
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@kms_content_protection@legacy.html

  * igt@kms_content_protection@srm:
    - shard-apl:          NOTRUN -> [TIMEOUT][39] ([i915#1319])
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl6/igt@kms_content_protection@srm.html

  * igt@kms_cursor_crc@pipe-b-cursor-512x512-offscreen:
    - shard-tglb:         NOTRUN -> [SKIP][40] ([fdo#109279] / [i915#3359])
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-tglb5/igt@kms_cursor_crc@pipe-b-cursor-512x512-offscreen.html

  * igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic:
    - shard-tglb:         NOTRUN -> [SKIP][41] ([fdo#109274] / [fdo#111825]) +1 similar issue
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-tglb5/igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic.html

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size:
    - shard-skl:          [PASS][42] -> [FAIL][43] ([i915#2346] / [i915#533])
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl1/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl6/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size.html

  * igt@kms_flip@flip-vs-suspend-interruptible@a-dp1:
    - shard-kbl:          [PASS][44] -> [DMESG-WARN][45] ([i915#180]) +6 similar issues
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl1/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html

  * igt@kms_flip@flip-vs-suspend-interruptible@c-dp1:
    - shard-apl:          [PASS][46] -> [DMESG-WARN][47] ([i915#180]) +5 similar issues
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-apl6/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl8/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-upscaling:
    - shard-glk:          [PASS][48] -> [FAIL][49] ([i915#4911])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-glk3/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-upscaling.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-glk8/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-upscaling.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-blt:
    - shard-kbl:          NOTRUN -> [SKIP][50] ([fdo#109271]) +120 similar issues
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl3/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-pri-indfb-draw-mmap-gtt:
    - shard-tglb:         NOTRUN -> [SKIP][51] ([fdo#109280] / [fdo#111825]) +2 similar issues
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-tglb5/igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-pri-indfb-draw-mmap-gtt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-spr-indfb-draw-mmap-gtt:
    - shard-skl:          NOTRUN -> [SKIP][52] ([fdo#109271]) +23 similar issues
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl1/igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-spr-indfb-draw-mmap-gtt.html

  * igt@kms_frontbuffer_tracking@psr-rgb565-draw-render:
    - shard-apl:          NOTRUN -> [SKIP][53] ([fdo#109271]) +61 similar issues
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl8/igt@kms_frontbuffer_tracking@psr-rgb565-draw-render.html

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-d:
    - shard-apl:          NOTRUN -> [SKIP][54] ([fdo#109271] / [i915#533])
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl1/igt@kms_pipe_crc_basic@hang-read-crc-pipe-d.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - shard-snb:          [PASS][55] -> [FAIL][56] ([fdo#103375])
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-snb2/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-snb2/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max:
    - shard-apl:          NOTRUN -> [FAIL][57] ([fdo#108145] / [i915#265])
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl1/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max.html

  * igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
    - shard-skl:          [PASS][58] -> [FAIL][59] ([fdo#108145] / [i915#265])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl9/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl10/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb:
    - shard-apl:          NOTRUN -> [FAIL][60] ([i915#265])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl6/igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb.html
    - shard-kbl:          NOTRUN -> [FAIL][61] ([i915#265])
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl3/igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb.html

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max:
    - shard-kbl:          NOTRUN -> [FAIL][62] ([fdo#108145] / [i915#265])
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max.html

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area:
    - shard-apl:          NOTRUN -> [SKIP][63] ([fdo#109271] / [i915#658]) +1 similar issue
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl8/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area.html

  * igt@kms_psr@psr2_primary_page_flip:
    - shard-iclb:         [PASS][64] -> [SKIP][65] ([fdo#109441]) +1 similar issue
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-iclb2/igt@kms_psr@psr2_primary_page_flip.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-iclb1/igt@kms_psr@psr2_primary_page_flip.html

  * igt@kms_vblank@pipe-d-ts-continuation-dpms-suspend:
    - shard-iclb:         NOTRUN -> [SKIP][66] ([fdo#109278])
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-iclb1/igt@kms_vblank@pipe-d-ts-continuation-dpms-suspend.html

  * igt@kms_writeback@writeback-fb-id:
    - shard-kbl:          NOTRUN -> [SKIP][67] ([fdo#109271] / [i915#2437])
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl6/igt@kms_writeback@writeback-fb-id.html

  * igt@perf@polling-parameterized:
    - shard-skl:          [PASS][68] -> [FAIL][69] ([i915#1542])
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl4/igt@perf@polling-parameterized.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl10/igt@perf@polling-parameterized.html
    - shard-kbl:          [PASS][70] -> [FAIL][71] ([i915#1542])
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl4/igt@perf@polling-parameterized.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl6/igt@perf@polling-parameterized.html

  * igt@prime_nv_api@nv_self_import:
    - shard-iclb:         NOTRUN -> [SKIP][72] ([fdo#109291])
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-iclb1/igt@prime_nv_api@nv_self_import.html

  * igt@sysfs_clients@fair-0:
    - shard-apl:          NOTRUN -> [SKIP][73] ([fdo#109271] / [i915#2994])
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl8/igt@sysfs_clients@fair-0.html

  
#### Possible fixes ####

  * igt@gem_ctx_isolation@preservation-s3@rcs0:
    - shard-skl:          [INCOMPLETE][74] ([i915#4793]) -> [PASS][75]
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl2/igt@gem_ctx_isolation@preservation-s3@rcs0.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl1/igt@gem_ctx_isolation@preservation-s3@rcs0.html

  * igt@gem_eio@in-flight-contexts-10ms:
    - shard-tglb:         [TIMEOUT][76] ([i915#3063]) -> [PASS][77] +1 similar issue
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-tglb1/igt@gem_eio@in-flight-contexts-10ms.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-tglb7/igt@gem_eio@in-flight-contexts-10ms.html

  * igt@gem_exec_capture@pi@rcs0:
    - shard-skl:          [INCOMPLETE][78] ([i915#4547]) -> [PASS][79]
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl10/igt@gem_exec_capture@pi@rcs0.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl9/igt@gem_exec_capture@pi@rcs0.html

  * igt@gem_exec_fair@basic-deadline:
    - shard-glk:          [FAIL][80] ([i915#2846]) -> [PASS][81]
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-glk1/igt@gem_exec_fair@basic-deadline.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-glk3/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
    - shard-iclb:         [FAIL][82] ([i915#2842]) -> [PASS][83]
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-iclb5/igt@gem_exec_fair@basic-pace-solo@rcs0.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-iclb2/igt@gem_exec_fair@basic-pace-solo@rcs0.html

  * igt@gem_exec_fair@basic-pace@rcs0:
    - shard-kbl:          [FAIL][84] ([i915#2842]) -> [PASS][85]
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl6/igt@gem_exec_fair@basic-pace@rcs0.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl6/igt@gem_exec_fair@basic-pace@rcs0.html

  * igt@gem_exec_whisper@basic-queues:
    - shard-glk:          [DMESG-WARN][86] ([i915#118]) -> [PASS][87] +1 similar issue
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-glk1/igt@gem_exec_whisper@basic-queues.html
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-glk7/igt@gem_exec_whisper@basic-queues.html

  * igt@gem_workarounds@suspend-resume-context:
    - shard-apl:          [DMESG-WARN][88] ([i915#180]) -> [PASS][89] +5 similar issues
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-apl2/igt@gem_workarounds@suspend-resume-context.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl1/igt@gem_workarounds@suspend-resume-context.html

  * igt@gen9_exec_parse@allowed-all:
    - shard-kbl:          [DMESG-WARN][90] ([i915#1436] / [i915#716]) -> [PASS][91]
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl4/igt@gen9_exec_parse@allowed-all.html
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@gen9_exec_parse@allowed-all.html

  * igt@i915_module_load@reload-no-display:
    - shard-iclb:         [DMESG-WARN][92] ([i915#2867]) -> [PASS][93] +1 similar issue
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-iclb5/igt@i915_module_load@reload-no-display.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-iclb6/igt@i915_module_load@reload-no-display.html

  * igt@i915_suspend@forcewake:
    - shard-snb:          [INCOMPLETE][94] -> [PASS][95]
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-snb7/igt@i915_suspend@forcewake.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-snb6/igt@i915_suspend@forcewake.html

  * igt@i915_suspend@sysfs-reader:
    - shard-kbl:          [INCOMPLETE][96] ([i915#3614]) -> [PASS][97]
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl4/igt@i915_suspend@sysfs-reader.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl3/igt@i915_suspend@sysfs-reader.html

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic:
    - shard-skl:          [FAIL][98] ([i915#2346]) -> [PASS][99]
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl2/igt@kms_cursor_legacy@flip-vs-cursor-atomic.html
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl10/igt@kms_cursor_legacy@flip-vs-cursor-atomic.html

  * igt@kms_hdr@bpc-switch:
    - shard-skl:          [FAIL][100] ([i915#1188]) -> [PASS][101]
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl2/igt@kms_hdr@bpc-switch.html
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl10/igt@kms_hdr@bpc-switch.html

  * igt@kms_hdr@bpc-switch-suspend:
    - shard-kbl:          [DMESG-WARN][102] ([i915#180]) -> [PASS][103] +2 similar issues
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl4/igt@kms_hdr@bpc-switch-suspend.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl6/igt@kms_hdr@bpc-switch-suspend.html

  * igt@kms_psr@psr2_cursor_blt:
    - shard-iclb:         [SKIP][104] ([fdo#109441]) -> [PASS][105]
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-iclb5/igt@kms_psr@psr2_cursor_blt.html
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-iclb2/igt@kms_psr@psr2_cursor_blt.html

  * igt@kms_setmode@basic:
    - shard-glk:          [FAIL][106] ([i915#31]) -> [PASS][107]
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-glk8/igt@kms_setmode@basic.html
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-glk1/igt@kms_setmode@basic.html

  
#### Warnings ####

  * igt@gem_eio@unwedge-stress:
    - shard-tglb:         [FAIL][108] ([i915#232]) -> [TIMEOUT][109] ([i915#3063] / [i915#3648])
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-tglb5/igt@gem_eio@unwedge-stress.html
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-tglb6/igt@gem_eio@unwedge-stress.html

  * igt@gem_exec_balancer@parallel-ordering:
    - shard-iclb:         [FAIL][110] ([i915#4916]) -> [SKIP][111] ([i915#4525])
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-iclb4/igt@gem_exec_balancer@parallel-ordering.html
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-iclb7/igt@gem_exec_balancer@parallel-ordering.html

  * igt@i915_pm_dc@dc3co-vpb-simulation:
    - shard-iclb:         [SKIP][112] ([i915#658]) -> [SKIP][113] ([i915#588])
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-iclb1/igt@i915_pm_dc@dc3co-vpb-simulation.html
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-iclb2/igt@i915_pm_dc@dc3co-vpb-simulation.html

  * igt@kms_psr2_su@page_flip-xrgb8888:
    - shard-iclb:         [FAIL][114] ([i915#4148]) -> [SKIP][115] ([fdo#109642] / [fdo#111068] / [i915#658])
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-iclb2/igt@kms_psr2_su@page_flip-xrgb8888.html
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-iclb7/igt@kms_psr2_su@page_flip-xrgb8888.html

  * igt@runner@aborted:
    - shard-kbl:          ([FAIL][116], [FAIL][117], [FAIL][118], [FAIL][119], [FAIL][120], [FAIL][121]) ([fdo#109271] / [i915#1436] / [i915#1814] / [i915#3002] / [i915#4312] / [i915#716]) -> ([FAIL][122], [FAIL][123], [FAIL][124], [FAIL][125]) ([i915#1436] / [i915#180] / [i915#1814] / [i915#3002] / [i915#4312])
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl4/igt@runner@aborted.html
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl7/igt@runner@aborted.html
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl3/igt@runner@aborted.html
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl4/igt@runner@aborted.html
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl4/igt@runner@aborted.html
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-kbl4/igt@runner@aborted.html
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl1/igt@runner@aborted.html
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@runner@aborted.html
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@runner@aborted.html
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-kbl4/igt@runner@aborted.html
    - shard-apl:          ([FAIL][126], [FAIL][127], [FAIL][128], [FAIL][129], [FAIL][130], [FAIL][131], [FAIL][132], [FAIL][133]) ([fdo#109271] / [i915#180] / [i915#1814] / [i915#3002] / [i915#4312]) -> ([FAIL][134], [FAIL][135], [FAIL][136], [FAIL][137], [FAIL][138], [FAIL][139], [FAIL][140]) ([i915#180] / [i915#1814] / [i915#3002] / [i915#4312])
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-apl7/igt@runner@aborted.html
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-apl2/igt@runner@aborted.html
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-apl3/igt@runner@aborted.html
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-apl6/igt@runner@aborted.html
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-apl8/igt@runner@aborted.html
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-apl6/igt@runner@aborted.html
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-apl3/igt@runner@aborted.html
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-apl2/igt@runner@aborted.html
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl8/igt@runner@aborted.html
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl7/igt@runner@aborted.html
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl4/igt@runner@aborted.html
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl4/igt@runner@aborted.html
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl2/igt@runner@aborted.html
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl2/igt@runner@aborted.html
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-apl8/igt@runner@aborted.html
    - shard-skl:          ([FAIL][141], [FAIL][142], [FAIL][143]) ([i915#2029] / [i915#3002] / [i915#4312]) -> ([FAIL][144], [FAIL][145], [FAIL][146], [FAIL][147]) ([i915#1814] / [i915#2029] / [i915#3002] / [i915#4312])
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl10/igt@runner@aborted.html
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl5/igt@runner@aborted.html
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11148/shard-skl8/igt@runner@aborted.html
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl2/igt@runner@aborted.html
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl9/igt@runner@aborted.html
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl1/igt@runner@aborted.html
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/shard-skl5/igt@runner@aborted.html

  
  [fdo#103375]: https://bugs.freedesktop.org/show_bug.cgi?id=103375
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109274]: https://bugs.freedesktop.org/show_bug.cgi?id=109274
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109279]: https://bugs.freedesktop.org/show_bug.cgi?id=109279
  [fdo#109280]: https://bugs.freedesktop.org/show_bug.cgi?id=109280
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109291]: https://bugs.freedesktop.org/show_bug.cgi?id=109291
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111615]: https://bugs.freedesktop.org/show_bug.cgi?id=111615
  [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1319]: https://gitlab.freedesktop.org/drm/intel/issues/1319
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#151]: https://gitlab.freedesktop.org/drm/intel/issues/151
  [i915#1542]: htt

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22119/index.html

[-- Attachment #2: Type: text/html, Size: 35622 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness
  2022-01-27  2:00 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness Umesh Nerlige Ramappa
                   ` (2 preceding siblings ...)
  2022-01-27  8:45 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
@ 2022-01-28  9:34 ` Tvrtko Ursulin
  2022-01-28 17:31   ` Umesh Nerlige Ramappa
  2022-01-28 21:53   ` John Harrison
  3 siblings, 2 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2022-01-28  9:34 UTC (permalink / raw)
  To: intel-gfx, John Harrison


John,

What CI results were used to merge this particular single patch? Unless 
I am not seeing it, it was always set in pair with something else.

First with "drm/i915/pmu: Use PM timestamp instead of RING TIMESTAMP for 
reference", which was merged earlier in the week and it had a standalone 
CI results so that is fine.

Other postings I can see it was in tandem with "drm/i915/pmu: Use 
existing uncore helper to read gpm_timestamp", which wasn't reviewed or 
merged.

So it looks to me, again unless I am not seeing anything, that it got 
merged without ever having a standalone CI results. This is therefore a 
reminder that BKM should be to always have CI results for the exact 
series/patch being merged.

If there is a situation where a subset of a series is conceptually ready 
before the rest, in the past what we used to do is send the reviewed 
portion as "--subject-prefix=CI" and then "--suppress-cc=all" and then 
merge when CI gives all green.

Regards,

Tvrtko

On 27/01/2022 02:00, Umesh Nerlige Ramappa wrote:
> GuC updates shared memory and KMD reads it. Since this is not
> synchronized, we run into a race where the value read is inconsistent.
> Sometimes the inconsistency is in reading the upper MSB bytes of the
> last_switch_in value. 2 types of cases are seen - upper 8 bits are zero
> and upper 24 bits are zero. Since these are non-zero values, it is
> not trivial to determine validity of these values. Instead we read the
> values multiple times until they are consistent. In test runs, 3
> attempts results in consistent values. The upper bound is set to 6
> attempts and may need to be tuned as per any new occurences.
> 
> Since the duration that gt is parked can vary, the patch also updates
> the gt timestamp on unpark before starting the worker.
> 
> v2:
> - Initialize i
> - Use READ_ONCE to access engine record
> 
> Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu")
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
> ---
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 58 +++++++++++++++++--
>   1 file changed, 54 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index db9615dcb0ec..4e9154cacc58 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1114,6 +1114,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
>   	if (new_start == lower_32_bits(*prev_start))
>   		return;
>   
> +	/*
> +	 * When gt is unparked, we update the gt timestamp and start the ping
> +	 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
> +	 * is unparked, all switched in contexts will have a start time that is
> +	 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
> +	 *
> +	 * If neither gt_stamp nor new_start has rolled over, then the
> +	 * gt_stamp_hi does not need to be adjusted, however if one of them has
> +	 * rolled over, we need to adjust gt_stamp_hi accordingly.
> +	 *
> +	 * The below conditions address the cases of new_start rollover and
> +	 * gt_stamp_last rollover respectively.
> +	 */
>   	if (new_start < gt_stamp_last &&
>   	    (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
>   		gt_stamp_hi++;
> @@ -1125,17 +1138,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
>   	*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
>   }
>   
> -static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
> +/*
> + * GuC updates shared memory and KMD reads it. Since this is not synchronized,
> + * we run into a race where the value read is inconsistent. Sometimes the
> + * inconsistency is in reading the upper MSB bytes of the last_in value when
> + * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
> + * 24 bits are zero. Since these are non-zero values, it is non-trivial to
> + * determine validity of these values. Instead we read the values multiple times
> + * until they are consistent. In test runs, 3 attempts results in consistent
> + * values. The upper bound is set to 6 attempts and may need to be tuned as per
> + * any new occurences.
> + */
> +static void __get_engine_usage_record(struct intel_engine_cs *engine,
> +				      u32 *last_in, u32 *id, u32 *total)
>   {
>   	struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
> +	int i = 0;
> +
> +	do {
> +		*last_in = READ_ONCE(rec->last_switch_in_stamp);
> +		*id = READ_ONCE(rec->current_context_index);
> +		*total = READ_ONCE(rec->total_runtime);
> +
> +		if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
> +		    READ_ONCE(rec->current_context_index) == *id &&
> +		    READ_ONCE(rec->total_runtime) == *total)
> +			break;
> +	} while (++i < 6);
> +}
> +
> +static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
> +{
>   	struct intel_engine_guc_stats *stats = &engine->stats.guc;
>   	struct intel_guc *guc = &engine->gt->uc.guc;
> -	u32 last_switch = rec->last_switch_in_stamp;
> -	u32 ctx_id = rec->current_context_index;
> -	u32 total = rec->total_runtime;
> +	u32 last_switch, ctx_id, total;
>   
>   	lockdep_assert_held(&guc->timestamp.lock);
>   
> +	__get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
> +
>   	stats->running = ctx_id != ~0U && last_switch;
>   	if (stats->running)
>   		__extend_last_switch(guc, &stats->start_gt_clk, last_switch);
> @@ -1237,6 +1278,10 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
>   	if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
>   		stats_saved = *stats;
>   		gt_stamp_saved = guc->timestamp.gt_stamp;
> +		/*
> +		 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
> +		 * start_gt_clk' calculation below for active engines.
> +		 */
>   		guc_update_engine_gt_clks(engine);
>   		guc_update_pm_timestamp(guc, now);
>   		intel_gt_pm_put_async(gt);
> @@ -1365,10 +1410,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
>   void intel_guc_busyness_unpark(struct intel_gt *gt)
>   {
>   	struct intel_guc *guc = &gt->uc.guc;
> +	unsigned long flags;
> +	ktime_t unused;
>   
>   	if (!guc_submission_initialized(guc))
>   		return;
>   
> +	spin_lock_irqsave(&guc->timestamp.lock, flags);
> +	guc_update_pm_timestamp(guc, &unused);
> +	spin_unlock_irqrestore(&guc->timestamp.lock, flags);
>   	mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
>   			 guc->timestamp.ping_delay);
>   }
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness
  2022-01-28  9:34 ` [Intel-gfx] [PATCH 1/2] " Tvrtko Ursulin
@ 2022-01-28 17:31   ` Umesh Nerlige Ramappa
  2022-01-28 21:53   ` John Harrison
  1 sibling, 0 replies; 9+ messages in thread
From: Umesh Nerlige Ramappa @ 2022-01-28 17:31 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Jan 28, 2022 at 09:34:28AM +0000, Tvrtko Ursulin wrote:
>
>John,
>
>What CI results were used to merge this particular single patch? 
>Unless I am not seeing it, it was always set in pair with something 
>else.
>
>First with "drm/i915/pmu: Use PM timestamp instead of RING TIMESTAMP 
>for reference", which was merged earlier in the week and it had a 
>standalone CI results so that is fine.
>
>Other postings I can see it was in tandem with "drm/i915/pmu: Use 
>existing uncore helper to read gpm_timestamp", which wasn't reviewed 
>or merged.

Sorry, the last series posted was my bad. It should have been only the 
helper patch which removed duplicated code.

>
>So it looks to me, again unless I am not seeing anything, that it got 
>merged without ever having a standalone CI results. This is therefore 
>a reminder that BKM should be to always have CI results for the exact 
>series/patch being merged.

https://patchwork.freedesktop.org/series/98714/ was posted for issue 1 -
drm/i915/pmu: Use PM timestamp instead of RING TIMESTAMP for reference

https://patchwork.freedesktop.org/series/99301/ was posted for issue 2 -
drm/i915/pmu: Fix KMD and GuC race on accessing busyness

In the latter case, I had to also include the patch for issue 1 since 
there was a rebase dependency and this patch was not yet merged to 
drm-tip. Both above postings have completed CI runs. Do you mean that 
the CI results from the second posting is not valid because it's not 
posted standalone?

Thanks,
Umesh

>
>If there is a situation where a subset of a series is conceptually 
>ready before the rest, in the past what we used to do is send the 
>reviewed portion as "--subject-prefix=CI" and then "--suppress-cc=all" 
>and then merge when CI gives all green.



>
>Regards,
>
>Tvrtko
>
>On 27/01/2022 02:00, Umesh Nerlige Ramappa wrote:
>>GuC updates shared memory and KMD reads it. Since this is not
>>synchronized, we run into a race where the value read is inconsistent.
>>Sometimes the inconsistency is in reading the upper MSB bytes of the
>>last_switch_in value. 2 types of cases are seen - upper 8 bits are zero
>>and upper 24 bits are zero. Since these are non-zero values, it is
>>not trivial to determine validity of these values. Instead we read the
>>values multiple times until they are consistent. In test runs, 3
>>attempts results in consistent values. The upper bound is set to 6
>>attempts and may need to be tuned as per any new occurences.
>>
>>Since the duration that gt is parked can vary, the patch also updates
>>the gt timestamp on unpark before starting the worker.
>>
>>v2:
>>- Initialize i
>>- Use READ_ONCE to access engine record
>>
>>Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu")
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
>>---
>>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 58 +++++++++++++++++--
>>  1 file changed, 54 insertions(+), 4 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>index db9615dcb0ec..4e9154cacc58 100644
>>--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>@@ -1114,6 +1114,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
>>  	if (new_start == lower_32_bits(*prev_start))
>>  		return;
>>+	/*
>>+	 * When gt is unparked, we update the gt timestamp and start the ping
>>+	 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
>>+	 * is unparked, all switched in contexts will have a start time that is
>>+	 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
>>+	 *
>>+	 * If neither gt_stamp nor new_start has rolled over, then the
>>+	 * gt_stamp_hi does not need to be adjusted, however if one of them has
>>+	 * rolled over, we need to adjust gt_stamp_hi accordingly.
>>+	 *
>>+	 * The below conditions address the cases of new_start rollover and
>>+	 * gt_stamp_last rollover respectively.
>>+	 */
>>  	if (new_start < gt_stamp_last &&
>>  	    (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
>>  		gt_stamp_hi++;
>>@@ -1125,17 +1138,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
>>  	*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
>>  }
>>-static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
>>+/*
>>+ * GuC updates shared memory and KMD reads it. Since this is not synchronized,
>>+ * we run into a race where the value read is inconsistent. Sometimes the
>>+ * inconsistency is in reading the upper MSB bytes of the last_in value when
>>+ * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
>>+ * 24 bits are zero. Since these are non-zero values, it is non-trivial to
>>+ * determine validity of these values. Instead we read the values multiple times
>>+ * until they are consistent. In test runs, 3 attempts results in consistent
>>+ * values. The upper bound is set to 6 attempts and may need to be tuned as per
>>+ * any new occurences.
>>+ */
>>+static void __get_engine_usage_record(struct intel_engine_cs *engine,
>>+				      u32 *last_in, u32 *id, u32 *total)
>>  {
>>  	struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
>>+	int i = 0;
>>+
>>+	do {
>>+		*last_in = READ_ONCE(rec->last_switch_in_stamp);
>>+		*id = READ_ONCE(rec->current_context_index);
>>+		*total = READ_ONCE(rec->total_runtime);
>>+
>>+		if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
>>+		    READ_ONCE(rec->current_context_index) == *id &&
>>+		    READ_ONCE(rec->total_runtime) == *total)
>>+			break;
>>+	} while (++i < 6);
>>+}
>>+
>>+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
>>+{
>>  	struct intel_engine_guc_stats *stats = &engine->stats.guc;
>>  	struct intel_guc *guc = &engine->gt->uc.guc;
>>-	u32 last_switch = rec->last_switch_in_stamp;
>>-	u32 ctx_id = rec->current_context_index;
>>-	u32 total = rec->total_runtime;
>>+	u32 last_switch, ctx_id, total;
>>  	lockdep_assert_held(&guc->timestamp.lock);
>>+	__get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
>>+
>>  	stats->running = ctx_id != ~0U && last_switch;
>>  	if (stats->running)
>>  		__extend_last_switch(guc, &stats->start_gt_clk, last_switch);
>>@@ -1237,6 +1278,10 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
>>  	if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
>>  		stats_saved = *stats;
>>  		gt_stamp_saved = guc->timestamp.gt_stamp;
>>+		/*
>>+		 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
>>+		 * start_gt_clk' calculation below for active engines.
>>+		 */
>>  		guc_update_engine_gt_clks(engine);
>>  		guc_update_pm_timestamp(guc, now);
>>  		intel_gt_pm_put_async(gt);
>>@@ -1365,10 +1410,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
>>  void intel_guc_busyness_unpark(struct intel_gt *gt)
>>  {
>>  	struct intel_guc *guc = &gt->uc.guc;
>>+	unsigned long flags;
>>+	ktime_t unused;
>>  	if (!guc_submission_initialized(guc))
>>  		return;
>>+	spin_lock_irqsave(&guc->timestamp.lock, flags);
>>+	guc_update_pm_timestamp(guc, &unused);
>>+	spin_unlock_irqrestore(&guc->timestamp.lock, flags);
>>  	mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
>>  			 guc->timestamp.ping_delay);
>>  }
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness
  2022-01-28  9:34 ` [Intel-gfx] [PATCH 1/2] " Tvrtko Ursulin
  2022-01-28 17:31   ` Umesh Nerlige Ramappa
@ 2022-01-28 21:53   ` John Harrison
  2022-01-28 22:46     ` Tvrtko Ursulin
  1 sibling, 1 reply; 9+ messages in thread
From: John Harrison @ 2022-01-28 21:53 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On 1/28/2022 01:34, Tvrtko Ursulin wrote:
>
> John,
>
> What CI results were used to merge this particular single patch? 
> Unless I am not seeing it, it was always set in pair with something else.
>
> First with "drm/i915/pmu: Use PM timestamp instead of RING TIMESTAMP 
> for reference", which was merged earlier in the week and it had a 
> standalone CI results so that is fine.

It seemed plausible to say that it had a green CI run when included with 
a dependent patch given that the dependent patch has already been merged.

We don't normally require a brand new CI run any time the tree has moved 
on since the previous CI was run. You would never get anything merged if 
that was necessary!

So to me this just counts as pushing a tested patch set to a slightly 
newer tree where the rebase happened to mean one patch vanished (because 
exactly the same patch was already merged).

John.


>
> Other postings I can see it was in tandem with "drm/i915/pmu: Use 
> existing uncore helper to read gpm_timestamp", which wasn't reviewed 
> or merged.
>
> So it looks to me, again unless I am not seeing anything, that it got 
> merged without ever having a standalone CI results. This is therefore 
> a reminder that BKM should be to always have CI results for the exact 
> series/patch being merged.
>
> If there is a situation where a subset of a series is conceptually 
> ready before the rest, in the past what we used to do is send the 
> reviewed portion as "--subject-prefix=CI" and then "--suppress-cc=all" 
> and then merge when CI gives all green.
>
> Regards,
>
> Tvrtko
>
> On 27/01/2022 02:00, Umesh Nerlige Ramappa wrote:
>> GuC updates shared memory and KMD reads it. Since this is not
>> synchronized, we run into a race where the value read is inconsistent.
>> Sometimes the inconsistency is in reading the upper MSB bytes of the
>> last_switch_in value. 2 types of cases are seen - upper 8 bits are zero
>> and upper 24 bits are zero. Since these are non-zero values, it is
>> not trivial to determine validity of these values. Instead we read the
>> values multiple times until they are consistent. In test runs, 3
>> attempts results in consistent values. The upper bound is set to 6
>> attempts and may need to be tuned as per any new occurences.
>>
>> Since the duration that gt is parked can vary, the patch also updates
>> the gt timestamp on unpark before starting the worker.
>>
>> v2:
>> - Initialize i
>> - Use READ_ONCE to access engine record
>>
>> Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats 
>> from GuC to pmu")
>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>> Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
>> ---
>>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 58 +++++++++++++++++--
>>   1 file changed, 54 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> index db9615dcb0ec..4e9154cacc58 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> @@ -1114,6 +1114,19 @@ __extend_last_switch(struct intel_guc *guc, 
>> u64 *prev_start, u32 new_start)
>>       if (new_start == lower_32_bits(*prev_start))
>>           return;
>>   +    /*
>> +     * When gt is unparked, we update the gt timestamp and start the 
>> ping
>> +     * worker that updates the gt_stamp every POLL_TIME_CLKS. As 
>> long as gt
>> +     * is unparked, all switched in contexts will have a start time 
>> that is
>> +     * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
>> +     *
>> +     * If neither gt_stamp nor new_start has rolled over, then the
>> +     * gt_stamp_hi does not need to be adjusted, however if one of 
>> them has
>> +     * rolled over, we need to adjust gt_stamp_hi accordingly.
>> +     *
>> +     * The below conditions address the cases of new_start rollover and
>> +     * gt_stamp_last rollover respectively.
>> +     */
>>       if (new_start < gt_stamp_last &&
>>           (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
>>           gt_stamp_hi++;
>> @@ -1125,17 +1138,45 @@ __extend_last_switch(struct intel_guc *guc, 
>> u64 *prev_start, u32 new_start)
>>       *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
>>   }
>>   -static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
>> +/*
>> + * GuC updates shared memory and KMD reads it. Since this is not 
>> synchronized,
>> + * we run into a race where the value read is inconsistent. 
>> Sometimes the
>> + * inconsistency is in reading the upper MSB bytes of the last_in 
>> value when
>> + * this race occurs. 2 types of cases are seen - upper 8 bits are 
>> zero and upper
>> + * 24 bits are zero. Since these are non-zero values, it is 
>> non-trivial to
>> + * determine validity of these values. Instead we read the values 
>> multiple times
>> + * until they are consistent. In test runs, 3 attempts results in 
>> consistent
>> + * values. The upper bound is set to 6 attempts and may need to be 
>> tuned as per
>> + * any new occurences.
>> + */
>> +static void __get_engine_usage_record(struct intel_engine_cs *engine,
>> +                      u32 *last_in, u32 *id, u32 *total)
>>   {
>>       struct guc_engine_usage_record *rec = 
>> intel_guc_engine_usage(engine);
>> +    int i = 0;
>> +
>> +    do {
>> +        *last_in = READ_ONCE(rec->last_switch_in_stamp);
>> +        *id = READ_ONCE(rec->current_context_index);
>> +        *total = READ_ONCE(rec->total_runtime);
>> +
>> +        if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
>> +            READ_ONCE(rec->current_context_index) == *id &&
>> +            READ_ONCE(rec->total_runtime) == *total)
>> +            break;
>> +    } while (++i < 6);
>> +}
>> +
>> +static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
>> +{
>>       struct intel_engine_guc_stats *stats = &engine->stats.guc;
>>       struct intel_guc *guc = &engine->gt->uc.guc;
>> -    u32 last_switch = rec->last_switch_in_stamp;
>> -    u32 ctx_id = rec->current_context_index;
>> -    u32 total = rec->total_runtime;
>> +    u32 last_switch, ctx_id, total;
>>         lockdep_assert_held(&guc->timestamp.lock);
>>   +    __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
>> +
>>       stats->running = ctx_id != ~0U && last_switch;
>>       if (stats->running)
>>           __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
>> @@ -1237,6 +1278,10 @@ static ktime_t guc_engine_busyness(struct 
>> intel_engine_cs *engine, ktime_t *now)
>>       if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
>>           stats_saved = *stats;
>>           gt_stamp_saved = guc->timestamp.gt_stamp;
>> +        /*
>> +         * Update gt_clks, then gt timestamp to simplify the 
>> 'gt_stamp -
>> +         * start_gt_clk' calculation below for active engines.
>> +         */
>>           guc_update_engine_gt_clks(engine);
>>           guc_update_pm_timestamp(guc, now);
>>           intel_gt_pm_put_async(gt);
>> @@ -1365,10 +1410,15 @@ void intel_guc_busyness_park(struct intel_gt 
>> *gt)
>>   void intel_guc_busyness_unpark(struct intel_gt *gt)
>>   {
>>       struct intel_guc *guc = &gt->uc.guc;
>> +    unsigned long flags;
>> +    ktime_t unused;
>>         if (!guc_submission_initialized(guc))
>>           return;
>>   +    spin_lock_irqsave(&guc->timestamp.lock, flags);
>> +    guc_update_pm_timestamp(guc, &unused);
>> +    spin_unlock_irqrestore(&guc->timestamp.lock, flags);
>>       mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
>>                guc->timestamp.ping_delay);
>>   }
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness
  2022-01-28 21:53   ` John Harrison
@ 2022-01-28 22:46     ` Tvrtko Ursulin
  0 siblings, 0 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2022-01-28 22:46 UTC (permalink / raw)
  To: John Harrison, intel-gfx


On 28/01/2022 21:53, John Harrison wrote:
> On 1/28/2022 01:34, Tvrtko Ursulin wrote:
>>
>> John,
>>
>> What CI results were used to merge this particular single patch? 
>> Unless I am not seeing it, it was always set in pair with something else.
>>
>> First with "drm/i915/pmu: Use PM timestamp instead of RING TIMESTAMP 
>> for reference", which was merged earlier in the week and it had a 
>> standalone CI results so that is fine.
> 
> It seemed plausible to say that it had a green CI run when included with 
> a dependent patch given that the dependent patch has already been merged.

Given it was later sent in a series with a different patch that was 
quite confusing.

> We don't normally require a brand new CI run any time the tree has moved 
> on since the previous CI was run. You would never get anything merged if 
> that was necessary!

Not when the tree has moved, but when series gain or lose patches.

Here we had these CI runs:

1. Patch A + Patch B
2. Patch A
3. Patch B + Patch C

Patch A got merged based on 2 one day. Patch B merged alone, based on 
what you say, the next day...

> So to me this just counts as pushing a tested patch set to a slightly 
> newer tree where the rebase happened to mean one patch vanished (because 
> exactly the same patch was already merged).

.. I say please do a CI run for B standalone. That way the Link: in the 
merged patch points to a series where maintainer can click on it and see 
it was green and all is good. These situations are so rare that there 
are no concerns about CI time.

If the link leads me somewhere else, which does not make sense, then it 
is extra work to untangle the story. I don't want to hunt on the mailing 
list between multiple postings of the patch with the same name and check 
whether it is the same patch, or just the same title where someone 
forgot to add a change log or something. Keeping it simple is the 
cheapest option here.

Regards,

Tvrtko

> 
> John.
> 
> 
>>
>> Other postings I can see it was in tandem with "drm/i915/pmu: Use 
>> existing uncore helper to read gpm_timestamp", which wasn't reviewed 
>> or merged.
>>
>> So it looks to me, again unless I am not seeing anything, that it got 
>> merged without ever having a standalone CI results. This is therefore 
>> a reminder that BKM should be to always have CI results for the exact 
>> series/patch being merged.
>>
>> If there is a situation where a subset of a series is conceptually 
>> ready before the rest, in the past what we used to do is send the 
>> reviewed portion as "--subject-prefix=CI" and then "--suppress-cc=all" 
>> and then merge when CI gives all green.
>>
>> Regards,
>>
>> Tvrtko
>>
>> On 27/01/2022 02:00, Umesh Nerlige Ramappa wrote:
>>> GuC updates shared memory and KMD reads it. Since this is not
>>> synchronized, we run into a race where the value read is inconsistent.
>>> Sometimes the inconsistency is in reading the upper MSB bytes of the
>>> last_switch_in value. 2 types of cases are seen - upper 8 bits are zero
>>> and upper 24 bits are zero. Since these are non-zero values, it is
>>> not trivial to determine validity of these values. Instead we read the
>>> values multiple times until they are consistent. In test runs, 3
>>> attempts results in consistent values. The upper bound is set to 6
>>> attempts and may need to be tuned as per any new occurences.
>>>
>>> Since the duration that gt is parked can vary, the patch also updates
>>> the gt timestamp on unpark before starting the worker.
>>>
>>> v2:
>>> - Initialize i
>>> - Use READ_ONCE to access engine record
>>>
>>> Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats 
>>> from GuC to pmu")
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>> Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
>>> ---
>>>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 58 +++++++++++++++++--
>>>   1 file changed, 54 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>> index db9615dcb0ec..4e9154cacc58 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>> @@ -1114,6 +1114,19 @@ __extend_last_switch(struct intel_guc *guc, 
>>> u64 *prev_start, u32 new_start)
>>>       if (new_start == lower_32_bits(*prev_start))
>>>           return;
>>>   +    /*
>>> +     * When gt is unparked, we update the gt timestamp and start the 
>>> ping
>>> +     * worker that updates the gt_stamp every POLL_TIME_CLKS. As 
>>> long as gt
>>> +     * is unparked, all switched in contexts will have a start time 
>>> that is
>>> +     * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
>>> +     *
>>> +     * If neither gt_stamp nor new_start has rolled over, then the
>>> +     * gt_stamp_hi does not need to be adjusted, however if one of 
>>> them has
>>> +     * rolled over, we need to adjust gt_stamp_hi accordingly.
>>> +     *
>>> +     * The below conditions address the cases of new_start rollover and
>>> +     * gt_stamp_last rollover respectively.
>>> +     */
>>>       if (new_start < gt_stamp_last &&
>>>           (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
>>>           gt_stamp_hi++;
>>> @@ -1125,17 +1138,45 @@ __extend_last_switch(struct intel_guc *guc, 
>>> u64 *prev_start, u32 new_start)
>>>       *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
>>>   }
>>>   -static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
>>> +/*
>>> + * GuC updates shared memory and KMD reads it. Since this is not 
>>> synchronized,
>>> + * we run into a race where the value read is inconsistent. 
>>> Sometimes the
>>> + * inconsistency is in reading the upper MSB bytes of the last_in 
>>> value when
>>> + * this race occurs. 2 types of cases are seen - upper 8 bits are 
>>> zero and upper
>>> + * 24 bits are zero. Since these are non-zero values, it is 
>>> non-trivial to
>>> + * determine validity of these values. Instead we read the values 
>>> multiple times
>>> + * until they are consistent. In test runs, 3 attempts results in 
>>> consistent
>>> + * values. The upper bound is set to 6 attempts and may need to be 
>>> tuned as per
>>> + * any new occurences.
>>> + */
>>> +static void __get_engine_usage_record(struct intel_engine_cs *engine,
>>> +                      u32 *last_in, u32 *id, u32 *total)
>>>   {
>>>       struct guc_engine_usage_record *rec = 
>>> intel_guc_engine_usage(engine);
>>> +    int i = 0;
>>> +
>>> +    do {
>>> +        *last_in = READ_ONCE(rec->last_switch_in_stamp);
>>> +        *id = READ_ONCE(rec->current_context_index);
>>> +        *total = READ_ONCE(rec->total_runtime);
>>> +
>>> +        if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
>>> +            READ_ONCE(rec->current_context_index) == *id &&
>>> +            READ_ONCE(rec->total_runtime) == *total)
>>> +            break;
>>> +    } while (++i < 6);
>>> +}
>>> +
>>> +static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
>>> +{
>>>       struct intel_engine_guc_stats *stats = &engine->stats.guc;
>>>       struct intel_guc *guc = &engine->gt->uc.guc;
>>> -    u32 last_switch = rec->last_switch_in_stamp;
>>> -    u32 ctx_id = rec->current_context_index;
>>> -    u32 total = rec->total_runtime;
>>> +    u32 last_switch, ctx_id, total;
>>>         lockdep_assert_held(&guc->timestamp.lock);
>>>   +    __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
>>> +
>>>       stats->running = ctx_id != ~0U && last_switch;
>>>       if (stats->running)
>>>           __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
>>> @@ -1237,6 +1278,10 @@ static ktime_t guc_engine_busyness(struct 
>>> intel_engine_cs *engine, ktime_t *now)
>>>       if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
>>>           stats_saved = *stats;
>>>           gt_stamp_saved = guc->timestamp.gt_stamp;
>>> +        /*
>>> +         * Update gt_clks, then gt timestamp to simplify the 
>>> 'gt_stamp -
>>> +         * start_gt_clk' calculation below for active engines.
>>> +         */
>>>           guc_update_engine_gt_clks(engine);
>>>           guc_update_pm_timestamp(guc, now);
>>>           intel_gt_pm_put_async(gt);
>>> @@ -1365,10 +1410,15 @@ void intel_guc_busyness_park(struct intel_gt 
>>> *gt)
>>>   void intel_guc_busyness_unpark(struct intel_gt *gt)
>>>   {
>>>       struct intel_guc *guc = &gt->uc.guc;
>>> +    unsigned long flags;
>>> +    ktime_t unused;
>>>         if (!guc_submission_initialized(guc))
>>>           return;
>>>   +    spin_lock_irqsave(&guc->timestamp.lock, flags);
>>> +    guc_update_pm_timestamp(guc, &unused);
>>> +    spin_unlock_irqrestore(&guc->timestamp.lock, flags);
>>>       mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
>>>                guc->timestamp.ping_delay);
>>>   }
>>>
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness
@ 2022-01-27  0:15 Umesh Nerlige Ramappa
  0 siblings, 0 replies; 9+ messages in thread
From: Umesh Nerlige Ramappa @ 2022-01-27  0:15 UTC (permalink / raw)
  To: intel-gfx

GuC updates shared memory and KMD reads it. Since this is not
synchronized, we run into a race where the value read is inconsistent.
Sometimes the inconsistency is in reading the upper MSB bytes of the
last_switch_in value. 2 types of cases are seen - upper 8 bits are zero
and upper 24 bits are zero. Since these are non-zero values, it is
not trivial to determine validity of these values. Instead we read the
values multiple times until they are consistent. In test runs, 3
attempts results in consistent values. The upper bound is set to 6
attempts and may need to be tuned as per any new occurences.

Since the duration that gt is parked can vary, the patch also updates
the gt timestamp on unpark before starting the worker.

v2:
- Initialize i
- Use READ_ONCE to access engine record

Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu")
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 58 +++++++++++++++++--
 1 file changed, 54 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index db9615dcb0ec..4e9154cacc58 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1114,6 +1114,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
 	if (new_start == lower_32_bits(*prev_start))
 		return;
 
+	/*
+	 * When gt is unparked, we update the gt timestamp and start the ping
+	 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
+	 * is unparked, all switched in contexts will have a start time that is
+	 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
+	 *
+	 * If neither gt_stamp nor new_start has rolled over, then the
+	 * gt_stamp_hi does not need to be adjusted, however if one of them has
+	 * rolled over, we need to adjust gt_stamp_hi accordingly.
+	 *
+	 * The below conditions address the cases of new_start rollover and
+	 * gt_stamp_last rollover respectively.
+	 */
 	if (new_start < gt_stamp_last &&
 	    (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
 		gt_stamp_hi++;
@@ -1125,17 +1138,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
 	*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
 }
 
-static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+/*
+ * GuC updates shared memory and KMD reads it. Since this is not synchronized,
+ * we run into a race where the value read is inconsistent. Sometimes the
+ * inconsistency is in reading the upper MSB bytes of the last_in value when
+ * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
+ * 24 bits are zero. Since these are non-zero values, it is non-trivial to
+ * determine validity of these values. Instead we read the values multiple times
+ * until they are consistent. In test runs, 3 attempts results in consistent
+ * values. The upper bound is set to 6 attempts and may need to be tuned as per
+ * any new occurences.
+ */
+static void __get_engine_usage_record(struct intel_engine_cs *engine,
+				      u32 *last_in, u32 *id, u32 *total)
 {
 	struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+	int i = 0;
+
+	do {
+		*last_in = READ_ONCE(rec->last_switch_in_stamp);
+		*id = READ_ONCE(rec->current_context_index);
+		*total = READ_ONCE(rec->total_runtime);
+
+		if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
+		    READ_ONCE(rec->current_context_index) == *id &&
+		    READ_ONCE(rec->total_runtime) == *total)
+			break;
+	} while (++i < 6);
+}
+
+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+{
 	struct intel_engine_guc_stats *stats = &engine->stats.guc;
 	struct intel_guc *guc = &engine->gt->uc.guc;
-	u32 last_switch = rec->last_switch_in_stamp;
-	u32 ctx_id = rec->current_context_index;
-	u32 total = rec->total_runtime;
+	u32 last_switch, ctx_id, total;
 
 	lockdep_assert_held(&guc->timestamp.lock);
 
+	__get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
+
 	stats->running = ctx_id != ~0U && last_switch;
 	if (stats->running)
 		__extend_last_switch(guc, &stats->start_gt_clk, last_switch);
@@ -1237,6 +1278,10 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 	if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
 		stats_saved = *stats;
 		gt_stamp_saved = guc->timestamp.gt_stamp;
+		/*
+		 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
+		 * start_gt_clk' calculation below for active engines.
+		 */
 		guc_update_engine_gt_clks(engine);
 		guc_update_pm_timestamp(guc, now);
 		intel_gt_pm_put_async(gt);
@@ -1365,10 +1410,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
 void intel_guc_busyness_unpark(struct intel_gt *gt)
 {
 	struct intel_guc *guc = &gt->uc.guc;
+	unsigned long flags;
+	ktime_t unused;
 
 	if (!guc_submission_initialized(guc))
 		return;
 
+	spin_lock_irqsave(&guc->timestamp.lock, flags);
+	guc_update_pm_timestamp(guc, &unused);
+	spin_unlock_irqrestore(&guc->timestamp.lock, flags);
 	mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
 			 guc->timestamp.ping_delay);
 }
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-01-28 22:46 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-27  2:00 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness Umesh Nerlige Ramappa
2022-01-27  2:00 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use existing uncore helper to read gpm_timestamp Umesh Nerlige Ramappa
2022-01-27  2:52 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Fix KMD and GuC race on accessing busyness Patchwork
2022-01-27  8:45 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2022-01-28  9:34 ` [Intel-gfx] [PATCH 1/2] " Tvrtko Ursulin
2022-01-28 17:31   ` Umesh Nerlige Ramappa
2022-01-28 21:53   ` John Harrison
2022-01-28 22:46     ` Tvrtko Ursulin
  -- strict thread matches above, loose matches on Subject: below --
2022-01-27  0:15 Umesh Nerlige Ramappa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.