All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
@ 2022-05-05  5:40 ` Vinay Belgaumkar
  0 siblings, 0 replies; 26+ messages in thread
From: Vinay Belgaumkar @ 2022-05-05  5:40 UTC (permalink / raw)
  To: intel-gfx, dri-devel; +Cc: Vinay Belgaumkar

SLPC min/max frequency updates require H2G calls. We are seeing
timeouts when GuC channel is backed up and it is unable to respond
in a timely fashion causing warnings and affecting CI.

This is seen when waitboosting happens during a stress test.
this patch updates the waitboost path to use a non-blocking
H2G call instead, which returns as soon as the message is
successfully transmitted.

Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 ++++++++++++++++-----
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 1db833da42df..c852f73cf521 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
 	return data->header.global_state;
 }
 
+static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
+{
+	u32 request[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+		id,
+		value,
+	};
+	int ret;
+
+	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
+
+	return ret > 0 ? -EPROTO : ret;
+}
+
+static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
+{
+	struct intel_guc *guc = slpc_to_guc(slpc);
+
+	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+	return guc_action_slpc_set_param_nb(guc, id, value);
+}
+
 static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
 {
 	u32 request[] = {
@@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
 	 */
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		ret = slpc_set_param(slpc,
-				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
-				     freq);
-		if (ret)
-			i915_probe_error(i915, "Unable to force min freq to %u: %d",
-					 freq, ret);
+		/* Non-blocking request will avoid stalls */
+		ret = slpc_set_param_nb(slpc,
+					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+					freq);
 	}
 
 	return ret;
@@ -231,8 +253,8 @@ static void slpc_boost_work(struct work_struct *work)
 	 */
 	mutex_lock(&slpc->lock);
 	if (atomic_read(&slpc->num_waiters)) {
-		slpc_force_min_freq(slpc, slpc->boost_freq);
-		slpc->num_boosts++;
+		if (!slpc_force_min_freq(slpc, slpc->boost_freq))
+			slpc->num_boosts++;
 	}
 	mutex_unlock(&slpc->lock);
 }
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
@ 2022-05-05  5:40 ` Vinay Belgaumkar
  0 siblings, 0 replies; 26+ messages in thread
From: Vinay Belgaumkar @ 2022-05-05  5:40 UTC (permalink / raw)
  To: intel-gfx, dri-devel

SLPC min/max frequency updates require H2G calls. We are seeing
timeouts when GuC channel is backed up and it is unable to respond
in a timely fashion causing warnings and affecting CI.

This is seen when waitboosting happens during a stress test.
this patch updates the waitboost path to use a non-blocking
H2G call instead, which returns as soon as the message is
successfully transmitted.

Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 ++++++++++++++++-----
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 1db833da42df..c852f73cf521 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
 	return data->header.global_state;
 }
 
+static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
+{
+	u32 request[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+		id,
+		value,
+	};
+	int ret;
+
+	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
+
+	return ret > 0 ? -EPROTO : ret;
+}
+
+static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
+{
+	struct intel_guc *guc = slpc_to_guc(slpc);
+
+	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+	return guc_action_slpc_set_param_nb(guc, id, value);
+}
+
 static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
 {
 	u32 request[] = {
@@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
 	 */
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		ret = slpc_set_param(slpc,
-				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
-				     freq);
-		if (ret)
-			i915_probe_error(i915, "Unable to force min freq to %u: %d",
-					 freq, ret);
+		/* Non-blocking request will avoid stalls */
+		ret = slpc_set_param_nb(slpc,
+					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+					freq);
 	}
 
 	return ret;
@@ -231,8 +253,8 @@ static void slpc_boost_work(struct work_struct *work)
 	 */
 	mutex_lock(&slpc->lock);
 	if (atomic_read(&slpc->num_waiters)) {
-		slpc_force_min_freq(slpc, slpc->boost_freq);
-		slpc->num_boosts++;
+		if (!slpc_force_min_freq(slpc, slpc->boost_freq))
+			slpc->num_boosts++;
 	}
 	mutex_unlock(&slpc->lock);
 }
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-05  5:40 ` [Intel-gfx] " Vinay Belgaumkar
  (?)
@ 2022-05-05  6:37 ` Patchwork
  -1 siblings, 0 replies; 26+ messages in thread
From: Patchwork @ 2022-05-05  6:37 UTC (permalink / raw)
  To: Vinay Belgaumkar; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 7454 bytes --]

== Series Details ==

Series: drm/i915/guc/slpc: Use non-blocking H2G for waitboost
URL   : https://patchwork.freedesktop.org/series/103598/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11607 -> Patchwork_103598v1
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/index.html

Participating hosts (41 -> 41)
------------------------------

  Additional (3): fi-hsw-4770 bat-adlm-1 bat-dg2-9 
  Missing    (3): bat-rpls-1 fi-bsw-cyan bat-dg2-8 

Known issues
------------

  Here are the changes found in Patchwork_103598v1 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_huc_copy@huc-copy:
    - fi-hsw-4770:        NOTRUN -> [SKIP][1] ([fdo#109271]) +9 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-hsw-4770/igt@gem_huc_copy@huc-copy.html

  * igt@i915_pm_backlight@basic-brightness:
    - fi-hsw-4770:        NOTRUN -> [SKIP][2] ([fdo#109271] / [i915#3012])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-hsw-4770/igt@i915_pm_backlight@basic-brightness.html

  * igt@i915_selftest@live@gem_migrate:
    - fi-bdw-5557u:       [PASS][3] -> [INCOMPLETE][4] ([i915#5716])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/fi-bdw-5557u/igt@i915_selftest@live@gem_migrate.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-bdw-5557u/igt@i915_selftest@live@gem_migrate.html

  * igt@i915_selftest@live@hangcheck:
    - fi-hsw-4770:        NOTRUN -> [INCOMPLETE][5] ([i915#4785])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-hsw-4770/igt@i915_selftest@live@hangcheck.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-hsw-g3258:       NOTRUN -> [SKIP][6] ([fdo#109271] / [fdo#111827])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-hsw-g3258/igt@kms_chamelium@common-hpd-after-suspend.html
    - fi-snb-2600:        NOTRUN -> [SKIP][7] ([fdo#109271] / [fdo#111827])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-snb-2600/igt@kms_chamelium@common-hpd-after-suspend.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-hsw-4770:        NOTRUN -> [SKIP][8] ([fdo#109271] / [fdo#111827]) +7 similar issues
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-hsw-4770/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - fi-hsw-4770:        NOTRUN -> [SKIP][9] ([fdo#109271] / [i915#533])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-hsw-4770/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@kms_psr@primary_mmap_gtt:
    - fi-hsw-4770:        NOTRUN -> [SKIP][10] ([fdo#109271] / [i915#1072]) +3 similar issues
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-hsw-4770/igt@kms_psr@primary_mmap_gtt.html

  * igt@runner@aborted:
    - fi-hsw-4770:        NOTRUN -> [FAIL][11] ([fdo#109271] / [i915#4312] / [i915#5594])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-hsw-4770/igt@runner@aborted.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@hangcheck:
    - fi-hsw-g3258:       [INCOMPLETE][12] ([i915#3303] / [i915#4785]) -> [PASS][13]
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/fi-hsw-g3258/igt@i915_selftest@live@hangcheck.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-hsw-g3258/igt@i915_selftest@live@hangcheck.html
    - {fi-ehl-2}:         [INCOMPLETE][14] ([i915#5134]) -> [PASS][15]
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/fi-ehl-2/igt@i915_selftest@live@hangcheck.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-ehl-2/igt@i915_selftest@live@hangcheck.html
    - fi-snb-2600:        [INCOMPLETE][16] ([i915#3921]) -> [PASS][17]
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/fi-snb-2600/igt@i915_selftest@live@hangcheck.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/fi-snb-2600/igt@i915_selftest@live@hangcheck.html

  * igt@kms_flip@basic-flip-vs-modeset@b-edp1:
    - {bat-adlp-6}:       [DMESG-WARN][18] ([i915#3576]) -> [PASS][19]
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/bat-adlp-6/igt@kms_flip@basic-flip-vs-modeset@b-edp1.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/bat-adlp-6/igt@kms_flip@basic-flip-vs-modeset@b-edp1.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103375]: https://bugs.freedesktop.org/show_bug.cgi?id=103375
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#1155]: https://gitlab.freedesktop.org/drm/intel/issues/1155
  [i915#2575]: https://gitlab.freedesktop.org/drm/intel/issues/2575
  [i915#2582]: https://gitlab.freedesktop.org/drm/intel/issues/2582
  [i915#3012]: https://gitlab.freedesktop.org/drm/intel/issues/3012
  [i915#3282]: https://gitlab.freedesktop.org/drm/intel/issues/3282
  [i915#3303]: https://gitlab.freedesktop.org/drm/intel/issues/3303
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3576]: https://gitlab.freedesktop.org/drm/intel/issues/3576
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#3921]: https://gitlab.freedesktop.org/drm/intel/issues/3921
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#4312]: https://gitlab.freedesktop.org/drm/intel/issues/4312
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4785]: https://gitlab.freedesktop.org/drm/intel/issues/4785
  [i915#5134]: https://gitlab.freedesktop.org/drm/intel/issues/5134
  [i915#5171]: https://gitlab.freedesktop.org/drm/intel/issues/5171
  [i915#5174]: https://gitlab.freedesktop.org/drm/intel/issues/5174
  [i915#5181]: https://gitlab.freedesktop.org/drm/intel/issues/5181
  [i915#5190]: https://gitlab.freedesktop.org/drm/intel/issues/5190
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#5594]: https://gitlab.freedesktop.org/drm/intel/issues/5594
  [i915#5606]: https://gitlab.freedesktop.org/drm/intel/issues/5606
  [i915#5703]: https://gitlab.freedesktop.org/drm/intel/issues/5703
  [i915#5716]: https://gitlab.freedesktop.org/drm/intel/issues/5716
  [i915#5775]: https://gitlab.freedesktop.org/drm/intel/issues/5775
  [i915#5801]: https://gitlab.freedesktop.org/drm/intel/issues/5801


Build changes
-------------

  * Linux: CI_DRM_11607 -> Patchwork_103598v1

  CI-20190529: 20190529
  CI_DRM_11607: b0f0de5bb000952abb29696adb93f289e49b129c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6465: f6bb4399881a806fbff75ce3df89b60286d55917 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_103598v1: b0f0de5bb000952abb29696adb93f289e49b129c @ git://anongit.freedesktop.org/gfx-ci/linux


### Linux commits

5aa2893a2b36 drm/i915/guc/slpc: Use non-blocking H2G for waitboost

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/index.html

[-- Attachment #2: Type: text/html, Size: 7820 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-05  5:40 ` [Intel-gfx] " Vinay Belgaumkar
  (?)
  (?)
@ 2022-05-05 11:12 ` Patchwork
  -1 siblings, 0 replies; 26+ messages in thread
From: Patchwork @ 2022-05-05 11:12 UTC (permalink / raw)
  To: Vinay Belgaumkar; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 49675 bytes --]

== Series Details ==

Series: drm/i915/guc/slpc: Use non-blocking H2G for waitboost
URL   : https://patchwork.freedesktop.org/series/103598/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11607_full -> Patchwork_103598v1_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Participating hosts (12 -> 12)
------------------------------

  No changes in participating hosts

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_103598v1_full:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@gem_render_copy_redux@interruptible:
    - {shard-rkl}:        [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-rkl-4/igt@gem_render_copy_redux@interruptible.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-rkl-5/igt@gem_render_copy_redux@interruptible.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-draw-mmap-wc:
    - {shard-rkl}:        [SKIP][3] ([i915#1849] / [i915#4098]) -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-rkl-2/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-draw-mmap-wc.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-rkl-5/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-draw-mmap-wc.html

  
Known issues
------------

  Here are the changes found in Patchwork_103598v1_full that come from known issues:

### CI changes ###

#### Possible fixes ####

  * boot:
    - shard-snb:          ([PASS][5], [PASS][6], [PASS][7], [PASS][8], [PASS][9], [PASS][10], [PASS][11], [PASS][12], [PASS][13], [PASS][14], [PASS][15], [PASS][16], [PASS][17], [FAIL][18], [PASS][19], [PASS][20], [PASS][21], [PASS][22], [PASS][23], [PASS][24], [PASS][25], [PASS][26], [PASS][27], [PASS][28], [PASS][29]) ([i915#4338]) -> ([PASS][30], [PASS][31], [PASS][32], [PASS][33], [PASS][34], [PASS][35], [PASS][36], [PASS][37], [PASS][38], [PASS][39], [PASS][40], [PASS][41], [PASS][42], [PASS][43], [PASS][44], [PASS][45], [PASS][46], [PASS][47], [PASS][48], [PASS][49], [PASS][50], [PASS][51], [PASS][52], [PASS][53], [PASS][54])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb7/boot.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb7/boot.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb7/boot.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb7/boot.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb7/boot.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb6/boot.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb6/boot.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb6/boot.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb6/boot.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb5/boot.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb5/boot.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb5/boot.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb5/boot.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb5/boot.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb4/boot.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb4/boot.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb4/boot.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb4/boot.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb4/boot.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb2/boot.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb2/boot.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb2/boot.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb2/boot.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb2/boot.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb2/boot.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb7/boot.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb7/boot.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb7/boot.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb7/boot.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb7/boot.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb6/boot.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb6/boot.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb6/boot.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb6/boot.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb6/boot.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb5/boot.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb5/boot.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb5/boot.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb5/boot.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb5/boot.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb5/boot.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb4/boot.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb4/boot.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb4/boot.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb4/boot.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb4/boot.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb2/boot.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb2/boot.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb2/boot.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb2/boot.html

  

### IGT changes ###

#### Issues hit ####

  * igt@gem_ccs@suspend-resume:
    - shard-iclb:         NOTRUN -> [SKIP][55] ([i915#5327]) +1 similar issue
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@gem_ccs@suspend-resume.html

  * igt@gem_ctx_param@set-priority-not-supported:
    - shard-iclb:         NOTRUN -> [SKIP][56] ([fdo#109314])
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@gem_ctx_param@set-priority-not-supported.html

  * igt@gem_ctx_persistence@legacy-engines-hostile:
    - shard-snb:          NOTRUN -> [SKIP][57] ([fdo#109271] / [i915#1099])
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb2/igt@gem_ctx_persistence@legacy-engines-hostile.html

  * igt@gem_eio@in-flight-immediate:
    - shard-skl:          [PASS][58] -> [TIMEOUT][59] ([i915#3063])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl1/igt@gem_eio@in-flight-immediate.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl1/igt@gem_eio@in-flight-immediate.html

  * igt@gem_eio@unwedge-stress:
    - shard-tglb:         NOTRUN -> [FAIL][60] ([i915#5784])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@gem_eio@unwedge-stress.html

  * igt@gem_exec_balancer@parallel-bb-first:
    - shard-tglb:         NOTRUN -> [DMESG-WARN][61] ([i915#5076] / [i915#5614])
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@gem_exec_balancer@parallel-bb-first.html

  * igt@gem_exec_fair@basic-deadline:
    - shard-skl:          NOTRUN -> [FAIL][62] ([i915#2846])
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-flow@rcs0:
    - shard-tglb:         [PASS][63] -> [FAIL][64] ([i915#2842]) +1 similar issue
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-tglb8/igt@gem_exec_fair@basic-flow@rcs0.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb2/igt@gem_exec_fair@basic-flow@rcs0.html

  * igt@gem_exec_fair@basic-none-solo@rcs0:
    - shard-apl:          [PASS][65] -> [FAIL][66] ([i915#2842]) +2 similar issues
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-apl2/igt@gem_exec_fair@basic-none-solo@rcs0.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl1/igt@gem_exec_fair@basic-none-solo@rcs0.html

  * igt@gem_exec_fair@basic-none-vip@rcs0:
    - shard-tglb:         NOTRUN -> [FAIL][67] ([i915#2842])
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@gem_exec_fair@basic-none-vip@rcs0.html

  * igt@gem_exec_fair@basic-pace@vcs0:
    - shard-iclb:         NOTRUN -> [FAIL][68] ([i915#2842]) +3 similar issues
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@gem_exec_fair@basic-pace@vcs0.html

  * igt@gem_exec_flush@basic-uc-set-default:
    - shard-snb:          [PASS][69] -> [SKIP][70] ([fdo#109271]) +1 similar issue
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb5/igt@gem_exec_flush@basic-uc-set-default.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb6/igt@gem_exec_flush@basic-uc-set-default.html

  * igt@gem_lmem_swapping@heavy-random:
    - shard-skl:          NOTRUN -> [SKIP][71] ([fdo#109271] / [i915#4613]) +1 similar issue
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@gem_lmem_swapping@heavy-random.html

  * igt@gem_lmem_swapping@random:
    - shard-iclb:         NOTRUN -> [SKIP][72] ([i915#4613])
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@gem_lmem_swapping@random.html

  * igt@gem_lmem_swapping@verify-random:
    - shard-apl:          NOTRUN -> [SKIP][73] ([fdo#109271] / [i915#4613]) +2 similar issues
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl7/igt@gem_lmem_swapping@verify-random.html

  * igt@gem_media_vme:
    - shard-tglb:         NOTRUN -> [SKIP][74] ([i915#284])
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@gem_media_vme.html

  * igt@gem_pxp@regular-baseline-src-copy-readible:
    - shard-tglb:         NOTRUN -> [SKIP][75] ([i915#4270])
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@gem_pxp@regular-baseline-src-copy-readible.html

  * igt@gem_render_copy@linear-to-vebox-y-tiled:
    - shard-iclb:         NOTRUN -> [SKIP][76] ([i915#768]) +1 similar issue
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@gem_render_copy@linear-to-vebox-y-tiled.html

  * igt@gem_userptr_blits@coherency-unsync:
    - shard-iclb:         NOTRUN -> [SKIP][77] ([i915#3297])
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@gem_userptr_blits@coherency-unsync.html

  * igt@gen7_exec_parse@basic-offset:
    - shard-iclb:         NOTRUN -> [SKIP][78] ([fdo#109289])
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@gen7_exec_parse@basic-offset.html

  * igt@gen9_exec_parse@shadow-peek:
    - shard-tglb:         NOTRUN -> [SKIP][79] ([i915#2527] / [i915#2856])
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@gen9_exec_parse@shadow-peek.html

  * igt@gen9_exec_parse@valid-registers:
    - shard-iclb:         NOTRUN -> [SKIP][80] ([i915#2856]) +1 similar issue
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@gen9_exec_parse@valid-registers.html

  * igt@i915_hangman@engine-engine-hang:
    - shard-snb:          NOTRUN -> [SKIP][81] ([fdo#109271]) +79 similar issues
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb2/igt@i915_hangman@engine-engine-hang.html

  * igt@i915_pm_dc@dc6-dpms:
    - shard-skl:          NOTRUN -> [FAIL][82] ([i915#454])
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@i915_pm_dc@dc6-dpms.html

  * igt@i915_pm_rpm@modeset-non-lpsp-stress-no-wait:
    - shard-tglb:         NOTRUN -> [SKIP][83] ([fdo#111644] / [i915#1397] / [i915#2411])
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb2/igt@i915_pm_rpm@modeset-non-lpsp-stress-no-wait.html

  * igt@i915_pm_rpm@modeset-pc8-residency-stress:
    - shard-iclb:         NOTRUN -> [SKIP][84] ([fdo#109293] / [fdo#109506])
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@i915_pm_rpm@modeset-pc8-residency-stress.html

  * igt@i915_selftest@live@hangcheck:
    - shard-snb:          [PASS][85] -> [INCOMPLETE][86] ([i915#3921])
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-snb4/igt@i915_selftest@live@hangcheck.html
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb6/igt@i915_selftest@live@hangcheck.html

  * igt@i915_suspend@debugfs-reader:
    - shard-apl:          NOTRUN -> [DMESG-WARN][87] ([i915#180])
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl4/igt@i915_suspend@debugfs-reader.html

  * igt@kms_async_flips@alternate-sync-async-flip:
    - shard-skl:          [PASS][88] -> [FAIL][89] ([i915#2521])
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl6/igt@kms_async_flips@alternate-sync-async-flip.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl3/igt@kms_async_flips@alternate-sync-async-flip.html

  * igt@kms_big_fb@4-tiled-64bpp-rotate-0:
    - shard-tglb:         NOTRUN -> [SKIP][90] ([i915#5286]) +2 similar issues
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_big_fb@4-tiled-64bpp-rotate-0.html

  * igt@kms_big_fb@4-tiled-max-hw-stride-64bpp-rotate-0-async-flip:
    - shard-iclb:         NOTRUN -> [SKIP][91] ([i915#5286]) +2 similar issues
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@kms_big_fb@4-tiled-max-hw-stride-64bpp-rotate-0-async-flip.html

  * igt@kms_big_fb@linear-16bpp-rotate-270:
    - shard-tglb:         NOTRUN -> [SKIP][92] ([fdo#111614])
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb2/igt@kms_big_fb@linear-16bpp-rotate-270.html

  * igt@kms_big_fb@linear-16bpp-rotate-90:
    - shard-apl:          NOTRUN -> [SKIP][93] ([fdo#109271]) +119 similar issues
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl7/igt@kms_big_fb@linear-16bpp-rotate-90.html

  * igt@kms_big_fb@linear-32bpp-rotate-90:
    - shard-iclb:         NOTRUN -> [SKIP][94] ([fdo#110725] / [fdo#111614]) +3 similar issues
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@kms_big_fb@linear-32bpp-rotate-90.html

  * igt@kms_big_fb@yf-tiled-8bpp-rotate-0:
    - shard-iclb:         NOTRUN -> [SKIP][95] ([fdo#110723]) +1 similar issue
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@kms_big_fb@yf-tiled-8bpp-rotate-0.html

  * igt@kms_ccs@pipe-a-ccs-on-another-bo-yf_tiled_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][96] ([fdo#111615] / [i915#3689])
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_ccs@pipe-a-ccs-on-another-bo-yf_tiled_ccs.html

  * igt@kms_ccs@pipe-a-random-ccs-data-y_tiled_gen12_mc_ccs:
    - shard-iclb:         NOTRUN -> [SKIP][97] ([fdo#109278] / [i915#3886]) +4 similar issues
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@kms_ccs@pipe-a-random-ccs-data-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-b-bad-rotation-90-y_tiled_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][98] ([i915#3689])
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_ccs@pipe-b-bad-rotation-90-y_tiled_ccs.html

  * igt@kms_ccs@pipe-c-ccs-on-another-bo-y_tiled_gen12_rc_ccs_cc:
    - shard-apl:          NOTRUN -> [SKIP][99] ([fdo#109271] / [i915#3886]) +4 similar issues
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl7/igt@kms_ccs@pipe-c-ccs-on-another-bo-y_tiled_gen12_rc_ccs_cc.html

  * igt@kms_ccs@pipe-c-missing-ccs-buffer-y_tiled_gen12_mc_ccs:
    - shard-skl:          NOTRUN -> [SKIP][100] ([fdo#109271] / [i915#3886]) +5 similar issues
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl1/igt@kms_ccs@pipe-c-missing-ccs-buffer-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-c-random-ccs-data-y_tiled_gen12_mc_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][101] ([i915#3689] / [i915#3886]) +2 similar issues
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_ccs@pipe-c-random-ccs-data-y_tiled_gen12_mc_ccs.html

  * igt@kms_cdclk@mode-transition:
    - shard-tglb:         NOTRUN -> [SKIP][102] ([i915#3742])
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_cdclk@mode-transition.html

  * igt@kms_chamelium@dp-audio:
    - shard-snb:          NOTRUN -> [SKIP][103] ([fdo#109271] / [fdo#111827]) +3 similar issues
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-snb2/igt@kms_chamelium@dp-audio.html

  * igt@kms_chamelium@dp-hpd-storm-disable:
    - shard-tglb:         NOTRUN -> [SKIP][104] ([fdo#109284] / [fdo#111827]) +3 similar issues
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_chamelium@dp-hpd-storm-disable.html

  * igt@kms_chamelium@hdmi-aspect-ratio:
    - shard-skl:          NOTRUN -> [SKIP][105] ([fdo#109271] / [fdo#111827]) +9 similar issues
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@kms_chamelium@hdmi-aspect-ratio.html

  * igt@kms_color@pipe-a-deep-color:
    - shard-tglb:         NOTRUN -> [SKIP][106] ([i915#3555])
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_color@pipe-a-deep-color.html

  * igt@kms_color_chamelium@pipe-c-ctm-0-25:
    - shard-iclb:         NOTRUN -> [SKIP][107] ([fdo#109284] / [fdo#111827]) +4 similar issues
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@kms_color_chamelium@pipe-c-ctm-0-25.html

  * igt@kms_color_chamelium@pipe-c-ctm-0-5:
    - shard-apl:          NOTRUN -> [SKIP][108] ([fdo#109271] / [fdo#111827]) +9 similar issues
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl7/igt@kms_color_chamelium@pipe-c-ctm-0-5.html

  * igt@kms_color_chamelium@pipe-d-ctm-green-to-red:
    - shard-iclb:         NOTRUN -> [SKIP][109] ([fdo#109278] / [fdo#109284] / [fdo#111827])
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@kms_color_chamelium@pipe-d-ctm-green-to-red.html

  * igt@kms_content_protection@dp-mst-type-0:
    - shard-iclb:         NOTRUN -> [SKIP][110] ([i915#3116])
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@kms_content_protection@dp-mst-type-0.html

  * igt@kms_content_protection@dp-mst-type-1:
    - shard-tglb:         NOTRUN -> [SKIP][111] ([i915#3116] / [i915#3299])
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_content_protection@dp-mst-type-1.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-kbl:          [PASS][112] -> [DMESG-WARN][113] ([i915#180]) +5 similar issues
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-kbl6/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-kbl1/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_cursor_crc@pipe-b-cursor-512x512-rapid-movement:
    - shard-iclb:         NOTRUN -> [SKIP][114] ([fdo#109278] / [fdo#109279])
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@kms_cursor_crc@pipe-b-cursor-512x512-rapid-movement.html

  * igt@kms_cursor_crc@pipe-c-cursor-32x10-sliding:
    - shard-tglb:         NOTRUN -> [SKIP][115] ([i915#3359])
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_cursor_crc@pipe-c-cursor-32x10-sliding.html

  * igt@kms_cursor_crc@pipe-d-cursor-32x32-rapid-movement:
    - shard-iclb:         NOTRUN -> [SKIP][116] ([fdo#109278]) +19 similar issues
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@kms_cursor_crc@pipe-d-cursor-32x32-rapid-movement.html

  * igt@kms_cursor_legacy@cursora-vs-flipb-atomic-transitions-varying-size:
    - shard-iclb:         NOTRUN -> [SKIP][117] ([fdo#109274] / [fdo#109278]) +4 similar issues
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@kms_cursor_legacy@cursora-vs-flipb-atomic-transitions-varying-size.html

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions:
    - shard-glk:          [PASS][118] -> [FAIL][119] ([i915#2346])
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-glk8/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-glk7/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html

  * igt@kms_cursor_legacy@pipe-d-torture-bo:
    - shard-skl:          NOTRUN -> [SKIP][120] ([fdo#109271] / [i915#533])
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@kms_cursor_legacy@pipe-d-torture-bo.html

  * igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-4tiled:
    - shard-iclb:         NOTRUN -> [SKIP][121] ([i915#5287])
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-4tiled.html

  * igt@kms_fbcon_fbt@psr-suspend:
    - shard-skl:          NOTRUN -> [FAIL][122] ([i915#4767])
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@kms_fbcon_fbt@psr-suspend.html

  * igt@kms_flip@2x-dpms-vs-vblank-race:
    - shard-tglb:         NOTRUN -> [SKIP][123] ([fdo#109274] / [fdo#111825]) +1 similar issue
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_flip@2x-dpms-vs-vblank-race.html

  * igt@kms_flip@2x-flip-vs-dpms-off-vs-modeset-interruptible:
    - shard-iclb:         NOTRUN -> [SKIP][124] ([fdo#109274]) +2 similar issues
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@kms_flip@2x-flip-vs-dpms-off-vs-modeset-interruptible.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1:
    - shard-skl:          [PASS][125] -> [FAIL][126] ([i915#79]) +1 similar issue
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl6/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl3/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@b-edp1:
    - shard-skl:          [PASS][127] -> [FAIL][128] ([i915#2122])
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl6/igt@kms_flip@flip-vs-expired-vblank-interruptible@b-edp1.html
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl3/igt@kms_flip@flip-vs-expired-vblank-interruptible@b-edp1.html

  * igt@kms_flip@flip-vs-suspend-interruptible@c-dp1:
    - shard-apl:          [PASS][129] -> [DMESG-WARN][130] ([i915#180]) +8 similar issues
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-apl3/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl8/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling:
    - shard-iclb:         [PASS][131] -> [SKIP][132] ([i915#3701])
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb8/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling.html
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb2/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-shrfb-draw-render:
    - shard-tglb:         NOTRUN -> [SKIP][133] ([fdo#109280] / [fdo#111825]) +7 similar issues
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-shrfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-spr-indfb-draw-mmap-cpu:
    - shard-iclb:         NOTRUN -> [SKIP][134] ([fdo#109280]) +17 similar issues
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-spr-indfb-draw-mmap-cpu.html

  * igt@kms_hdr@bpc-switch@bpc-switch-edp-1-pipe-a:
    - shard-skl:          NOTRUN -> [FAIL][135] ([i915#1188])
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@kms_hdr@bpc-switch@bpc-switch-edp-1-pipe-a.html

  * igt@kms_pipe_b_c_ivb@pipe-b-double-modeset-then-modeset-pipe-c:
    - shard-tglb:         NOTRUN -> [SKIP][136] ([fdo#109289]) +1 similar issue
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_pipe_b_c_ivb@pipe-b-double-modeset-then-modeset-pipe-c.html

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min:
    - shard-skl:          NOTRUN -> [FAIL][137] ([fdo#108145] / [i915#265]) +1 similar issue
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb:
    - shard-skl:          NOTRUN -> [FAIL][138] ([i915#265])
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [PASS][139] -> [FAIL][140] ([fdo#108145] / [i915#265])
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl6/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl3/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_plane_lowres@pipe-b-tiling-y:
    - shard-iclb:         NOTRUN -> [SKIP][141] ([i915#3536]) +1 similar issue
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@kms_plane_lowres@pipe-b-tiling-y.html

  * igt@kms_plane_lowres@pipe-d-tiling-yf:
    - shard-tglb:         NOTRUN -> [SKIP][142] ([fdo#111615] / [fdo#112054]) +1 similar issue
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_plane_lowres@pipe-d-tiling-yf.html

  * igt@kms_plane_scaling@downscale-with-rotation-factor-0-25@pipe-a-edp-1-downscale-with-rotation:
    - shard-iclb:         NOTRUN -> [SKIP][143] ([i915#5176]) +2 similar issues
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@kms_plane_scaling@downscale-with-rotation-factor-0-25@pipe-a-edp-1-downscale-with-rotation.html

  * igt@kms_plane_scaling@invalid-num-scalers@pipe-a-edp-1-invalid-num-scalers:
    - shard-skl:          NOTRUN -> [SKIP][144] ([fdo#109271] / [i915#5776]) +2 similar issues
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@kms_plane_scaling@invalid-num-scalers@pipe-a-edp-1-invalid-num-scalers.html

  * igt@kms_prime@basic-crc@first-to-second:
    - shard-iclb:         NOTRUN -> [SKIP][145] ([i915#1836])
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@kms_prime@basic-crc@first-to-second.html

  * igt@kms_psr2_sf@overlay-plane-move-continuous-sf:
    - shard-apl:          NOTRUN -> [SKIP][146] ([fdo#109271] / [i915#658])
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl1/igt@kms_psr2_sf@overlay-plane-move-continuous-sf.html

  * igt@kms_psr2_sf@overlay-plane-update-continuous-sf:
    - shard-iclb:         NOTRUN -> [SKIP][147] ([fdo#111068] / [i915#658]) +1 similar issue
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@kms_psr2_sf@overlay-plane-update-continuous-sf.html

  * igt@kms_psr2_su@frontbuffer-xrgb8888:
    - shard-iclb:         [PASS][148] -> [SKIP][149] ([fdo#109642] / [fdo#111068] / [i915#658])
   [148]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb2/igt@kms_psr2_su@frontbuffer-xrgb8888.html
   [149]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb4/igt@kms_psr2_su@frontbuffer-xrgb8888.html

  * igt@kms_psr2_su@page_flip-xrgb8888:
    - shard-skl:          NOTRUN -> [SKIP][150] ([fdo#109271] / [i915#658]) +1 similar issue
   [150]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl1/igt@kms_psr2_su@page_flip-xrgb8888.html

  * igt@kms_psr@psr2_no_drrs:
    - shard-tglb:         NOTRUN -> [FAIL][151] ([i915#132] / [i915#3467])
   [151]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_psr@psr2_no_drrs.html

  * igt@kms_psr@psr2_primary_mmap_cpu:
    - shard-iclb:         [PASS][152] -> [SKIP][153] ([fdo#109441]) +3 similar issues
   [152]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb2/igt@kms_psr@psr2_primary_mmap_cpu.html
   [153]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb4/igt@kms_psr@psr2_primary_mmap_cpu.html

  * igt@kms_psr@psr2_sprite_mmap_cpu:
    - shard-iclb:         NOTRUN -> [SKIP][154] ([fdo#109441])
   [154]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@kms_psr@psr2_sprite_mmap_cpu.html

  * igt@kms_scaling_modes@scaling-mode-none@edp-1-pipe-a:
    - shard-skl:          NOTRUN -> [SKIP][155] ([fdo#109271]) +162 similar issues
   [155]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@kms_scaling_modes@scaling-mode-none@edp-1-pipe-a.html

  * igt@kms_setmode@basic-clone-single-crtc:
    - shard-iclb:         NOTRUN -> [SKIP][156] ([i915#3555]) +1 similar issue
   [156]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@kms_setmode@basic-clone-single-crtc.html

  * igt@nouveau_crc@ctx-flip-threshold-reset-after-capture:
    - shard-iclb:         NOTRUN -> [SKIP][157] ([i915#2530]) +1 similar issue
   [157]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@nouveau_crc@ctx-flip-threshold-reset-after-capture.html

  * igt@nouveau_crc@pipe-b-source-outp-inactive:
    - shard-tglb:         NOTRUN -> [SKIP][158] ([i915#2530])
   [158]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@nouveau_crc@pipe-b-source-outp-inactive.html

  * igt@perf@polling-parameterized:
    - shard-skl:          [PASS][159] -> [FAIL][160] ([i915#5639])
   [159]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl4/igt@perf@polling-parameterized.html
   [160]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl1/igt@perf@polling-parameterized.html

  * igt@perf@polling-small-buf:
    - shard-skl:          [PASS][161] -> [FAIL][162] ([i915#1722])
   [161]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl9/igt@perf@polling-small-buf.html
   [162]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl7/igt@perf@polling-small-buf.html

  * igt@prime_nv_pcopy@test1_micro:
    - shard-tglb:         NOTRUN -> [SKIP][163] ([fdo#109291])
   [163]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@prime_nv_pcopy@test1_micro.html

  * igt@prime_nv_pcopy@test3_4:
    - shard-iclb:         NOTRUN -> [SKIP][164] ([fdo#109291]) +1 similar issue
   [164]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@prime_nv_pcopy@test3_4.html

  * igt@syncobj_timeline@transfer-timeline-point:
    - shard-tglb:         NOTRUN -> [DMESG-FAIL][165] ([i915#5098])
   [165]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb2/igt@syncobj_timeline@transfer-timeline-point.html

  * igt@sysfs_clients@recycle-many:
    - shard-apl:          NOTRUN -> [SKIP][166] ([fdo#109271] / [i915#2994]) +2 similar issues
   [166]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl6/igt@sysfs_clients@recycle-many.html

  * igt@sysfs_clients@sema-50:
    - shard-iclb:         NOTRUN -> [SKIP][167] ([i915#2994]) +1 similar issue
   [167]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@sysfs_clients@sema-50.html

  * igt@sysfs_clients@split-10:
    - shard-skl:          NOTRUN -> [SKIP][168] ([fdo#109271] / [i915#2994]) +1 similar issue
   [168]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl4/igt@sysfs_clients@split-10.html

  
#### Possible fixes ####

  * igt@drm_import_export@import-close-race-prime:
    - {shard-rkl}:        [INCOMPLETE][169] -> [PASS][170]
   [169]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-rkl-5/igt@drm_import_export@import-close-race-prime.html
   [170]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-rkl-5/igt@drm_import_export@import-close-race-prime.html

  * igt@gem_exec_balancer@nop:
    - {shard-rkl}:        [INCOMPLETE][171] ([i915#5080]) -> [PASS][172]
   [171]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-rkl-5/igt@gem_exec_balancer@nop.html
   [172]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-rkl-5/igt@gem_exec_balancer@nop.html

  * igt@gem_exec_balancer@parallel-balancer:
    - shard-iclb:         [SKIP][173] ([i915#4525]) -> [PASS][174]
   [173]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb8/igt@gem_exec_balancer@parallel-balancer.html
   [174]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb4/igt@gem_exec_balancer@parallel-balancer.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
    - shard-glk:          [FAIL][175] ([i915#2842]) -> [PASS][176]
   [175]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-glk2/igt@gem_exec_fair@basic-pace-solo@rcs0.html
   [176]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-glk5/igt@gem_exec_fair@basic-pace-solo@rcs0.html

  * igt@gem_workarounds@suspend-resume-context:
    - shard-apl:          [DMESG-WARN][177] ([i915#180]) -> [PASS][178] +3 similar issues
   [177]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-apl2/igt@gem_workarounds@suspend-resume-context.html
   [178]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl2/igt@gem_workarounds@suspend-resume-context.html

  * igt@gen9_exec_parse@allowed-all:
    - shard-apl:          [DMESG-WARN][179] ([i915#5566] / [i915#716]) -> [PASS][180]
   [179]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-apl3/igt@gen9_exec_parse@allowed-all.html
   [180]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-apl4/igt@gen9_exec_parse@allowed-all.html

  * igt@kms_cursor_legacy@flip-vs-cursor-legacy:
    - shard-skl:          [FAIL][181] -> [PASS][182]
   [181]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl3/igt@kms_cursor_legacy@flip-vs-cursor-legacy.html
   [182]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl9/igt@kms_cursor_legacy@flip-vs-cursor-legacy.html

  * igt@kms_flip@flip-vs-expired-vblank@a-edp1:
    - shard-skl:          [FAIL][183] ([i915#79]) -> [PASS][184]
   [183]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl2/igt@kms_flip@flip-vs-expired-vblank@a-edp1.html
   [184]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl6/igt@kms_flip@flip-vs-expired-vblank@a-edp1.html

  * igt@kms_flip@wf_vblank-ts-check-interruptible@c-edp1:
    - shard-skl:          [FAIL][185] ([i915#2122]) -> [PASS][186] +2 similar issues
   [185]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl5/igt@kms_flip@wf_vblank-ts-check-interruptible@c-edp1.html
   [186]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl2/igt@kms_flip@wf_vblank-ts-check-interruptible@c-edp1.html

  * igt@kms_plane_scaling@planes-upscale-factor-0-25-downscale-factor-0-5@pipe-c-edp-1-planes-upscale-downscale:
    - shard-iclb:         [SKIP][187] ([i915#5235]) -> [PASS][188] +2 similar issues
   [187]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb2/igt@kms_plane_scaling@planes-upscale-factor-0-25-downscale-factor-0-5@pipe-c-edp-1-planes-upscale-downscale.html
   [188]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb6/igt@kms_plane_scaling@planes-upscale-factor-0-25-downscale-factor-0-5@pipe-c-edp-1-planes-upscale-downscale.html

  * igt@kms_psr@psr2_primary_page_flip:
    - shard-iclb:         [SKIP][189] ([fdo#109441]) -> [PASS][190]
   [189]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb8/igt@kms_psr@psr2_primary_page_flip.html
   [190]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb2/igt@kms_psr@psr2_primary_page_flip.html

  * igt@kms_vblank@pipe-d-query-idle-hang:
    - shard-tglb:         [INCOMPLETE][191] -> [PASS][192]
   [191]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-tglb8/igt@kms_vblank@pipe-d-query-idle-hang.html
   [192]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-tglb3/igt@kms_vblank@pipe-d-query-idle-hang.html

  * igt@sysfs_heartbeat_interval@mixed@bcs0:
    - shard-skl:          [WARN][193] ([i915#4055]) -> [PASS][194]
   [193]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl2/igt@sysfs_heartbeat_interval@mixed@bcs0.html
   [194]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl6/igt@sysfs_heartbeat_interval@mixed@bcs0.html

  * igt@sysfs_heartbeat_interval@mixed@vcs0:
    - shard-skl:          [FAIL][195] ([i915#1731]) -> [PASS][196] +1 similar issue
   [195]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl2/igt@sysfs_heartbeat_interval@mixed@vcs0.html
   [196]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl6/igt@sysfs_heartbeat_interval@mixed@vcs0.html

  
#### Warnings ####

  * igt@gem_exec_balancer@parallel-ordering:
    - shard-iclb:         [DMESG-FAIL][197] ([i915#5614]) -> [SKIP][198] ([i915#4525])
   [197]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb1/igt@gem_exec_balancer@parallel-ordering.html
   [198]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb7/igt@gem_exec_balancer@parallel-ordering.html

  * igt@gem_exec_balancer@parallel-out-fence:
    - shard-iclb:         [SKIP][199] ([i915#4525]) -> [DMESG-WARN][200] ([i915#5614]) +3 similar issues
   [199]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb8/igt@gem_exec_balancer@parallel-out-fence.html
   [200]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb2/igt@gem_exec_balancer@parallel-out-fence.html

  * igt@gem_exec_fair@basic-none-rrul@rcs0:
    - shard-iclb:         [FAIL][201] ([i915#2852]) -> [FAIL][202] ([i915#2842])
   [201]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb7/igt@gem_exec_fair@basic-none-rrul@rcs0.html
   [202]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb5/igt@gem_exec_fair@basic-none-rrul@rcs0.html

  * igt@gem_pxp@reject-modify-context-protection-off-3:
    - shard-skl:          [SKIP][203] ([fdo#109271]) -> [SKIP][204] ([fdo#109271] / [i915#1888])
   [203]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-skl5/igt@gem_pxp@reject-modify-context-protection-off-3.html
   [204]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-skl2/igt@gem_pxp@reject-modify-context-protection-off-3.html

  * igt@i915_pm_dc@dc3co-vpb-simulation:
    - shard-iclb:         [SKIP][205] ([i915#588]) -> [SKIP][206] ([i915#658])
   [205]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb2/igt@i915_pm_dc@dc3co-vpb-simulation.html
   [206]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb4/igt@i915_pm_dc@dc3co-vpb-simulation.html

  * igt@kms_psr2_sf@cursor-plane-update-sf:
    - shard-iclb:         [SKIP][207] ([fdo#111068] / [i915#658]) -> [SKIP][208] ([i915#2920])
   [207]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11607/shard-iclb8/igt@kms_psr2_sf@cursor-plane-update-sf.html
   [208]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/shard-iclb2/igt@kms_psr2_sf@cursor-plane-update-sf.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103375]: https://bugs.freedesktop.org/show_bug.cgi?id=103375
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109274]: https://bugs.freedesktop.org/show_bug.cgi?id=109274
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109279]: https://bugs.freedesktop.org/show_bug.cgi?id=109279
  [fdo#109280]: https://bugs.freedesktop.org/show_bug.cgi?id=109280
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109291]: https://bugs.freedesktop.org/show_bug.cgi?id=109291
  [fdo#109293]: https://bugs.freedesktop.org/show_bug.cgi?id=109293
  [fdo#109300]: https://bugs.freedesktop.org/show_bug.cgi?id=109300
  [fdo#109314]: https://bugs.freedesktop.org/show_bug.cgi?id=109314
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109506]: https://bugs.freedesktop.org/show_bug.cgi?id=109506
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#110723]: https://bugs.freedesktop.org/show_bug.cgi?id=110723
  [fdo#110725]: https://bugs.freedesktop.org/show_bug.cgi?id=110725
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111314]: https://bugs.freedesktop.org/show_bug.cgi?id=111314
  [fdo#111614]: https://bugs.freedesktop.org/show_bug.cgi?id=111614
  [fdo#111615]: https://bugs.freedesktop.org/show_bug.cgi?id=111615
  [fdo#111644]: https://bugs.freedesktop.org/show_bug.cgi?id=111644
  [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [fdo#112022]: https://bugs.freedesktop.org/show_bug.cgi?id=112022
  [fdo#112054]: https://bugs.freedesktop.org/show_bug.cgi?id=112054
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#1099]: https://gitlab.freedesktop.org/drm/intel/issues/1099
  [i915#1149]: https://gitlab.freedesktop.org/drm/intel/issues/1149
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#132]: https://gitlab.freedesktop.org/drm/intel/issues/132
  [i915#1397]: https://gitlab.freedesktop.org/drm/intel/issues/1397
  [i915#1722]: https://gitlab.freedesktop.org/drm/intel/issues/1722
  [i915#1731]: https://gitlab.freedesktop.org/drm/intel/issues/1731
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1825]: https://gitlab.freedesktop.org/drm/intel/issues/1825
  [i915#1836]: https://gitlab.freedesktop.org/drm/intel/issues/1836
  [i915#1845]: https://gitlab.freedesktop.org/drm/intel/issues/1845
  [i915#1849]: https://gitlab.freedesktop.org/drm/intel/issues/1849
  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#1911]: https://gitlab.freedesktop.org/drm/intel/issues/1911
  [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122
  [i915#2346]: https://gitlab.freedesktop.org/drm/intel/issues/2346
  [i915#2410]: https://gitlab.freedesktop.org/drm/intel/issues/2410
  [i915#2411]: https://gitlab.freedesktop.org/drm/intel/issues/2411
  [i915#2521]: https://gitlab.freedesktop.org/drm/intel/issues/2521
  [i915#2527]: https://gitlab.freedesktop.org/drm/intel/issues/2527
  [i915#2530]: https://gitlab.freedesktop.org/drm/intel/issues/2530
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#2672]: https://gitlab.freedesktop.org/drm/intel/issues/2672
  [i915#2681]: https://gitlab.freedesktop.org/drm/intel/issues/2681
  [i915#284]: https://gitlab.freedesktop.org/drm/intel/issues/284
  [i915#2842]: https://gitlab.freedesktop.org/drm/intel/issues/2842
  [i915#2846]: https://gitlab.freedesktop.org/drm/intel/issues/2846
  [i915#2849]: https://gitlab.freedesktop.org/drm/intel/issues/2849
  [i915#2852]: https://gitlab.freedesktop.org/drm/intel/issues/2852
  [i915#2856]: https://gitlab.freedesktop.org/drm/intel/issues/2856
  [i915#2920]: https://gitlab.freedesktop.org/drm/intel/issues/2920
  [i915#2994]: https://gitlab.freedesktop.org/drm/intel/issues/2994
  [i915#3063]: https://gitlab.freedesktop.org/drm/intel/issues/3063
  [i915#3116]: https://gitlab.freedesktop.org/drm/intel/issues/3116
  [i915#3282]: https://gitlab.freedesktop.org/drm/intel/issues/3282
  [i915#3297]: https://gitlab.freedesktop.org/drm/intel/issues/3297
  [i915#3299]: https://gitlab.freedesktop.org/drm/intel/issues/3299
  [i915#3319]: https://gitlab.freedesktop.org/drm/intel/issues/3319
  [i915#3359]: https://gitlab.freedesktop.org/drm/intel/issues/3359
  [i915#3467]: https://gitlab.freedesktop.org/drm/intel/issues/3467
  [i915#3536]: https://gitlab.freedesktop.org/drm/intel/issues/3536
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3558]: https://gitlab.freedesktop.org/drm/intel/issues/3558
  [i915#3591]: https://gitlab.freedesktop.org/drm/intel/issues/3591
  [i915#3637]: https://gitlab.freedesktop.org/drm/intel/issues/3637
  [i915#3638]: https://gitlab.freedesktop.org/drm/intel/issues/3638
  [i915#3689]: https://gitlab.freedesktop.org/drm/intel/issues/3689
  [i915#3701]: https://gitlab.freedesktop.org/drm/intel/issues/3701
  [i915#3734]: https://gitlab.freedesktop.org/drm/intel/issues/3734
  [i915#3742]: https://gitlab.freedesktop.org/drm/intel/issues/3742
  [i915#3886]: https://gitlab.freedesktop.org/drm/intel/issues/3886
  [i915#3921]: https://gitlab.freedesktop.org/drm/intel/issues/3921
  [i915#4055]: https://gitlab.freedesktop.org/drm/intel/issues/4055
  [i915#4070]: https://gitlab.freedesktop.org/drm/intel/issues/4070
  [i915#4098]: https://gitlab.freedesktop.org/drm/intel/issues/4098
  [i915#4270]: https://gitlab.freedesktop.org/drm/intel/issues/4270
  [i915#4278]: https://gitlab.freedesktop.org/drm/intel/issues/4278
  [i915#4338]: https://gitlab.freedesktop.org/drm/intel/issues/4338
  [i915#4369]: https://gitlab.freedesktop.org/drm/intel/issues/4369
  [i915#4525]: https://gitlab.freedesktop.org/drm/intel/issues/4525
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4767]: https://gitlab.freedesktop.org/drm/intel/issues/4767
  [i915#5076]: https://gitlab.freedesktop.org/drm/intel/issues/5076
  [i915#5080]: https://gitlab.freedesktop.org/drm/intel/issues/5080
  [i915#5098]: https://gitlab.freedesktop.org/drm/intel/issues/5098
  [i915#5176]: https://gitlab.freedesktop.org/drm/intel/issues/5176
  [i915#5235]: https://gitlab.freedesktop.org/drm/intel/issues/5235
  [i915#5286]: https://gitlab.freedesktop.org/drm/intel/issues/5286
  [i915#5287]: https://gitlab.freedesktop.org/drm/intel/issues/5287
  [i915#5327]: https://gitlab.freedesktop.org/drm/intel/issues/5327
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#5566]: https://gitlab.freedesktop.org/drm/intel/issues/5566
  [i915#5614]: https://gitlab.freedesktop.org/drm/intel/issues/5614
  [i915#5639]: https://gitlab.freedesktop.org/drm/intel/issues/5639
  [i915#5691]: https://gitlab.freedesktop.org/drm/intel/issues/5691
  [i915#5776]: https://gitlab.freedesktop.org/drm/intel/issues/5776
  [i915#5784]: https://gitlab.freedesktop.org/drm/intel/issues/5784
  [i915#588]: https://gitlab.freedesktop.org/drm/intel/issues/588
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#768]: https://gitlab.freedesktop.org/drm/intel/issues/768
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79


Build changes
-------------

  * Linux: CI_DRM_11607 -> Patchwork_103598v1

  CI-20190529: 20190529
  CI_DRM_11607: b0f0de5bb000952abb29696adb93f289e49b129c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6465: f6bb4399881a806fbff75ce3df89b60286d55917 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_103598v1: b0f0de5bb000952abb29696adb93f289e49b129c @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103598v1/index.html

[-- Attachment #2: Type: text/html, Size: 57354 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-05  5:40 ` [Intel-gfx] " Vinay Belgaumkar
                   ` (2 preceding siblings ...)
  (?)
@ 2022-05-05 12:13 ` Tvrtko Ursulin
  2022-05-05 17:21   ` Belgaumkar, Vinay
  -1 siblings, 1 reply; 26+ messages in thread
From: Tvrtko Ursulin @ 2022-05-05 12:13 UTC (permalink / raw)
  To: Vinay Belgaumkar, intel-gfx, dri-devel


On 05/05/2022 06:40, Vinay Belgaumkar wrote:
> SLPC min/max frequency updates require H2G calls. We are seeing
> timeouts when GuC channel is backed up and it is unable to respond
> in a timely fashion causing warnings and affecting CI.

Is it the "Unable to force min freq" error? Do you have a link to the 
GitLab issue to add to commit message?

> This is seen when waitboosting happens during a stress test.
> this patch updates the waitboost path to use a non-blocking
> H2G call instead, which returns as soon as the message is
> successfully transmitted.

AFAIU with this approach, when CT channel is congested, you instead 
achieve silent dropping of the waitboost request, right?

It sounds like a potentially important feedback from the field to lose 
so easily. How about you added drm_notice to the worker when it fails?

Or simply a "one line patch" to replace i915_probe_error (!?) with 
drm_notice and keep the blocking behavior. (I have no idea what is the 
typical time to drain the CT buffer, and so to decide whether waiting or 
dropping makes more sense for effectiveness of waitboosting.)

Or since the congestion /should not/ happen in production, then the 
argument is why complicate with more code, in which case going with one 
line patch is an easy way forward?

Regards,

Tvrtko

> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 ++++++++++++++++-----
>   1 file changed, 30 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> index 1db833da42df..c852f73cf521 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
>   	return data->header.global_state;
>   }
>   
> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
> +{
> +	u32 request[] = {
> +		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
> +		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
> +		id,
> +		value,
> +	};
> +	int ret;
> +
> +	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
> +
> +	return ret > 0 ? -EPROTO : ret;
> +}
> +
> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
> +{
> +	struct intel_guc *guc = slpc_to_guc(slpc);
> +
> +	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
> +
> +	return guc_action_slpc_set_param_nb(guc, id, value);
> +}
> +
>   static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
>   {
>   	u32 request[] = {
> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>   	 */
>   
>   	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
> -		ret = slpc_set_param(slpc,
> -				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
> -				     freq);
> -		if (ret)
> -			i915_probe_error(i915, "Unable to force min freq to %u: %d",
> -					 freq, ret);
> +		/* Non-blocking request will avoid stalls */
> +		ret = slpc_set_param_nb(slpc,
> +					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
> +					freq);
>   	}
>   
>   	return ret;
> @@ -231,8 +253,8 @@ static void slpc_boost_work(struct work_struct *work)
>   	 */
>   	mutex_lock(&slpc->lock);
>   	if (atomic_read(&slpc->num_waiters)) {
> -		slpc_force_min_freq(slpc, slpc->boost_freq);
> -		slpc->num_boosts++;
> +		if (!slpc_force_min_freq(slpc, slpc->boost_freq))
> +			slpc->num_boosts++;
>   	}
>   	mutex_unlock(&slpc->lock);
>   }

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-05 12:13 ` [Intel-gfx] [PATCH] " Tvrtko Ursulin
@ 2022-05-05 17:21   ` Belgaumkar, Vinay
  2022-05-05 18:36     ` John Harrison
  0 siblings, 1 reply; 26+ messages in thread
From: Belgaumkar, Vinay @ 2022-05-05 17:21 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx, dri-devel


On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote:
>
> On 05/05/2022 06:40, Vinay Belgaumkar wrote:
>> SLPC min/max frequency updates require H2G calls. We are seeing
>> timeouts when GuC channel is backed up and it is unable to respond
>> in a timely fashion causing warnings and affecting CI.
>
> Is it the "Unable to force min freq" error? Do you have a link to the 
> GitLab issue to add to commit message?
We don't have a specific error for this one, but have seen similar 
issues with other H2G which are blocking.
>
>> This is seen when waitboosting happens during a stress test.
>> this patch updates the waitboost path to use a non-blocking
>> H2G call instead, which returns as soon as the message is
>> successfully transmitted.
>
> AFAIU with this approach, when CT channel is congested, you instead 
> achieve silent dropping of the waitboost request, right?
We are hoping it makes it, but just not waiting for it to complete.
>
> It sounds like a potentially important feedback from the field to lose 
> so easily. How about you added drm_notice to the worker when it fails?
>
> Or simply a "one line patch" to replace i915_probe_error (!?) with 
> drm_notice and keep the blocking behavior. (I have no idea what is the 
> typical time to drain the CT buffer, and so to decide whether waiting 
> or dropping makes more sense for effectiveness of waitboosting.)
>
> Or since the congestion /should not/ happen in production, then the 
> argument is why complicate with more code, in which case going with 
> one line patch is an easy way forward?

Even if we soften the blow here, the actual timeout error occurs in the 
intel_guc_ct.c code, so we cannot hide that error anyways. Making this 
call non-blocking will achieve both things.

Thanks,

Vinay.

>
> Regards,
>
> Tvrtko
>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 ++++++++++++++++-----
>>   1 file changed, 30 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> index 1db833da42df..c852f73cf521 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc 
>> *slpc)
>>       return data->header.global_state;
>>   }
>>   +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 
>> id, u32 value)
>> +{
>> +    u32 request[] = {
>> +        GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
>> +        SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
>> +        id,
>> +        value,
>> +    };
>> +    int ret;
>> +
>> +    ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
>> +
>> +    return ret > 0 ? -EPROTO : ret;
>> +}
>> +
>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 
>> value)
>> +{
>> +    struct intel_guc *guc = slpc_to_guc(slpc);
>> +
>> +    GEM_BUG_ON(id >= SLPC_MAX_PARAM);
>> +
>> +    return guc_action_slpc_set_param_nb(guc, id, value);
>> +}
>> +
>>   static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, 
>> u32 value)
>>   {
>>       u32 request[] = {
>> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct 
>> intel_guc_slpc *slpc, u32 freq)
>>        */
>>         with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>> -        ret = slpc_set_param(slpc,
>> -                     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>> -                     freq);
>> -        if (ret)
>> -            i915_probe_error(i915, "Unable to force min freq to %u: 
>> %d",
>> -                     freq, ret);
>> +        /* Non-blocking request will avoid stalls */
>> +        ret = slpc_set_param_nb(slpc,
>> +                    SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>> +                    freq);
>>       }
>>         return ret;
>> @@ -231,8 +253,8 @@ static void slpc_boost_work(struct work_struct 
>> *work)
>>        */
>>       mutex_lock(&slpc->lock);
>>       if (atomic_read(&slpc->num_waiters)) {
>> -        slpc_force_min_freq(slpc, slpc->boost_freq);
>> -        slpc->num_boosts++;
>> +        if (!slpc_force_min_freq(slpc, slpc->boost_freq))
>> +            slpc->num_boosts++;
>>       }
>>       mutex_unlock(&slpc->lock);
>>   }

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-05 17:21   ` Belgaumkar, Vinay
@ 2022-05-05 18:36     ` John Harrison
  2022-05-06  7:18       ` Tvrtko Ursulin
  0 siblings, 1 reply; 26+ messages in thread
From: John Harrison @ 2022-05-05 18:36 UTC (permalink / raw)
  To: Belgaumkar, Vinay, Tvrtko Ursulin, intel-gfx, dri-devel

On 5/5/2022 10:21, Belgaumkar, Vinay wrote:
> On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote:
>> On 05/05/2022 06:40, Vinay Belgaumkar wrote:
>>> SLPC min/max frequency updates require H2G calls. We are seeing
>>> timeouts when GuC channel is backed up and it is unable to respond
>>> in a timely fashion causing warnings and affecting CI.
>>
>> Is it the "Unable to force min freq" error? Do you have a link to the 
>> GitLab issue to add to commit message?
> We don't have a specific error for this one, but have seen similar 
> issues with other H2G which are blocking.
>>
>>> This is seen when waitboosting happens during a stress test.
>>> this patch updates the waitboost path to use a non-blocking
>>> H2G call instead, which returns as soon as the message is
>>> successfully transmitted.
>>
>> AFAIU with this approach, when CT channel is congested, you instead 
>> achieve silent dropping of the waitboost request, right?
> We are hoping it makes it, but just not waiting for it to complete.
We are not 'hoping it makes it'. We know for a fact that it will make 
it. We just don't know when. The issue is not about whether the 
waitboost request itself gets dropped/lost it is about the ack that 
comes back. The GuC will process the message and it will send an ack. 
It's just a question of whether the i915 driver has given up waiting for 
it yet. And if it has, then you get the initial 'timed out waiting for 
ack' followed by a later 'got unexpected ack' message.

Whereas, if we make the call asynchronous, there is no ack. i915 doesn't 
bother waiting and it won't get surprised later.

Also, note that this is only an issue when GuC itself is backed up. 
Normally that requires the creation/destruction of large numbers of 
contexts in rapid succession (context management is about the slowest 
thing we do with GuC). Some of the IGTs and selftests do that with 
thousands of contexts all at once. Those are generally where we see this 
kind of problem. It would be highly unlikely (but not impossible) to hit 
it in real world usage.

The general design philosophy of H2G messages is that asynchronous mode 
should be used for everything if at all possible. It is fire and forget 
and will all get processed in the order sent (same as batch buffer 
execution, really). Synchronous messages should only be used when an 
ack/status is absolutely required. E.g. start of day initialisation or 
things like TLB invalidation where we need to know that a cache has been 
cleared/flushed before updating memory from the CPU.

John.


>>
>> It sounds like a potentially important feedback from the field to 
>> lose so easily. How about you added drm_notice to the worker when it 
>> fails?
>>
>> Or simply a "one line patch" to replace i915_probe_error (!?) with 
>> drm_notice and keep the blocking behavior. (I have no idea what is 
>> the typical time to drain the CT buffer, and so to decide whether 
>> waiting or dropping makes more sense for effectiveness of waitboosting.)
>>
>> Or since the congestion /should not/ happen in production, then the 
>> argument is why complicate with more code, in which case going with 
>> one line patch is an easy way forward?
>
> Even if we soften the blow here, the actual timeout error occurs in 
> the intel_guc_ct.c code, so we cannot hide that error anyways. Making 
> this call non-blocking will achieve both things.
>
> Thanks,
>
> Vinay.
>
>>
>> Regards,
>>
>> Tvrtko
>>
>>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 
>>> ++++++++++++++++-----
>>>   1 file changed, 30 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>> index 1db833da42df..c852f73cf521 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc 
>>> *slpc)
>>>       return data->header.global_state;
>>>   }
>>>   +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 
>>> id, u32 value)
>>> +{
>>> +    u32 request[] = {
>>> +        GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
>>> +        SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
>>> +        id,
>>> +        value,
>>> +    };
>>> +    int ret;
>>> +
>>> +    ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
>>> +
>>> +    return ret > 0 ? -EPROTO : ret;
>>> +}
>>> +
>>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, 
>>> u32 value)
>>> +{
>>> +    struct intel_guc *guc = slpc_to_guc(slpc);
>>> +
>>> +    GEM_BUG_ON(id >= SLPC_MAX_PARAM);
>>> +
>>> +    return guc_action_slpc_set_param_nb(guc, id, value);
>>> +}
>>> +
>>>   static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, 
>>> u32 value)
>>>   {
>>>       u32 request[] = {
>>> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct 
>>> intel_guc_slpc *slpc, u32 freq)
>>>        */
>>>         with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>>> -        ret = slpc_set_param(slpc,
>>> - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>>> -                     freq);
>>> -        if (ret)
>>> -            i915_probe_error(i915, "Unable to force min freq to %u: 
>>> %d",
>>> -                     freq, ret);
>>> +        /* Non-blocking request will avoid stalls */
>>> +        ret = slpc_set_param_nb(slpc,
>>> + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>>> +                    freq);
>>>       }
>>>         return ret;
>>> @@ -231,8 +253,8 @@ static void slpc_boost_work(struct work_struct 
>>> *work)
>>>        */
>>>       mutex_lock(&slpc->lock);
>>>       if (atomic_read(&slpc->num_waiters)) {
>>> -        slpc_force_min_freq(slpc, slpc->boost_freq);
>>> -        slpc->num_boosts++;
>>> +        if (!slpc_force_min_freq(slpc, slpc->boost_freq))
>>> +            slpc->num_boosts++;
>>>       }
>>>       mutex_unlock(&slpc->lock);
>>>   }


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-05 18:36     ` John Harrison
@ 2022-05-06  7:18       ` Tvrtko Ursulin
  2022-05-06 16:21         ` Belgaumkar, Vinay
  2022-05-06 16:43         ` John Harrison
  0 siblings, 2 replies; 26+ messages in thread
From: Tvrtko Ursulin @ 2022-05-06  7:18 UTC (permalink / raw)
  To: John Harrison, Belgaumkar, Vinay, intel-gfx, dri-devel


On 05/05/2022 19:36, John Harrison wrote:
> On 5/5/2022 10:21, Belgaumkar, Vinay wrote:
>> On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote:
>>> On 05/05/2022 06:40, Vinay Belgaumkar wrote:
>>>> SLPC min/max frequency updates require H2G calls. We are seeing
>>>> timeouts when GuC channel is backed up and it is unable to respond
>>>> in a timely fashion causing warnings and affecting CI.
>>>
>>> Is it the "Unable to force min freq" error? Do you have a link to the 
>>> GitLab issue to add to commit message?
>> We don't have a specific error for this one, but have seen similar 
>> issues with other H2G which are blocking.
>>>
>>>> This is seen when waitboosting happens during a stress test.
>>>> this patch updates the waitboost path to use a non-blocking
>>>> H2G call instead, which returns as soon as the message is
>>>> successfully transmitted.
>>>
>>> AFAIU with this approach, when CT channel is congested, you instead 
>>> achieve silent dropping of the waitboost request, right?
>> We are hoping it makes it, but just not waiting for it to complete.
> We are not 'hoping it makes it'. We know for a fact that it will make 
> it. We just don't know when. The issue is not about whether the 
> waitboost request itself gets dropped/lost it is about the ack that 
> comes back. The GuC will process the message and it will send an ack. 
> It's just a question of whether the i915 driver has given up waiting for 
> it yet. And if it has, then you get the initial 'timed out waiting for 
> ack' followed by a later 'got unexpected ack' message.
> 
> Whereas, if we make the call asynchronous, there is no ack. i915 doesn't 
> bother waiting and it won't get surprised later.
> 
> Also, note that this is only an issue when GuC itself is backed up. 
> Normally that requires the creation/destruction of large numbers of 
> contexts in rapid succession (context management is about the slowest 
> thing we do with GuC). Some of the IGTs and selftests do that with 
> thousands of contexts all at once. Those are generally where we see this 
> kind of problem. It would be highly unlikely (but not impossible) to hit 
> it in real world usage.

Goto ->

> The general design philosophy of H2G messages is that asynchronous mode 
> should be used for everything if at all possible. It is fire and forget 
> and will all get processed in the order sent (same as batch buffer 
> execution, really). Synchronous messages should only be used when an 
> ack/status is absolutely required. E.g. start of day initialisation or 
> things like TLB invalidation where we need to know that a cache has been 
> cleared/flushed before updating memory from the CPU.
> 
> John.
> 
> 
>>>
>>> It sounds like a potentially important feedback from the field to 
>>> lose so easily. How about you added drm_notice to the worker when it 
>>> fails?
>>>
>>> Or simply a "one line patch" to replace i915_probe_error (!?) with 
>>> drm_notice and keep the blocking behavior. (I have no idea what is 
>>> the typical time to drain the CT buffer, and so to decide whether 
>>> waiting or dropping makes more sense for effectiveness of waitboosting.)
>>>
>>> Or since the congestion /should not/ happen in production, then the 
>>> argument is why complicate with more code, in which case going with 
>>> one line patch is an easy way forward?

Here. Where I did hint I understood the "should not happen in production 
angle".

So statement is GuC is congested in processing requests, but the h2g 
buffer is not congested so no chance intel_guc_send_nb() will fail with 
no space in that buffer? Sounds a bit un-intuitive.

Anyway, it sounds okay to me to use the non-blocking, but I would like 
to see some logging if the unexpected does happen. Hence I was 
suggesting the option of adding drm_notice logging if the send fails 
from the worker. (Because I think other callers would already propagate 
the error, like sysfs.)

   err = slpc_force_min_freq(slpc, slpc->boost_freq);
   if (!err)
        slpc->num_boosts++;
   else
        drm_notice(... "Failed to send waitboost request (%d)", err);

Something like that.

Regards,

Tvrtko


>> Even if we soften the blow here, the actual timeout error occurs in 
>> the intel_guc_ct.c code, so we cannot hide that error anyways. Making 
>> this call non-blocking will achieve both things.
>>
>> Thanks,
>>
>> Vinay.
>>
>>>
>>> Regards,
>>>
>>> Tvrtko
>>>
>>>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 
>>>> ++++++++++++++++-----
>>>>   1 file changed, 30 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>> index 1db833da42df..c852f73cf521 100644
>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc 
>>>> *slpc)
>>>>       return data->header.global_state;
>>>>   }
>>>>   +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 
>>>> id, u32 value)
>>>> +{
>>>> +    u32 request[] = {
>>>> +        GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
>>>> +        SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
>>>> +        id,
>>>> +        value,
>>>> +    };
>>>> +    int ret;
>>>> +
>>>> +    ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
>>>> +
>>>> +    return ret > 0 ? -EPROTO : ret;
>>>> +}
>>>> +
>>>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, 
>>>> u32 value)
>>>> +{
>>>> +    struct intel_guc *guc = slpc_to_guc(slpc);
>>>> +
>>>> +    GEM_BUG_ON(id >= SLPC_MAX_PARAM);
>>>> +
>>>> +    return guc_action_slpc_set_param_nb(guc, id, value);
>>>> +}
>>>> +
>>>>   static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, 
>>>> u32 value)
>>>>   {
>>>>       u32 request[] = {
>>>> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct 
>>>> intel_guc_slpc *slpc, u32 freq)
>>>>        */
>>>>         with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>>>> -        ret = slpc_set_param(slpc,
>>>> - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>>>> -                     freq);
>>>> -        if (ret)
>>>> -            i915_probe_error(i915, "Unable to force min freq to %u: 
>>>> %d",
>>>> -                     freq, ret);
>>>> +        /* Non-blocking request will avoid stalls */
>>>> +        ret = slpc_set_param_nb(slpc,
>>>> + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>>>> +                    freq);
>>>>       }
>>>>         return ret;
>>>> @@ -231,8 +253,8 @@ static void slpc_boost_work(struct work_struct 
>>>> *work)
>>>>        */
>>>>       mutex_lock(&slpc->lock);
>>>>       if (atomic_read(&slpc->num_waiters)) {
>>>> -        slpc_force_min_freq(slpc, slpc->boost_freq);
>>>> -        slpc->num_boosts++;
>>>> +        if (!slpc_force_min_freq(slpc, slpc->boost_freq))
>>>> +            slpc->num_boosts++;
>>>>       }
>>>>       mutex_unlock(&slpc->lock);
>>>>   }
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-06  7:18       ` Tvrtko Ursulin
@ 2022-05-06 16:21         ` Belgaumkar, Vinay
  2022-05-06 16:43         ` John Harrison
  1 sibling, 0 replies; 26+ messages in thread
From: Belgaumkar, Vinay @ 2022-05-06 16:21 UTC (permalink / raw)
  To: Tvrtko Ursulin, John Harrison, intel-gfx, dri-devel


On 5/6/2022 12:18 AM, Tvrtko Ursulin wrote:
>
> On 05/05/2022 19:36, John Harrison wrote:
>> On 5/5/2022 10:21, Belgaumkar, Vinay wrote:
>>> On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote:
>>>> On 05/05/2022 06:40, Vinay Belgaumkar wrote:
>>>>> SLPC min/max frequency updates require H2G calls. We are seeing
>>>>> timeouts when GuC channel is backed up and it is unable to respond
>>>>> in a timely fashion causing warnings and affecting CI.
>>>>
>>>> Is it the "Unable to force min freq" error? Do you have a link to 
>>>> the GitLab issue to add to commit message?
>>> We don't have a specific error for this one, but have seen similar 
>>> issues with other H2G which are blocking.
>>>>
>>>>> This is seen when waitboosting happens during a stress test.
>>>>> this patch updates the waitboost path to use a non-blocking
>>>>> H2G call instead, which returns as soon as the message is
>>>>> successfully transmitted.
>>>>
>>>> AFAIU with this approach, when CT channel is congested, you instead 
>>>> achieve silent dropping of the waitboost request, right?
>>> We are hoping it makes it, but just not waiting for it to complete.
>> We are not 'hoping it makes it'. We know for a fact that it will make 
>> it. We just don't know when. The issue is not about whether the 
>> waitboost request itself gets dropped/lost it is about the ack that 
>> comes back. The GuC will process the message and it will send an ack. 
>> It's just a question of whether the i915 driver has given up waiting 
>> for it yet. And if it has, then you get the initial 'timed out 
>> waiting for ack' followed by a later 'got unexpected ack' message.
>>
>> Whereas, if we make the call asynchronous, there is no ack. i915 
>> doesn't bother waiting and it won't get surprised later.
>>
>> Also, note that this is only an issue when GuC itself is backed up. 
>> Normally that requires the creation/destruction of large numbers of 
>> contexts in rapid succession (context management is about the slowest 
>> thing we do with GuC). Some of the IGTs and selftests do that with 
>> thousands of contexts all at once. Those are generally where we see 
>> this kind of problem. It would be highly unlikely (but not 
>> impossible) to hit it in real world usage.
>
> Goto ->
>
>> The general design philosophy of H2G messages is that asynchronous 
>> mode should be used for everything if at all possible. It is fire and 
>> forget and will all get processed in the order sent (same as batch 
>> buffer execution, really). Synchronous messages should only be used 
>> when an ack/status is absolutely required. E.g. start of day 
>> initialisation or things like TLB invalidation where we need to know 
>> that a cache has been cleared/flushed before updating memory from the 
>> CPU.
>>
>> John.
>>
>>
>>>>
>>>> It sounds like a potentially important feedback from the field to 
>>>> lose so easily. How about you added drm_notice to the worker when 
>>>> it fails?
>>>>
>>>> Or simply a "one line patch" to replace i915_probe_error (!?) with 
>>>> drm_notice and keep the blocking behavior. (I have no idea what is 
>>>> the typical time to drain the CT buffer, and so to decide whether 
>>>> waiting or dropping makes more sense for effectiveness of 
>>>> waitboosting.)
>>>>
>>>> Or since the congestion /should not/ happen in production, then the 
>>>> argument is why complicate with more code, in which case going with 
>>>> one line patch is an easy way forward?
>
> Here. Where I did hint I understood the "should not happen in 
> production angle".
>
> So statement is GuC is congested in processing requests, but the h2g 
> buffer is not congested so no chance intel_guc_send_nb() will fail 
> with no space in that buffer? Sounds a bit un-intuitive.
>
> Anyway, it sounds okay to me to use the non-blocking, but I would like 
> to see some logging if the unexpected does happen. Hence I was 
> suggesting the option of adding drm_notice logging if the send fails 
> from the worker. (Because I think other callers would already 
> propagate the error, like sysfs.)
>
>   err = slpc_force_min_freq(slpc, slpc->boost_freq);
>   if (!err)
>        slpc->num_boosts++;
>   else
>        drm_notice(... "Failed to send waitboost request (%d)", err);

Ok, makes sense. Will send out another rev with this change.

Thanks,

Vinay.


>
> Something like that.
>
> Regards,
>
> Tvrtko
>
>
>>> Even if we soften the blow here, the actual timeout error occurs in 
>>> the intel_guc_ct.c code, so we cannot hide that error anyways. 
>>> Making this call non-blocking will achieve both things.
>>>
>>> Thanks,
>>>
>>> Vinay.
>>>
>>>>
>>>> Regards,
>>>>
>>>> Tvrtko
>>>>
>>>>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 
>>>>> ++++++++++++++++-----
>>>>>   1 file changed, 30 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
>>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>>> index 1db833da42df..c852f73cf521 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc 
>>>>> *slpc)
>>>>>       return data->header.global_state;
>>>>>   }
>>>>>   +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, 
>>>>> u8 id, u32 value)
>>>>> +{
>>>>> +    u32 request[] = {
>>>>> +        GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
>>>>> +        SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
>>>>> +        id,
>>>>> +        value,
>>>>> +    };
>>>>> +    int ret;
>>>>> +
>>>>> +    ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
>>>>> +
>>>>> +    return ret > 0 ? -EPROTO : ret;
>>>>> +}
>>>>> +
>>>>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, 
>>>>> u32 value)
>>>>> +{
>>>>> +    struct intel_guc *guc = slpc_to_guc(slpc);
>>>>> +
>>>>> +    GEM_BUG_ON(id >= SLPC_MAX_PARAM);
>>>>> +
>>>>> +    return guc_action_slpc_set_param_nb(guc, id, value);
>>>>> +}
>>>>> +
>>>>>   static int guc_action_slpc_set_param(struct intel_guc *guc, u8 
>>>>> id, u32 value)
>>>>>   {
>>>>>       u32 request[] = {
>>>>> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct 
>>>>> intel_guc_slpc *slpc, u32 freq)
>>>>>        */
>>>>>         with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>>>>> -        ret = slpc_set_param(slpc,
>>>>> - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>>>>> -                     freq);
>>>>> -        if (ret)
>>>>> -            i915_probe_error(i915, "Unable to force min freq to 
>>>>> %u: %d",
>>>>> -                     freq, ret);
>>>>> +        /* Non-blocking request will avoid stalls */
>>>>> +        ret = slpc_set_param_nb(slpc,
>>>>> + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>>>>> +                    freq);
>>>>>       }
>>>>>         return ret;
>>>>> @@ -231,8 +253,8 @@ static void slpc_boost_work(struct work_struct 
>>>>> *work)
>>>>>        */
>>>>>       mutex_lock(&slpc->lock);
>>>>>       if (atomic_read(&slpc->num_waiters)) {
>>>>> -        slpc_force_min_freq(slpc, slpc->boost_freq);
>>>>> -        slpc->num_boosts++;
>>>>> +        if (!slpc_force_min_freq(slpc, slpc->boost_freq))
>>>>> +            slpc->num_boosts++;
>>>>>       }
>>>>>       mutex_unlock(&slpc->lock);
>>>>>   }
>>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-06  7:18       ` Tvrtko Ursulin
  2022-05-06 16:21         ` Belgaumkar, Vinay
@ 2022-05-06 16:43         ` John Harrison
  2022-05-15  5:46           ` Belgaumkar, Vinay
  1 sibling, 1 reply; 26+ messages in thread
From: John Harrison @ 2022-05-06 16:43 UTC (permalink / raw)
  To: Tvrtko Ursulin, Belgaumkar, Vinay, intel-gfx, dri-devel

On 5/6/2022 00:18, Tvrtko Ursulin wrote:
> On 05/05/2022 19:36, John Harrison wrote:
>> On 5/5/2022 10:21, Belgaumkar, Vinay wrote:
>>> On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote:
>>>> On 05/05/2022 06:40, Vinay Belgaumkar wrote:
>>>>> SLPC min/max frequency updates require H2G calls. We are seeing
>>>>> timeouts when GuC channel is backed up and it is unable to respond
>>>>> in a timely fashion causing warnings and affecting CI.
>>>>
>>>> Is it the "Unable to force min freq" error? Do you have a link to 
>>>> the GitLab issue to add to commit message?
>>> We don't have a specific error for this one, but have seen similar 
>>> issues with other H2G which are blocking.
>>>>
>>>>> This is seen when waitboosting happens during a stress test.
>>>>> this patch updates the waitboost path to use a non-blocking
>>>>> H2G call instead, which returns as soon as the message is
>>>>> successfully transmitted.
>>>>
>>>> AFAIU with this approach, when CT channel is congested, you instead 
>>>> achieve silent dropping of the waitboost request, right?
>>> We are hoping it makes it, but just not waiting for it to complete.
>> We are not 'hoping it makes it'. We know for a fact that it will make 
>> it. We just don't know when. The issue is not about whether the 
>> waitboost request itself gets dropped/lost it is about the ack that 
>> comes back. The GuC will process the message and it will send an ack. 
>> It's just a question of whether the i915 driver has given up waiting 
>> for it yet. And if it has, then you get the initial 'timed out 
>> waiting for ack' followed by a later 'got unexpected ack' message.
>>
>> Whereas, if we make the call asynchronous, there is no ack. i915 
>> doesn't bother waiting and it won't get surprised later.
>>
>> Also, note that this is only an issue when GuC itself is backed up. 
>> Normally that requires the creation/destruction of large numbers of 
>> contexts in rapid succession (context management is about the slowest 
>> thing we do with GuC). Some of the IGTs and selftests do that with 
>> thousands of contexts all at once. Those are generally where we see 
>> this kind of problem. It would be highly unlikely (but not 
>> impossible) to hit it in real world usage.
>
> Goto ->
>
>> The general design philosophy of H2G messages is that asynchronous 
>> mode should be used for everything if at all possible. It is fire and 
>> forget and will all get processed in the order sent (same as batch 
>> buffer execution, really). Synchronous messages should only be used 
>> when an ack/status is absolutely required. E.g. start of day 
>> initialisation or things like TLB invalidation where we need to know 
>> that a cache has been cleared/flushed before updating memory from the 
>> CPU.
>>
>> John.
>>
>>
>>>>
>>>> It sounds like a potentially important feedback from the field to 
>>>> lose so easily. How about you added drm_notice to the worker when 
>>>> it fails?
>>>>
>>>> Or simply a "one line patch" to replace i915_probe_error (!?) with 
>>>> drm_notice and keep the blocking behavior. (I have no idea what is 
>>>> the typical time to drain the CT buffer, and so to decide whether 
>>>> waiting or dropping makes more sense for effectiveness of 
>>>> waitboosting.)
>>>>
>>>> Or since the congestion /should not/ happen in production, then the 
>>>> argument is why complicate with more code, in which case going with 
>>>> one line patch is an easy way forward?
>
> Here. Where I did hint I understood the "should not happen in 
> production angle".
>
> So statement is GuC is congested in processing requests, but the h2g 
> buffer is not congested so no chance intel_guc_send_nb() will fail 
> with no space in that buffer? Sounds a bit un-intuitive.
That's two different things. The problem of no space in the H2G buffer 
is the same whether the call is sent blocking or non-blocking. The 
wait-for-space version is intel_guc_send_busy_loop() rather than 
intel_guc_send_nb(). NB: _busy_loop is a wrapper around _nb, so the 
wait-for-space version is also non-blocking ;). If a non-looping version 
is used (blocking or otherwise) it will return -EBUSY if there is no 
space. So both the original SLPC call and this non-blocking version will 
still get an immediate EBUSY return code if the H2G channel is backed up 
completely.

Whether the code should be handling EBUSY or not is another matter. 
Vinay, does anything higher up do a loop on EBUSY? If not, maybe it 
should be using the _busy_loop() call instead?

The blocking vs non-blocking is about waiting for a response if the 
command is successfully sent. The blocking case will sit and spin for a 
reply, the non-blocking assumes success and expects an asynchronous 
error report on failure. The assumption being that the call can't fail 
unless something is already broken - i915 sending invalid data to GuC 
for example. And thus any failure is in the BUG_ON category rather than 
the try again with a different approach and/or try again later category.

This is the point of the change. We are currently getting timeout errors 
when the H2G channel has space so the command can be sent, but the 
channel already contains a lot of slow operations. The command has been 
sent and will be processed successfully, it just takes longer than the 
i915 timeout. Given that we don't actually care about the completion 
response for this command, there is no point in either a) sitting in a 
loop waiting for it or b) complaining that it doesn't happen in a timely 
fashion. Hence the plan to make it non-blocking.

>
> Anyway, it sounds okay to me to use the non-blocking, but I would like 
> to see some logging if the unexpected does happen. Hence I was 
> suggesting the option of adding drm_notice logging if the send fails 
> from the worker. (Because I think other callers would already 
> propagate the error, like sysfs.)
>
>   err = slpc_force_min_freq(slpc, slpc->boost_freq);
>   if (!err)
>        slpc->num_boosts++;
>   else
>        drm_notice(... "Failed to send waitboost request (%d)", err);
The only error this should ever report would be EBUSY when the H2G 
channel is full. Anything else (ENODEV, EPIPE, etc.) means the system is 
already toast and bigger errors will likely have already have been reported.

As above, maybe this should be looping on the EBUSY case. Presumably it 
is safe to do so if it was already looping waiting for the response. And 
then printing a notice level warning on more catastrophic errors seems 
reasonable.

John.

>
> Something like that.
>
> Regards,
>
> Tvrtko
>
>
>>> Even if we soften the blow here, the actual timeout error occurs in 
>>> the intel_guc_ct.c code, so we cannot hide that error anyways. 
>>> Making this call non-blocking will achieve both things.
>>>
>>> Thanks,
>>>
>>> Vinay.
>>>
>>>>
>>>> Regards,
>>>>
>>>> Tvrtko
>>>>
>>>>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 
>>>>> ++++++++++++++++-----
>>>>>   1 file changed, 30 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
>>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>>> index 1db833da42df..c852f73cf521 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc 
>>>>> *slpc)
>>>>>       return data->header.global_state;
>>>>>   }
>>>>>   +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, 
>>>>> u8 id, u32 value)
>>>>> +{
>>>>> +    u32 request[] = {
>>>>> +        GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
>>>>> +        SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
>>>>> +        id,
>>>>> +        value,
>>>>> +    };
>>>>> +    int ret;
>>>>> +
>>>>> +    ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
>>>>> +
>>>>> +    return ret > 0 ? -EPROTO : ret;
>>>>> +}
>>>>> +
>>>>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, 
>>>>> u32 value)
>>>>> +{
>>>>> +    struct intel_guc *guc = slpc_to_guc(slpc);
>>>>> +
>>>>> +    GEM_BUG_ON(id >= SLPC_MAX_PARAM);
>>>>> +
>>>>> +    return guc_action_slpc_set_param_nb(guc, id, value);
>>>>> +}
>>>>> +
>>>>>   static int guc_action_slpc_set_param(struct intel_guc *guc, u8 
>>>>> id, u32 value)
>>>>>   {
>>>>>       u32 request[] = {
>>>>> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct 
>>>>> intel_guc_slpc *slpc, u32 freq)
>>>>>        */
>>>>>         with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>>>>> -        ret = slpc_set_param(slpc,
>>>>> - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>>>>> -                     freq);
>>>>> -        if (ret)
>>>>> -            i915_probe_error(i915, "Unable to force min freq to 
>>>>> %u: %d",
>>>>> -                     freq, ret);
>>>>> +        /* Non-blocking request will avoid stalls */
>>>>> +        ret = slpc_set_param_nb(slpc,
>>>>> + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>>>>> +                    freq);
>>>>>       }
>>>>>         return ret;
>>>>> @@ -231,8 +253,8 @@ static void slpc_boost_work(struct work_struct 
>>>>> *work)
>>>>>        */
>>>>>       mutex_lock(&slpc->lock);
>>>>>       if (atomic_read(&slpc->num_waiters)) {
>>>>> -        slpc_force_min_freq(slpc, slpc->boost_freq);
>>>>> -        slpc->num_boosts++;
>>>>> +        if (!slpc_force_min_freq(slpc, slpc->boost_freq))
>>>>> +            slpc->num_boosts++;
>>>>>       }
>>>>>       mutex_unlock(&slpc->lock);
>>>>>   }
>>


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-06 16:43         ` John Harrison
@ 2022-05-15  5:46           ` Belgaumkar, Vinay
  0 siblings, 0 replies; 26+ messages in thread
From: Belgaumkar, Vinay @ 2022-05-15  5:46 UTC (permalink / raw)
  To: John Harrison, Tvrtko Ursulin, intel-gfx, dri-devel


On 5/6/2022 9:43 AM, John Harrison wrote:
> On 5/6/2022 00:18, Tvrtko Ursulin wrote:
>> On 05/05/2022 19:36, John Harrison wrote:
>>> On 5/5/2022 10:21, Belgaumkar, Vinay wrote:
>>>> On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote:
>>>>> On 05/05/2022 06:40, Vinay Belgaumkar wrote:
>>>>>> SLPC min/max frequency updates require H2G calls. We are seeing
>>>>>> timeouts when GuC channel is backed up and it is unable to respond
>>>>>> in a timely fashion causing warnings and affecting CI.
>>>>>
>>>>> Is it the "Unable to force min freq" error? Do you have a link to 
>>>>> the GitLab issue to add to commit message?
>>>> We don't have a specific error for this one, but have seen similar 
>>>> issues with other H2G which are blocking.
>>>>>
>>>>>> This is seen when waitboosting happens during a stress test.
>>>>>> this patch updates the waitboost path to use a non-blocking
>>>>>> H2G call instead, which returns as soon as the message is
>>>>>> successfully transmitted.
>>>>>
>>>>> AFAIU with this approach, when CT channel is congested, you 
>>>>> instead achieve silent dropping of the waitboost request, right?
>>>> We are hoping it makes it, but just not waiting for it to complete.
>>> We are not 'hoping it makes it'. We know for a fact that it will 
>>> make it. We just don't know when. The issue is not about whether the 
>>> waitboost request itself gets dropped/lost it is about the ack that 
>>> comes back. The GuC will process the message and it will send an 
>>> ack. It's just a question of whether the i915 driver has given up 
>>> waiting for it yet. And if it has, then you get the initial 'timed 
>>> out waiting for ack' followed by a later 'got unexpected ack' message.
>>>
>>> Whereas, if we make the call asynchronous, there is no ack. i915 
>>> doesn't bother waiting and it won't get surprised later.
>>>
>>> Also, note that this is only an issue when GuC itself is backed up. 
>>> Normally that requires the creation/destruction of large numbers of 
>>> contexts in rapid succession (context management is about the 
>>> slowest thing we do with GuC). Some of the IGTs and selftests do 
>>> that with thousands of contexts all at once. Those are generally 
>>> where we see this kind of problem. It would be highly unlikely (but 
>>> not impossible) to hit it in real world usage.
>>
>> Goto ->
>>
>>> The general design philosophy of H2G messages is that asynchronous 
>>> mode should be used for everything if at all possible. It is fire 
>>> and forget and will all get processed in the order sent (same as 
>>> batch buffer execution, really). Synchronous messages should only be 
>>> used when an ack/status is absolutely required. E.g. start of day 
>>> initialisation or things like TLB invalidation where we need to know 
>>> that a cache has been cleared/flushed before updating memory from 
>>> the CPU.
>>>
>>> John.
>>>
>>>
>>>>>
>>>>> It sounds like a potentially important feedback from the field to 
>>>>> lose so easily. How about you added drm_notice to the worker when 
>>>>> it fails?
>>>>>
>>>>> Or simply a "one line patch" to replace i915_probe_error (!?) with 
>>>>> drm_notice and keep the blocking behavior. (I have no idea what is 
>>>>> the typical time to drain the CT buffer, and so to decide whether 
>>>>> waiting or dropping makes more sense for effectiveness of 
>>>>> waitboosting.)
>>>>>
>>>>> Or since the congestion /should not/ happen in production, then 
>>>>> the argument is why complicate with more code, in which case going 
>>>>> with one line patch is an easy way forward?
>>
>> Here. Where I did hint I understood the "should not happen in 
>> production angle".
>>
>> So statement is GuC is congested in processing requests, but the h2g 
>> buffer is not congested so no chance intel_guc_send_nb() will fail 
>> with no space in that buffer? Sounds a bit un-intuitive.
> That's two different things. The problem of no space in the H2G buffer 
> is the same whether the call is sent blocking or non-blocking. The 
> wait-for-space version is intel_guc_send_busy_loop() rather than 
> intel_guc_send_nb(). NB: _busy_loop is a wrapper around _nb, so the 
> wait-for-space version is also non-blocking ;). If a non-looping 
> version is used (blocking or otherwise) it will return -EBUSY if there 
> is no space. So both the original SLPC call and this non-blocking 
> version will still get an immediate EBUSY return code if the H2G 
> channel is backed up completely.
>
> Whether the code should be handling EBUSY or not is another matter. 
> Vinay, does anything higher up do a loop on EBUSY? If not, maybe it 
> should be using the _busy_loop() call instead?
>
> The blocking vs non-blocking is about waiting for a response if the 
> command is successfully sent. The blocking case will sit and spin for 
> a reply, the non-blocking assumes success and expects an asynchronous 
> error report on failure. The assumption being that the call can't fail 
> unless something is already broken - i915 sending invalid data to GuC 
> for example. And thus any failure is in the BUG_ON category rather 
> than the try again with a different approach and/or try again later 
> category.
>
> This is the point of the change. We are currently getting timeout 
> errors when the H2G channel has space so the command can be sent, but 
> the channel already contains a lot of slow operations. The command has 
> been sent and will be processed successfully, it just takes longer 
> than the i915 timeout. Given that we don't actually care about the 
> completion response for this command, there is no point in either a) 
> sitting in a loop waiting for it or b) complaining that it doesn't 
> happen in a timely fashion. Hence the plan to make it non-blocking.
>
>>
>> Anyway, it sounds okay to me to use the non-blocking, but I would 
>> like to see some logging if the unexpected does happen. Hence I was 
>> suggesting the option of adding drm_notice logging if the send fails 
>> from the worker. (Because I think other callers would already 
>> propagate the error, like sysfs.)
>>
>>   err = slpc_force_min_freq(slpc, slpc->boost_freq);
>>   if (!err)
>>        slpc->num_boosts++;
>>   else
>>        drm_notice(... "Failed to send waitboost request (%d)", err);
> The only error this should ever report would be EBUSY when the H2G 
> channel is full. Anything else (ENODEV, EPIPE, etc.) means the system 
> is already toast and bigger errors will likely have already have been 
> reported.
>
> As above, maybe this should be looping on the EBUSY case. Presumably 
> it is safe to do so if it was already looping waiting for the 
> response. And then printing a notice level warning on more 
> catastrophic errors seems reasonable.

Not sure if we need an extensive busy loop here. All we are trying to do 
(through a tasklet) is bump the min freq. If that fails due to EBUSY, we 
will just end up

retrying next time around.

Thanks,

Vinay.

>
> John.
>
>>
>> Something like that.
>>
>> Regards,
>>
>> Tvrtko
>>
>>
>>>> Even if we soften the blow here, the actual timeout error occurs in 
>>>> the intel_guc_ct.c code, so we cannot hide that error anyways. 
>>>> Making this call non-blocking will achieve both things.
>>>>
>>>> Thanks,
>>>>
>>>> Vinay.
>>>>
>>>>>
>>>>> Regards,
>>>>>
>>>>> Tvrtko
>>>>>
>>>>>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 
>>>>>> ++++++++++++++++-----
>>>>>>   1 file changed, 30 insertions(+), 8 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
>>>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>>>> index 1db833da42df..c852f73cf521 100644
>>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>>>>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct 
>>>>>> intel_guc_slpc *slpc)
>>>>>>       return data->header.global_state;
>>>>>>   }
>>>>>>   +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, 
>>>>>> u8 id, u32 value)
>>>>>> +{
>>>>>> +    u32 request[] = {
>>>>>> +        GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
>>>>>> +        SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
>>>>>> +        id,
>>>>>> +        value,
>>>>>> +    };
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
>>>>>> +
>>>>>> +    return ret > 0 ? -EPROTO : ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, 
>>>>>> u32 value)
>>>>>> +{
>>>>>> +    struct intel_guc *guc = slpc_to_guc(slpc);
>>>>>> +
>>>>>> +    GEM_BUG_ON(id >= SLPC_MAX_PARAM);
>>>>>> +
>>>>>> +    return guc_action_slpc_set_param_nb(guc, id, value);
>>>>>> +}
>>>>>> +
>>>>>>   static int guc_action_slpc_set_param(struct intel_guc *guc, u8 
>>>>>> id, u32 value)
>>>>>>   {
>>>>>>       u32 request[] = {
>>>>>> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct 
>>>>>> intel_guc_slpc *slpc, u32 freq)
>>>>>>        */
>>>>>>         with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>>>>>> -        ret = slpc_set_param(slpc,
>>>>>> - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>>>>>> -                     freq);
>>>>>> -        if (ret)
>>>>>> -            i915_probe_error(i915, "Unable to force min freq to 
>>>>>> %u: %d",
>>>>>> -                     freq, ret);
>>>>>> +        /* Non-blocking request will avoid stalls */
>>>>>> +        ret = slpc_set_param_nb(slpc,
>>>>>> + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>>>>>> +                    freq);
>>>>>>       }
>>>>>>         return ret;
>>>>>> @@ -231,8 +253,8 @@ static void slpc_boost_work(struct 
>>>>>> work_struct *work)
>>>>>>        */
>>>>>>       mutex_lock(&slpc->lock);
>>>>>>       if (atomic_read(&slpc->num_waiters)) {
>>>>>> -        slpc_force_min_freq(slpc, slpc->boost_freq);
>>>>>> -        slpc->num_boosts++;
>>>>>> +        if (!slpc_force_min_freq(slpc, slpc->boost_freq))
>>>>>> +            slpc->num_boosts++;
>>>>>>       }
>>>>>>       mutex_unlock(&slpc->lock);
>>>>>>   }
>>>
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-06-22 21:28     ` Dixit, Ashutosh
@ 2022-06-23  8:12       ` Tvrtko Ursulin
  0 siblings, 0 replies; 26+ messages in thread
From: Tvrtko Ursulin @ 2022-06-23  8:12 UTC (permalink / raw)
  To: Dixit, Ashutosh, Belgaumkar, Vinay; +Cc: intel-gfx, dri-devel


On 22/06/2022 22:28, Dixit, Ashutosh wrote:
> On Wed, 22 Jun 2022 13:30:23 -0700, Belgaumkar, Vinay wrote:
>> On 6/21/2022 5:26 PM, Dixit, Ashutosh wrote:
>>> On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote:
>>> The issue I have is what happens when we de-boost (restore min freq to its
>>> previous value in intel_guc_slpc_dec_waiters()). It would seem that that
>>> call is fairly important to get the min freq down when there are no pending
>>> requests. Therefore what do we do in that case?
>>>
>>> This is the function:
>>>
>>> void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
>>> {
>>>           mutex_lock(&slpc->lock);
>>>           if (atomic_dec_and_test(&slpc->num_waiters))
>>>                   slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
>>>           mutex_unlock(&slpc->lock);
>>> }
>>>
>>>
>>> 1. First it would seem that at the minimum we need a similar drm_notice()
>>>      in intel_guc_slpc_dec_waiters(). That would mean we need to put the
>>>      drm_notice() back in slpc_force_min_freq() (replacing
>>>      i915_probe_error()) rather than in slpc_boost_work() above?
>> Sure.
>>>
>>> 2. Further, if de-boosting is important then maybe as was being discussed
>>>      in v1 of this patch (see the bottom of
>>>      https://patchwork.freedesktop.org/patch/485004/?series=103598&rev=1) do
>>>      we need to use intel_guc_send_busy_loop() in the
>>>      intel_guc_slpc_dec_waiters() code path?
>>
>> Using a busy_loop here would essentially be the same as blocking, right?
> 
> Well blocking waits for a response from GuC (so all previous requests need
> to be processed by GuC) whereas busy_loop() just waits for space to be
> available at the back of the queue (so just a few, or maybe just one,
> request have to be processed by GuC).
> 
>> And it could still fail/timeout with blocking as well (which is the problem
>> we are trying to solve here).
> 
> intel_guc_send_busy_loop() has an infinite wait without a drm_err()!! :)
> 
>> De-boosting is important, but in the worst case scenario, lets say this
>> request was not processed by GuC. This would happen only if the system
>> were really busy, which would mean there is a high likelihood we would
>> boost/de-boost again anyways and it would probably go through at that
>> point.
> 
> Not sure of this. The system was busy but now might have gone idle which is
> why we are trying to de-boost. But GuC queue might still be full so we may
> drop the de-boost request. Or if the system has gone really idle there will
> be space in the GuC queue.
> 
> Also the problem with intel_guc_send_busy_loop() is that it just has a
> sleep in it, so others might be adding requests in the GuC queue while
> busy_loop() was sleeping (to avoid such situations we'd need a SW queue in
> front of the real GuC queue).
> 
> So I am ok if we don't want to add intel_guc_send_busy_loop() for now and
> "wait and watch". Unless John suggests otherwise since I don't have any
> idea how likely is this to happen. If we change drm_notice to drm_err the
> CI will quick tell us if this happening.
> 
> Anyway, so at least let's move drm_notice (or drm_err) into
> slpc_force_min_freq() and I can ok the patch. Thanks.

I got a bit lost but I thought I suggested notice level? Is it the same 
log message you are discussing here? If so, I don't think it is an error 
strictly speaking but just an unexpected condition which should be noted 
(claim being it should never ever happen outside IGT). Maybe warning if 
you think notice is too low level?

Regards,

Tvrtko

> 
>>> At least we need to do 1. But for 2. we might as well just put
>>> intel_guc_send_busy_loop() in guc_action_slpc_set_param_nb()? In both cases
>>> (boost and de-boost) intel_guc_send_busy_loop() would be called from a work
>>> item so looks doable (the way we were previously doing the blocking call
>>> from the two places). Thoughts?
>>>
>>> Thanks.
>>> --
>>> Ashutosh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-06-23  0:32 Vinay Belgaumkar
@ 2022-06-23  0:53 ` Dixit, Ashutosh
  0 siblings, 0 replies; 26+ messages in thread
From: Dixit, Ashutosh @ 2022-06-23  0:53 UTC (permalink / raw)
  To: Vinay Belgaumkar; +Cc: intel-gfx, dri-devel

On Wed, 22 Jun 2022 17:32:25 -0700, Vinay Belgaumkar wrote:
>
> @@ -208,12 +232,14 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>	 */
>
>	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
> -		ret = slpc_set_param(slpc,
> -				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
> -				     freq);
> +		/* Non-blocking request will avoid stalls */
> +		ret = slpc_set_param_nb(slpc,
> +					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
> +					freq);
>		if (ret)
> -			i915_probe_error(i915, "Unable to force min freq to %u: %d",
> -					 freq, ret);
> +			drm_notice(&i915->drm,
> +				   "Failed to send set_param for min freq(%d): (%d)\n",
> +				   freq, ret);

I am still thinking if we should replace drm_notice() by i915_probe_error()
since drm_notice() will basically hide any issues of boost/de-boost's
getting dropped.

Another idea here might be to maintain a counter, say "slpc->failed_boosts"
which we increment each time slpc_set_param_nb() fails and dump that
counter via intel_guc_slpc_print_info().

Anyway for now this is:

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
@ 2022-06-23  0:32 Vinay Belgaumkar
  2022-06-23  0:53 ` Dixit, Ashutosh
  0 siblings, 1 reply; 26+ messages in thread
From: Vinay Belgaumkar @ 2022-06-23  0:32 UTC (permalink / raw)
  To: intel-gfx, dri-devel

SLPC min/max frequency updates require H2G calls. We are seeing
timeouts when GuC channel is backed up and it is unable to respond
in a timely fashion causing warnings and affecting CI.

This is seen when waitboosting happens during a stress test.
this patch updates the waitboost path to use a non-blocking
H2G call instead, which returns as soon as the message is
successfully transmitted.

v2: Use drm_notice to report any errors that might occur while
sending the waitboost H2G request (Tvrtko)
v3: Add drm_notice inside force_min_freq (Ashutosh)

Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 42 +++++++++++++++++----
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 2df31af70d63..ec9c4ca0f615 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
 	return data->header.global_state;
 }
 
+static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
+{
+	u32 request[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+		id,
+		value,
+	};
+	int ret;
+
+	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
+
+	return ret > 0 ? -EPROTO : ret;
+}
+
+static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
+{
+	struct intel_guc *guc = slpc_to_guc(slpc);
+
+	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+	return guc_action_slpc_set_param_nb(guc, id, value);
+}
+
 static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
 {
 	u32 request[] = {
@@ -208,12 +232,14 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
 	 */
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		ret = slpc_set_param(slpc,
-				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
-				     freq);
+		/* Non-blocking request will avoid stalls */
+		ret = slpc_set_param_nb(slpc,
+					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+					freq);
 		if (ret)
-			i915_probe_error(i915, "Unable to force min freq to %u: %d",
-					 freq, ret);
+			drm_notice(&i915->drm,
+				   "Failed to send set_param for min freq(%d): (%d)\n",
+				   freq, ret);
 	}
 
 	return ret;
@@ -222,6 +248,7 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
 static void slpc_boost_work(struct work_struct *work)
 {
 	struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
+	int err;
 
 	/*
 	 * Raise min freq to boost. It's possible that
@@ -231,8 +258,9 @@ static void slpc_boost_work(struct work_struct *work)
 	 */
 	mutex_lock(&slpc->lock);
 	if (atomic_read(&slpc->num_waiters)) {
-		slpc_force_min_freq(slpc, slpc->boost_freq);
-		slpc->num_boosts++;
+		err = slpc_force_min_freq(slpc, slpc->boost_freq);
+		if (!err)
+			slpc->num_boosts++;
 	}
 	mutex_unlock(&slpc->lock);
 }
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-06-22 20:30   ` Belgaumkar, Vinay
@ 2022-06-22 21:28     ` Dixit, Ashutosh
  2022-06-23  8:12       ` Tvrtko Ursulin
  0 siblings, 1 reply; 26+ messages in thread
From: Dixit, Ashutosh @ 2022-06-22 21:28 UTC (permalink / raw)
  To: Belgaumkar, Vinay; +Cc: intel-gfx, dri-devel

On Wed, 22 Jun 2022 13:30:23 -0700, Belgaumkar, Vinay wrote:
> On 6/21/2022 5:26 PM, Dixit, Ashutosh wrote:
> > On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote:
> > The issue I have is what happens when we de-boost (restore min freq to its
> > previous value in intel_guc_slpc_dec_waiters()). It would seem that that
> > call is fairly important to get the min freq down when there are no pending
> > requests. Therefore what do we do in that case?
> >
> > This is the function:
> >
> > void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
> > {
> >          mutex_lock(&slpc->lock);
> >          if (atomic_dec_and_test(&slpc->num_waiters))
> >                  slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
> >          mutex_unlock(&slpc->lock);
> > }
> >
> >
> > 1. First it would seem that at the minimum we need a similar drm_notice()
> >     in intel_guc_slpc_dec_waiters(). That would mean we need to put the
> >     drm_notice() back in slpc_force_min_freq() (replacing
> >     i915_probe_error()) rather than in slpc_boost_work() above?
> Sure.
> >
> > 2. Further, if de-boosting is important then maybe as was being discussed
> >     in v1 of this patch (see the bottom of
> >     https://patchwork.freedesktop.org/patch/485004/?series=103598&rev=1) do
> >     we need to use intel_guc_send_busy_loop() in the
> >     intel_guc_slpc_dec_waiters() code path?
>
> Using a busy_loop here would essentially be the same as blocking, right?

Well blocking waits for a response from GuC (so all previous requests need
to be processed by GuC) whereas busy_loop() just waits for space to be
available at the back of the queue (so just a few, or maybe just one,
request have to be processed by GuC).

> And it could still fail/timeout with blocking as well (which is the problem
> we are trying to solve here).

intel_guc_send_busy_loop() has an infinite wait without a drm_err()!! :)

> De-boosting is important, but in the worst case scenario, lets say this
> request was not processed by GuC. This would happen only if the system
> were really busy, which would mean there is a high likelihood we would
> boost/de-boost again anyways and it would probably go through at that
> point.

Not sure of this. The system was busy but now might have gone idle which is
why we are trying to de-boost. But GuC queue might still be full so we may
drop the de-boost request. Or if the system has gone really idle there will
be space in the GuC queue.

Also the problem with intel_guc_send_busy_loop() is that it just has a
sleep in it, so others might be adding requests in the GuC queue while
busy_loop() was sleeping (to avoid such situations we'd need a SW queue in
front of the real GuC queue).

So I am ok if we don't want to add intel_guc_send_busy_loop() for now and
"wait and watch". Unless John suggests otherwise since I don't have any
idea how likely is this to happen. If we change drm_notice to drm_err the
CI will quick tell us if this happening.

Anyway, so at least let's move drm_notice (or drm_err) into
slpc_force_min_freq() and I can ok the patch. Thanks.

> > At least we need to do 1. But for 2. we might as well just put
> > intel_guc_send_busy_loop() in guc_action_slpc_set_param_nb()? In both cases
> > (boost and de-boost) intel_guc_send_busy_loop() would be called from a work
> > item so looks doable (the way we were previously doing the blocking call
> > from the two places). Thoughts?
> >
> > Thanks.
> > --
> > Ashutosh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-06-22  0:26 ` Dixit, Ashutosh
@ 2022-06-22 20:30   ` Belgaumkar, Vinay
  2022-06-22 21:28     ` Dixit, Ashutosh
  0 siblings, 1 reply; 26+ messages in thread
From: Belgaumkar, Vinay @ 2022-06-22 20:30 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: intel-gfx, dri-devel


On 6/21/2022 5:26 PM, Dixit, Ashutosh wrote:
> On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote:
>> SLPC min/max frequency updates require H2G calls. We are seeing
>> timeouts when GuC channel is backed up and it is unable to respond
>> in a timely fashion causing warnings and affecting CI.
>>
>> This is seen when waitboosting happens during a stress test.
>> this patch updates the waitboost path to use a non-blocking
>> H2G call instead, which returns as soon as the message is
>> successfully transmitted.
> Overall I am ok moving waitboost to use the non-blocking H2G. We can
> consider increasing the timeout in wait_for_ct_request_update() to be a
> separate issue for blocking cases and we can handle that separately.
>
> Still there a couple of issues with this patch mentioned below.
>
>> v2: Use drm_notice to report any errors that might occur while
>> sending the waitboost H2G request (Tvrtko)
>>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++----
>>   1 file changed, 36 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> index 1db833da42df..e5e869c96262 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
>> 	return data->header.global_state;
>>   }
>>
>> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
>> +{
>> +	u32 request[] = {
>> +		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
>> +		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
>> +		id,
>> +		value,
>> +	};
>> +	int ret;
>> +
>> +	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
>> +
>> +	return ret > 0 ? -EPROTO : ret;
>> +}
>> +
>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
>> +{
>> +	struct intel_guc *guc = slpc_to_guc(slpc);
>> +
>> +	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
>> +
>> +	return guc_action_slpc_set_param_nb(guc, id, value);
>> +}
>> +
>>   static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
>>   {
>> 	u32 request[] = {
>> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>> 	 */
>>
>> 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>> -		ret = slpc_set_param(slpc,
>> -				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>> -				     freq);
>> -		if (ret)
>> -			i915_probe_error(i915, "Unable to force min freq to %u: %d",
>> -					 freq, ret);
>> +		/* Non-blocking request will avoid stalls */
>> +		ret = slpc_set_param_nb(slpc,
>> +					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>> +					freq);
>> 	}
>>
>> 	return ret;
>> @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>>   static void slpc_boost_work(struct work_struct *work)
>>   {
>> 	struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
>> +	struct drm_i915_private *i915 = slpc_to_i915(slpc);
>> +	int err;
>>
>> 	/*
>> 	 * Raise min freq to boost. It's possible that
>> @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work)
>> 	 */
>> 	mutex_lock(&slpc->lock);
>> 	if (atomic_read(&slpc->num_waiters)) {
>> -		slpc_force_min_freq(slpc, slpc->boost_freq);
>> -		slpc->num_boosts++;
>> +		err = slpc_force_min_freq(slpc, slpc->boost_freq);
>> +		if (!err)
>> +			slpc->num_boosts++;
>> +		else
>> +			drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n",
>> +				   err);
> The issue I have is what happens when we de-boost (restore min freq to its
> previous value in intel_guc_slpc_dec_waiters()). It would seem that that
> call is fairly important to get the min freq down when there are no pending
> requests. Therefore what do we do in that case?
>
> This is the function:
>
> void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
> {
>          mutex_lock(&slpc->lock);
>          if (atomic_dec_and_test(&slpc->num_waiters))
>                  slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
>          mutex_unlock(&slpc->lock);
> }
>
>
> 1. First it would seem that at the minimum we need a similar drm_notice()
>     in intel_guc_slpc_dec_waiters(). That would mean we need to put the
>     drm_notice() back in slpc_force_min_freq() (replacing
>     i915_probe_error()) rather than in slpc_boost_work() above?
Sure.
>
> 2. Further, if de-boosting is important then maybe as was being discussed
>     in v1 of this patch (see the bottom of
>     https://patchwork.freedesktop.org/patch/485004/?series=103598&rev=1) do
>     we need to use intel_guc_send_busy_loop() in the
>     intel_guc_slpc_dec_waiters() code path?

Using a busy_loop here would essentially be the same as blocking, right? 
And it could still fail/timeout with blocking as well (which is the 
problem we are trying to solve here). De-boosting is important, but in 
the worst case scenario, lets say this request was not processed by GuC. 
This would happen only if the system were really busy, which would mean 
there is a high likelihood we would boost/de-boost again anyways and it 
would probably go through at that point.

Thanks,

Vinay.

>
> At least we need to do 1. But for 2. we might as well just put
> intel_guc_send_busy_loop() in guc_action_slpc_set_param_nb()? In both cases
> (boost and de-boost) intel_guc_send_busy_loop() would be called from a work
> item so looks doable (the way we were previously doing the blocking call
> from the two places). Thoughts?
>
> Thanks.
> --
> Ashutosh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-15  6:05 Vinay Belgaumkar
  2022-05-16  7:59 ` Jani Nikula
  2022-06-07 22:29 ` Dixit, Ashutosh
@ 2022-06-22  0:26 ` Dixit, Ashutosh
  2022-06-22 20:30   ` Belgaumkar, Vinay
  2 siblings, 1 reply; 26+ messages in thread
From: Dixit, Ashutosh @ 2022-06-22  0:26 UTC (permalink / raw)
  To: Vinay Belgaumkar; +Cc: intel-gfx, dri-devel

On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote:
>
> SLPC min/max frequency updates require H2G calls. We are seeing
> timeouts when GuC channel is backed up and it is unable to respond
> in a timely fashion causing warnings and affecting CI.
>
> This is seen when waitboosting happens during a stress test.
> this patch updates the waitboost path to use a non-blocking
> H2G call instead, which returns as soon as the message is
> successfully transmitted.

Overall I am ok moving waitboost to use the non-blocking H2G. We can
consider increasing the timeout in wait_for_ct_request_update() to be a
separate issue for blocking cases and we can handle that separately.

Still there a couple of issues with this patch mentioned below.

> v2: Use drm_notice to report any errors that might occur while
> sending the waitboost H2G request (Tvrtko)
>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++----
>  1 file changed, 36 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> index 1db833da42df..e5e869c96262 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
>	return data->header.global_state;
>  }
>
> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
> +{
> +	u32 request[] = {
> +		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
> +		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
> +		id,
> +		value,
> +	};
> +	int ret;
> +
> +	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
> +
> +	return ret > 0 ? -EPROTO : ret;
> +}
> +
> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
> +{
> +	struct intel_guc *guc = slpc_to_guc(slpc);
> +
> +	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
> +
> +	return guc_action_slpc_set_param_nb(guc, id, value);
> +}
> +
>  static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
>  {
>	u32 request[] = {
> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>	 */
>
>	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
> -		ret = slpc_set_param(slpc,
> -				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
> -				     freq);
> -		if (ret)
> -			i915_probe_error(i915, "Unable to force min freq to %u: %d",
> -					 freq, ret);
> +		/* Non-blocking request will avoid stalls */
> +		ret = slpc_set_param_nb(slpc,
> +					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
> +					freq);
>	}
>
>	return ret;
> @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>  static void slpc_boost_work(struct work_struct *work)
>  {
>	struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
> +	struct drm_i915_private *i915 = slpc_to_i915(slpc);
> +	int err;
>
>	/*
>	 * Raise min freq to boost. It's possible that
> @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work)
>	 */
>	mutex_lock(&slpc->lock);
>	if (atomic_read(&slpc->num_waiters)) {
> -		slpc_force_min_freq(slpc, slpc->boost_freq);
> -		slpc->num_boosts++;
> +		err = slpc_force_min_freq(slpc, slpc->boost_freq);
> +		if (!err)
> +			slpc->num_boosts++;
> +		else
> +			drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n",
> +				   err);

The issue I have is what happens when we de-boost (restore min freq to its
previous value in intel_guc_slpc_dec_waiters()). It would seem that that
call is fairly important to get the min freq down when there are no pending
requests. Therefore what do we do in that case?

This is the function:

void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
{
        mutex_lock(&slpc->lock);
        if (atomic_dec_and_test(&slpc->num_waiters))
                slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
        mutex_unlock(&slpc->lock);
}


1. First it would seem that at the minimum we need a similar drm_notice()
   in intel_guc_slpc_dec_waiters(). That would mean we need to put the
   drm_notice() back in slpc_force_min_freq() (replacing
   i915_probe_error()) rather than in slpc_boost_work() above?

2. Further, if de-boosting is important then maybe as was being discussed
   in v1 of this patch (see the bottom of
   https://patchwork.freedesktop.org/patch/485004/?series=103598&rev=1) do
   we need to use intel_guc_send_busy_loop() in the
   intel_guc_slpc_dec_waiters() code path?

At least we need to do 1. But for 2. we might as well just put
intel_guc_send_busy_loop() in guc_action_slpc_set_param_nb()? In both cases
(boost and de-boost) intel_guc_send_busy_loop() would be called from a work
item so looks doable (the way we were previously doing the blocking call
from the two places). Thoughts?

Thanks.
--
Ashutosh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-06-07 23:15   ` John Harrison
@ 2022-06-08 17:39     ` Dixit, Ashutosh
  0 siblings, 0 replies; 26+ messages in thread
From: Dixit, Ashutosh @ 2022-06-08 17:39 UTC (permalink / raw)
  To: John Harrison; +Cc: jeff.mcgee, intel-gfx, dri-devel

On Tue, 07 Jun 2022 16:15:19 -0700, John Harrison wrote:
>
> On 6/7/2022 15:29, Dixit, Ashutosh wrote:
> > On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote:
> >> SLPC min/max frequency updates require H2G calls. We are seeing
> >> timeouts when GuC channel is backed up and it is unable to respond
> >> in a timely fashion causing warnings and affecting CI.
> >>
> >> This is seen when waitboosting happens during a stress test.
> >> this patch updates the waitboost path to use a non-blocking
> >> H2G call instead, which returns as soon as the message is
> >> successfully transmitted.
> > Overall I think this patch is trying to paper over problems in the blocking
> > H2G CT interface (specifically the 1 second timeout in
> > wait_for_ct_request_update()). So I think we should address that problem in
> > the interface directly rather than having each client (SLPC and any future
> > client) work around the problem. Following points:
> >
> > 1. This patch seems to assume that it is 'ok' to ignore the return code
> >     from FW for a waitboost request (arguing waitboost is best effort so
> >     it's ok to 'fire and forget'). But the return code is still useful
> >     e.g. in cases where we see performance issues and want to go back and
> >     investigate if FW rejected any waitboost requests.
>
> You still get errors reported in the GuC log. Indeed, some errors (or at
> least error reasons) are only visible in the log not in the return code.

OK, so we at least have this method for debug available.

> > 2. We are already seeing that a 1 second timeout is not sufficient. So why
> >     not simply increase that timeout?
> >
> > 3. In fact if we are saying that the CT interface is a "reliable" interface
> >     (implying no message loss), to ensure reliability that timeout should
> >     not simply be increased, it should be made "infinite" (in quotes).
> >
> > 4. Maybe it would have been best to not have a "blocking" H2G interface at
> >     all (with the wait in wait_for_ct_request_update()). Just have an
> >     asynchronous interface (which mirrors the actual interface between FW
> >     and i915) in which clients register callbacks which are invoked when FW
> >     responds. If this is too big a change we can probably continue with the
> >     current blocking interface after increasing the timeout as mentioned
> >     above.
> >
> > 5. Finally, the waitboost request is just the most likely to get stuck at
> >     the back of a full CT queue since it happens during normal
> >     operation. Actually any request, say one initiated from sysfs, can also
> >     get similarly stuck at the back of a full queue. So any solution should
> >     also address that situation (where the return code is needed and
> >     similarly for a future client of the "blocking" (REQUEST/RESPONSE)
> >     interface).
> The blocking interface is only intended for init time operations, not
> runtime.

In that case we should probably have code to enforce this in i915.

> Stuff where the operation is meant to be synchronous and the KMD
> should not proceed until it has an ack back from the GuC that the update
> has taken place. All runtime operations are expected to be asynchronous. If
> a response is required, then it should be sent via an async
> callback. E.g. context de-registration is a 'fire and forget' H2G call but
> gets a 'deregistration complete' G2H notification when it is safe for the
> KMD to free up the associated storage.

At present all GuC interactions in intel_guc_slpc.c (in i915) do *not*
follow this. They use the REQUEST/RESPONSE FW interface which is pushed
through the blocking H2G CT interface in i915. If we are serious about this
this needs a GuC FW change to use bi-directional EVENT's used in the
asynchronous interface (with corresponding changes in intel_guc_slpc.c).

> There is an 'errors only' H2G mechanism. That will not send an ack back in
> the case of a successful H2G but will send back an error notification in
> the case of a failure. All async H2Gs should really be using that
> mechanism. I think Michal W did post a patch for it and I was meant to be
> reviewing it but it dropped of my radar due to other higher priorities.

These I believe are referred to as FAST_REQUEST's in GuC FW. That success
is not communicated back to the KMD might be an issue in cases where KMD
needs to know whether a particular operation was successful (such as
for operations initiated via sysfs).

Thanks.
--
Ashutosh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-06-07 23:04     ` John Harrison
@ 2022-06-08  7:58       ` Jani Nikula
  0 siblings, 0 replies; 26+ messages in thread
From: Jani Nikula @ 2022-06-08  7:58 UTC (permalink / raw)
  To: John Harrison, Vinay Belgaumkar, intel-gfx, dri-devel

On Tue, 07 Jun 2022, John Harrison <john.c.harrison@intel.com> wrote:
> Oops. Just saw your follow up message. No worries!

Again, sorry for the noise, and for wasting your time!

BR,
Jani.

-- 
Jani Nikula, Intel Open Source Graphics Center

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-06-07 22:29 ` Dixit, Ashutosh
@ 2022-06-07 23:15   ` John Harrison
  2022-06-08 17:39     ` Dixit, Ashutosh
  0 siblings, 1 reply; 26+ messages in thread
From: John Harrison @ 2022-06-07 23:15 UTC (permalink / raw)
  To: Dixit, Ashutosh, Vinay Belgaumkar; +Cc: intel-gfx, dri-devel

On 6/7/2022 15:29, Dixit, Ashutosh wrote:
> On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote:
>> SLPC min/max frequency updates require H2G calls. We are seeing
>> timeouts when GuC channel is backed up and it is unable to respond
>> in a timely fashion causing warnings and affecting CI.
>>
>> This is seen when waitboosting happens during a stress test.
>> this patch updates the waitboost path to use a non-blocking
>> H2G call instead, which returns as soon as the message is
>> successfully transmitted.
> Overall I think this patch is trying to paper over problems in the blocking
> H2G CT interface (specifically the 1 second timeout in
> wait_for_ct_request_update()). So I think we should address that problem in
> the interface directly rather than having each client (SLPC and any future
> client) work around the problem. Following points:
>
> 1. This patch seems to assume that it is 'ok' to ignore the return code
>     from FW for a waitboost request (arguing waitboost is best effort so
>     it's ok to 'fire and forget'). But the return code is still useful
>     e.g. in cases where we see performance issues and want to go back and
>     investigate if FW rejected any waitboost requests.
You still get errors reported in the GuC log. Indeed, some errors (or at 
least error reasons) are only visible in the log not in the return code.

>
> 2. We are already seeing that a 1 second timeout is not sufficient. So why
>     not simply increase that timeout?
>
> 3. In fact if we are saying that the CT interface is a "reliable" interface
>     (implying no message loss), to ensure reliability that timeout should
>     not simply be increased, it should be made "infinite" (in quotes).
>
> 4. Maybe it would have been best to not have a "blocking" H2G interface at
>     all (with the wait in wait_for_ct_request_update()). Just have an
>     asynchronous interface (which mirrors the actual interface between FW
>     and i915) in which clients register callbacks which are invoked when FW
>     responds. If this is too big a change we can probably continue with the
>     current blocking interface after increasing the timeout as mentioned
>     above.
>
> 5. Finally, the waitboost request is just the most likely to get stuck at
>     the back of a full CT queue since it happens during normal
>     operation. Actually any request, say one initiated from sysfs, can also
>     get similarly stuck at the back of a full queue. So any solution should
>     also address that situation (where the return code is needed and
>     similarly for a future client of the "blocking" (REQUEST/RESPONSE)
>     interface).
The blocking interface is only intended for init time operations, not 
runtime. Stuff where the operation is meant to be synchronous and the 
KMD should not proceed until it has an ack back from the GuC that the 
update has taken place. All runtime operations are expected to be 
asynchronous. If a response is required, then it should be sent via an 
async callback. E.g. context de-registration is a 'fire and forget' H2G 
call but gets a 'deregistration complete' G2H notification when it is 
safe for the KMD to free up the associated storage.

There is an 'errors only' H2G mechanism. That will not send an ack back 
in the case of a successful H2G but will send back an error notification 
in the case of a failure. All async H2Gs should really be using that 
mechanism. I think Michal W did post a patch for it and I was meant to 
be reviewing it but it dropped of my radar due to other higher priorities.

John.

>
> Thanks.
> --
> Ashutosh


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-06-07 23:02   ` John Harrison
@ 2022-06-07 23:04     ` John Harrison
  2022-06-08  7:58       ` Jani Nikula
  0 siblings, 1 reply; 26+ messages in thread
From: John Harrison @ 2022-06-07 23:04 UTC (permalink / raw)
  To: Jani Nikula, Vinay Belgaumkar, intel-gfx, dri-devel

On 6/7/2022 16:02, John Harrison wrote:
> On 5/16/2022 00:59, Jani Nikula wrote:
>> On Sat, 14 May 2022, Vinay Belgaumkar<vinay.belgaumkar@intel.com>  wrote:
>>> SLPC min/max frequency updates require H2G calls. We are seeing
>>> timeouts when GuC channel is backed up and it is unable to respond
>>> in a timely fashion causing warnings and affecting CI.
>>>
>>> This is seen when waitboosting happens during a stress test.
>>> this patch updates the waitboost path to use a non-blocking
>>> H2G call instead, which returns as soon as the message is
>>> successfully transmitted.
>>>
>>> v2: Use drm_notice to report any errors that might occur while
>>> sending the waitboost H2G request (Tvrtko)
>>>
>>> Signed-off-by: Vinay Belgaumkar<vinay.belgaumkar@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++----
>>>   1 file changed, 36 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>> index 1db833da42df..e5e869c96262 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
>>>   	return data->header.global_state;
>>>   }
>>>   
>>> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
>>> +{
>>> +	u32 request[] = {
>> static const
>>
>>> +		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
>>> +		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
>>> +		id,
>>> +		value,
>>> +	};
>>> +	int ret;
>>> +
>>> +	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
>>> +
>>> +	return ret > 0 ? -EPROTO : ret;
>>> +}
>>> +
>>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
>>> +{
>>> +	struct intel_guc *guc = slpc_to_guc(slpc);
>>> +
>>> +	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
>>> +
>>> +	return guc_action_slpc_set_param_nb(guc, id, value);
>>> +}
>>> +
>>>   static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
>>>   {
>>>   	u32 request[] = {
>> Ditto here, and the whole gt/uc directory seems to have tons of these
>> u32 action/request array variables on stack, with the required
>> initialization, that could be in rodata.
>>
>> Please fix all of them.
>>
>> BR,
>> Jani.
> But the only constant is the action code. Everything else is 
> parameters and will be different on each call.
> ...

Oops. Just saw your follow up message. No worries!

John.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-16  7:59 ` Jani Nikula
  2022-05-16  8:00   ` Jani Nikula
@ 2022-06-07 23:02   ` John Harrison
  2022-06-07 23:04     ` John Harrison
  1 sibling, 1 reply; 26+ messages in thread
From: John Harrison @ 2022-06-07 23:02 UTC (permalink / raw)
  To: Jani Nikula, Vinay Belgaumkar, intel-gfx, dri-devel

[-- Attachment #1: Type: text/plain, Size: 4519 bytes --]

On 5/16/2022 00:59, Jani Nikula wrote:
> On Sat, 14 May 2022, Vinay Belgaumkar<vinay.belgaumkar@intel.com>  wrote:
>> SLPC min/max frequency updates require H2G calls. We are seeing
>> timeouts when GuC channel is backed up and it is unable to respond
>> in a timely fashion causing warnings and affecting CI.
>>
>> This is seen when waitboosting happens during a stress test.
>> this patch updates the waitboost path to use a non-blocking
>> H2G call instead, which returns as soon as the message is
>> successfully transmitted.
>>
>> v2: Use drm_notice to report any errors that might occur while
>> sending the waitboost H2G request (Tvrtko)
>>
>> Signed-off-by: Vinay Belgaumkar<vinay.belgaumkar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++----
>>   1 file changed, 36 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> index 1db833da42df..e5e869c96262 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
>>   	return data->header.global_state;
>>   }
>>   
>> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
>> +{
>> +	u32 request[] = {
> static const
>
>> +		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
>> +		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
>> +		id,
>> +		value,
>> +	};
>> +	int ret;
>> +
>> +	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
>> +
>> +	return ret > 0 ? -EPROTO : ret;
>> +}
>> +
>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
>> +{
>> +	struct intel_guc *guc = slpc_to_guc(slpc);
>> +
>> +	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
>> +
>> +	return guc_action_slpc_set_param_nb(guc, id, value);
>> +}
>> +
>>   static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
>>   {
>>   	u32 request[] = {
> Ditto here, and the whole gt/uc directory seems to have tons of these
> u32 action/request array variables on stack, with the required
> initialization, that could be in rodata.
>
> Please fix all of them.
>
> BR,
> Jani.
But the only constant is the action code. Everything else is parameters 
and will be different on each call.

You mean something like this?

    static const u32 template[] = {
         action,
    };
    u32 *request = kmalloc_array(sizeof(*request), 4);
    memcpy(request, template, sizeof(*request) * 1);
    request[1] = param0;
    request[2] = param1;
    request[3] = param2;
    ret = send(request);
    kfree(request);
    return ret;


Not seeing how that would be an improvement. It's a lot more code, a lot 
less readable, more prone to bugs due to incorrect structure sizes 
and/or params in the wrong place. The current version is easy to read 
and therefore to maintain, almost impossible to get wrong, and only puts 
a few words on the stack. I think the largest request is region of 15 
words? I'm not seeing what the problem is.

John.


>> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>>   	 */
>>   
>>   	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>> -		ret = slpc_set_param(slpc,
>> -				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>> -				     freq);
>> -		if (ret)
>> -			i915_probe_error(i915, "Unable to force min freq to %u: %d",
>> -					 freq, ret);
>> +		/* Non-blocking request will avoid stalls */
>> +		ret = slpc_set_param_nb(slpc,
>> +					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>> +					freq);
>>   	}
>>   
>>   	return ret;
>> @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>>   static void slpc_boost_work(struct work_struct *work)
>>   {
>>   	struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
>> +	struct drm_i915_private *i915 = slpc_to_i915(slpc);
>> +	int err;
>>   
>>   	/*
>>   	 * Raise min freq to boost. It's possible that
>> @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work)
>>   	 */
>>   	mutex_lock(&slpc->lock);
>>   	if (atomic_read(&slpc->num_waiters)) {
>> -		slpc_force_min_freq(slpc, slpc->boost_freq);
>> -		slpc->num_boosts++;
>> +		err = slpc_force_min_freq(slpc, slpc->boost_freq);
>> +		if (!err)
>> +			slpc->num_boosts++;
>> +		else
>> +			drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n",
>> +				   err);
>>   	}
>>   	mutex_unlock(&slpc->lock);
>>   }

[-- Attachment #2: Type: text/html, Size: 5486 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-15  6:05 Vinay Belgaumkar
  2022-05-16  7:59 ` Jani Nikula
@ 2022-06-07 22:29 ` Dixit, Ashutosh
  2022-06-07 23:15   ` John Harrison
  2022-06-22  0:26 ` Dixit, Ashutosh
  2 siblings, 1 reply; 26+ messages in thread
From: Dixit, Ashutosh @ 2022-06-07 22:29 UTC (permalink / raw)
  To: Vinay Belgaumkar; +Cc: intel-gfx, dri-devel

On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote:
>
> SLPC min/max frequency updates require H2G calls. We are seeing
> timeouts when GuC channel is backed up and it is unable to respond
> in a timely fashion causing warnings and affecting CI.
>
> This is seen when waitboosting happens during a stress test.
> this patch updates the waitboost path to use a non-blocking
> H2G call instead, which returns as soon as the message is
> successfully transmitted.

Overall I think this patch is trying to paper over problems in the blocking
H2G CT interface (specifically the 1 second timeout in
wait_for_ct_request_update()). So I think we should address that problem in
the interface directly rather than having each client (SLPC and any future
client) work around the problem. Following points:

1. This patch seems to assume that it is 'ok' to ignore the return code
   from FW for a waitboost request (arguing waitboost is best effort so
   it's ok to 'fire and forget'). But the return code is still useful
   e.g. in cases where we see performance issues and want to go back and
   investigate if FW rejected any waitboost requests.

2. We are already seeing that a 1 second timeout is not sufficient. So why
   not simply increase that timeout?

3. In fact if we are saying that the CT interface is a "reliable" interface
   (implying no message loss), to ensure reliability that timeout should
   not simply be increased, it should be made "infinite" (in quotes).

4. Maybe it would have been best to not have a "blocking" H2G interface at
   all (with the wait in wait_for_ct_request_update()). Just have an
   asynchronous interface (which mirrors the actual interface between FW
   and i915) in which clients register callbacks which are invoked when FW
   responds. If this is too big a change we can probably continue with the
   current blocking interface after increasing the timeout as mentioned
   above.

5. Finally, the waitboost request is just the most likely to get stuck at
   the back of a full CT queue since it happens during normal
   operation. Actually any request, say one initiated from sysfs, can also
   get similarly stuck at the back of a full queue. So any solution should
   also address that situation (where the return code is needed and
   similarly for a future client of the "blocking" (REQUEST/RESPONSE)
   interface).

Thanks.
--
Ashutosh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-16  7:59 ` Jani Nikula
@ 2022-05-16  8:00   ` Jani Nikula
  2022-06-07 23:02   ` John Harrison
  1 sibling, 0 replies; 26+ messages in thread
From: Jani Nikula @ 2022-05-16  8:00 UTC (permalink / raw)
  To: Vinay Belgaumkar, intel-gfx, dri-devel

On Mon, 16 May 2022, Jani Nikula <jani.nikula@linux.intel.com> wrote:
> On Sat, 14 May 2022, Vinay Belgaumkar <vinay.belgaumkar@intel.com> wrote:
>> SLPC min/max frequency updates require H2G calls. We are seeing
>> timeouts when GuC channel is backed up and it is unable to respond
>> in a timely fashion causing warnings and affecting CI.
>>
>> This is seen when waitboosting happens during a stress test.
>> this patch updates the waitboost path to use a non-blocking
>> H2G call instead, which returns as soon as the message is
>> successfully transmitted.
>>
>> v2: Use drm_notice to report any errors that might occur while
>> sending the waitboost H2G request (Tvrtko)
>>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++----
>>  1 file changed, 36 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> index 1db833da42df..e5e869c96262 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
>>  	return data->header.global_state;
>>  }
>>  
>> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
>> +{
>> +	u32 request[] = {
>
> static const

*sigh*

-ENOCOFFEE, please ignore the mail.

BR,
Jani.


>
>> +		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
>> +		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
>> +		id,
>> +		value,
>> +	};
>> +	int ret;
>> +
>> +	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
>> +
>> +	return ret > 0 ? -EPROTO : ret;
>> +}
>> +
>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
>> +{
>> +	struct intel_guc *guc = slpc_to_guc(slpc);
>> +
>> +	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
>> +
>> +	return guc_action_slpc_set_param_nb(guc, id, value);
>> +}
>> +
>>  static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
>>  {
>>  	u32 request[] = {
>
> Ditto here, and the whole gt/uc directory seems to have tons of these
> u32 action/request array variables on stack, with the required
> initialization, that could be in rodata.
>
> Please fix all of them.
>
> BR,
> Jani.
>
>> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>>  	 */
>>  
>>  	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>> -		ret = slpc_set_param(slpc,
>> -				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>> -				     freq);
>> -		if (ret)
>> -			i915_probe_error(i915, "Unable to force min freq to %u: %d",
>> -					 freq, ret);
>> +		/* Non-blocking request will avoid stalls */
>> +		ret = slpc_set_param_nb(slpc,
>> +					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
>> +					freq);
>>  	}
>>  
>>  	return ret;
>> @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>>  static void slpc_boost_work(struct work_struct *work)
>>  {
>>  	struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
>> +	struct drm_i915_private *i915 = slpc_to_i915(slpc);
>> +	int err;
>>  
>>  	/*
>>  	 * Raise min freq to boost. It's possible that
>> @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work)
>>  	 */
>>  	mutex_lock(&slpc->lock);
>>  	if (atomic_read(&slpc->num_waiters)) {
>> -		slpc_force_min_freq(slpc, slpc->boost_freq);
>> -		slpc->num_boosts++;
>> +		err = slpc_force_min_freq(slpc, slpc->boost_freq);
>> +		if (!err)
>> +			slpc->num_boosts++;
>> +		else
>> +			drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n",
>> +				   err);
>>  	}
>>  	mutex_unlock(&slpc->lock);
>>  }

-- 
Jani Nikula, Intel Open Source Graphics Center

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
  2022-05-15  6:05 Vinay Belgaumkar
@ 2022-05-16  7:59 ` Jani Nikula
  2022-05-16  8:00   ` Jani Nikula
  2022-06-07 23:02   ` John Harrison
  2022-06-07 22:29 ` Dixit, Ashutosh
  2022-06-22  0:26 ` Dixit, Ashutosh
  2 siblings, 2 replies; 26+ messages in thread
From: Jani Nikula @ 2022-05-16  7:59 UTC (permalink / raw)
  To: Vinay Belgaumkar, intel-gfx, dri-devel

On Sat, 14 May 2022, Vinay Belgaumkar <vinay.belgaumkar@intel.com> wrote:
> SLPC min/max frequency updates require H2G calls. We are seeing
> timeouts when GuC channel is backed up and it is unable to respond
> in a timely fashion causing warnings and affecting CI.
>
> This is seen when waitboosting happens during a stress test.
> this patch updates the waitboost path to use a non-blocking
> H2G call instead, which returns as soon as the message is
> successfully transmitted.
>
> v2: Use drm_notice to report any errors that might occur while
> sending the waitboost H2G request (Tvrtko)
>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++----
>  1 file changed, 36 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> index 1db833da42df..e5e869c96262 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
>  	return data->header.global_state;
>  }
>  
> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
> +{
> +	u32 request[] = {

static const

> +		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
> +		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
> +		id,
> +		value,
> +	};
> +	int ret;
> +
> +	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
> +
> +	return ret > 0 ? -EPROTO : ret;
> +}
> +
> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
> +{
> +	struct intel_guc *guc = slpc_to_guc(slpc);
> +
> +	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
> +
> +	return guc_action_slpc_set_param_nb(guc, id, value);
> +}
> +
>  static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
>  {
>  	u32 request[] = {

Ditto here, and the whole gt/uc directory seems to have tons of these
u32 action/request array variables on stack, with the required
initialization, that could be in rodata.

Please fix all of them.

BR,
Jani.

> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>  	 */
>  
>  	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
> -		ret = slpc_set_param(slpc,
> -				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
> -				     freq);
> -		if (ret)
> -			i915_probe_error(i915, "Unable to force min freq to %u: %d",
> -					 freq, ret);
> +		/* Non-blocking request will avoid stalls */
> +		ret = slpc_set_param_nb(slpc,
> +					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
> +					freq);
>  	}
>  
>  	return ret;
> @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
>  static void slpc_boost_work(struct work_struct *work)
>  {
>  	struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
> +	struct drm_i915_private *i915 = slpc_to_i915(slpc);
> +	int err;
>  
>  	/*
>  	 * Raise min freq to boost. It's possible that
> @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work)
>  	 */
>  	mutex_lock(&slpc->lock);
>  	if (atomic_read(&slpc->num_waiters)) {
> -		slpc_force_min_freq(slpc, slpc->boost_freq);
> -		slpc->num_boosts++;
> +		err = slpc_force_min_freq(slpc, slpc->boost_freq);
> +		if (!err)
> +			slpc->num_boosts++;
> +		else
> +			drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n",
> +				   err);
>  	}
>  	mutex_unlock(&slpc->lock);
>  }

-- 
Jani Nikula, Intel Open Source Graphics Center

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
@ 2022-05-15  6:05 Vinay Belgaumkar
  2022-05-16  7:59 ` Jani Nikula
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Vinay Belgaumkar @ 2022-05-15  6:05 UTC (permalink / raw)
  To: intel-gfx, dri-devel

SLPC min/max frequency updates require H2G calls. We are seeing
timeouts when GuC channel is backed up and it is unable to respond
in a timely fashion causing warnings and affecting CI.

This is seen when waitboosting happens during a stress test.
this patch updates the waitboost path to use a non-blocking
H2G call instead, which returns as soon as the message is
successfully transmitted.

v2: Use drm_notice to report any errors that might occur while
sending the waitboost H2G request (Tvrtko)

Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++----
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 1db833da42df..e5e869c96262 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
 	return data->header.global_state;
 }
 
+static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
+{
+	u32 request[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+		id,
+		value,
+	};
+	int ret;
+
+	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
+
+	return ret > 0 ? -EPROTO : ret;
+}
+
+static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
+{
+	struct intel_guc *guc = slpc_to_guc(slpc);
+
+	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+	return guc_action_slpc_set_param_nb(guc, id, value);
+}
+
 static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
 {
 	u32 request[] = {
@@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
 	 */
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		ret = slpc_set_param(slpc,
-				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
-				     freq);
-		if (ret)
-			i915_probe_error(i915, "Unable to force min freq to %u: %d",
-					 freq, ret);
+		/* Non-blocking request will avoid stalls */
+		ret = slpc_set_param_nb(slpc,
+					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+					freq);
 	}
 
 	return ret;
@@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
 static void slpc_boost_work(struct work_struct *work)
 {
 	struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
+	struct drm_i915_private *i915 = slpc_to_i915(slpc);
+	int err;
 
 	/*
 	 * Raise min freq to boost. It's possible that
@@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work)
 	 */
 	mutex_lock(&slpc->lock);
 	if (atomic_read(&slpc->num_waiters)) {
-		slpc_force_min_freq(slpc, slpc->boost_freq);
-		slpc->num_boosts++;
+		err = slpc_force_min_freq(slpc, slpc->boost_freq);
+		if (!err)
+			slpc->num_boosts++;
+		else
+			drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n",
+				   err);
 	}
 	mutex_unlock(&slpc->lock);
 }
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2022-06-23  8:12 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-05  5:40 [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost Vinay Belgaumkar
2022-05-05  5:40 ` [Intel-gfx] " Vinay Belgaumkar
2022-05-05  6:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success for " Patchwork
2022-05-05 11:12 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2022-05-05 12:13 ` [Intel-gfx] [PATCH] " Tvrtko Ursulin
2022-05-05 17:21   ` Belgaumkar, Vinay
2022-05-05 18:36     ` John Harrison
2022-05-06  7:18       ` Tvrtko Ursulin
2022-05-06 16:21         ` Belgaumkar, Vinay
2022-05-06 16:43         ` John Harrison
2022-05-15  5:46           ` Belgaumkar, Vinay
2022-05-15  6:05 Vinay Belgaumkar
2022-05-16  7:59 ` Jani Nikula
2022-05-16  8:00   ` Jani Nikula
2022-06-07 23:02   ` John Harrison
2022-06-07 23:04     ` John Harrison
2022-06-08  7:58       ` Jani Nikula
2022-06-07 22:29 ` Dixit, Ashutosh
2022-06-07 23:15   ` John Harrison
2022-06-08 17:39     ` Dixit, Ashutosh
2022-06-22  0:26 ` Dixit, Ashutosh
2022-06-22 20:30   ` Belgaumkar, Vinay
2022-06-22 21:28     ` Dixit, Ashutosh
2022-06-23  8:12       ` Tvrtko Ursulin
2022-06-23  0:32 Vinay Belgaumkar
2022-06-23  0:53 ` Dixit, Ashutosh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.