All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM
@ 2022-04-29 15:11 Adrian Larumbe
  2022-04-29 17:19 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Change semantics of context isolation reporting to UM (rev2) Patchwork
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Adrian Larumbe @ 2022-04-29 15:11 UTC (permalink / raw)
  To: daniel, intel-gfx; +Cc: adrian.larumbe

I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
both Iris and Vulkan , and stood for the guarantee that, when creating a
new context, all state set by it will not leak to any other context.

However the actual return value was a bitmask where every bit stood for an
initialised engine, and IGT test gem_ctx_isolation makes use of this mask
for deciding on the actual context engine isolation status.

However, we do not provide UAPI for IGT tests, so the value returned by the
PARAM ioctl has to reflect Mesa usage as a boolean.

This change only made sense after compute engine support was added to the
driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
and engine") because no context isolation can be assumed on any device with
both RCS annd CCS engines.

Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
 drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
 drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
 drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
 include/uapi/drm/i915_drm.h                 | 14 +++-----------
 5 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 0f6cd96b459f..2d6bd36d6150 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
 	[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
 	[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
 	[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
-	/* TODO: Add COMPUTE_CLASS mapping once ABI is available */
+	[COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
 };
 
 static int engine_cmp(void *priv, const struct list_head *A,
@@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
 
 	return which;
 }
+
+bool intel_cross_engine_isolated(struct drm_i915_private *i915)
+{
+	unsigned int which = intel_engines_has_context_isolation(i915);
+
+	if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
+	    (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
+		return false;
+
+	return !!which;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
index 3dc7e8ab9fbc..ff21349db4d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
@@ -15,6 +15,7 @@ struct intel_engine_cs *
 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
 
 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
+bool intel_cross_engine_isolated(struct drm_i915_private *i915);
 
 void intel_engine_add_user(struct intel_engine_cs *engine);
 void intel_engines_driver_register(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 5f5b02b01ba0..f796c5e8e060 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -13,7 +13,7 @@
 
 #include "gt/intel_engine_types.h"
 
-#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
+#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
 
 struct drm_i915_private;
 
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index c12a0adefda5..3d5120d2d78a 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = 1;
 		break;
 	case I915_PARAM_HAS_CONTEXT_ISOLATION:
-		value = intel_engines_has_context_isolation(i915);
+		value = intel_cross_engine_isolated(i915);
 		break;
 	case I915_PARAM_SLICE_MASK:
 		value = sseu->slice_mask;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 35ca528803fd..84c0af77cc1f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
 	I915_ENGINE_CLASS_COPY		= 1,
 	I915_ENGINE_CLASS_VIDEO		= 2,
 	I915_ENGINE_CLASS_VIDEO_ENHANCE	= 3,
+	I915_ENGINE_CLASS_COMPUTE	= 4,
 
 	/* should be kept compact */
 
@@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
 
 /*
- * Query whether every context (both per-file default and user created) is
- * isolated (insofar as HW supports). If this parameter is not true, then
- * freshly created contexts may inherit values from an existing context,
- * rather than default HW values. If true, it also ensures (insofar as HW
- * supports) that all state set by this context will not leak to any other
- * context.
- *
- * As not every engine across every gen support contexts, the returned
- * value reports the support of context isolation for individual engines by
- * returning a bitmask of each engine class set to true if that class supports
- * isolation.
+ * Query whether the device can make cross-engine isolation guarantees for
+ * all the engines whose default state has been initialised.
  */
 #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Change semantics of context isolation reporting to UM (rev2)
  2022-04-29 15:11 [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM Adrian Larumbe
@ 2022-04-29 17:19 ` Patchwork
  2022-04-29 17:51 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2022-04-29 17:19 UTC (permalink / raw)
  To: Adrian Larumbe; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Change semantics of context isolation reporting to UM (rev2)
URL   : https://patchwork.freedesktop.org/series/103343/
State : warning

== Summary ==

Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Change semantics of context isolation reporting to UM (rev2)
  2022-04-29 15:11 [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM Adrian Larumbe
  2022-04-29 17:19 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Change semantics of context isolation reporting to UM (rev2) Patchwork
@ 2022-04-29 17:51 ` Patchwork
  2022-05-03 14:44 ` [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM Tvrtko Ursulin
  2022-05-04 12:24   ` [Intel-gfx] " Daniel Vetter
  3 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2022-04-29 17:51 UTC (permalink / raw)
  To: Adrian Larumbe; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 8949 bytes --]

== Series Details ==

Series: drm/i915: Change semantics of context isolation reporting to UM (rev2)
URL   : https://patchwork.freedesktop.org/series/103343/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11582 -> Patchwork_103343v2
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_103343v2 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_103343v2, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/index.html

Participating hosts (43 -> 44)
------------------------------

  Additional (2): bat-rpls-1 fi-rkl-11600 
  Missing    (1): fi-bsw-cyan 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_103343v2:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@gt_lrc:
    - fi-tgl-1115g4:      [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11582/fi-tgl-1115g4/igt@i915_selftest@live@gt_lrc.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-tgl-1115g4/igt@i915_selftest@live@gt_lrc.html

  
Known issues
------------

  Here are the changes found in Patchwork_103343v2 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_huc_copy@huc-copy:
    - fi-rkl-11600:       NOTRUN -> [SKIP][3] ([i915#2190])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@gem_huc_copy@huc-copy.html

  * igt@gem_lmem_swapping@basic:
    - fi-rkl-11600:       NOTRUN -> [SKIP][4] ([i915#4613]) +3 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@gem_lmem_swapping@basic.html

  * igt@gem_tiled_pread_basic:
    - fi-rkl-11600:       NOTRUN -> [SKIP][5] ([i915#3282])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@gem_tiled_pread_basic.html

  * igt@i915_pm_backlight@basic-brightness:
    - fi-rkl-11600:       NOTRUN -> [SKIP][6] ([i915#3012])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@i915_pm_backlight@basic-brightness.html

  * igt@i915_selftest@live@gt_lrc:
    - fi-rkl-guc:         [PASS][7] -> [INCOMPLETE][8] ([i915#4983])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11582/fi-rkl-guc/igt@i915_selftest@live@gt_lrc.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-guc/igt@i915_selftest@live@gt_lrc.html

  * igt@i915_selftest@live@hangcheck:
    - fi-hsw-4770:        [PASS][9] -> [INCOMPLETE][10] ([i915#4785])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11582/fi-hsw-4770/igt@i915_selftest@live@hangcheck.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-hsw-4770/igt@i915_selftest@live@hangcheck.html
    - fi-bdw-5557u:       NOTRUN -> [INCOMPLETE][11] ([i915#3921])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-bdw-5557u/igt@i915_selftest@live@hangcheck.html

  * igt@i915_selftest@live@late_gt_pm:
    - fi-bsw-n3050:       [PASS][12] -> [DMESG-FAIL][13] ([i915#3428])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11582/fi-bsw-n3050/igt@i915_selftest@live@late_gt_pm.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-bsw-n3050/igt@i915_selftest@live@late_gt_pm.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-rkl-11600:       NOTRUN -> [SKIP][14] ([fdo#111827]) +8 similar issues
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - fi-rkl-11600:       NOTRUN -> [SKIP][15] ([i915#4070] / [i915#4103]) +1 similar issue
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  * igt@kms_force_connector_basic@force-load-detect:
    - fi-rkl-11600:       NOTRUN -> [SKIP][16] ([fdo#109285] / [i915#4098])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@kms_force_connector_basic@force-load-detect.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - fi-rkl-11600:       NOTRUN -> [SKIP][17] ([i915#4070] / [i915#533])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@kms_psr@primary_mmap_gtt:
    - fi-rkl-11600:       NOTRUN -> [SKIP][18] ([i915#1072]) +3 similar issues
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@kms_psr@primary_mmap_gtt.html

  * igt@kms_setmode@basic-clone-single-crtc:
    - fi-rkl-11600:       NOTRUN -> [SKIP][19] ([i915#3555] / [i915#4098])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@kms_setmode@basic-clone-single-crtc.html

  * igt@prime_vgem@basic-userptr:
    - fi-rkl-11600:       NOTRUN -> [SKIP][20] ([i915#3301] / [i915#3708])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@prime_vgem@basic-userptr.html

  * igt@prime_vgem@basic-write:
    - fi-rkl-11600:       NOTRUN -> [SKIP][21] ([i915#3291] / [i915#3708]) +2 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-rkl-11600/igt@prime_vgem@basic-write.html

  * igt@runner@aborted:
    - fi-hsw-4770:        NOTRUN -> [FAIL][22] ([fdo#109271] / [i915#4312] / [i915#5594])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-hsw-4770/igt@runner@aborted.html
    - fi-bsw-n3050:       NOTRUN -> [FAIL][23] ([fdo#109271] / [i915#3428] / [i915#4312])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-bsw-n3050/igt@runner@aborted.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@mman:
    - fi-bdw-5557u:       [INCOMPLETE][24] ([i915#5704]) -> [PASS][25]
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11582/fi-bdw-5557u/igt@i915_selftest@live@mman.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/fi-bdw-5557u/igt@i915_selftest@live@mman.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#3012]: https://gitlab.freedesktop.org/drm/intel/issues/3012
  [i915#3282]: https://gitlab.freedesktop.org/drm/intel/issues/3282
  [i915#3291]: https://gitlab.freedesktop.org/drm/intel/issues/3291
  [i915#3301]: https://gitlab.freedesktop.org/drm/intel/issues/3301
  [i915#3428]: https://gitlab.freedesktop.org/drm/intel/issues/3428
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3576]: https://gitlab.freedesktop.org/drm/intel/issues/3576
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#3921]: https://gitlab.freedesktop.org/drm/intel/issues/3921
  [i915#4070]: https://gitlab.freedesktop.org/drm/intel/issues/4070
  [i915#4098]: https://gitlab.freedesktop.org/drm/intel/issues/4098
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#4312]: https://gitlab.freedesktop.org/drm/intel/issues/4312
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4785]: https://gitlab.freedesktop.org/drm/intel/issues/4785
  [i915#4983]: https://gitlab.freedesktop.org/drm/intel/issues/4983
  [i915#5270]: https://gitlab.freedesktop.org/drm/intel/issues/5270
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#5356]: https://gitlab.freedesktop.org/drm/intel/issues/5356
  [i915#5594]: https://gitlab.freedesktop.org/drm/intel/issues/5594
  [i915#5704]: https://gitlab.freedesktop.org/drm/intel/issues/5704


Build changes
-------------

  * Linux: CI_DRM_11582 -> Patchwork_103343v2

  CI-20190529: 20190529
  CI_DRM_11582: 4c61a333113eb2bfd41c9e735fc4307f5e14646b @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6463: 0793746970d24ee41d5cfe9905e7532ea1530721 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_103343v2: 4c61a333113eb2bfd41c9e735fc4307f5e14646b @ git://anongit.freedesktop.org/gfx-ci/linux


### Linux commits

ca506db7d47a drm/i915: Change semantics of context isolation reporting to UM

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103343v2/index.html

[-- Attachment #2: Type: text/html, Size: 10241 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM
  2022-04-29 15:11 [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM Adrian Larumbe
  2022-04-29 17:19 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Change semantics of context isolation reporting to UM (rev2) Patchwork
  2022-04-29 17:51 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
@ 2022-05-03 14:44 ` Tvrtko Ursulin
  2022-05-04 11:50   ` Adrian Larumbe
  2022-05-04 12:24   ` [Intel-gfx] " Daniel Vetter
  3 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2022-05-03 14:44 UTC (permalink / raw)
  To: Adrian Larumbe, daniel, intel-gfx


Hi,

On 29/04/2022 16:11, Adrian Larumbe wrote:
> I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> both Iris and Vulkan , and stood for the guarantee that, when creating a
> new context, all state set by it will not leak to any other context.
> 
> However the actual return value was a bitmask where every bit stood for an
> initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> for deciding on the actual context engine isolation status.
> 
> However, we do not provide UAPI for IGT tests, so the value returned by the
> PARAM ioctl has to reflect Mesa usage as a boolean.

a)
I suggest splitting into two patches. One changes the semantics to boolean, second one changes it for RCS+CCS.

b)
What about test coverage - both for platforms with RCS+CSS (media and blitter stop being tested - all coverage is gone basically) and also for pre Gen8 platforms, are there failures expected there? (Test will try to run on some engines which do not support isolation now, no?)

> This change only made sense after compute engine support was added to the
> driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> and engine") because no context isolation can be assumed on any device with
> both RCS annd CCS engines.

Isn't it an arbitrary decision when thinking about other engine classes (media, blitter) on those platforms? Commit message should be clear in that respect.

Also, looking at iris:

    if (iris_getparam_integer(fd, I915_PARAM_HAS_CONTEXT_ISOLATION) <= 0) {
       debug_error("Kernel is too old for Iris. Consider upgrading to kernel v4.16.\n");
       return NULL;
    }

Won't this make Iris fail on RCS+CCS platforms - or I need to look at a newer branch/pull request? What is the plan there?

> Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
>   drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
>   drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
>   drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
>   include/uapi/drm/i915_drm.h                 | 14 +++-----------
>   5 files changed, 18 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 0f6cd96b459f..2d6bd36d6150 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
>   	[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
>   	[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
>   	[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> -	/* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> +	[COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,

I think this landed so you will need to rebase.

>   };
>   
>   static int engine_cmp(void *priv, const struct list_head *A,
> @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
>   
>   	return which;
>   }
> +
> +bool intel_cross_engine_isolated(struct drm_i915_private *i915)

Naming feels lackluster (Intel what?). Do you expect other callers or could just implement the check in i915_getparam.c, inside the switch statement?

> +{
> +	unsigned int which = intel_engines_has_context_isolation(i915);
> +
> +	if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> +	    (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> +		return false;
> +
> +	return !!which;

You could first just check if there are any RCS and CCS engines (for instance i915->engine_uabi_class_count[], or RCS/CCS_MASK()).

   /* Comment here to explain the decision. */
   if (RCS_MASK(&i915->gt) | CCS_MASK(&i915->gt))
	value = 0;
   else
         value = !!intel_engines_has_context_isolation(i915);

?

There also may be little point in even calling intel_engines_has_context_isolation, when the desired output is a boolean and could just make it feature or Gen based. Decision for later though, after some clarifications.

> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> index 3dc7e8ab9fbc..ff21349db4d4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> @@ -15,6 +15,7 @@ struct intel_engine_cs *
>   intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
>   
>   unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
>   
>   void intel_engine_add_user(struct intel_engine_cs *engine);
>   void intel_engines_driver_register(struct drm_i915_private *i915);
> diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> index 5f5b02b01ba0..f796c5e8e060 100644
> --- a/drivers/gpu/drm/i915/i915_drm_client.h
> +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> @@ -13,7 +13,7 @@
>   
>   #include "gt/intel_engine_types.h"
>   
> -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
>   
>   struct drm_i915_private;
>   
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> index c12a0adefda5..3d5120d2d78a 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
>   		value = 1;
>   		break;
>   	case I915_PARAM_HAS_CONTEXT_ISOLATION:
> -		value = intel_engines_has_context_isolation(i915);
> +		value = intel_cross_engine_isolated(i915);
>   		break;
>   	case I915_PARAM_SLICE_MASK:
>   		value = sseu->slice_mask;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 35ca528803fd..84c0af77cc1f 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
>   	I915_ENGINE_CLASS_COPY		= 1,
>   	I915_ENGINE_CLASS_VIDEO		= 2,
>   	I915_ENGINE_CLASS_VIDEO_ENHANCE	= 3,
> +	I915_ENGINE_CLASS_COMPUTE	= 4,
>   
>   	/* should be kept compact */
>   
> @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
>   #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>   
>   /*
> - * Query whether every context (both per-file default and user created) is
> - * isolated (insofar as HW supports). If this parameter is not true, then
> - * freshly created contexts may inherit values from an existing context,
> - * rather than default HW values. If true, it also ensures (insofar as HW
> - * supports) that all state set by this context will not leak to any other
> - * context.
> - *
> - * As not every engine across every gen support contexts, the returned
> - * value reports the support of context isolation for individual engines by
> - * returning a bitmask of each engine class set to true if that class supports
> - * isolation.
> + * Query whether the device can make cross-engine isolation guarantees for
> + * all the engines whose default state has been initialised.

"Engine whose default state has been initialised" does not sound very helpful for userspace developers. Like what determines if that has happened or not, and the fact userspace is more about context state rather than engine state.

Existing text which talked about engines supporting contexts and not leaking state sounded better TBF. So overall I think you deleted too much of the text. Can't you just change the ending, from the point where it goes into individual engines and bitmap, replacing with the new explanation?

Regards,

Tvrtko

>    */
>   #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
>   

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM
  2022-05-03 14:44 ` [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM Tvrtko Ursulin
@ 2022-05-04 11:50   ` Adrian Larumbe
  0 siblings, 0 replies; 15+ messages in thread
From: Adrian Larumbe @ 2022-05-04 11:50 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

Hi, Tvrtko

On 03.05.2022 15:44, Tvrtko Ursulin wrote:
> 
> Hi,
> 
> On 29/04/2022 16:11, Adrian Larumbe wrote:
> > I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> > both Iris and Vulkan , and stood for the guarantee that, when creating a
> > new context, all state set by it will not leak to any other context.
> > 
> > However the actual return value was a bitmask where every bit stood for an
> > initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> > for deciding on the actual context engine isolation status.
> > 
> > However, we do not provide UAPI for IGT tests, so the value returned by the
> > PARAM ioctl has to reflect Mesa usage as a boolean.
> 
> a)
> I suggest splitting into two patches. One changes the semantics to boolean, second one changes it for RCS+CCS.
> 
> b)
> What about test coverage - both for platforms with RCS+CSS (media and blitter stop being tested - all coverage is gone basically) and also for pre Gen8 platforms, are there failures expected there? (Test will try to run on some engines which do not support isolation now, no?)

Do you mean IGT tests? I think gem_ctx_isolation.c has to be rewritten so that
the engine isolation status of the different devices is somehow hard-coded
into the test, perhaps something like the intel_device_info struct in the
kernel.

> This change only made sense after compute engine support was added to the
> driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> and engine") because no context isolation can be assumed on any device with
> both RCS annd CCS engines.

>Isn't it an arbitrary decision when thinking about other engine classes (media, blitter) on those platforms? Commit message should be clear in that respect.

I think the change was required because the addition of a CCS engine broke
pre-existing assumptions about engine context isolation, but only when
coexisting with an RCS engine in the same device. I wouldn't know about other
engines, but I'll ask around.

> Also, looking at iris:
> 
>    if (iris_getparam_integer(fd, I915_PARAM_HAS_CONTEXT_ISOLATION) <= 0) {
>       debug_error("Kernel is too old for Iris. Consider upgrading to kernel v4.16.\n");
>       return NULL;
>    }
> 
> Won't this make Iris fail on RCS+CCS platforms - or I need to look at a newer branch/pull request? What is the plan there?

Yes, I think I misread this code and didn't realise, when there isn't context
isolation, this snippet will fail. However, given the semantics of it, which I
glean from the error message between the brackets, I'd say context isolation not
being present shouldn't cause it to fail. So I guess it could be rewritten as

    if (iris_getparam_integer(fd, I915_PARAM_HAS_CONTEXT_ISOLATION) < 0) {

> Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
>   drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
>   drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
>   drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
>   include/uapi/drm/i915_drm.h                 | 14 +++-----------
>   5 files changed, 18 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 0f6cd96b459f..2d6bd36d6150 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
>   	[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
>   	[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
>   	[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> -	/* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> +	[COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,

> > I think this landed so you will need to rebase.
> 
> >   };
> >   static int engine_cmp(void *priv, const struct list_head *A,
> > @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
> >   	return which;
> >   }
> > +
> > +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> 
> Naming feels lackluster (Intel what?). Do you expect other callers or could just implement the check in i915_getparam.c, inside the switch statement?

No other callers, so I guess I should do like you suggested below, handle the logic straightaway from i915_getparam_ioctl.

> +{
> +	unsigned int which = intel_engines_has_context_isolation(i915);
> +
> +	if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> +	    (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> +		return false;
> +
> +	return !!which;

> You could first just check if there are any RCS and CCS engines (for instance i915->engine_uabi_class_count[], or RCS/CCS_MASK()).
> 
>   /* Comment here to explain the decision. */
>   if (RCS_MASK(&i915->gt) | CCS_MASK(&i915->gt))
> 	value = 0;
>   else
>         value = !!intel_engines_has_context_isolation(i915);
> 
> ?
> 
> There also may be little point in even calling intel_engines_has_context_isolation, when the desired output is a boolean and could just make it feature or Gen based. Decision for later though, after some clarifications.

> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> index 3dc7e8ab9fbc..ff21349db4d4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> @@ -15,6 +15,7 @@ struct intel_engine_cs *
>   intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
>   unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
>   void intel_engine_add_user(struct intel_engine_cs *engine);
>   void intel_engines_driver_register(struct drm_i915_private *i915);
> diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> index 5f5b02b01ba0..f796c5e8e060 100644
> --- a/drivers/gpu/drm/i915/i915_drm_client.h
> +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> @@ -13,7 +13,7 @@
>   #include "gt/intel_engine_types.h"
> -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
>   struct drm_i915_private;
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> index c12a0adefda5..3d5120d2d78a 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
>   		value = 1;
>   		break;
>   	case I915_PARAM_HAS_CONTEXT_ISOLATION:
> -		value = intel_engines_has_context_isolation(i915);
> +		value = intel_cross_engine_isolated(i915);
>   		break;
>   	case I915_PARAM_SLICE_MASK:
>   		value = sseu->slice_mask;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 35ca528803fd..84c0af77cc1f 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
>   	I915_ENGINE_CLASS_COPY		= 1,
>   	I915_ENGINE_CLASS_VIDEO		= 2,
>   	I915_ENGINE_CLASS_VIDEO_ENHANCE	= 3,
> +	I915_ENGINE_CLASS_COMPUTE	= 4,
>   	/* should be kept compact */
> @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
>   #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>   /*
> - * Query whether every context (both per-file default and user created) is
> - * isolated (insofar as HW supports). If this parameter is not true, then
> - * freshly created contexts may inherit values from an existing context,
> - * rather than default HW values. If true, it also ensures (insofar as HW
> - * supports) that all state set by this context will not leak to any other
> - * context.
> - *
> - * As not every engine across every gen support contexts, the returned
> - * value reports the support of context isolation for individual engines by
> - * returning a bitmask of each engine class set to true if that class supports
> - * isolation.
> + * Query whether the device can make cross-engine isolation guarantees for
> + * all the engines whose default state has been initialised.

> "Engine whose default state has been initialised" does not sound very helpful for userspace developers. Like what determines if that has happened or not, and the fact userspace is more about context state rather than engine state.
> 
> Existing text which talked about engines supporting contexts and not leaking state sounded better TBF. So overall I think you deleted too much of the text. Can't you just change the ending, from the point where it goes into individual engines and bitmap, replacing with the new explanation?

I made a mistake here. I assumed mentioning this would be relevant because
engine isolation is only checked for engines whose default state had been
assigned in __engines_record_defaults, but like you said this is a KM detail
userspace developers shouldn't concern themselves with, so I'll restore that
part of the previous text.

> Regards,
> 
> Tvrtko

>    */
>   #define I915_PARAM_HAS_CONTEXT_ISOLATION 50

Kind Regards,
Adrian Larumbe

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Change semantics of context isolation reporting to UM
  2022-04-29 15:11 [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM Adrian Larumbe
@ 2022-05-04 12:24   ` Daniel Vetter
  2022-04-29 17:51 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2022-05-04 12:24 UTC (permalink / raw)
  To: Adrian Larumbe, Matthew D Roper
  Cc: bob.beckett, Tvrtko Ursulin, intel-gfx, dri-devel

On Fri, 29 Apr 2022 at 17:11, Adrian Larumbe
<adrian.larumbe@collabora.com> wrote:
> I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> both Iris and Vulkan , and stood for the guarantee that, when creating a
> new context, all state set by it will not leak to any other context.
>
> However the actual return value was a bitmask where every bit stood for an
> initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> for deciding on the actual context engine isolation status.
>
> However, we do not provide UAPI for IGT tests, so the value returned by the
> PARAM ioctl has to reflect Mesa usage as a boolean.
>
> This change only made sense after compute engine support was added to the
> driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> and engine") because no context isolation can be assumed on any device with
> both RCS annd CCS engines.
>
> Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>

Top level post and adding Matt Roper and dri-devel.

This was meant as a simple cleanup after CCS enabling in upstream, but
that CCS enabling seems to have gone wrong.

What I thought we should be done for CCS enabling is the following:
- actually have some igt-side hardcoded assumption about how much
engines are isolated from each another, which is a hw property. I
think some of that landed, but it's very incomplete

- convert all igt tests over to that. At least gem_ctx_isolation.c is
not converted over, as Adrian pointed out.

- once igt stopped using this context isolation getparam (we do not,
ever, create uapi just for testcases), fix up the uapi to what iris
actually needs, which is _only_ a boolean which indicates whether the
kernel's context setup code leaks register state from existing
contexts to newly created ones. Which is the bug iris works around
here, where using iris caused gpu hangs in libva. Iow, the kernel
should always and unconditionally return true here. Check out iris
history for details please, actual iris usage has nothing to do with
any other cross-context or cross-engine isolation guarantee we're
making, it's purely about whether our hw ctx code is buggy or not and
leaks state between clients, because we accidentally used the
currently running ctx as template instead of a fixed one created once
at driver load.

Matt, since the CCS enabling on the igt validation side looks very
incomplete I'm leaning very much towards "pls revert, try again".

Cheers, Daniel

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
>  drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
>  drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
>  drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
>  include/uapi/drm/i915_drm.h                 | 14 +++-----------
>  5 files changed, 18 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 0f6cd96b459f..2d6bd36d6150 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
>         [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
>         [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
>         [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> -       /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> +       [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
>  };
>
>  static int engine_cmp(void *priv, const struct list_head *A,
> @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
>
>         return which;
>  }
> +
> +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> +{
> +       unsigned int which = intel_engines_has_context_isolation(i915);
> +
> +       if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> +           (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> +               return false;
> +
> +       return !!which;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> index 3dc7e8ab9fbc..ff21349db4d4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> @@ -15,6 +15,7 @@ struct intel_engine_cs *
>  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
>
>  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
>
>  void intel_engine_add_user(struct intel_engine_cs *engine);
>  void intel_engines_driver_register(struct drm_i915_private *i915);
> diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> index 5f5b02b01ba0..f796c5e8e060 100644
> --- a/drivers/gpu/drm/i915/i915_drm_client.h
> +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> @@ -13,7 +13,7 @@
>
>  #include "gt/intel_engine_types.h"
>
> -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
>
>  struct drm_i915_private;
>
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> index c12a0adefda5..3d5120d2d78a 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
>                 value = 1;
>                 break;
>         case I915_PARAM_HAS_CONTEXT_ISOLATION:
> -               value = intel_engines_has_context_isolation(i915);
> +               value = intel_cross_engine_isolated(i915);
>                 break;
>         case I915_PARAM_SLICE_MASK:
>                 value = sseu->slice_mask;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 35ca528803fd..84c0af77cc1f 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
>         I915_ENGINE_CLASS_COPY          = 1,
>         I915_ENGINE_CLASS_VIDEO         = 2,
>         I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
> +       I915_ENGINE_CLASS_COMPUTE       = 4,
>
>         /* should be kept compact */
>
> @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>
>  /*
> - * Query whether every context (both per-file default and user created) is
> - * isolated (insofar as HW supports). If this parameter is not true, then
> - * freshly created contexts may inherit values from an existing context,
> - * rather than default HW values. If true, it also ensures (insofar as HW
> - * supports) that all state set by this context will not leak to any other
> - * context.
> - *
> - * As not every engine across every gen support contexts, the returned
> - * value reports the support of context isolation for individual engines by
> - * returning a bitmask of each engine class set to true if that class supports
> - * isolation.
> + * Query whether the device can make cross-engine isolation guarantees for
> + * all the engines whose default state has been initialised.
>   */
>  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
>
> --
> 2.35.1
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM
@ 2022-05-04 12:24   ` Daniel Vetter
  0 siblings, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2022-05-04 12:24 UTC (permalink / raw)
  To: Adrian Larumbe, Matthew D Roper; +Cc: intel-gfx, airlied, dri-devel

On Fri, 29 Apr 2022 at 17:11, Adrian Larumbe
<adrian.larumbe@collabora.com> wrote:
> I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> both Iris and Vulkan , and stood for the guarantee that, when creating a
> new context, all state set by it will not leak to any other context.
>
> However the actual return value was a bitmask where every bit stood for an
> initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> for deciding on the actual context engine isolation status.
>
> However, we do not provide UAPI for IGT tests, so the value returned by the
> PARAM ioctl has to reflect Mesa usage as a boolean.
>
> This change only made sense after compute engine support was added to the
> driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> and engine") because no context isolation can be assumed on any device with
> both RCS annd CCS engines.
>
> Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>

Top level post and adding Matt Roper and dri-devel.

This was meant as a simple cleanup after CCS enabling in upstream, but
that CCS enabling seems to have gone wrong.

What I thought we should be done for CCS enabling is the following:
- actually have some igt-side hardcoded assumption about how much
engines are isolated from each another, which is a hw property. I
think some of that landed, but it's very incomplete

- convert all igt tests over to that. At least gem_ctx_isolation.c is
not converted over, as Adrian pointed out.

- once igt stopped using this context isolation getparam (we do not,
ever, create uapi just for testcases), fix up the uapi to what iris
actually needs, which is _only_ a boolean which indicates whether the
kernel's context setup code leaks register state from existing
contexts to newly created ones. Which is the bug iris works around
here, where using iris caused gpu hangs in libva. Iow, the kernel
should always and unconditionally return true here. Check out iris
history for details please, actual iris usage has nothing to do with
any other cross-context or cross-engine isolation guarantee we're
making, it's purely about whether our hw ctx code is buggy or not and
leaks state between clients, because we accidentally used the
currently running ctx as template instead of a fixed one created once
at driver load.

Matt, since the CCS enabling on the igt validation side looks very
incomplete I'm leaning very much towards "pls revert, try again".

Cheers, Daniel

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
>  drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
>  drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
>  drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
>  include/uapi/drm/i915_drm.h                 | 14 +++-----------
>  5 files changed, 18 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 0f6cd96b459f..2d6bd36d6150 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
>         [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
>         [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
>         [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> -       /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> +       [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
>  };
>
>  static int engine_cmp(void *priv, const struct list_head *A,
> @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
>
>         return which;
>  }
> +
> +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> +{
> +       unsigned int which = intel_engines_has_context_isolation(i915);
> +
> +       if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> +           (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> +               return false;
> +
> +       return !!which;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> index 3dc7e8ab9fbc..ff21349db4d4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> @@ -15,6 +15,7 @@ struct intel_engine_cs *
>  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
>
>  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
>
>  void intel_engine_add_user(struct intel_engine_cs *engine);
>  void intel_engines_driver_register(struct drm_i915_private *i915);
> diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> index 5f5b02b01ba0..f796c5e8e060 100644
> --- a/drivers/gpu/drm/i915/i915_drm_client.h
> +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> @@ -13,7 +13,7 @@
>
>  #include "gt/intel_engine_types.h"
>
> -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
>
>  struct drm_i915_private;
>
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> index c12a0adefda5..3d5120d2d78a 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
>                 value = 1;
>                 break;
>         case I915_PARAM_HAS_CONTEXT_ISOLATION:
> -               value = intel_engines_has_context_isolation(i915);
> +               value = intel_cross_engine_isolated(i915);
>                 break;
>         case I915_PARAM_SLICE_MASK:
>                 value = sseu->slice_mask;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 35ca528803fd..84c0af77cc1f 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
>         I915_ENGINE_CLASS_COPY          = 1,
>         I915_ENGINE_CLASS_VIDEO         = 2,
>         I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
> +       I915_ENGINE_CLASS_COMPUTE       = 4,
>
>         /* should be kept compact */
>
> @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>
>  /*
> - * Query whether every context (both per-file default and user created) is
> - * isolated (insofar as HW supports). If this parameter is not true, then
> - * freshly created contexts may inherit values from an existing context,
> - * rather than default HW values. If true, it also ensures (insofar as HW
> - * supports) that all state set by this context will not leak to any other
> - * context.
> - *
> - * As not every engine across every gen support contexts, the returned
> - * value reports the support of context isolation for individual engines by
> - * returning a bitmask of each engine class set to true if that class supports
> - * isolation.
> + * Query whether the device can make cross-engine isolation guarantees for
> + * all the engines whose default state has been initialised.
>   */
>  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
>
> --
> 2.35.1
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Change semantics of context isolation reporting to UM
  2022-05-04 12:24   ` [Intel-gfx] " Daniel Vetter
@ 2022-05-04 14:59     ` Matt Roper
  -1 siblings, 0 replies; 15+ messages in thread
From: Matt Roper @ 2022-05-04 14:59 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: bob.beckett, Tvrtko Ursulin, intel-gfx, Adrian Larumbe, dri-devel

On Wed, May 04, 2022 at 02:24:07PM +0200, Daniel Vetter wrote:
> On Fri, 29 Apr 2022 at 17:11, Adrian Larumbe
> <adrian.larumbe@collabora.com> wrote:
> > I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> > both Iris and Vulkan , and stood for the guarantee that, when creating a
> > new context, all state set by it will not leak to any other context.
> >
> > However the actual return value was a bitmask where every bit stood for an
> > initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> > for deciding on the actual context engine isolation status.
> >
> > However, we do not provide UAPI for IGT tests, so the value returned by the
> > PARAM ioctl has to reflect Mesa usage as a boolean.
> >
> > This change only made sense after compute engine support was added to the
> > driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> > and engine") because no context isolation can be assumed on any device with
> > both RCS annd CCS engines.
> >
> > Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> 
> Top level post and adding Matt Roper and dri-devel.
> 
> This was meant as a simple cleanup after CCS enabling in upstream, but
> that CCS enabling seems to have gone wrong.
> 
> What I thought we should be done for CCS enabling is the following:
> - actually have some igt-side hardcoded assumption about how much
> engines are isolated from each another, which is a hw property. I
> think some of that landed, but it's very incomplete
> 
> - convert all igt tests over to that. At least gem_ctx_isolation.c is
> not converted over, as Adrian pointed out.

I pointed that out last week in one of our offline syncs and that's what
got the ball rolling on that test again.  But you specifically told us
that the uapi cleanup for context isolation shouldn't block the CCS
patches from landing since that was still happening in parallel:

    "...I do see the uapi cleanup as part of this multi engine/CCS
    enabling, but it's not a blocker to land the patches..."

Did we misunderstand what you were trying to say in that email or was
there a change of direction here?


Matt

> 
> - once igt stopped using this context isolation getparam (we do not,
> ever, create uapi just for testcases), fix up the uapi to what iris
> actually needs, which is _only_ a boolean which indicates whether the
> kernel's context setup code leaks register state from existing
> contexts to newly created ones. Which is the bug iris works around
> here, where using iris caused gpu hangs in libva. Iow, the kernel
> should always and unconditionally return true here. Check out iris
> history for details please, actual iris usage has nothing to do with
> any other cross-context or cross-engine isolation guarantee we're
> making, it's purely about whether our hw ctx code is buggy or not and
> leaks state between clients, because we accidentally used the
> currently running ctx as template instead of a fixed one created once
> at driver load.
> 
> Matt, since the CCS enabling on the igt validation side looks very
> incomplete I'm leaning very much towards "pls revert, try again".
> 
> Cheers, Daniel
> 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
> >  drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
> >  drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
> >  drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
> >  include/uapi/drm/i915_drm.h                 | 14 +++-----------
> >  5 files changed, 18 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > index 0f6cd96b459f..2d6bd36d6150 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
> >         [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
> >         [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
> >         [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> > -       /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> > +       [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
> >  };
> >
> >  static int engine_cmp(void *priv, const struct list_head *A,
> > @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
> >
> >         return which;
> >  }
> > +
> > +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> > +{
> > +       unsigned int which = intel_engines_has_context_isolation(i915);
> > +
> > +       if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> > +           (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> > +               return false;
> > +
> > +       return !!which;
> > +}
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > index 3dc7e8ab9fbc..ff21349db4d4 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > @@ -15,6 +15,7 @@ struct intel_engine_cs *
> >  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> >
> >  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> > +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
> >
> >  void intel_engine_add_user(struct intel_engine_cs *engine);
> >  void intel_engines_driver_register(struct drm_i915_private *i915);
> > diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> > index 5f5b02b01ba0..f796c5e8e060 100644
> > --- a/drivers/gpu/drm/i915/i915_drm_client.h
> > +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> > @@ -13,7 +13,7 @@
> >
> >  #include "gt/intel_engine_types.h"
> >
> > -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> > +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
> >
> >  struct drm_i915_private;
> >
> > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> > index c12a0adefda5..3d5120d2d78a 100644
> > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> >                 value = 1;
> >                 break;
> >         case I915_PARAM_HAS_CONTEXT_ISOLATION:
> > -               value = intel_engines_has_context_isolation(i915);
> > +               value = intel_cross_engine_isolated(i915);
> >                 break;
> >         case I915_PARAM_SLICE_MASK:
> >                 value = sseu->slice_mask;
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 35ca528803fd..84c0af77cc1f 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
> >         I915_ENGINE_CLASS_COPY          = 1,
> >         I915_ENGINE_CLASS_VIDEO         = 2,
> >         I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
> > +       I915_ENGINE_CLASS_COMPUTE       = 4,
> >
> >         /* should be kept compact */
> >
> > @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
> >  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
> >
> >  /*
> > - * Query whether every context (both per-file default and user created) is
> > - * isolated (insofar as HW supports). If this parameter is not true, then
> > - * freshly created contexts may inherit values from an existing context,
> > - * rather than default HW values. If true, it also ensures (insofar as HW
> > - * supports) that all state set by this context will not leak to any other
> > - * context.
> > - *
> > - * As not every engine across every gen support contexts, the returned
> > - * value reports the support of context isolation for individual engines by
> > - * returning a bitmask of each engine class set to true if that class supports
> > - * isolation.
> > + * Query whether the device can make cross-engine isolation guarantees for
> > + * all the engines whose default state has been initialised.
> >   */
> >  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
> >
> > --
> > 2.35.1
> >
> 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation
(916) 356-2795

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM
@ 2022-05-04 14:59     ` Matt Roper
  0 siblings, 0 replies; 15+ messages in thread
From: Matt Roper @ 2022-05-04 14:59 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Adrian Larumbe, dri-devel, airlied

On Wed, May 04, 2022 at 02:24:07PM +0200, Daniel Vetter wrote:
> On Fri, 29 Apr 2022 at 17:11, Adrian Larumbe
> <adrian.larumbe@collabora.com> wrote:
> > I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> > both Iris and Vulkan , and stood for the guarantee that, when creating a
> > new context, all state set by it will not leak to any other context.
> >
> > However the actual return value was a bitmask where every bit stood for an
> > initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> > for deciding on the actual context engine isolation status.
> >
> > However, we do not provide UAPI for IGT tests, so the value returned by the
> > PARAM ioctl has to reflect Mesa usage as a boolean.
> >
> > This change only made sense after compute engine support was added to the
> > driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> > and engine") because no context isolation can be assumed on any device with
> > both RCS annd CCS engines.
> >
> > Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> 
> Top level post and adding Matt Roper and dri-devel.
> 
> This was meant as a simple cleanup after CCS enabling in upstream, but
> that CCS enabling seems to have gone wrong.
> 
> What I thought we should be done for CCS enabling is the following:
> - actually have some igt-side hardcoded assumption about how much
> engines are isolated from each another, which is a hw property. I
> think some of that landed, but it's very incomplete
> 
> - convert all igt tests over to that. At least gem_ctx_isolation.c is
> not converted over, as Adrian pointed out.

I pointed that out last week in one of our offline syncs and that's what
got the ball rolling on that test again.  But you specifically told us
that the uapi cleanup for context isolation shouldn't block the CCS
patches from landing since that was still happening in parallel:

    "...I do see the uapi cleanup as part of this multi engine/CCS
    enabling, but it's not a blocker to land the patches..."

Did we misunderstand what you were trying to say in that email or was
there a change of direction here?


Matt

> 
> - once igt stopped using this context isolation getparam (we do not,
> ever, create uapi just for testcases), fix up the uapi to what iris
> actually needs, which is _only_ a boolean which indicates whether the
> kernel's context setup code leaks register state from existing
> contexts to newly created ones. Which is the bug iris works around
> here, where using iris caused gpu hangs in libva. Iow, the kernel
> should always and unconditionally return true here. Check out iris
> history for details please, actual iris usage has nothing to do with
> any other cross-context or cross-engine isolation guarantee we're
> making, it's purely about whether our hw ctx code is buggy or not and
> leaks state between clients, because we accidentally used the
> currently running ctx as template instead of a fixed one created once
> at driver load.
> 
> Matt, since the CCS enabling on the igt validation side looks very
> incomplete I'm leaning very much towards "pls revert, try again".
> 
> Cheers, Daniel
> 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
> >  drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
> >  drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
> >  drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
> >  include/uapi/drm/i915_drm.h                 | 14 +++-----------
> >  5 files changed, 18 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > index 0f6cd96b459f..2d6bd36d6150 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
> >         [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
> >         [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
> >         [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> > -       /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> > +       [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
> >  };
> >
> >  static int engine_cmp(void *priv, const struct list_head *A,
> > @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
> >
> >         return which;
> >  }
> > +
> > +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> > +{
> > +       unsigned int which = intel_engines_has_context_isolation(i915);
> > +
> > +       if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> > +           (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> > +               return false;
> > +
> > +       return !!which;
> > +}
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > index 3dc7e8ab9fbc..ff21349db4d4 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > @@ -15,6 +15,7 @@ struct intel_engine_cs *
> >  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> >
> >  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> > +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
> >
> >  void intel_engine_add_user(struct intel_engine_cs *engine);
> >  void intel_engines_driver_register(struct drm_i915_private *i915);
> > diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> > index 5f5b02b01ba0..f796c5e8e060 100644
> > --- a/drivers/gpu/drm/i915/i915_drm_client.h
> > +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> > @@ -13,7 +13,7 @@
> >
> >  #include "gt/intel_engine_types.h"
> >
> > -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> > +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
> >
> >  struct drm_i915_private;
> >
> > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> > index c12a0adefda5..3d5120d2d78a 100644
> > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> >                 value = 1;
> >                 break;
> >         case I915_PARAM_HAS_CONTEXT_ISOLATION:
> > -               value = intel_engines_has_context_isolation(i915);
> > +               value = intel_cross_engine_isolated(i915);
> >                 break;
> >         case I915_PARAM_SLICE_MASK:
> >                 value = sseu->slice_mask;
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 35ca528803fd..84c0af77cc1f 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
> >         I915_ENGINE_CLASS_COPY          = 1,
> >         I915_ENGINE_CLASS_VIDEO         = 2,
> >         I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
> > +       I915_ENGINE_CLASS_COMPUTE       = 4,
> >
> >         /* should be kept compact */
> >
> > @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
> >  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
> >
> >  /*
> > - * Query whether every context (both per-file default and user created) is
> > - * isolated (insofar as HW supports). If this parameter is not true, then
> > - * freshly created contexts may inherit values from an existing context,
> > - * rather than default HW values. If true, it also ensures (insofar as HW
> > - * supports) that all state set by this context will not leak to any other
> > - * context.
> > - *
> > - * As not every engine across every gen support contexts, the returned
> > - * value reports the support of context isolation for individual engines by
> > - * returning a bitmask of each engine class set to true if that class supports
> > - * isolation.
> > + * Query whether the device can make cross-engine isolation guarantees for
> > + * all the engines whose default state has been initialised.
> >   */
> >  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
> >
> > --
> > 2.35.1
> >
> 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation
(916) 356-2795

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Change semantics of context isolation reporting to UM
  2022-05-04 14:59     ` [Intel-gfx] " Matt Roper
@ 2022-05-04 16:42       ` Daniel Vetter
  -1 siblings, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2022-05-04 16:42 UTC (permalink / raw)
  To: Matt Roper
  Cc: bob.beckett, Tvrtko Ursulin, intel-gfx, Adrian Larumbe, dri-devel

On Wed, May 04, 2022 at 07:59:27AM -0700, Matt Roper wrote:
> On Wed, May 04, 2022 at 02:24:07PM +0200, Daniel Vetter wrote:
> > On Fri, 29 Apr 2022 at 17:11, Adrian Larumbe
> > <adrian.larumbe@collabora.com> wrote:
> > > I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> > > both Iris and Vulkan , and stood for the guarantee that, when creating a
> > > new context, all state set by it will not leak to any other context.
> > >
> > > However the actual return value was a bitmask where every bit stood for an
> > > initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> > > for deciding on the actual context engine isolation status.
> > >
> > > However, we do not provide UAPI for IGT tests, so the value returned by the
> > > PARAM ioctl has to reflect Mesa usage as a boolean.
> > >
> > > This change only made sense after compute engine support was added to the
> > > driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> > > and engine") because no context isolation can be assumed on any device with
> > > both RCS annd CCS engines.
> > >
> > > Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> > 
> > Top level post and adding Matt Roper and dri-devel.
> > 
> > This was meant as a simple cleanup after CCS enabling in upstream, but
> > that CCS enabling seems to have gone wrong.
> > 
> > What I thought we should be done for CCS enabling is the following:
> > - actually have some igt-side hardcoded assumption about how much
> > engines are isolated from each another, which is a hw property. I
> > think some of that landed, but it's very incomplete
> > 
> > - convert all igt tests over to that. At least gem_ctx_isolation.c is
> > not converted over, as Adrian pointed out.
> 
> I pointed that out last week in one of our offline syncs and that's what
> got the ball rolling on that test again.  But you specifically told us
> that the uapi cleanup for context isolation shouldn't block the CCS
> patches from landing since that was still happening in parallel:
> 
>     "...I do see the uapi cleanup as part of this multi engine/CCS
>     enabling, but it's not a blocker to land the patches..."
> 
> Did we misunderstand what you were trying to say in that email or was
> there a change of direction here?

The cleanup (which Adrian is now working on, but there's confusion) is
totally fine to do later. What looks really iffy is the test coverage, and
at least from me looking around gem_ctx_isolation wasn't touched or
updated for CCS engines, and that looks like it's not enough. Either those
tests are bogus or not actually testing a lot, and then we should delete
them. Or there's probably going to be some impact on how much exactly the
engines/contexts are isolated against each another.

That's the part that I think should be done before we call CCS support
done and ready for merging. And if that's done properly it should also
take care of the "igt uses HAS_CONTEXT_ISOLATION getparam" issue, since
you need something more fancy anyway.
-Daniel


> 
> 
> Matt
> 
> > 
> > - once igt stopped using this context isolation getparam (we do not,
> > ever, create uapi just for testcases), fix up the uapi to what iris
> > actually needs, which is _only_ a boolean which indicates whether the
> > kernel's context setup code leaks register state from existing
> > contexts to newly created ones. Which is the bug iris works around
> > here, where using iris caused gpu hangs in libva. Iow, the kernel
> > should always and unconditionally return true here. Check out iris
> > history for details please, actual iris usage has nothing to do with
> > any other cross-context or cross-engine isolation guarantee we're
> > making, it's purely about whether our hw ctx code is buggy or not and
> > leaks state between clients, because we accidentally used the
> > currently running ctx as template instead of a fixed one created once
> > at driver load.
> > 
> > Matt, since the CCS enabling on the igt validation side looks very
> > incomplete I'm leaning very much towards "pls revert, try again".
> > 
> > Cheers, Daniel
> > 
> > > ---
> > >  drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
> > >  drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
> > >  drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
> > >  drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
> > >  include/uapi/drm/i915_drm.h                 | 14 +++-----------
> > >  5 files changed, 18 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > index 0f6cd96b459f..2d6bd36d6150 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
> > >         [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
> > >         [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
> > >         [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> > > -       /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> > > +       [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
> > >  };
> > >
> > >  static int engine_cmp(void *priv, const struct list_head *A,
> > > @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
> > >
> > >         return which;
> > >  }
> > > +
> > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> > > +{
> > > +       unsigned int which = intel_engines_has_context_isolation(i915);
> > > +
> > > +       if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> > > +           (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> > > +               return false;
> > > +
> > > +       return !!which;
> > > +}
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > index 3dc7e8ab9fbc..ff21349db4d4 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > @@ -15,6 +15,7 @@ struct intel_engine_cs *
> > >  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> > >
> > >  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
> > >
> > >  void intel_engine_add_user(struct intel_engine_cs *engine);
> > >  void intel_engines_driver_register(struct drm_i915_private *i915);
> > > diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> > > index 5f5b02b01ba0..f796c5e8e060 100644
> > > --- a/drivers/gpu/drm/i915/i915_drm_client.h
> > > +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> > > @@ -13,7 +13,7 @@
> > >
> > >  #include "gt/intel_engine_types.h"
> > >
> > > -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> > > +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
> > >
> > >  struct drm_i915_private;
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> > > index c12a0adefda5..3d5120d2d78a 100644
> > > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > > @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> > >                 value = 1;
> > >                 break;
> > >         case I915_PARAM_HAS_CONTEXT_ISOLATION:
> > > -               value = intel_engines_has_context_isolation(i915);
> > > +               value = intel_cross_engine_isolated(i915);
> > >                 break;
> > >         case I915_PARAM_SLICE_MASK:
> > >                 value = sseu->slice_mask;
> > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > > index 35ca528803fd..84c0af77cc1f 100644
> > > --- a/include/uapi/drm/i915_drm.h
> > > +++ b/include/uapi/drm/i915_drm.h
> > > @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
> > >         I915_ENGINE_CLASS_COPY          = 1,
> > >         I915_ENGINE_CLASS_VIDEO         = 2,
> > >         I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
> > > +       I915_ENGINE_CLASS_COMPUTE       = 4,
> > >
> > >         /* should be kept compact */
> > >
> > > @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
> > >  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
> > >
> > >  /*
> > > - * Query whether every context (both per-file default and user created) is
> > > - * isolated (insofar as HW supports). If this parameter is not true, then
> > > - * freshly created contexts may inherit values from an existing context,
> > > - * rather than default HW values. If true, it also ensures (insofar as HW
> > > - * supports) that all state set by this context will not leak to any other
> > > - * context.
> > > - *
> > > - * As not every engine across every gen support contexts, the returned
> > > - * value reports the support of context isolation for individual engines by
> > > - * returning a bitmask of each engine class set to true if that class supports
> > > - * isolation.
> > > + * Query whether the device can make cross-engine isolation guarantees for
> > > + * all the engines whose default state has been initialised.
> > >   */
> > >  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
> > >
> > > --
> > > 2.35.1
> > >
> > 
> > 
> > -- 
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
> 
> -- 
> Matt Roper
> Graphics Software Engineer
> VTT-OSGC Platform Enablement
> Intel Corporation
> (916) 356-2795

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM
@ 2022-05-04 16:42       ` Daniel Vetter
  0 siblings, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2022-05-04 16:42 UTC (permalink / raw)
  To: Matt Roper; +Cc: intel-gfx, Adrian Larumbe, dri-devel, airlied

On Wed, May 04, 2022 at 07:59:27AM -0700, Matt Roper wrote:
> On Wed, May 04, 2022 at 02:24:07PM +0200, Daniel Vetter wrote:
> > On Fri, 29 Apr 2022 at 17:11, Adrian Larumbe
> > <adrian.larumbe@collabora.com> wrote:
> > > I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> > > both Iris and Vulkan , and stood for the guarantee that, when creating a
> > > new context, all state set by it will not leak to any other context.
> > >
> > > However the actual return value was a bitmask where every bit stood for an
> > > initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> > > for deciding on the actual context engine isolation status.
> > >
> > > However, we do not provide UAPI for IGT tests, so the value returned by the
> > > PARAM ioctl has to reflect Mesa usage as a boolean.
> > >
> > > This change only made sense after compute engine support was added to the
> > > driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> > > and engine") because no context isolation can be assumed on any device with
> > > both RCS annd CCS engines.
> > >
> > > Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> > 
> > Top level post and adding Matt Roper and dri-devel.
> > 
> > This was meant as a simple cleanup after CCS enabling in upstream, but
> > that CCS enabling seems to have gone wrong.
> > 
> > What I thought we should be done for CCS enabling is the following:
> > - actually have some igt-side hardcoded assumption about how much
> > engines are isolated from each another, which is a hw property. I
> > think some of that landed, but it's very incomplete
> > 
> > - convert all igt tests over to that. At least gem_ctx_isolation.c is
> > not converted over, as Adrian pointed out.
> 
> I pointed that out last week in one of our offline syncs and that's what
> got the ball rolling on that test again.  But you specifically told us
> that the uapi cleanup for context isolation shouldn't block the CCS
> patches from landing since that was still happening in parallel:
> 
>     "...I do see the uapi cleanup as part of this multi engine/CCS
>     enabling, but it's not a blocker to land the patches..."
> 
> Did we misunderstand what you were trying to say in that email or was
> there a change of direction here?

The cleanup (which Adrian is now working on, but there's confusion) is
totally fine to do later. What looks really iffy is the test coverage, and
at least from me looking around gem_ctx_isolation wasn't touched or
updated for CCS engines, and that looks like it's not enough. Either those
tests are bogus or not actually testing a lot, and then we should delete
them. Or there's probably going to be some impact on how much exactly the
engines/contexts are isolated against each another.

That's the part that I think should be done before we call CCS support
done and ready for merging. And if that's done properly it should also
take care of the "igt uses HAS_CONTEXT_ISOLATION getparam" issue, since
you need something more fancy anyway.
-Daniel


> 
> 
> Matt
> 
> > 
> > - once igt stopped using this context isolation getparam (we do not,
> > ever, create uapi just for testcases), fix up the uapi to what iris
> > actually needs, which is _only_ a boolean which indicates whether the
> > kernel's context setup code leaks register state from existing
> > contexts to newly created ones. Which is the bug iris works around
> > here, where using iris caused gpu hangs in libva. Iow, the kernel
> > should always and unconditionally return true here. Check out iris
> > history for details please, actual iris usage has nothing to do with
> > any other cross-context or cross-engine isolation guarantee we're
> > making, it's purely about whether our hw ctx code is buggy or not and
> > leaks state between clients, because we accidentally used the
> > currently running ctx as template instead of a fixed one created once
> > at driver load.
> > 
> > Matt, since the CCS enabling on the igt validation side looks very
> > incomplete I'm leaning very much towards "pls revert, try again".
> > 
> > Cheers, Daniel
> > 
> > > ---
> > >  drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
> > >  drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
> > >  drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
> > >  drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
> > >  include/uapi/drm/i915_drm.h                 | 14 +++-----------
> > >  5 files changed, 18 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > index 0f6cd96b459f..2d6bd36d6150 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
> > >         [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
> > >         [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
> > >         [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> > > -       /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> > > +       [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
> > >  };
> > >
> > >  static int engine_cmp(void *priv, const struct list_head *A,
> > > @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
> > >
> > >         return which;
> > >  }
> > > +
> > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> > > +{
> > > +       unsigned int which = intel_engines_has_context_isolation(i915);
> > > +
> > > +       if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> > > +           (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> > > +               return false;
> > > +
> > > +       return !!which;
> > > +}
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > index 3dc7e8ab9fbc..ff21349db4d4 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > @@ -15,6 +15,7 @@ struct intel_engine_cs *
> > >  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> > >
> > >  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
> > >
> > >  void intel_engine_add_user(struct intel_engine_cs *engine);
> > >  void intel_engines_driver_register(struct drm_i915_private *i915);
> > > diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> > > index 5f5b02b01ba0..f796c5e8e060 100644
> > > --- a/drivers/gpu/drm/i915/i915_drm_client.h
> > > +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> > > @@ -13,7 +13,7 @@
> > >
> > >  #include "gt/intel_engine_types.h"
> > >
> > > -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> > > +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
> > >
> > >  struct drm_i915_private;
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> > > index c12a0adefda5..3d5120d2d78a 100644
> > > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > > @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> > >                 value = 1;
> > >                 break;
> > >         case I915_PARAM_HAS_CONTEXT_ISOLATION:
> > > -               value = intel_engines_has_context_isolation(i915);
> > > +               value = intel_cross_engine_isolated(i915);
> > >                 break;
> > >         case I915_PARAM_SLICE_MASK:
> > >                 value = sseu->slice_mask;
> > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > > index 35ca528803fd..84c0af77cc1f 100644
> > > --- a/include/uapi/drm/i915_drm.h
> > > +++ b/include/uapi/drm/i915_drm.h
> > > @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
> > >         I915_ENGINE_CLASS_COPY          = 1,
> > >         I915_ENGINE_CLASS_VIDEO         = 2,
> > >         I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
> > > +       I915_ENGINE_CLASS_COMPUTE       = 4,
> > >
> > >         /* should be kept compact */
> > >
> > > @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
> > >  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
> > >
> > >  /*
> > > - * Query whether every context (both per-file default and user created) is
> > > - * isolated (insofar as HW supports). If this parameter is not true, then
> > > - * freshly created contexts may inherit values from an existing context,
> > > - * rather than default HW values. If true, it also ensures (insofar as HW
> > > - * supports) that all state set by this context will not leak to any other
> > > - * context.
> > > - *
> > > - * As not every engine across every gen support contexts, the returned
> > > - * value reports the support of context isolation for individual engines by
> > > - * returning a bitmask of each engine class set to true if that class supports
> > > - * isolation.
> > > + * Query whether the device can make cross-engine isolation guarantees for
> > > + * all the engines whose default state has been initialised.
> > >   */
> > >  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
> > >
> > > --
> > > 2.35.1
> > >
> > 
> > 
> > -- 
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
> 
> -- 
> Matt Roper
> Graphics Software Engineer
> VTT-OSGC Platform Enablement
> Intel Corporation
> (916) 356-2795

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Change semantics of context isolation reporting to UM
  2022-05-04 16:42       ` [Intel-gfx] " Daniel Vetter
@ 2022-05-04 18:12         ` Matt Roper
  -1 siblings, 0 replies; 15+ messages in thread
From: Matt Roper @ 2022-05-04 18:12 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: bob.beckett, Tvrtko Ursulin, Umesh Nerlige Ramappa, intel-gfx,
	Adrian Larumbe, dri-devel, John Harrison

On Wed, May 04, 2022 at 06:42:37PM +0200, Daniel Vetter wrote:
> On Wed, May 04, 2022 at 07:59:27AM -0700, Matt Roper wrote:
> > On Wed, May 04, 2022 at 02:24:07PM +0200, Daniel Vetter wrote:
> > > On Fri, 29 Apr 2022 at 17:11, Adrian Larumbe
> > > <adrian.larumbe@collabora.com> wrote:
> > > > I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> > > > both Iris and Vulkan , and stood for the guarantee that, when creating a
> > > > new context, all state set by it will not leak to any other context.
> > > >
> > > > However the actual return value was a bitmask where every bit stood for an
> > > > initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> > > > for deciding on the actual context engine isolation status.
> > > >
> > > > However, we do not provide UAPI for IGT tests, so the value returned by the
> > > > PARAM ioctl has to reflect Mesa usage as a boolean.
> > > >
> > > > This change only made sense after compute engine support was added to the
> > > > driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> > > > and engine") because no context isolation can be assumed on any device with
> > > > both RCS annd CCS engines.
> > > >
> > > > Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> > > 
> > > Top level post and adding Matt Roper and dri-devel.
> > > 
> > > This was meant as a simple cleanup after CCS enabling in upstream, but
> > > that CCS enabling seems to have gone wrong.
> > > 
> > > What I thought we should be done for CCS enabling is the following:
> > > - actually have some igt-side hardcoded assumption about how much
> > > engines are isolated from each another, which is a hw property. I
> > > think some of that landed, but it's very incomplete
> > > 
> > > - convert all igt tests over to that. At least gem_ctx_isolation.c is
> > > not converted over, as Adrian pointed out.
> > 
> > I pointed that out last week in one of our offline syncs and that's what
> > got the ball rolling on that test again.  But you specifically told us
> > that the uapi cleanup for context isolation shouldn't block the CCS
> > patches from landing since that was still happening in parallel:
> > 
> >     "...I do see the uapi cleanup as part of this multi engine/CCS
> >     enabling, but it's not a blocker to land the patches..."
> > 
> > Did we misunderstand what you were trying to say in that email or was
> > there a change of direction here?
> 
> The cleanup (which Adrian is now working on, but there's confusion) is
> totally fine to do later. What looks really iffy is the test coverage, and
> at least from me looking around gem_ctx_isolation wasn't touched or
> updated for CCS engines, and that looks like it's not enough. Either those
> tests are bogus or not actually testing a lot, and then we should delete
> them. Or there's probably going to be some impact on how much exactly the
> engines/contexts are isolated against each another.

The test automatically picks up any new engines that show up and
includes them in execution.  The test is already running properly on DG2
CCS engines in CI right now.  E.g.,

    https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11599/re-dg2-12/igt@gem_ctx_isolation@dirty-create@ccs0.html

Is the concern just that we haven't added extra registers to the list to
check on the CCS engines?  For that matter, we're missing a bunch of
registers for RCS, BCS, VCS, and VECS for newer platforms too; the CCS
isn't really special there.

My understanding is that most of what gem_ctx_isolation tests is that
context switches really do save/restore registers properly (i.e., the
hardware behavior is sane) and there's really nothing special about CCS
engines regarding general context switching behavior.  Where things get
unusual with CCS engines is the shared reset domain, and that's more the
realm of what tests like i915_hangman cover.  But even there, the
workarounds that are in place right now (which only allow parallelism
between engines if they belong to the same VM) means that in most cases
there actually isn't any userspace-visible impact of the shared resets.

Adding JohnH and Umesh since they're a lot more familiar with all of
this stuff than I.

I can send a revert if you think that's what we need, but from what I'm
hearing we don't really expect many areas where there's
userspace-visible behavior from CCS engines that would need non-standard
IGT handling, and the few places where there are have already been
updated.  But there are so many orphaned IGT tests out there, many of
which have bitrotted away over the years, that it's possible we might
still be missing something.


Matt

> 
> That's the part that I think should be done before we call CCS support
> done and ready for merging. And if that's done properly it should also
> take care of the "igt uses HAS_CONTEXT_ISOLATION getparam" issue, since
> you need something more fancy anyway.
> -Daniel
> 
> 
> > 
> > 
> > Matt
> > 
> > > 
> > > - once igt stopped using this context isolation getparam (we do not,
> > > ever, create uapi just for testcases), fix up the uapi to what iris
> > > actually needs, which is _only_ a boolean which indicates whether the
> > > kernel's context setup code leaks register state from existing
> > > contexts to newly created ones. Which is the bug iris works around
> > > here, where using iris caused gpu hangs in libva. Iow, the kernel
> > > should always and unconditionally return true here. Check out iris
> > > history for details please, actual iris usage has nothing to do with
> > > any other cross-context or cross-engine isolation guarantee we're
> > > making, it's purely about whether our hw ctx code is buggy or not and
> > > leaks state between clients, because we accidentally used the
> > > currently running ctx as template instead of a fixed one created once
> > > at driver load.
> > > 
> > > Matt, since the CCS enabling on the igt validation side looks very
> > > incomplete I'm leaning very much towards "pls revert, try again".
> > > 
> > > Cheers, Daniel
> > > 
> > > > ---
> > > >  drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
> > > >  drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
> > > >  drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
> > > >  drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
> > > >  include/uapi/drm/i915_drm.h                 | 14 +++-----------
> > > >  5 files changed, 18 insertions(+), 14 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > index 0f6cd96b459f..2d6bd36d6150 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
> > > >         [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
> > > >         [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
> > > >         [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> > > > -       /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> > > > +       [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
> > > >  };
> > > >
> > > >  static int engine_cmp(void *priv, const struct list_head *A,
> > > > @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
> > > >
> > > >         return which;
> > > >  }
> > > > +
> > > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> > > > +{
> > > > +       unsigned int which = intel_engines_has_context_isolation(i915);
> > > > +
> > > > +       if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> > > > +           (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> > > > +               return false;
> > > > +
> > > > +       return !!which;
> > > > +}
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > index 3dc7e8ab9fbc..ff21349db4d4 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > @@ -15,6 +15,7 @@ struct intel_engine_cs *
> > > >  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> > > >
> > > >  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> > > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
> > > >
> > > >  void intel_engine_add_user(struct intel_engine_cs *engine);
> > > >  void intel_engines_driver_register(struct drm_i915_private *i915);
> > > > diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> > > > index 5f5b02b01ba0..f796c5e8e060 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drm_client.h
> > > > +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> > > > @@ -13,7 +13,7 @@
> > > >
> > > >  #include "gt/intel_engine_types.h"
> > > >
> > > > -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> > > > +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
> > > >
> > > >  struct drm_i915_private;
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> > > > index c12a0adefda5..3d5120d2d78a 100644
> > > > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > > > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > > > @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> > > >                 value = 1;
> > > >                 break;
> > > >         case I915_PARAM_HAS_CONTEXT_ISOLATION:
> > > > -               value = intel_engines_has_context_isolation(i915);
> > > > +               value = intel_cross_engine_isolated(i915);
> > > >                 break;
> > > >         case I915_PARAM_SLICE_MASK:
> > > >                 value = sseu->slice_mask;
> > > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > > > index 35ca528803fd..84c0af77cc1f 100644
> > > > --- a/include/uapi/drm/i915_drm.h
> > > > +++ b/include/uapi/drm/i915_drm.h
> > > > @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
> > > >         I915_ENGINE_CLASS_COPY          = 1,
> > > >         I915_ENGINE_CLASS_VIDEO         = 2,
> > > >         I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
> > > > +       I915_ENGINE_CLASS_COMPUTE       = 4,
> > > >
> > > >         /* should be kept compact */
> > > >
> > > > @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
> > > >  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
> > > >
> > > >  /*
> > > > - * Query whether every context (both per-file default and user created) is
> > > > - * isolated (insofar as HW supports). If this parameter is not true, then
> > > > - * freshly created contexts may inherit values from an existing context,
> > > > - * rather than default HW values. If true, it also ensures (insofar as HW
> > > > - * supports) that all state set by this context will not leak to any other
> > > > - * context.
> > > > - *
> > > > - * As not every engine across every gen support contexts, the returned
> > > > - * value reports the support of context isolation for individual engines by
> > > > - * returning a bitmask of each engine class set to true if that class supports
> > > > - * isolation.
> > > > + * Query whether the device can make cross-engine isolation guarantees for
> > > > + * all the engines whose default state has been initialised.
> > > >   */
> > > >  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
> > > >
> > > > --
> > > > 2.35.1
> > > >
> > > 
> > > 
> > > -- 
> > > Daniel Vetter
> > > Software Engineer, Intel Corporation
> > > http://blog.ffwll.ch
> > 
> > -- 
> > Matt Roper
> > Graphics Software Engineer
> > VTT-OSGC Platform Enablement
> > Intel Corporation
> > (916) 356-2795
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation
(916) 356-2795

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM
@ 2022-05-04 18:12         ` Matt Roper
  0 siblings, 0 replies; 15+ messages in thread
From: Matt Roper @ 2022-05-04 18:12 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Adrian Larumbe, dri-devel, airlied

On Wed, May 04, 2022 at 06:42:37PM +0200, Daniel Vetter wrote:
> On Wed, May 04, 2022 at 07:59:27AM -0700, Matt Roper wrote:
> > On Wed, May 04, 2022 at 02:24:07PM +0200, Daniel Vetter wrote:
> > > On Fri, 29 Apr 2022 at 17:11, Adrian Larumbe
> > > <adrian.larumbe@collabora.com> wrote:
> > > > I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> > > > both Iris and Vulkan , and stood for the guarantee that, when creating a
> > > > new context, all state set by it will not leak to any other context.
> > > >
> > > > However the actual return value was a bitmask where every bit stood for an
> > > > initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> > > > for deciding on the actual context engine isolation status.
> > > >
> > > > However, we do not provide UAPI for IGT tests, so the value returned by the
> > > > PARAM ioctl has to reflect Mesa usage as a boolean.
> > > >
> > > > This change only made sense after compute engine support was added to the
> > > > driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> > > > and engine") because no context isolation can be assumed on any device with
> > > > both RCS annd CCS engines.
> > > >
> > > > Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> > > 
> > > Top level post and adding Matt Roper and dri-devel.
> > > 
> > > This was meant as a simple cleanup after CCS enabling in upstream, but
> > > that CCS enabling seems to have gone wrong.
> > > 
> > > What I thought we should be done for CCS enabling is the following:
> > > - actually have some igt-side hardcoded assumption about how much
> > > engines are isolated from each another, which is a hw property. I
> > > think some of that landed, but it's very incomplete
> > > 
> > > - convert all igt tests over to that. At least gem_ctx_isolation.c is
> > > not converted over, as Adrian pointed out.
> > 
> > I pointed that out last week in one of our offline syncs and that's what
> > got the ball rolling on that test again.  But you specifically told us
> > that the uapi cleanup for context isolation shouldn't block the CCS
> > patches from landing since that was still happening in parallel:
> > 
> >     "...I do see the uapi cleanup as part of this multi engine/CCS
> >     enabling, but it's not a blocker to land the patches..."
> > 
> > Did we misunderstand what you were trying to say in that email or was
> > there a change of direction here?
> 
> The cleanup (which Adrian is now working on, but there's confusion) is
> totally fine to do later. What looks really iffy is the test coverage, and
> at least from me looking around gem_ctx_isolation wasn't touched or
> updated for CCS engines, and that looks like it's not enough. Either those
> tests are bogus or not actually testing a lot, and then we should delete
> them. Or there's probably going to be some impact on how much exactly the
> engines/contexts are isolated against each another.

The test automatically picks up any new engines that show up and
includes them in execution.  The test is already running properly on DG2
CCS engines in CI right now.  E.g.,

    https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11599/re-dg2-12/igt@gem_ctx_isolation@dirty-create@ccs0.html

Is the concern just that we haven't added extra registers to the list to
check on the CCS engines?  For that matter, we're missing a bunch of
registers for RCS, BCS, VCS, and VECS for newer platforms too; the CCS
isn't really special there.

My understanding is that most of what gem_ctx_isolation tests is that
context switches really do save/restore registers properly (i.e., the
hardware behavior is sane) and there's really nothing special about CCS
engines regarding general context switching behavior.  Where things get
unusual with CCS engines is the shared reset domain, and that's more the
realm of what tests like i915_hangman cover.  But even there, the
workarounds that are in place right now (which only allow parallelism
between engines if they belong to the same VM) means that in most cases
there actually isn't any userspace-visible impact of the shared resets.

Adding JohnH and Umesh since they're a lot more familiar with all of
this stuff than I.

I can send a revert if you think that's what we need, but from what I'm
hearing we don't really expect many areas where there's
userspace-visible behavior from CCS engines that would need non-standard
IGT handling, and the few places where there are have already been
updated.  But there are so many orphaned IGT tests out there, many of
which have bitrotted away over the years, that it's possible we might
still be missing something.


Matt

> 
> That's the part that I think should be done before we call CCS support
> done and ready for merging. And if that's done properly it should also
> take care of the "igt uses HAS_CONTEXT_ISOLATION getparam" issue, since
> you need something more fancy anyway.
> -Daniel
> 
> 
> > 
> > 
> > Matt
> > 
> > > 
> > > - once igt stopped using this context isolation getparam (we do not,
> > > ever, create uapi just for testcases), fix up the uapi to what iris
> > > actually needs, which is _only_ a boolean which indicates whether the
> > > kernel's context setup code leaks register state from existing
> > > contexts to newly created ones. Which is the bug iris works around
> > > here, where using iris caused gpu hangs in libva. Iow, the kernel
> > > should always and unconditionally return true here. Check out iris
> > > history for details please, actual iris usage has nothing to do with
> > > any other cross-context or cross-engine isolation guarantee we're
> > > making, it's purely about whether our hw ctx code is buggy or not and
> > > leaks state between clients, because we accidentally used the
> > > currently running ctx as template instead of a fixed one created once
> > > at driver load.
> > > 
> > > Matt, since the CCS enabling on the igt validation side looks very
> > > incomplete I'm leaning very much towards "pls revert, try again".
> > > 
> > > Cheers, Daniel
> > > 
> > > > ---
> > > >  drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
> > > >  drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
> > > >  drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
> > > >  drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
> > > >  include/uapi/drm/i915_drm.h                 | 14 +++-----------
> > > >  5 files changed, 18 insertions(+), 14 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > index 0f6cd96b459f..2d6bd36d6150 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
> > > >         [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
> > > >         [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
> > > >         [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> > > > -       /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> > > > +       [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
> > > >  };
> > > >
> > > >  static int engine_cmp(void *priv, const struct list_head *A,
> > > > @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
> > > >
> > > >         return which;
> > > >  }
> > > > +
> > > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> > > > +{
> > > > +       unsigned int which = intel_engines_has_context_isolation(i915);
> > > > +
> > > > +       if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> > > > +           (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> > > > +               return false;
> > > > +
> > > > +       return !!which;
> > > > +}
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > index 3dc7e8ab9fbc..ff21349db4d4 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > @@ -15,6 +15,7 @@ struct intel_engine_cs *
> > > >  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> > > >
> > > >  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> > > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
> > > >
> > > >  void intel_engine_add_user(struct intel_engine_cs *engine);
> > > >  void intel_engines_driver_register(struct drm_i915_private *i915);
> > > > diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> > > > index 5f5b02b01ba0..f796c5e8e060 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drm_client.h
> > > > +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> > > > @@ -13,7 +13,7 @@
> > > >
> > > >  #include "gt/intel_engine_types.h"
> > > >
> > > > -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> > > > +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
> > > >
> > > >  struct drm_i915_private;
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> > > > index c12a0adefda5..3d5120d2d78a 100644
> > > > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > > > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > > > @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> > > >                 value = 1;
> > > >                 break;
> > > >         case I915_PARAM_HAS_CONTEXT_ISOLATION:
> > > > -               value = intel_engines_has_context_isolation(i915);
> > > > +               value = intel_cross_engine_isolated(i915);
> > > >                 break;
> > > >         case I915_PARAM_SLICE_MASK:
> > > >                 value = sseu->slice_mask;
> > > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > > > index 35ca528803fd..84c0af77cc1f 100644
> > > > --- a/include/uapi/drm/i915_drm.h
> > > > +++ b/include/uapi/drm/i915_drm.h
> > > > @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
> > > >         I915_ENGINE_CLASS_COPY          = 1,
> > > >         I915_ENGINE_CLASS_VIDEO         = 2,
> > > >         I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
> > > > +       I915_ENGINE_CLASS_COMPUTE       = 4,
> > > >
> > > >         /* should be kept compact */
> > > >
> > > > @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
> > > >  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
> > > >
> > > >  /*
> > > > - * Query whether every context (both per-file default and user created) is
> > > > - * isolated (insofar as HW supports). If this parameter is not true, then
> > > > - * freshly created contexts may inherit values from an existing context,
> > > > - * rather than default HW values. If true, it also ensures (insofar as HW
> > > > - * supports) that all state set by this context will not leak to any other
> > > > - * context.
> > > > - *
> > > > - * As not every engine across every gen support contexts, the returned
> > > > - * value reports the support of context isolation for individual engines by
> > > > - * returning a bitmask of each engine class set to true if that class supports
> > > > - * isolation.
> > > > + * Query whether the device can make cross-engine isolation guarantees for
> > > > + * all the engines whose default state has been initialised.
> > > >   */
> > > >  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
> > > >
> > > > --
> > > > 2.35.1
> > > >
> > > 
> > > 
> > > -- 
> > > Daniel Vetter
> > > Software Engineer, Intel Corporation
> > > http://blog.ffwll.ch
> > 
> > -- 
> > Matt Roper
> > Graphics Software Engineer
> > VTT-OSGC Platform Enablement
> > Intel Corporation
> > (916) 356-2795
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation
(916) 356-2795

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Change semantics of context isolation reporting to UM
  2022-05-04 18:12         ` [Intel-gfx] " Matt Roper
@ 2022-05-05 12:46           ` Daniel Vetter
  -1 siblings, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2022-05-05 12:46 UTC (permalink / raw)
  To: Matt Roper
  Cc: bob.beckett, Tvrtko Ursulin, Umesh Nerlige Ramappa, intel-gfx,
	Adrian Larumbe, dri-devel, John Harrison

On Wed, May 04, 2022 at 11:12:26AM -0700, Matt Roper wrote:
> On Wed, May 04, 2022 at 06:42:37PM +0200, Daniel Vetter wrote:
> > On Wed, May 04, 2022 at 07:59:27AM -0700, Matt Roper wrote:
> > > On Wed, May 04, 2022 at 02:24:07PM +0200, Daniel Vetter wrote:
> > > > On Fri, 29 Apr 2022 at 17:11, Adrian Larumbe
> > > > <adrian.larumbe@collabora.com> wrote:
> > > > > I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> > > > > both Iris and Vulkan , and stood for the guarantee that, when creating a
> > > > > new context, all state set by it will not leak to any other context.
> > > > >
> > > > > However the actual return value was a bitmask where every bit stood for an
> > > > > initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> > > > > for deciding on the actual context engine isolation status.
> > > > >
> > > > > However, we do not provide UAPI for IGT tests, so the value returned by the
> > > > > PARAM ioctl has to reflect Mesa usage as a boolean.
> > > > >
> > > > > This change only made sense after compute engine support was added to the
> > > > > driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> > > > > and engine") because no context isolation can be assumed on any device with
> > > > > both RCS annd CCS engines.
> > > > >
> > > > > Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> > > > 
> > > > Top level post and adding Matt Roper and dri-devel.
> > > > 
> > > > This was meant as a simple cleanup after CCS enabling in upstream, but
> > > > that CCS enabling seems to have gone wrong.
> > > > 
> > > > What I thought we should be done for CCS enabling is the following:
> > > > - actually have some igt-side hardcoded assumption about how much
> > > > engines are isolated from each another, which is a hw property. I
> > > > think some of that landed, but it's very incomplete
> > > > 
> > > > - convert all igt tests over to that. At least gem_ctx_isolation.c is
> > > > not converted over, as Adrian pointed out.
> > > 
> > > I pointed that out last week in one of our offline syncs and that's what
> > > got the ball rolling on that test again.  But you specifically told us
> > > that the uapi cleanup for context isolation shouldn't block the CCS
> > > patches from landing since that was still happening in parallel:
> > > 
> > >     "...I do see the uapi cleanup as part of this multi engine/CCS
> > >     enabling, but it's not a blocker to land the patches..."
> > > 
> > > Did we misunderstand what you were trying to say in that email or was
> > > there a change of direction here?
> > 
> > The cleanup (which Adrian is now working on, but there's confusion) is
> > totally fine to do later. What looks really iffy is the test coverage, and
> > at least from me looking around gem_ctx_isolation wasn't touched or
> > updated for CCS engines, and that looks like it's not enough. Either those
> > tests are bogus or not actually testing a lot, and then we should delete
> > them. Or there's probably going to be some impact on how much exactly the
> > engines/contexts are isolated against each another.
> 
> The test automatically picks up any new engines that show up and
> includes them in execution.  The test is already running properly on DG2
> CCS engines in CI right now.  E.g.,
> 
>     https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11599/re-dg2-12/igt@gem_ctx_isolation@dirty-create@ccs0.html
> 
> Is the concern just that we haven't added extra registers to the list to
> check on the CCS engines?  For that matter, we're missing a bunch of
> registers for RCS, BCS, VCS, and VECS for newer platforms too; the CCS
> isn't really special there.

Hm I guess I'm just confused and this isn't actually the testcase that was
discussed at length.

I think I was mixing up gem_ctx_isolation with gem_ctx_persistence. The
latter also neeed adjustments for CCS enabling, and that patch seems to
have landed. Except it's not done with a shared helper, so now it's still
as hard as ever to find these tests that validate cross-engine
interactions.

> My understanding is that most of what gem_ctx_isolation tests is that
> context switches really do save/restore registers properly (i.e., the
> hardware behavior is sane) and there's really nothing special about CCS
> engines regarding general context switching behavior.  Where things get
> unusual with CCS engines is the shared reset domain, and that's more the
> realm of what tests like i915_hangman cover.  But even there, the
> workarounds that are in place right now (which only allow parallelism
> between engines if they belong to the same VM) means that in most cases
> there actually isn't any userspace-visible impact of the shared resets.
> 
> Adding JohnH and Umesh since they're a lot more familiar with all of
> this stuff than I.
> 
> I can send a revert if you think that's what we need, but from what I'm
> hearing we don't really expect many areas where there's
> userspace-visible behavior from CCS engines that would need non-standard
> IGT handling, and the few places where there are have already been
> updated.  But there are so many orphaned IGT tests out there, many of
> which have bitrotted away over the years, that it's possible we might
> still be missing something.

Nah I think it's ok. I guess I was just hoping that someone would review
the entire area a bit and help to structure things more, instead of
piling a bunch of local additions into various tests without much
coordination and calling it a day. It would have been a great opportunity
to clarify what exactly we guarantee wrt context/engine/reset isolation.

I'm not sure where that leaves the patch from Adrian, since tasking Adrian
with reviewing the entirety of igt test coverage around ctx and engines is
probably not what he signed up for and also doesn't make much sense, since
he didn't handle any of the CCS enabling.

I guess we'll just leave the igt testcase dungeons as is and hope they
don't end up eating us?

Dunno, Daniel


> 
> 
> Matt
> 
> > 
> > That's the part that I think should be done before we call CCS support
> > done and ready for merging. And if that's done properly it should also
> > take care of the "igt uses HAS_CONTEXT_ISOLATION getparam" issue, since
> > you need something more fancy anyway.
> > -Daniel
> > 
> > 
> > > 
> > > 
> > > Matt
> > > 
> > > > 
> > > > - once igt stopped using this context isolation getparam (we do not,
> > > > ever, create uapi just for testcases), fix up the uapi to what iris
> > > > actually needs, which is _only_ a boolean which indicates whether the
> > > > kernel's context setup code leaks register state from existing
> > > > contexts to newly created ones. Which is the bug iris works around
> > > > here, where using iris caused gpu hangs in libva. Iow, the kernel
> > > > should always and unconditionally return true here. Check out iris
> > > > history for details please, actual iris usage has nothing to do with
> > > > any other cross-context or cross-engine isolation guarantee we're
> > > > making, it's purely about whether our hw ctx code is buggy or not and
> > > > leaks state between clients, because we accidentally used the
> > > > currently running ctx as template instead of a fixed one created once
> > > > at driver load.
> > > > 
> > > > Matt, since the CCS enabling on the igt validation side looks very
> > > > incomplete I'm leaning very much towards "pls revert, try again".
> > > > 
> > > > Cheers, Daniel
> > > > 
> > > > > ---
> > > > >  drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
> > > > >  drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
> > > > >  drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
> > > > >  drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
> > > > >  include/uapi/drm/i915_drm.h                 | 14 +++-----------
> > > > >  5 files changed, 18 insertions(+), 14 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > > index 0f6cd96b459f..2d6bd36d6150 100644
> > > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > > @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
> > > > >         [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
> > > > >         [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
> > > > >         [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> > > > > -       /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> > > > > +       [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
> > > > >  };
> > > > >
> > > > >  static int engine_cmp(void *priv, const struct list_head *A,
> > > > > @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
> > > > >
> > > > >         return which;
> > > > >  }
> > > > > +
> > > > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> > > > > +{
> > > > > +       unsigned int which = intel_engines_has_context_isolation(i915);
> > > > > +
> > > > > +       if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> > > > > +           (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> > > > > +               return false;
> > > > > +
> > > > > +       return !!which;
> > > > > +}
> > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > > index 3dc7e8ab9fbc..ff21349db4d4 100644
> > > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > > @@ -15,6 +15,7 @@ struct intel_engine_cs *
> > > > >  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> > > > >
> > > > >  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> > > > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
> > > > >
> > > > >  void intel_engine_add_user(struct intel_engine_cs *engine);
> > > > >  void intel_engines_driver_register(struct drm_i915_private *i915);
> > > > > diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> > > > > index 5f5b02b01ba0..f796c5e8e060 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_drm_client.h
> > > > > +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> > > > > @@ -13,7 +13,7 @@
> > > > >
> > > > >  #include "gt/intel_engine_types.h"
> > > > >
> > > > > -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> > > > > +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
> > > > >
> > > > >  struct drm_i915_private;
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> > > > > index c12a0adefda5..3d5120d2d78a 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > > > > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > > > > @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> > > > >                 value = 1;
> > > > >                 break;
> > > > >         case I915_PARAM_HAS_CONTEXT_ISOLATION:
> > > > > -               value = intel_engines_has_context_isolation(i915);
> > > > > +               value = intel_cross_engine_isolated(i915);
> > > > >                 break;
> > > > >         case I915_PARAM_SLICE_MASK:
> > > > >                 value = sseu->slice_mask;
> > > > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > > > > index 35ca528803fd..84c0af77cc1f 100644
> > > > > --- a/include/uapi/drm/i915_drm.h
> > > > > +++ b/include/uapi/drm/i915_drm.h
> > > > > @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
> > > > >         I915_ENGINE_CLASS_COPY          = 1,
> > > > >         I915_ENGINE_CLASS_VIDEO         = 2,
> > > > >         I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
> > > > > +       I915_ENGINE_CLASS_COMPUTE       = 4,
> > > > >
> > > > >         /* should be kept compact */
> > > > >
> > > > > @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
> > > > >  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
> > > > >
> > > > >  /*
> > > > > - * Query whether every context (both per-file default and user created) is
> > > > > - * isolated (insofar as HW supports). If this parameter is not true, then
> > > > > - * freshly created contexts may inherit values from an existing context,
> > > > > - * rather than default HW values. If true, it also ensures (insofar as HW
> > > > > - * supports) that all state set by this context will not leak to any other
> > > > > - * context.
> > > > > - *
> > > > > - * As not every engine across every gen support contexts, the returned
> > > > > - * value reports the support of context isolation for individual engines by
> > > > > - * returning a bitmask of each engine class set to true if that class supports
> > > > > - * isolation.
> > > > > + * Query whether the device can make cross-engine isolation guarantees for
> > > > > + * all the engines whose default state has been initialised.
> > > > >   */
> > > > >  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
> > > > >
> > > > > --
> > > > > 2.35.1
> > > > >
> > > > 
> > > > 
> > > > -- 
> > > > Daniel Vetter
> > > > Software Engineer, Intel Corporation
> > > > http://blog.ffwll.ch
> > > 
> > > -- 
> > > Matt Roper
> > > Graphics Software Engineer
> > > VTT-OSGC Platform Enablement
> > > Intel Corporation
> > > (916) 356-2795
> > 
> > -- 
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
> 
> -- 
> Matt Roper
> Graphics Software Engineer
> VTT-OSGC Platform Enablement
> Intel Corporation
> (916) 356-2795

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM
@ 2022-05-05 12:46           ` Daniel Vetter
  0 siblings, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2022-05-05 12:46 UTC (permalink / raw)
  To: Matt Roper; +Cc: intel-gfx, Adrian Larumbe, dri-devel, airlied

On Wed, May 04, 2022 at 11:12:26AM -0700, Matt Roper wrote:
> On Wed, May 04, 2022 at 06:42:37PM +0200, Daniel Vetter wrote:
> > On Wed, May 04, 2022 at 07:59:27AM -0700, Matt Roper wrote:
> > > On Wed, May 04, 2022 at 02:24:07PM +0200, Daniel Vetter wrote:
> > > > On Fri, 29 Apr 2022 at 17:11, Adrian Larumbe
> > > > <adrian.larumbe@collabora.com> wrote:
> > > > > I915_PARAM_HAS_CONTEXT_ISOLATION was already being used as a boolean by
> > > > > both Iris and Vulkan , and stood for the guarantee that, when creating a
> > > > > new context, all state set by it will not leak to any other context.
> > > > >
> > > > > However the actual return value was a bitmask where every bit stood for an
> > > > > initialised engine, and IGT test gem_ctx_isolation makes use of this mask
> > > > > for deciding on the actual context engine isolation status.
> > > > >
> > > > > However, we do not provide UAPI for IGT tests, so the value returned by the
> > > > > PARAM ioctl has to reflect Mesa usage as a boolean.
> > > > >
> > > > > This change only made sense after compute engine support was added to the
> > > > > driver in commit 944823c9463916dd53f3 ("drm/i915/xehp: Define compute class
> > > > > and engine") because no context isolation can be assumed on any device with
> > > > > both RCS annd CCS engines.
> > > > >
> > > > > Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> > > > 
> > > > Top level post and adding Matt Roper and dri-devel.
> > > > 
> > > > This was meant as a simple cleanup after CCS enabling in upstream, but
> > > > that CCS enabling seems to have gone wrong.
> > > > 
> > > > What I thought we should be done for CCS enabling is the following:
> > > > - actually have some igt-side hardcoded assumption about how much
> > > > engines are isolated from each another, which is a hw property. I
> > > > think some of that landed, but it's very incomplete
> > > > 
> > > > - convert all igt tests over to that. At least gem_ctx_isolation.c is
> > > > not converted over, as Adrian pointed out.
> > > 
> > > I pointed that out last week in one of our offline syncs and that's what
> > > got the ball rolling on that test again.  But you specifically told us
> > > that the uapi cleanup for context isolation shouldn't block the CCS
> > > patches from landing since that was still happening in parallel:
> > > 
> > >     "...I do see the uapi cleanup as part of this multi engine/CCS
> > >     enabling, but it's not a blocker to land the patches..."
> > > 
> > > Did we misunderstand what you were trying to say in that email or was
> > > there a change of direction here?
> > 
> > The cleanup (which Adrian is now working on, but there's confusion) is
> > totally fine to do later. What looks really iffy is the test coverage, and
> > at least from me looking around gem_ctx_isolation wasn't touched or
> > updated for CCS engines, and that looks like it's not enough. Either those
> > tests are bogus or not actually testing a lot, and then we should delete
> > them. Or there's probably going to be some impact on how much exactly the
> > engines/contexts are isolated against each another.
> 
> The test automatically picks up any new engines that show up and
> includes them in execution.  The test is already running properly on DG2
> CCS engines in CI right now.  E.g.,
> 
>     https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11599/re-dg2-12/igt@gem_ctx_isolation@dirty-create@ccs0.html
> 
> Is the concern just that we haven't added extra registers to the list to
> check on the CCS engines?  For that matter, we're missing a bunch of
> registers for RCS, BCS, VCS, and VECS for newer platforms too; the CCS
> isn't really special there.

Hm I guess I'm just confused and this isn't actually the testcase that was
discussed at length.

I think I was mixing up gem_ctx_isolation with gem_ctx_persistence. The
latter also neeed adjustments for CCS enabling, and that patch seems to
have landed. Except it's not done with a shared helper, so now it's still
as hard as ever to find these tests that validate cross-engine
interactions.

> My understanding is that most of what gem_ctx_isolation tests is that
> context switches really do save/restore registers properly (i.e., the
> hardware behavior is sane) and there's really nothing special about CCS
> engines regarding general context switching behavior.  Where things get
> unusual with CCS engines is the shared reset domain, and that's more the
> realm of what tests like i915_hangman cover.  But even there, the
> workarounds that are in place right now (which only allow parallelism
> between engines if they belong to the same VM) means that in most cases
> there actually isn't any userspace-visible impact of the shared resets.
> 
> Adding JohnH and Umesh since they're a lot more familiar with all of
> this stuff than I.
> 
> I can send a revert if you think that's what we need, but from what I'm
> hearing we don't really expect many areas where there's
> userspace-visible behavior from CCS engines that would need non-standard
> IGT handling, and the few places where there are have already been
> updated.  But there are so many orphaned IGT tests out there, many of
> which have bitrotted away over the years, that it's possible we might
> still be missing something.

Nah I think it's ok. I guess I was just hoping that someone would review
the entire area a bit and help to structure things more, instead of
piling a bunch of local additions into various tests without much
coordination and calling it a day. It would have been a great opportunity
to clarify what exactly we guarantee wrt context/engine/reset isolation.

I'm not sure where that leaves the patch from Adrian, since tasking Adrian
with reviewing the entirety of igt test coverage around ctx and engines is
probably not what he signed up for and also doesn't make much sense, since
he didn't handle any of the CCS enabling.

I guess we'll just leave the igt testcase dungeons as is and hope they
don't end up eating us?

Dunno, Daniel


> 
> 
> Matt
> 
> > 
> > That's the part that I think should be done before we call CCS support
> > done and ready for merging. And if that's done properly it should also
> > take care of the "igt uses HAS_CONTEXT_ISOLATION getparam" issue, since
> > you need something more fancy anyway.
> > -Daniel
> > 
> > 
> > > 
> > > 
> > > Matt
> > > 
> > > > 
> > > > - once igt stopped using this context isolation getparam (we do not,
> > > > ever, create uapi just for testcases), fix up the uapi to what iris
> > > > actually needs, which is _only_ a boolean which indicates whether the
> > > > kernel's context setup code leaks register state from existing
> > > > contexts to newly created ones. Which is the bug iris works around
> > > > here, where using iris caused gpu hangs in libva. Iow, the kernel
> > > > should always and unconditionally return true here. Check out iris
> > > > history for details please, actual iris usage has nothing to do with
> > > > any other cross-context or cross-engine isolation guarantee we're
> > > > making, it's purely about whether our hw ctx code is buggy or not and
> > > > leaks state between clients, because we accidentally used the
> > > > currently running ctx as template instead of a fixed one created once
> > > > at driver load.
> > > > 
> > > > Matt, since the CCS enabling on the igt validation side looks very
> > > > incomplete I'm leaning very much towards "pls revert, try again".
> > > > 
> > > > Cheers, Daniel
> > > > 
> > > > > ---
> > > > >  drivers/gpu/drm/i915/gt/intel_engine_user.c | 13 ++++++++++++-
> > > > >  drivers/gpu/drm/i915/gt/intel_engine_user.h |  1 +
> > > > >  drivers/gpu/drm/i915/i915_drm_client.h      |  2 +-
> > > > >  drivers/gpu/drm/i915/i915_getparam.c        |  2 +-
> > > > >  include/uapi/drm/i915_drm.h                 | 14 +++-----------
> > > > >  5 files changed, 18 insertions(+), 14 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > > index 0f6cd96b459f..2d6bd36d6150 100644
> > > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > > > @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
> > > > >         [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
> > > > >         [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
> > > > >         [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> > > > > -       /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> > > > > +       [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
> > > > >  };
> > > > >
> > > > >  static int engine_cmp(void *priv, const struct list_head *A,
> > > > > @@ -306,3 +306,14 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
> > > > >
> > > > >         return which;
> > > > >  }
> > > > > +
> > > > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915)
> > > > > +{
> > > > > +       unsigned int which = intel_engines_has_context_isolation(i915);
> > > > > +
> > > > > +       if ((which & BIT(I915_ENGINE_CLASS_RENDER)) &&
> > > > > +           (which & BIT(I915_ENGINE_CLASS_COMPUTE)))
> > > > > +               return false;
> > > > > +
> > > > > +       return !!which;
> > > > > +}
> > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > > index 3dc7e8ab9fbc..ff21349db4d4 100644
> > > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> > > > > @@ -15,6 +15,7 @@ struct intel_engine_cs *
> > > > >  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> > > > >
> > > > >  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> > > > > +bool intel_cross_engine_isolated(struct drm_i915_private *i915);
> > > > >
> > > > >  void intel_engine_add_user(struct intel_engine_cs *engine);
> > > > >  void intel_engines_driver_register(struct drm_i915_private *i915);
> > > > > diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> > > > > index 5f5b02b01ba0..f796c5e8e060 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_drm_client.h
> > > > > +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> > > > > @@ -13,7 +13,7 @@
> > > > >
> > > > >  #include "gt/intel_engine_types.h"
> > > > >
> > > > > -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> > > > > +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
> > > > >
> > > > >  struct drm_i915_private;
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> > > > > index c12a0adefda5..3d5120d2d78a 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > > > > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > > > > @@ -145,7 +145,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> > > > >                 value = 1;
> > > > >                 break;
> > > > >         case I915_PARAM_HAS_CONTEXT_ISOLATION:
> > > > > -               value = intel_engines_has_context_isolation(i915);
> > > > > +               value = intel_cross_engine_isolated(i915);
> > > > >                 break;
> > > > >         case I915_PARAM_SLICE_MASK:
> > > > >                 value = sseu->slice_mask;
> > > > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > > > > index 35ca528803fd..84c0af77cc1f 100644
> > > > > --- a/include/uapi/drm/i915_drm.h
> > > > > +++ b/include/uapi/drm/i915_drm.h
> > > > > @@ -166,6 +166,7 @@ enum drm_i915_gem_engine_class {
> > > > >         I915_ENGINE_CLASS_COPY          = 1,
> > > > >         I915_ENGINE_CLASS_VIDEO         = 2,
> > > > >         I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
> > > > > +       I915_ENGINE_CLASS_COMPUTE       = 4,
> > > > >
> > > > >         /* should be kept compact */
> > > > >
> > > > > @@ -635,17 +636,8 @@ typedef struct drm_i915_irq_wait {
> > > > >  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
> > > > >
> > > > >  /*
> > > > > - * Query whether every context (both per-file default and user created) is
> > > > > - * isolated (insofar as HW supports). If this parameter is not true, then
> > > > > - * freshly created contexts may inherit values from an existing context,
> > > > > - * rather than default HW values. If true, it also ensures (insofar as HW
> > > > > - * supports) that all state set by this context will not leak to any other
> > > > > - * context.
> > > > > - *
> > > > > - * As not every engine across every gen support contexts, the returned
> > > > > - * value reports the support of context isolation for individual engines by
> > > > > - * returning a bitmask of each engine class set to true if that class supports
> > > > > - * isolation.
> > > > > + * Query whether the device can make cross-engine isolation guarantees for
> > > > > + * all the engines whose default state has been initialised.
> > > > >   */
> > > > >  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
> > > > >
> > > > > --
> > > > > 2.35.1
> > > > >
> > > > 
> > > > 
> > > > -- 
> > > > Daniel Vetter
> > > > Software Engineer, Intel Corporation
> > > > http://blog.ffwll.ch
> > > 
> > > -- 
> > > Matt Roper
> > > Graphics Software Engineer
> > > VTT-OSGC Platform Enablement
> > > Intel Corporation
> > > (916) 356-2795
> > 
> > -- 
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
> 
> -- 
> Matt Roper
> Graphics Software Engineer
> VTT-OSGC Platform Enablement
> Intel Corporation
> (916) 356-2795

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2022-05-05 12:46 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-29 15:11 [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM Adrian Larumbe
2022-04-29 17:19 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Change semantics of context isolation reporting to UM (rev2) Patchwork
2022-04-29 17:51 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2022-05-03 14:44 ` [Intel-gfx] [PATCH] drm/i915: Change semantics of context isolation reporting to UM Tvrtko Ursulin
2022-05-04 11:50   ` Adrian Larumbe
2022-05-04 12:24 ` Daniel Vetter
2022-05-04 12:24   ` [Intel-gfx] " Daniel Vetter
2022-05-04 14:59   ` Matt Roper
2022-05-04 14:59     ` [Intel-gfx] " Matt Roper
2022-05-04 16:42     ` Daniel Vetter
2022-05-04 16:42       ` [Intel-gfx] " Daniel Vetter
2022-05-04 18:12       ` Matt Roper
2022-05-04 18:12         ` [Intel-gfx] " Matt Roper
2022-05-05 12:46         ` Daniel Vetter
2022-05-05 12:46           ` [Intel-gfx] " Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.