All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-13 10:45 ` Matthew Auld
  0 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 10:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, dri-devel

Try to document the object caching related bits, like cache_coherent and
cache_dirty.

Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 135 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_drv.h               |   9 --
 2 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index ef3de2ae9723..02c3529b774c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -92,6 +92,57 @@ struct drm_i915_gem_object_ops {
 	const char *name; /* friendly name for debug, e.g. lockdep classes */
 };
 
+/**
+ * enum i915_cache_level - The supported GTT caching values for system memory
+ * pages.
+ *
+ * These translate to some special GTT PTE bits when binding pages into some
+ * address space. It also determines whether an object, or rather its pages are
+ * coherent with the GPU, when also reading or writing through the CPU cache
+ * with those pages.
+ *
+ * Userspace can also control this through struct drm_i915_gem_caching.
+ */
+enum i915_cache_level {
+	/**
+	 * @I915_CACHE_NONE:
+	 *
+	 * Not coherent with the CPU cache. If the cache is dirty and we need
+	 * the underlying pages to be coherent with some later GPU access then
+	 * we need to manually flush the pages.
+	 *
+	 * Note that on shared-LLC platforms reads through the CPU cache are
+	 * still coherent even with this setting. See also
+	 * I915_BO_CACHE_COHERENT_FOR_READ for more details.
+	 */
+	I915_CACHE_NONE = 0,
+	/**
+	 * @I915_CACHE_LLC:
+	 *
+	 * Coherent with the CPU cache. If the cache is dirty, then the GPU will
+	 * ensure that access remains coherent, when both reading and writing
+	 * through the CPU cache.
+	 *
+	 * Applies to both platforms with shared-LLC(HAS_LLC), and snooping
+	 * based platforms(HAS_SNOOP).
+	 */
+	I915_CACHE_LLC,
+	/**
+	 * @I915_CACHE_L3_LLC:
+	 *
+	 * gen7+, L3 sits between the domain specifc caches, eg sampler/render
+	 * caches, and the large Last-Level-Cache. LLC is coherent with the CPU,
+	 * but L3 is only visible to the GPU.
+	 */
+	I915_CACHE_L3_LLC,
+	/**
+	 * @I915_CACHE_WT:
+	 *
+	 * hsw:gt3e Write-through for scanout buffers.
+	 */
+	I915_CACHE_WT,
+};
+
 enum i915_map_type {
 	I915_MAP_WB = 0,
 	I915_MAP_WC,
@@ -228,14 +279,90 @@ struct drm_i915_gem_object {
 	unsigned int mem_flags;
 #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
 #define I915_BO_FLAG_IOMEM       BIT(1) /* Object backed by IO memory */
-	/*
-	 * Is the object to be mapped as read-only to the GPU
-	 * Only honoured if hardware has relevant pte bit
+	/**
+	 * @cache_level: The desired GTT caching level.
+	 *
+	 * See enum i915_cache_level for possible values, along with what
+	 * each does.
 	 */
 	unsigned int cache_level:3;
-	unsigned int cache_coherent:2;
+	/**
+	 * @cache_coherent:
+	 *
+	 * Track whether the pages are coherent with the GPU if reading or
+	 * writing through the CPU cache.
+	 *
+	 * This largely depends on the @cache_level, for example if the object
+	 * is marked as I915_CACHE_LLC, then GPU access is coherent for both
+	 * reads and writes through the CPU cache.
+	 *
+	 * Note that on platforms with shared-LLC support(HAS_LLC) reads through
+	 * the CPU cache are always coherent, regardless of the @cache_level. On
+	 * snooping based platforms this is not the case, unless the full
+	 * I915_CACHE_LLC or similar setting is used.
+	 *
+	 * As a result of this we need to track coherency separately for reads
+	 * and writes, in order to avoid superfluous flushing on shared-LLC
+	 * platforms, for reads.
+	 *
+	 * I915_BO_CACHE_COHERENT_FOR_READ:
+	 *
+	 * When reading through the CPU cache, the GPU is still coherent. Note
+	 * that no data has actually been modified here, so it might seem
+	 * strange that we care about this.
+	 *
+	 * As an example, if some object is mapped on the CPU with write-back
+	 * caching, and we read some page, then the cache likely now contains
+	 * the data from that read. At this point the cache and main memory
+	 * match up, so all good. But next the GPU needs to write some data to
+	 * that same page. Now if the @cache_level is I915_CACHE_NONE and the
+	 * the platform doesn't have the shared-LLC, then the GPU will
+	 * effectively skip invalidating the cache(or however that works
+	 * internally) when writing the new value.  This is really bad since the
+	 * GPU has just written some new data to main memory, but the CPU cache
+	 * is still valid and now contains stale data. As a result the next time
+	 * we do a cached read with the CPU, we are rewarded with stale data.
+	 * Likewise if the cache is later flushed, we might be rewarded with
+	 * overwriting main memory with stale data.
+	 *
+	 * I915_BO_CACHE_COHERENT_FOR_WRITE:
+	 *
+	 * When writing through the CPU cache, the GPU is still coherent. Note
+	 * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
+	 *
+	 * This is never set when I915_CACHE_NONE is used for @cache_level,
+	 * where instead we have to manually flush the caches after writing
+	 * through the CPU cache. For other cache levels this should be set and
+	 * the object is therefore considered coherent for both reads and writes
+	 * through the CPU cache.
+	 */
 #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
 #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
+	unsigned int cache_coherent:2;
+	/**
+	 * @cache_dirty:
+	 *
+	 * Track if the cache might be dirty for the @pages i.e it has yet to be
+	 * written back to main memory. As a result reading directly from main
+	 * memory might yield stale data.
+	 *
+	 * This also ties into whether the kernel is tracking the object as
+	 * coherent with the GPU, as per @cache_coherent, as it determines if
+	 * flushing might be needed at various points.
+	 *
+	 * Another part of @cache_dirty is managing flushing when first
+	 * acquiring the pages for system memory, at this point the pages are
+	 * considered foreign, so the default assumption is that the cache is
+	 * dirty, for example the page zeroing done my the kernel might leave
+	 * writes though the CPU cache, or swapping-in, while the actual data in
+	 * main memory is potentially stale.  Note that this is a potential
+	 * security issue when dealing with userspace objects and zeroing. Now,
+	 * whether we actually need apply the big sledgehammer of flushing all
+	 * the pages on acquire depends on if @cache_coherent is marked as
+	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
+	 * for both reads and writes though the CPU cache. So pretty much this
+	 * should only be needed for I915_CACHE_NONE objects.
+	 */
 	unsigned int cache_dirty:1;
 
 	/**
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c4747f4407ef..37bb1a3cadd4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -394,15 +394,6 @@ struct drm_i915_display_funcs {
 	void (*read_luts)(struct intel_crtc_state *crtc_state);
 };
 
-enum i915_cache_level {
-	I915_CACHE_NONE = 0,
-	I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
-	I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc
-			      caches, eg sampler/render caches, and the
-			      large Last-Level-Cache. LLC is coherent with
-			      the CPU, but L3 is only visible to the GPU. */
-	I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
-};
 
 #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-13 10:45 ` Matthew Auld
  0 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 10:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, dri-devel

Try to document the object caching related bits, like cache_coherent and
cache_dirty.

Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 135 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_drv.h               |   9 --
 2 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index ef3de2ae9723..02c3529b774c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -92,6 +92,57 @@ struct drm_i915_gem_object_ops {
 	const char *name; /* friendly name for debug, e.g. lockdep classes */
 };
 
+/**
+ * enum i915_cache_level - The supported GTT caching values for system memory
+ * pages.
+ *
+ * These translate to some special GTT PTE bits when binding pages into some
+ * address space. It also determines whether an object, or rather its pages are
+ * coherent with the GPU, when also reading or writing through the CPU cache
+ * with those pages.
+ *
+ * Userspace can also control this through struct drm_i915_gem_caching.
+ */
+enum i915_cache_level {
+	/**
+	 * @I915_CACHE_NONE:
+	 *
+	 * Not coherent with the CPU cache. If the cache is dirty and we need
+	 * the underlying pages to be coherent with some later GPU access then
+	 * we need to manually flush the pages.
+	 *
+	 * Note that on shared-LLC platforms reads through the CPU cache are
+	 * still coherent even with this setting. See also
+	 * I915_BO_CACHE_COHERENT_FOR_READ for more details.
+	 */
+	I915_CACHE_NONE = 0,
+	/**
+	 * @I915_CACHE_LLC:
+	 *
+	 * Coherent with the CPU cache. If the cache is dirty, then the GPU will
+	 * ensure that access remains coherent, when both reading and writing
+	 * through the CPU cache.
+	 *
+	 * Applies to both platforms with shared-LLC(HAS_LLC), and snooping
+	 * based platforms(HAS_SNOOP).
+	 */
+	I915_CACHE_LLC,
+	/**
+	 * @I915_CACHE_L3_LLC:
+	 *
+	 * gen7+, L3 sits between the domain specifc caches, eg sampler/render
+	 * caches, and the large Last-Level-Cache. LLC is coherent with the CPU,
+	 * but L3 is only visible to the GPU.
+	 */
+	I915_CACHE_L3_LLC,
+	/**
+	 * @I915_CACHE_WT:
+	 *
+	 * hsw:gt3e Write-through for scanout buffers.
+	 */
+	I915_CACHE_WT,
+};
+
 enum i915_map_type {
 	I915_MAP_WB = 0,
 	I915_MAP_WC,
@@ -228,14 +279,90 @@ struct drm_i915_gem_object {
 	unsigned int mem_flags;
 #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
 #define I915_BO_FLAG_IOMEM       BIT(1) /* Object backed by IO memory */
-	/*
-	 * Is the object to be mapped as read-only to the GPU
-	 * Only honoured if hardware has relevant pte bit
+	/**
+	 * @cache_level: The desired GTT caching level.
+	 *
+	 * See enum i915_cache_level for possible values, along with what
+	 * each does.
 	 */
 	unsigned int cache_level:3;
-	unsigned int cache_coherent:2;
+	/**
+	 * @cache_coherent:
+	 *
+	 * Track whether the pages are coherent with the GPU if reading or
+	 * writing through the CPU cache.
+	 *
+	 * This largely depends on the @cache_level, for example if the object
+	 * is marked as I915_CACHE_LLC, then GPU access is coherent for both
+	 * reads and writes through the CPU cache.
+	 *
+	 * Note that on platforms with shared-LLC support(HAS_LLC) reads through
+	 * the CPU cache are always coherent, regardless of the @cache_level. On
+	 * snooping based platforms this is not the case, unless the full
+	 * I915_CACHE_LLC or similar setting is used.
+	 *
+	 * As a result of this we need to track coherency separately for reads
+	 * and writes, in order to avoid superfluous flushing on shared-LLC
+	 * platforms, for reads.
+	 *
+	 * I915_BO_CACHE_COHERENT_FOR_READ:
+	 *
+	 * When reading through the CPU cache, the GPU is still coherent. Note
+	 * that no data has actually been modified here, so it might seem
+	 * strange that we care about this.
+	 *
+	 * As an example, if some object is mapped on the CPU with write-back
+	 * caching, and we read some page, then the cache likely now contains
+	 * the data from that read. At this point the cache and main memory
+	 * match up, so all good. But next the GPU needs to write some data to
+	 * that same page. Now if the @cache_level is I915_CACHE_NONE and the
+	 * the platform doesn't have the shared-LLC, then the GPU will
+	 * effectively skip invalidating the cache(or however that works
+	 * internally) when writing the new value.  This is really bad since the
+	 * GPU has just written some new data to main memory, but the CPU cache
+	 * is still valid and now contains stale data. As a result the next time
+	 * we do a cached read with the CPU, we are rewarded with stale data.
+	 * Likewise if the cache is later flushed, we might be rewarded with
+	 * overwriting main memory with stale data.
+	 *
+	 * I915_BO_CACHE_COHERENT_FOR_WRITE:
+	 *
+	 * When writing through the CPU cache, the GPU is still coherent. Note
+	 * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
+	 *
+	 * This is never set when I915_CACHE_NONE is used for @cache_level,
+	 * where instead we have to manually flush the caches after writing
+	 * through the CPU cache. For other cache levels this should be set and
+	 * the object is therefore considered coherent for both reads and writes
+	 * through the CPU cache.
+	 */
 #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
 #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
+	unsigned int cache_coherent:2;
+	/**
+	 * @cache_dirty:
+	 *
+	 * Track if the cache might be dirty for the @pages i.e it has yet to be
+	 * written back to main memory. As a result reading directly from main
+	 * memory might yield stale data.
+	 *
+	 * This also ties into whether the kernel is tracking the object as
+	 * coherent with the GPU, as per @cache_coherent, as it determines if
+	 * flushing might be needed at various points.
+	 *
+	 * Another part of @cache_dirty is managing flushing when first
+	 * acquiring the pages for system memory, at this point the pages are
+	 * considered foreign, so the default assumption is that the cache is
+	 * dirty, for example the page zeroing done my the kernel might leave
+	 * writes though the CPU cache, or swapping-in, while the actual data in
+	 * main memory is potentially stale.  Note that this is a potential
+	 * security issue when dealing with userspace objects and zeroing. Now,
+	 * whether we actually need apply the big sledgehammer of flushing all
+	 * the pages on acquire depends on if @cache_coherent is marked as
+	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
+	 * for both reads and writes though the CPU cache. So pretty much this
+	 * should only be needed for I915_CACHE_NONE objects.
+	 */
 	unsigned int cache_dirty:1;
 
 	/**
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c4747f4407ef..37bb1a3cadd4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -394,15 +394,6 @@ struct drm_i915_display_funcs {
 	void (*read_luts)(struct intel_crtc_state *crtc_state);
 };
 
-enum i915_cache_level {
-	I915_CACHE_NONE = 0,
-	I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
-	I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc
-			      caches, eg sampler/render caches, and the
-			      large Last-Level-Cache. LLC is coherent with
-			      the CPU, but L3 is only visible to the GPU. */
-	I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
-};
 
 #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */
 
-- 
2.26.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 2/5] drm/i915/uapi: convert drm_i915_gem_madvise to kernel-doc
  2021-07-13 10:45 ` [Intel-gfx] " Matthew Auld
@ 2021-07-13 10:45   ` Matthew Auld
  -1 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 10:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel

Add some kernel doc for this. We can then just reference this later when
documenting madv in the kernel.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
---
 include/uapi/drm/i915_drm.h | 50 +++++++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e334a8b14ef2..a839085b6577 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1492,20 +1492,54 @@ struct drm_i915_get_pipe_from_crtc_id {
 	__u32 pipe;
 };
 
-#define I915_MADV_WILLNEED 0
-#define I915_MADV_DONTNEED 1
-#define __I915_MADV_PURGED 2 /* internal state */
-
+/**
+ * struct drm_i915_gem_madvise - Update the madvise hint for the object.
+ *
+ * The kernel uses this to know when it can safely discard the backing pages for
+ * an object, when under memory pressure.
+ */
 struct drm_i915_gem_madvise {
-	/** Handle of the buffer to change the backing store advice */
+	/**
+	 * @handle: Handle of the buffer to change the backing store advice for.
+	 */
 	__u32 handle;
 
-	/* Advice: either the buffer will be needed again in the near future,
-	 *         or wont be and could be discarded under memory pressure.
+	/**
+	 * @madv: The madvise hint to set for the object.
+	 *
+	 * Supported values:
+	 *
+	 * I915_MADV_WILLNEED:
+	 *
+	 * The buffer will be needed again in the near future. By default all
+	 * objects are set as I915_MADV_WILLNEED. Once the pages become
+	 * dirty, the kernel is no longer allowed to simply discard the pages,
+	 * and instead can only resort to swapping the pages out, if under
+	 * memory pressure, where the page contents must persist when swapping
+	 * the pages back in.
+	 *
+	 * I915_MADV_DONTNEED:
+	 *
+	 * The buffer wont be needed. The pages and their contents can be
+	 * discarded under memory pressure.
+	 *
+	 * Note that if the pages were discarded then the kernel updates the
+	 * internal madvise value of the object to __I915_MADV_PURGED, which
+	 * effectively kills the object, since all further requests to allocate
+	 * pages for the object will be rejected. At this point a new object is
+	 * needed. This will be reflected in @retained.
 	 */
+#define I915_MADV_WILLNEED 0
+#define I915_MADV_DONTNEED 1
+#define __I915_MADV_PURGED 2 /* internal state */
 	__u32 madv;
 
-	/** Whether the backing store still exists. */
+	/**
+	 * @retained: Whether the backing store still exists.
+	 *
+	 * Set to false if the kernel purged the object and marked the object as
+	 * __I915_MADV_PURGED.
+	 */
 	__u32 retained;
 };
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 2/5] drm/i915/uapi: convert drm_i915_gem_madvise to kernel-doc
@ 2021-07-13 10:45   ` Matthew Auld
  0 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 10:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel

Add some kernel doc for this. We can then just reference this later when
documenting madv in the kernel.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
---
 include/uapi/drm/i915_drm.h | 50 +++++++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e334a8b14ef2..a839085b6577 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1492,20 +1492,54 @@ struct drm_i915_get_pipe_from_crtc_id {
 	__u32 pipe;
 };
 
-#define I915_MADV_WILLNEED 0
-#define I915_MADV_DONTNEED 1
-#define __I915_MADV_PURGED 2 /* internal state */
-
+/**
+ * struct drm_i915_gem_madvise - Update the madvise hint for the object.
+ *
+ * The kernel uses this to know when it can safely discard the backing pages for
+ * an object, when under memory pressure.
+ */
 struct drm_i915_gem_madvise {
-	/** Handle of the buffer to change the backing store advice */
+	/**
+	 * @handle: Handle of the buffer to change the backing store advice for.
+	 */
 	__u32 handle;
 
-	/* Advice: either the buffer will be needed again in the near future,
-	 *         or wont be and could be discarded under memory pressure.
+	/**
+	 * @madv: The madvise hint to set for the object.
+	 *
+	 * Supported values:
+	 *
+	 * I915_MADV_WILLNEED:
+	 *
+	 * The buffer will be needed again in the near future. By default all
+	 * objects are set as I915_MADV_WILLNEED. Once the pages become
+	 * dirty, the kernel is no longer allowed to simply discard the pages,
+	 * and instead can only resort to swapping the pages out, if under
+	 * memory pressure, where the page contents must persist when swapping
+	 * the pages back in.
+	 *
+	 * I915_MADV_DONTNEED:
+	 *
+	 * The buffer wont be needed. The pages and their contents can be
+	 * discarded under memory pressure.
+	 *
+	 * Note that if the pages were discarded then the kernel updates the
+	 * internal madvise value of the object to __I915_MADV_PURGED, which
+	 * effectively kills the object, since all further requests to allocate
+	 * pages for the object will be rejected. At this point a new object is
+	 * needed. This will be reflected in @retained.
 	 */
+#define I915_MADV_WILLNEED 0
+#define I915_MADV_DONTNEED 1
+#define __I915_MADV_PURGED 2 /* internal state */
 	__u32 madv;
 
-	/** Whether the backing store still exists. */
+	/**
+	 * @retained: Whether the backing store still exists.
+	 *
+	 * Set to false if the kernel purged the object and marked the object as
+	 * __I915_MADV_PURGED.
+	 */
 	__u32 retained;
 };
 
-- 
2.26.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 3/5] drm/i915: convert drm_i915_gem_object to kernel-doc
  2021-07-13 10:45 ` [Intel-gfx] " Matthew Auld
@ 2021-07-13 10:45   ` Matthew Auld
  -1 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 10:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel

Before we can pull in the previous kernel doc for the caching bits, we
first get to add kernel doc for all of drm_i915_gem_object so this
actually builds.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 422 +++++++++++++++---
 1 file changed, 366 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 02c3529b774c..da2194290436 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -174,24 +174,75 @@ struct i915_gem_object_page_iter {
 	struct mutex lock; /* protects this cache */
 };
 
-struct drm_i915_gem_object {
-	/*
-	 * We might have reason to revisit the below since it wastes
-	 * a lot of space for non-ttm gem objects.
-	 * In any case, always use the accessors for the ttm_buffer_object
-	 * when accessing it.
+/**
+ * struct i915_page_sizes - Track the various pieces we need to
+ * both track and construct huge GTT entries, when binding the
+ * object.
+ */
+struct i915_page_sizes {
+	/**
+	 * @phys:
+	 *
+	 * The sg mask of the pages sg_table. i.e the
+	 * mask of of the lengths for each sg entry.
 	 */
+	unsigned int phys;
+
+	/**
+	 * @sg:
+	 *
+	 * The gtt page sizes we are allowed to use given
+	 * the sg mask and the supported page sizes. This will
+	 * express the smallest unit we can use for the whole
+	 * object, as well as the larger sizes we may be able to
+	 * use opportunistically.
+	 */
+	unsigned int sg;
+
+	/**
+	 * @gtt:
+	 *
+	 * The actual gtt page size usage. Since we can
+	 * have multiple vma associated with this object we need
+	 * to prevent any trampling of state, hence a copy of
+	 * this struct also lives in each vma, therefore the gtt
+	 * value here should only be read/write through the vma.
+	 */
+	unsigned int gtt;
+};
+
+/**
+ * struct drm_i915_gem_object - Our core GEM object which extends the base
+ * struct drm_gem_object behaviour.
+ */
+struct drm_i915_gem_object {
 	union {
+		/** @base: The base DRM GEM object. */
 		struct drm_gem_object base;
+
+		/**
+		 * @__do_not_access:
+		 *
+		 * The base TTM object, if we are using the TTM backend. Note
+		 * that this also embeds its own DRM_GEM base object.
+		 *
+		 * We might have reason to revisit the below since it wastes a
+		 * lot of space for non-ttm gem objects.  In any case, always
+		 * use the accessors for the ttm_buffer_object when accessing
+		 * it.
+		 */
 		struct ttm_buffer_object __do_not_access;
 	};
 
+	/**
+	 * @ops: The struct drm_i915_gem_object_ops interface implemented by the
+	 * object instance.
+	 */
 	const struct drm_i915_gem_object_ops *ops;
 
+	/** @vma: Track all the struct i915_vma instances for this object. */
 	struct {
-		/**
-		 * @vma.lock: protect the list/tree of vmas
-		 */
+		/** @vma.lock: protect the list/tree of vmas */
 		spinlock_t lock;
 
 		/**
@@ -224,7 +275,9 @@ struct drm_i915_gem_object {
 	 * this translation from object to context->handles_vma.
 	 */
 	struct list_head lut_list;
-	spinlock_t lut_lock; /* guards lut_list */
+
+	/** @lut_lock: Guards the lut_list */
+	spinlock_t lut_lock;
 
 	/**
 	 * @obj_link: Link into @i915_gem_ww_ctx.obj_list
@@ -234,29 +287,123 @@ struct drm_i915_gem_object {
 	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
 	 */
 	struct list_head obj_link;
-	/**
-	 * @shared_resv_from: The object shares the resv from this vm.
-	 */
+
+	/** @shares_resv_from: The object shares the resv from this vm. */
 	struct i915_address_space *shares_resv_from;
 
 	union {
+		/** @rcu: Embedded rcu_head */
 		struct rcu_head rcu;
+
+		/**
+		 * @freed:
+		 *
+		 * When objects need to be destroyed we batch them together into
+		 * an llist, for a separate worker thread to then pick up and
+		 * process.
+		 */
 		struct llist_node freed;
 	};
 
 	/**
-	 * Whether the object is currently in the GGTT mmap.
+	 * @userfault_count: Whether the object is currently in the GGTT mmap.
 	 */
 	unsigned int userfault_count;
+	/**
+	 * @userfault_link:
+	 *
+	 * We need to maintain the list of all objects which might have been
+	 * faulted into the GGTT mappable aperture, for easy revocation later.
+	 */
 	struct list_head userfault_link;
 
+	/**
+	 * @mmo: Track the mmap_offset nodes for this object.
+	 */
 	struct {
-		spinlock_t lock; /* Protects access to mmo offsets */
+		/** @lock: Protects access to mmo offsets. */
+		spinlock_t lock;
+
+		/** @offsets: The rb-tree of mmo offsets. */
 		struct rb_root offsets;
 	} mmo;
 
-	I915_SELFTEST_DECLARE(struct list_head st_link);
+	/** @st_link: SELFTEST ONLY */
+	struct list_head st_link;
 
+	/**
+	 * @flags: The object flags
+	 *
+	 * The currently supported I915_BO_ALLOC_FLAGS. Note that these can only
+	 * be set at object creation, after which they should be considered
+	 * immutable. Also some of these largely depend on whether the backend
+	 * supports it.
+	 *
+	 * I915_BO_ALLOC_CONTIGUOUS:
+	 *
+	 * Allocate the physical pages for the object as one contiguous block or
+	 * page. Currently only supported for device local-memory.
+	 *
+	 * I915_BO_ALLOC_VOLATILE:
+	 *
+	 * Volatile here refers to the volatility of the allocated pages when
+	 * unpinned. This effectively just sets the @madv hint to
+	 * I915_MADV_DONTNEED while the pages are pinned/allocated. This way as
+	 * soon as the pages become unpinned the shrinker is free to discard the
+	 * pages if needed.  This is only intended for kernel internal objects
+	 * where they are often short lived anyway, and don't require any kind
+	 * of persistence.
+	 *
+	 * I915_BO_ALLOC_CPU_CLEAR:
+	 *
+	 * After allocating the pages, zero them using a simple memset. This is
+	 * very specialised and is only intended for kernel internal objects
+	 * where we are unable(too early during probe) or prefer not to use a
+	 * normal accelerated blitter clear.
+	 *
+	 * I915_BO_ALLOC_USER:
+	 *
+	 * All normal userspace objects are allocated with this flag. This is
+	 * useful where the kernel needs to know if the object is merely kernel
+	 * internal, or was created by userspace, where slightly different rules
+	 * might be needed.
+	 *
+	 * Other special flags, note that these might be mutable:
+	 *
+	 * I915_BO_READONLY:
+	 *
+	 * Should this object be marked as read-only. This applies to both CPU
+	 * and GPU access, when dealing with userspace objects, at least where
+	 * it can be enforced. From a userspace perspective this only exposed
+	 * for userptr objects.
+	 *
+	 * When dealing with kernel internal objects this *only* applies to GPU
+	 * access, usually where we need to prevent userspace access to some
+	 * security critical object, which might need to share the user visible
+	 * ppGTT address space.
+	 *
+	 * Note that for GPU access the HW needs to actually support the
+	 * read-only bit in the ppGTT PTE field. On some newer hardware this
+	 * support is completely busted. So whether this is actually supported
+	 * depends on the vm. Currently the caller is expected to check this
+	 * first before marking the object as readonly, if they really do need
+	 * it, since it just gets silently ignored when setting up the PTEs,
+	 * during i915_vma_pin().
+	 *
+	 * FIXME: Note that this might be a slight wart in the api. Once idea
+	 * could be to move this to I915_BO_ALLOC_FLAGS, that way it becomes
+	 * immutable, and then we don't have to worry about unbinding and
+	 * rebinding objects on the fly if the object suddenly becomes readonly.
+	 * The final piece is to make i915_vma_pin() fall over if the vm doesn't
+	 * have read-only support, when the object is marked as readonly. The
+	 * callers should then be updated to account for this.
+	 *
+	 * I915_TILING_QUIRK_BIT:
+	 *
+	 * Tiled objects with unknown swizzling need special care. For example,
+	 * we are not allowed to swap the pages out if this is set, otherwise we
+	 * may see corruption.
+	 */
 	unsigned long flags;
 #define I915_BO_ALLOC_CONTIGUOUS BIT(0)
 #define I915_BO_ALLOC_VOLATILE   BIT(1)
@@ -270,15 +417,26 @@ struct drm_i915_gem_object {
 #define I915_TILING_QUIRK_BIT    5 /* unknown swizzling; do not release! */
 
 	/**
-	 * @mem_flags - Mutable placement-related flags
+	 * @mem_flags: Mutable placement-related flags
 	 *
 	 * These are flags that indicate specifics of the memory region
 	 * the object is currently in. As such they are only stable
 	 * either under the object lock or if the object is pinned.
+	 *
+	 * Possible values:
+	 *
+	 * I915_BO_FLAG_STRUCT_PAGE:
+	 *
+	 * Object backed by struct pages, aka system memory
+	 *
+	 * I915_BO_FLAG_IOMEM:
+	 *
+	 * Object backed by device memory, aka local memory
 	 */
 	unsigned int mem_flags;
-#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
-#define I915_BO_FLAG_IOMEM       BIT(1) /* Object backed by IO memory */
+#define I915_BO_FLAG_STRUCT_PAGE BIT(0)
+#define I915_BO_FLAG_IOMEM       BIT(1)
+
 	/**
 	 * @cache_level: The desired GTT caching level.
 	 *
@@ -286,6 +444,7 @@ struct drm_i915_gem_object {
 	 * each does.
 	 */
 	unsigned int cache_level:3;
+
 	/**
 	 * @cache_coherent:
 	 *
@@ -339,6 +498,7 @@ struct drm_i915_gem_object {
 #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
 #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
 	unsigned int cache_coherent:2;
+
 	/**
 	 * @cache_dirty:
 	 *
@@ -380,124 +540,274 @@ struct drm_i915_gem_object {
 	 */
 	u16 write_domain;
 
+	/**
+	 * @frontbuffer:
+	 *
+	 * Frontbuffer tracking bits, or NULL if this is just a normal object.
+	 */
 	struct intel_frontbuffer __rcu *frontbuffer;
 
-	/** Current tiling stride for the object, if it's tiled. */
+	/**
+	 * @tiling_and_stride:
+	 *
+	 * Current tiling stride for the object, if it's tiled.
+	 */
 	unsigned int tiling_and_stride;
 #define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
 #define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
 #define STRIDE_MASK (~TILING_MASK)
 
+	/** @mm: Manage all the state related to the backing storage */
 	struct {
-		/*
-		 * Protects the pages and their use. Do not use directly, but
-		 * instead go through the pin/unpin interfaces.
+		/**
+		 * @pages_pin_count:
+		 *
+		 * Protects the @pages and their use. Do not use directly, but
+		 * instead go through the i915_gem_object_{pin, unpin}_pages()
+		 * interface.
+		 *
+		 * When the @pages_pin_count reaches zero the pages might be
+		 * discared when under memory pressure, if the @madv is also
+		 * I915_MADV_DONTNEED.
+		 *
+		 * When the final ref for the object is dropped, the object
+		 * destruction code will also zero the @pages_pin_count, and
+		 * free the @pages and related state.
 		 */
 		atomic_t pages_pin_count;
+
+		/**
+		 * @shrink_pin:
+		 *
+		 * While @shrink_pin is non-zero, the object is not visible to
+		 * the shrinker. Usually when the kernel knows the object can't
+		 * be swapped out or discarded, we try to hide it from the
+		 * shrinker so that it doesn't needlessly waste effort on such
+		 * objects.
+		 */
 		atomic_t shrink_pin;
 
 		/**
+		 * @placements:
+		 *
 		 * Priority list of potential placements for this object.
 		 */
 		struct intel_memory_region **placements;
+
+		/**
+		 * @n_placements: Number of elements in @placements.
+		 */
 		int n_placements;
 
 		/**
-		 * Memory region for this object.
+		 * @region: Memory region for this object.
 		 */
 		struct intel_memory_region *region;
 
 		/**
+		 * @res:
+		 *
 		 * Memory manager resource allocated for this object. Only
 		 * needed for the mock region.
 		 */
 		struct ttm_resource *res;
 
 		/**
+		 * @region_link:
+		 *
 		 * Element within memory_region->objects or region->purgeable
 		 * if the object is marked as DONTNEED. Access is protected by
 		 * region->obj_lock.
 		 */
 		struct list_head region_link;
 
+		/**
+		 * @pages:
+		 *
+		 * Only valid while the @pages_pin_count is not zero.
+		 *
+		 * The cached struct sg_table for the backing pages, or NULL if
+		 * the pages have yet to be allocated. We use this when mapping
+		 * the object(or rather the struct i915_vma) through the GTT,
+		 * effectively each GTT PTE is programmed using this table.
+		 *
+		 * If we are using an IOMMU then this will contain the
+		 * respective DMA addresses for the physical pages, when dealing
+		 * with system memory.
+		 *
+		 * We also like to abuse this as a general container for device
+		 * addresses, like for device local memory and stolen memory.
+		 */
 		struct sg_table *pages;
-		void *mapping;
 
-		struct i915_page_sizes {
-			/**
-			 * The sg mask of the pages sg_table. i.e the mask of
-			 * of the lengths for each sg entry.
-			 */
-			unsigned int phys;
-
-			/**
-			 * The gtt page sizes we are allowed to use given the
-			 * sg mask and the supported page sizes. This will
-			 * express the smallest unit we can use for the whole
-			 * object, as well as the larger sizes we may be able
-			 * to use opportunistically.
-			 */
-			unsigned int sg;
+		/*
+		 * @mapping:
+		 *
+		 * Only valid while the @pages_pin_count is not zero.
+		 *
+		 * The cached CPU virtual address for the @pages, or NULL if
+		 * there is no current mapping.
+		 *
+		 * The caching type is encoded in the unused lower bits of the
+		 * address, so this should not be directly accessed. Rather the
+		 * i915_gem_object_pin_map() should be used to obtain the
+		 * address, which also ensures the pages are correctly pinned
+		 * during CPU access of the virtual address.
+		 * i915_gem_object_unpin_map() should be called when done.
+		 */
+		void *mapping;
 
-			/**
-			 * The actual gtt page size usage. Since we can have
-			 * multiple vma associated with this object we need to
-			 * prevent any trampling of state, hence a copy of this
-			 * struct also lives in each vma, therefore the gtt
-			 * value here should only be read/write through the vma.
-			 */
-			unsigned int gtt;
-		} page_sizes;
+		/** @page_sizes: Track the GTT page size related bits */
+		struct i915_page_sizes page_sizes;
 
-		I915_SELFTEST_DECLARE(unsigned int page_mask);
+		/**
+		 * @page_mask: SELFTEST ONLY
+		 */
+		unsigned int page_mask;
 
+		/**
+		 * @get_page:
+		 *
+		 * The cached iterator for looking up struct pages in @pages.
+		 */
 		struct i915_gem_object_page_iter get_page;
+
+		/**
+		 * @get_dma_page:
+		 *
+		 * The cached iterator for looking up device addresses in
+		 * @pages.
+		 */
 		struct i915_gem_object_page_iter get_dma_page;
 
 		/**
+		 * @link:
+		 *
 		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
 		 * locked by i915->mm.obj_lock.
 		 */
 		struct list_head link;
 
 		/**
-		 * Advice: are the backing pages purgeable?
+		 * @madv: The advice hint for the pages.
+		 *
+		 * See &drm_i915_gem_madvise.madv.
 		 */
 		unsigned int madv:2;
 
 		/**
-		 * This is set if the object has been written to since the
-		 * pages were last acquired.
+		 * @dirty:
+		 *
+		 * This is set if the object might have been written to since
+		 * the pages were acquired. Tracking if the object is dirty
+		 * tells us if we can for example simply discard the pages,
+		 * instead of having to persist their contents, assuming the
+		 * object is still marked as I915_MADV_WILLNEED.
 		 */
 		bool dirty:1;
 	} mm;
 
+	/**
+	 * @ttm:
+	 *
+	 * The TTM specific state for this object. Currently for discrete
+	 * only.
+	 */
 	struct {
+		/**
+		 * @cached_io_st:
+		 *
+		 * Some nasty sleight of hand to manage the sg_table for
+		 * discrete, which uses use the TTM backend instead.
+		 */
 		struct sg_table *cached_io_st;
+
+		/**
+		 * @get_io_page: The cached iterator for @cached_io_st
+		 */
 		struct i915_gem_object_page_iter get_io_page;
+
+		/**
+		 * @created:
+		 *
+		 * Some more nasty sleight of hand to manage the object
+		 * destruction differences when the TTM backend is used. Nothing
+		 * to see here.
+		 */
 		bool created:1;
 	} ttm;
 
-	/** Record of address bit 17 of each page at last unbind. */
+	/** @bit_17 : Record of address bit 17 of each page at last unbind. */
 	unsigned long *bit_17;
 
 	union {
 #ifdef CONFIG_MMU_NOTIFIER
-		struct i915_gem_userptr {
+		/**
+		 * @userptr:
+		 *
+		 * Track the userptr specific state if this is a userptr object.
+		 */
+		struct {
+			/**
+			 * @ptr:
+			 *
+			 * The user provided virtual address for the memory.
+			 */
 			uintptr_t ptr;
+
+			/**
+			 * @notifier_seq: The notifier sequence number.
+			 */
 			unsigned long notifier_seq;
 
+			/** @notifier: The struct mmu_interval_notifier */
 			struct mmu_interval_notifier notifier;
+
+			/**
+			 * @pvec:
+			 *
+			 * The array of struct pages, as per the provided @ptr.
+			 */
 			struct page **pvec;
+
+			/**
+			 * @page_ref:
+			 *
+			 * The userptr reference count for the pages.
+			 */
 			int page_ref;
 		} userptr;
 #endif
-
+		/**
+		 * @stolen:
+		 *
+		 * Pointer to the contiguous memory block if this is a stolen
+		 * memory object.
+		 */
 		struct drm_mm_node *stolen;
 
+		/** @scratch: SELFTEST ONLY */
 		unsigned long scratch;
+
+		/**
+		 * @encode:
+		 *
+		 * Cached PTE encoding for this object, i.e it has the PTE_LM,
+		 * caching bits, DMA address etc already built.
+		 *
+		 * Note that this is *only* used for scratch pages, where it's
+		 * an extremely common operation to point the various paging
+		 * structures(PDE, PTE etc) at the respective scratch page, and
+		 * since the scratch page is static the encoding value here
+		 * shouldn't change.
+		 */
 		u64 encode;
 
+		/**
+		 * @gvt_info:
+		 *
+		 * The GVT specific state, assuming GVT is indeed active.
+		 */
 		void *gvt_info;
 	};
 };
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 3/5] drm/i915: convert drm_i915_gem_object to kernel-doc
@ 2021-07-13 10:45   ` Matthew Auld
  0 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 10:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel

Before we can pull in the previous kernel doc for the caching bits, we
first get to add kernel doc for all of drm_i915_gem_object so this
actually builds.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 422 +++++++++++++++---
 1 file changed, 366 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 02c3529b774c..da2194290436 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -174,24 +174,75 @@ struct i915_gem_object_page_iter {
 	struct mutex lock; /* protects this cache */
 };
 
-struct drm_i915_gem_object {
-	/*
-	 * We might have reason to revisit the below since it wastes
-	 * a lot of space for non-ttm gem objects.
-	 * In any case, always use the accessors for the ttm_buffer_object
-	 * when accessing it.
+/**
+ * struct i915_page_sizes - Track the various pieces we need to
+ * both track and construct huge GTT entries, when binding the
+ * object.
+ */
+struct i915_page_sizes {
+	/**
+	 * @phys:
+	 *
+	 * The sg mask of the pages sg_table. i.e the
+	 * mask of of the lengths for each sg entry.
 	 */
+	unsigned int phys;
+
+	/**
+	 * @sg:
+	 *
+	 * The gtt page sizes we are allowed to use given
+	 * the sg mask and the supported page sizes. This will
+	 * express the smallest unit we can use for the whole
+	 * object, as well as the larger sizes we may be able to
+	 * use opportunistically.
+	 */
+	unsigned int sg;
+
+	/**
+	 * @gtt:
+	 *
+	 * The actual gtt page size usage. Since we can
+	 * have multiple vma associated with this object we need
+	 * to prevent any trampling of state, hence a copy of
+	 * this struct also lives in each vma, therefore the gtt
+	 * value here should only be read/write through the vma.
+	 */
+	unsigned int gtt;
+};
+
+/**
+ * struct drm_i915_gem_object - Our core GEM object which extends the base
+ * struct drm_gem_object behaviour.
+ */
+struct drm_i915_gem_object {
 	union {
+		/** @base: The base DRM GEM object. */
 		struct drm_gem_object base;
+
+		/**
+		 * @__do_not_access:
+		 *
+		 * The base TTM object, if we are using the TTM backend. Note
+		 * that this also embeds its own DRM_GEM base object.
+		 *
+		 * We might have reason to revisit the below since it wastes a
+		 * lot of space for non-ttm gem objects.  In any case, always
+		 * use the accessors for the ttm_buffer_object when accessing
+		 * it.
+		 */
 		struct ttm_buffer_object __do_not_access;
 	};
 
+	/**
+	 * @ops: The struct drm_i915_gem_object_ops interface implemented by the
+	 * object instance.
+	 */
 	const struct drm_i915_gem_object_ops *ops;
 
+	/** @vma: Track all the struct i915_vma instances for this object. */
 	struct {
-		/**
-		 * @vma.lock: protect the list/tree of vmas
-		 */
+		/** @vma.lock: protect the list/tree of vmas */
 		spinlock_t lock;
 
 		/**
@@ -224,7 +275,9 @@ struct drm_i915_gem_object {
 	 * this translation from object to context->handles_vma.
 	 */
 	struct list_head lut_list;
-	spinlock_t lut_lock; /* guards lut_list */
+
+	/** @lut_lock: Guards the lut_list */
+	spinlock_t lut_lock;
 
 	/**
 	 * @obj_link: Link into @i915_gem_ww_ctx.obj_list
@@ -234,29 +287,123 @@ struct drm_i915_gem_object {
 	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
 	 */
 	struct list_head obj_link;
-	/**
-	 * @shared_resv_from: The object shares the resv from this vm.
-	 */
+
+	/** @shares_resv_from: The object shares the resv from this vm. */
 	struct i915_address_space *shares_resv_from;
 
 	union {
+		/** @rcu: Embedded rcu_head */
 		struct rcu_head rcu;
+
+		/**
+		 * @freed:
+		 *
+		 * When objects need to be destroyed we batch them together into
+		 * an llist, for a separate worker thread to then pick up and
+		 * process.
+		 */
 		struct llist_node freed;
 	};
 
 	/**
-	 * Whether the object is currently in the GGTT mmap.
+	 * @userfault_count: Whether the object is currently in the GGTT mmap.
 	 */
 	unsigned int userfault_count;
+	/**
+	 * @userfault_link:
+	 *
+	 * We need to maintain the list of all objects which might have been
+	 * faulted into the GGTT mappable aperture, for easy revocation later.
+	 */
 	struct list_head userfault_link;
 
+	/**
+	 * @mmo: Track the mmap_offset nodes for this object.
+	 */
 	struct {
-		spinlock_t lock; /* Protects access to mmo offsets */
+		/** @lock: Protects access to mmo offsets. */
+		spinlock_t lock;
+
+		/** @offsets: The rb-tree of mmo offsets. */
 		struct rb_root offsets;
 	} mmo;
 
-	I915_SELFTEST_DECLARE(struct list_head st_link);
+	/** @st_link: SELFTEST ONLY */
+	struct list_head st_link;
 
+	/**
+	 * @flags: The object flags
+	 *
+	 * The currently supported I915_BO_ALLOC_FLAGS. Note that these can only
+	 * be set at object creation, after which they should be considered
+	 * immutable. Also some of these largely depend on whether the backend
+	 * supports it.
+	 *
+	 * I915_BO_ALLOC_CONTIGUOUS:
+	 *
+	 * Allocate the physical pages for the object as one contiguous block or
+	 * page. Currently only supported for device local-memory.
+	 *
+	 * I915_BO_ALLOC_VOLATILE:
+	 *
+	 * Volatile here refers to the volatility of the allocated pages when
+	 * unpinned. This effectively just sets the @madv hint to
+	 * I915_MADV_DONTNEED while the pages are pinned/allocated. This way as
+	 * soon as the pages become unpinned the shrinker is free to discard the
+	 * pages if needed.  This is only intended for kernel internal objects
+	 * where they are often short lived anyway, and don't require any kind
+	 * of persistence.
+	 *
+	 * I915_BO_ALLOC_CPU_CLEAR:
+	 *
+	 * After allocating the pages, zero them using a simple memset. This is
+	 * very specialised and is only intended for kernel internal objects
+	 * where we are unable(too early during probe) or prefer not to use a
+	 * normal accelerated blitter clear.
+	 *
+	 * I915_BO_ALLOC_USER:
+	 *
+	 * All normal userspace objects are allocated with this flag. This is
+	 * useful where the kernel needs to know if the object is merely kernel
+	 * internal, or was created by userspace, where slightly different rules
+	 * might be needed.
+	 *
+	 * Other special flags, note that these might be mutable:
+	 *
+	 * I915_BO_READONLY:
+	 *
+	 * Should this object be marked as read-only. This applies to both CPU
+	 * and GPU access, when dealing with userspace objects, at least where
+	 * it can be enforced. From a userspace perspective this only exposed
+	 * for userptr objects.
+	 *
+	 * When dealing with kernel internal objects this *only* applies to GPU
+	 * access, usually where we need to prevent userspace access to some
+	 * security critical object, which might need to share the user visible
+	 * ppGTT address space.
+	 *
+	 * Note that for GPU access the HW needs to actually support the
+	 * read-only bit in the ppGTT PTE field. On some newer hardware this
+	 * support is completely busted. So whether this is actually supported
+	 * depends on the vm. Currently the caller is expected to check this
+	 * first before marking the object as readonly, if they really do need
+	 * it, since it just gets silently ignored when setting up the PTEs,
+	 * during i915_vma_pin().
+	 *
+	 * FIXME: Note that this might be a slight wart in the api. Once idea
+	 * could be to move this to I915_BO_ALLOC_FLAGS, that way it becomes
+	 * immutable, and then we don't have to worry about unbinding and
+	 * rebinding objects on the fly if the object suddenly becomes readonly.
+	 * The final piece is to make i915_vma_pin() fall over if the vm doesn't
+	 * have read-only support, when the object is marked as readonly. The
+	 * callers should then be updated to account for this.
+	 *
+	 * I915_TILING_QUIRK_BIT:
+	 *
+	 * Tiled objects with unknown swizzling need special care. For example,
+	 * we are not allowed to swap the pages out if this is set, otherwise we
+	 * may see corruption.
+	 */
 	unsigned long flags;
 #define I915_BO_ALLOC_CONTIGUOUS BIT(0)
 #define I915_BO_ALLOC_VOLATILE   BIT(1)
@@ -270,15 +417,26 @@ struct drm_i915_gem_object {
 #define I915_TILING_QUIRK_BIT    5 /* unknown swizzling; do not release! */
 
 	/**
-	 * @mem_flags - Mutable placement-related flags
+	 * @mem_flags: Mutable placement-related flags
 	 *
 	 * These are flags that indicate specifics of the memory region
 	 * the object is currently in. As such they are only stable
 	 * either under the object lock or if the object is pinned.
+	 *
+	 * Possible values:
+	 *
+	 * I915_BO_FLAG_STRUCT_PAGE:
+	 *
+	 * Object backed by struct pages, aka system memory
+	 *
+	 * I915_BO_FLAG_IOMEM:
+	 *
+	 * Object backed by device memory, aka local memory
 	 */
 	unsigned int mem_flags;
-#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
-#define I915_BO_FLAG_IOMEM       BIT(1) /* Object backed by IO memory */
+#define I915_BO_FLAG_STRUCT_PAGE BIT(0)
+#define I915_BO_FLAG_IOMEM       BIT(1)
+
 	/**
 	 * @cache_level: The desired GTT caching level.
 	 *
@@ -286,6 +444,7 @@ struct drm_i915_gem_object {
 	 * each does.
 	 */
 	unsigned int cache_level:3;
+
 	/**
 	 * @cache_coherent:
 	 *
@@ -339,6 +498,7 @@ struct drm_i915_gem_object {
 #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
 #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
 	unsigned int cache_coherent:2;
+
 	/**
 	 * @cache_dirty:
 	 *
@@ -380,124 +540,274 @@ struct drm_i915_gem_object {
 	 */
 	u16 write_domain;
 
+	/**
+	 * @frontbuffer:
+	 *
+	 * Frontbuffer tracking bits, or NULL if this is just a normal object.
+	 */
 	struct intel_frontbuffer __rcu *frontbuffer;
 
-	/** Current tiling stride for the object, if it's tiled. */
+	/**
+	 * @tiling_and_stride:
+	 *
+	 * Current tiling stride for the object, if it's tiled.
+	 */
 	unsigned int tiling_and_stride;
 #define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
 #define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
 #define STRIDE_MASK (~TILING_MASK)
 
+	/** @mm: Manage all the state related to the backing storage */
 	struct {
-		/*
-		 * Protects the pages and their use. Do not use directly, but
-		 * instead go through the pin/unpin interfaces.
+		/**
+		 * @pages_pin_count:
+		 *
+		 * Protects the @pages and their use. Do not use directly, but
+		 * instead go through the i915_gem_object_{pin, unpin}_pages()
+		 * interface.
+		 *
+		 * When the @pages_pin_count reaches zero the pages might be
+		 * discared when under memory pressure, if the @madv is also
+		 * I915_MADV_DONTNEED.
+		 *
+		 * When the final ref for the object is dropped, the object
+		 * destruction code will also zero the @pages_pin_count, and
+		 * free the @pages and related state.
 		 */
 		atomic_t pages_pin_count;
+
+		/**
+		 * @shrink_pin:
+		 *
+		 * While @shrink_pin is non-zero, the object is not visible to
+		 * the shrinker. Usually when the kernel knows the object can't
+		 * be swapped out or discarded, we try to hide it from the
+		 * shrinker so that it doesn't needlessly waste effort on such
+		 * objects.
+		 */
 		atomic_t shrink_pin;
 
 		/**
+		 * @placements:
+		 *
 		 * Priority list of potential placements for this object.
 		 */
 		struct intel_memory_region **placements;
+
+		/**
+		 * @n_placements: Number of elements in @placements.
+		 */
 		int n_placements;
 
 		/**
-		 * Memory region for this object.
+		 * @region: Memory region for this object.
 		 */
 		struct intel_memory_region *region;
 
 		/**
+		 * @res:
+		 *
 		 * Memory manager resource allocated for this object. Only
 		 * needed for the mock region.
 		 */
 		struct ttm_resource *res;
 
 		/**
+		 * @region_link:
+		 *
 		 * Element within memory_region->objects or region->purgeable
 		 * if the object is marked as DONTNEED. Access is protected by
 		 * region->obj_lock.
 		 */
 		struct list_head region_link;
 
+		/**
+		 * @pages:
+		 *
+		 * Only valid while the @pages_pin_count is not zero.
+		 *
+		 * The cached struct sg_table for the backing pages, or NULL if
+		 * the pages have yet to be allocated. We use this when mapping
+		 * the object(or rather the struct i915_vma) through the GTT,
+		 * effectively each GTT PTE is programmed using this table.
+		 *
+		 * If we are using an IOMMU then this will contain the
+		 * respective DMA addresses for the physical pages, when dealing
+		 * with system memory.
+		 *
+		 * We also like to abuse this as a general container for device
+		 * addresses, like for device local memory and stolen memory.
+		 */
 		struct sg_table *pages;
-		void *mapping;
 
-		struct i915_page_sizes {
-			/**
-			 * The sg mask of the pages sg_table. i.e the mask of
-			 * of the lengths for each sg entry.
-			 */
-			unsigned int phys;
-
-			/**
-			 * The gtt page sizes we are allowed to use given the
-			 * sg mask and the supported page sizes. This will
-			 * express the smallest unit we can use for the whole
-			 * object, as well as the larger sizes we may be able
-			 * to use opportunistically.
-			 */
-			unsigned int sg;
+		/*
+		 * @mapping:
+		 *
+		 * Only valid while the @pages_pin_count is not zero.
+		 *
+		 * The cached CPU virtual address for the @pages, or NULL if
+		 * there is no current mapping.
+		 *
+		 * The caching type is encoded in the unused lower bits of the
+		 * address, so this should not be directly accessed. Rather the
+		 * i915_gem_object_pin_map() should be used to obtain the
+		 * address, which also ensures the pages are correctly pinned
+		 * during CPU access of the virtual address.
+		 * i915_gem_object_unpin_map() should be called when done.
+		 */
+		void *mapping;
 
-			/**
-			 * The actual gtt page size usage. Since we can have
-			 * multiple vma associated with this object we need to
-			 * prevent any trampling of state, hence a copy of this
-			 * struct also lives in each vma, therefore the gtt
-			 * value here should only be read/write through the vma.
-			 */
-			unsigned int gtt;
-		} page_sizes;
+		/** @page_sizes: Track the GTT page size related bits */
+		struct i915_page_sizes page_sizes;
 
-		I915_SELFTEST_DECLARE(unsigned int page_mask);
+		/**
+		 * @page_mask: SELFTEST ONLY
+		 */
+		unsigned int page_mask;
 
+		/**
+		 * @get_page:
+		 *
+		 * The cached iterator for looking up struct pages in @pages.
+		 */
 		struct i915_gem_object_page_iter get_page;
+
+		/**
+		 * @get_dma_page:
+		 *
+		 * The cached iterator for looking up device addresses in
+		 * @pages.
+		 */
 		struct i915_gem_object_page_iter get_dma_page;
 
 		/**
+		 * @link:
+		 *
 		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
 		 * locked by i915->mm.obj_lock.
 		 */
 		struct list_head link;
 
 		/**
-		 * Advice: are the backing pages purgeable?
+		 * @madv: The advice hint for the pages.
+		 *
+		 * See &drm_i915_gem_madvise.madv.
 		 */
 		unsigned int madv:2;
 
 		/**
-		 * This is set if the object has been written to since the
-		 * pages were last acquired.
+		 * @dirty:
+		 *
+		 * This is set if the object might have been written to since
+		 * the pages were acquired. Tracking if the object is dirty
+		 * tells us if we can for example simply discard the pages,
+		 * instead of having to persist their contents, assuming the
+		 * object is still marked as I915_MADV_WILLNEED.
 		 */
 		bool dirty:1;
 	} mm;
 
+	/**
+	 * @ttm:
+	 *
+	 * The TTM specific state for this object. Currently for discrete
+	 * only.
+	 */
 	struct {
+		/**
+		 * @cached_io_st:
+		 *
+		 * Some nasty sleight of hand to manage the sg_table for
+		 * discrete, which uses use the TTM backend instead.
+		 */
 		struct sg_table *cached_io_st;
+
+		/**
+		 * @get_io_page: The cached iterator for @cached_io_st
+		 */
 		struct i915_gem_object_page_iter get_io_page;
+
+		/**
+		 * @created:
+		 *
+		 * Some more nasty sleight of hand to manage the object
+		 * destruction differences when the TTM backend is used. Nothing
+		 * to see here.
+		 */
 		bool created:1;
 	} ttm;
 
-	/** Record of address bit 17 of each page at last unbind. */
+	/** @bit_17 : Record of address bit 17 of each page at last unbind. */
 	unsigned long *bit_17;
 
 	union {
 #ifdef CONFIG_MMU_NOTIFIER
-		struct i915_gem_userptr {
+		/**
+		 * @userptr:
+		 *
+		 * Track the userptr specific state if this is a userptr object.
+		 */
+		struct {
+			/**
+			 * @ptr:
+			 *
+			 * The user provided virtual address for the memory.
+			 */
 			uintptr_t ptr;
+
+			/**
+			 * @notifier_seq: The notifier sequence number.
+			 */
 			unsigned long notifier_seq;
 
+			/** @notifier: The struct mmu_interval_notifier */
 			struct mmu_interval_notifier notifier;
+
+			/**
+			 * @pvec:
+			 *
+			 * The array of struct pages, as per the provided @ptr.
+			 */
 			struct page **pvec;
+
+			/**
+			 * @page_ref:
+			 *
+			 * The userptr reference count for the pages.
+			 */
 			int page_ref;
 		} userptr;
 #endif
-
+		/**
+		 * @stolen:
+		 *
+		 * Pointer to the contiguous memory block if this is a stolen
+		 * memory object.
+		 */
 		struct drm_mm_node *stolen;
 
+		/** @scratch: SELFTEST ONLY */
 		unsigned long scratch;
+
+		/**
+		 * @encode:
+		 *
+		 * Cached PTE encoding for this object, i.e it has the PTE_LM,
+		 * caching bits, DMA address etc already built.
+		 *
+		 * Note that this is *only* used for scratch pages, where it's
+		 * an extremely common operation to point the various paging
+		 * structures(PDE, PTE etc) at the respective scratch page, and
+		 * since the scratch page is static the encoding value here
+		 * shouldn't change.
+		 */
 		u64 encode;
 
+		/**
+		 * @gvt_info:
+		 *
+		 * The GVT specific state, assuming GVT is indeed active.
+		 */
 		void *gvt_info;
 	};
 };
-- 
2.26.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 4/5] drm/i915: pull in some more kernel-doc
  2021-07-13 10:45 ` [Intel-gfx] " Matthew Auld
@ 2021-07-13 10:45   ` Matthew Auld
  -1 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 10:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, dri-devel

Pull in the kernel-doc for drm_i915_gem_object.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 Documentation/gpu/i915.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 204ebdaadb45..77558084e989 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -387,6 +387,13 @@ GEM BO Management Implementation Details
 .. kernel-doc:: drivers/gpu/drm/i915/i915_vma_types.h
    :doc: Virtual Memory Address
 
+GEM Buffer Object
+-----------------
+This section documents our core GEM object, and related bits.
+
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+   :internal:
+
 Buffer Object Eviction
 ----------------------
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 4/5] drm/i915: pull in some more kernel-doc
@ 2021-07-13 10:45   ` Matthew Auld
  0 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 10:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, dri-devel

Pull in the kernel-doc for drm_i915_gem_object.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 Documentation/gpu/i915.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 204ebdaadb45..77558084e989 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -387,6 +387,13 @@ GEM BO Management Implementation Details
 .. kernel-doc:: drivers/gpu/drm/i915/i915_vma_types.h
    :doc: Virtual Memory Address
 
+GEM Buffer Object
+-----------------
+This section documents our core GEM object, and related bits.
+
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+   :internal:
+
 Buffer Object Eviction
 ----------------------
 
-- 
2.26.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 5/5] drm/i915/ehl: unconditionally flush the pages on acquire
  2021-07-13 10:45 ` [Intel-gfx] " Matthew Auld
@ 2021-07-13 10:45   ` Matthew Auld
  -1 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 10:45 UTC (permalink / raw)
  To: intel-gfx
  Cc: Lucas De Marchi, dri-devel, Jon Bloomfield, Chris Wilson,
	Francisco Jerez, Tejas Upadhyay

EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
possible for userspace to bypass the GTT caching bits set by the kernel,
as per the given object cache_level. This is troublesome since the heavy
flush we apply when first acquiring the pages is skipped if the kernel
thinks the object is coherent with the GPU. As a result it might be
possible to bypass the cache and read the contents of the page directly,
which could be stale data. If it's just a case of userspace shooting
themselves in the foot then so be it, but since i915 takes the stance of
always zeroing memory before handing it to userspace, we need to prevent
this.

v2: this time actually set cache_dirty in put_pages()
v3: move to get_pages() which looks simpler

BSpec: 34007
References: 046091758b50 ("Revert "drm/i915/ehl: Update MOCS table for EHL"")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Tejas Upadhyay <tejaskumarx.surendrakumar.upadhyay@intel.com>
Cc: Francisco Jerez <francisco.jerez.plata@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Chris Wilson <chris.p.wilson@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h   |  6 ++++++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c      | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index da2194290436..7089d1b222c5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -522,6 +522,12 @@ struct drm_i915_gem_object {
 	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
 	 * for both reads and writes though the CPU cache. So pretty much this
 	 * should only be needed for I915_CACHE_NONE objects.
+	 *
+	 * Update: Some bonkers hardware decided to add the 'Bypass LLC' MOCS
+	 * entry, which defeats our @cache_coherent tracking, since userspace
+	 * can freely bypass the CPU cache when touching the pages with the GPU,
+	 * where the kernel is completely unaware. On such platform we need
+	 * apply the sledgehammer-on-acquire regardless of the @cache_coherent.
 	 */
 	unsigned int cache_dirty:1;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 6a04cce188fc..11f072193f3b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -182,6 +182,24 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_do_bit_17_swizzle(obj, st);
 
+	/*
+	 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
+	 * possible for userspace to bypass the GTT caching bits set by the
+	 * kernel, as per the given object cache_level. This is troublesome
+	 * since the heavy flush we apply when first gathering the pages is
+	 * skipped if the kernel thinks the object is coherent with the GPU. As
+	 * a result it might be possible to bypass the cache and read the
+	 * contents of the page directly, which could be stale data. If it's
+	 * just a case of userspace shooting themselves in the foot then so be
+	 * it, but since i915 takes the stance of always zeroing memory before
+	 * handing it to userspace, we need to prevent this.
+	 *
+	 * By setting cache_dirty here we make the clflush in set_pages
+	 * unconditional on such platforms.
+	 */
+	if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
+		obj->cache_dirty = true;
+
 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
 
 	return 0;
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 5/5] drm/i915/ehl: unconditionally flush the pages on acquire
@ 2021-07-13 10:45   ` Matthew Auld
  0 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 10:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi, dri-devel, Chris Wilson, Francisco Jerez

EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
possible for userspace to bypass the GTT caching bits set by the kernel,
as per the given object cache_level. This is troublesome since the heavy
flush we apply when first acquiring the pages is skipped if the kernel
thinks the object is coherent with the GPU. As a result it might be
possible to bypass the cache and read the contents of the page directly,
which could be stale data. If it's just a case of userspace shooting
themselves in the foot then so be it, but since i915 takes the stance of
always zeroing memory before handing it to userspace, we need to prevent
this.

v2: this time actually set cache_dirty in put_pages()
v3: move to get_pages() which looks simpler

BSpec: 34007
References: 046091758b50 ("Revert "drm/i915/ehl: Update MOCS table for EHL"")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Tejas Upadhyay <tejaskumarx.surendrakumar.upadhyay@intel.com>
Cc: Francisco Jerez <francisco.jerez.plata@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Chris Wilson <chris.p.wilson@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h   |  6 ++++++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c      | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index da2194290436..7089d1b222c5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -522,6 +522,12 @@ struct drm_i915_gem_object {
 	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
 	 * for both reads and writes though the CPU cache. So pretty much this
 	 * should only be needed for I915_CACHE_NONE objects.
+	 *
+	 * Update: Some bonkers hardware decided to add the 'Bypass LLC' MOCS
+	 * entry, which defeats our @cache_coherent tracking, since userspace
+	 * can freely bypass the CPU cache when touching the pages with the GPU,
+	 * where the kernel is completely unaware. On such platform we need
+	 * apply the sledgehammer-on-acquire regardless of the @cache_coherent.
 	 */
 	unsigned int cache_dirty:1;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 6a04cce188fc..11f072193f3b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -182,6 +182,24 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_do_bit_17_swizzle(obj, st);
 
+	/*
+	 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
+	 * possible for userspace to bypass the GTT caching bits set by the
+	 * kernel, as per the given object cache_level. This is troublesome
+	 * since the heavy flush we apply when first gathering the pages is
+	 * skipped if the kernel thinks the object is coherent with the GPU. As
+	 * a result it might be possible to bypass the cache and read the
+	 * contents of the page directly, which could be stale data. If it's
+	 * just a case of userspace shooting themselves in the foot then so be
+	 * it, but since i915 takes the stance of always zeroing memory before
+	 * handing it to userspace, we need to prevent this.
+	 *
+	 * By setting cache_dirty here we make the clflush in set_pages
+	 * unconditional on such platforms.
+	 */
+	if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
+		obj->cache_dirty = true;
+
 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
 
 	return 0;
-- 
2.26.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: document caching related bits
  2021-07-13 10:45 ` [Intel-gfx] " Matthew Auld
                   ` (4 preceding siblings ...)
  (?)
@ 2021-07-13 11:12 ` Patchwork
  -1 siblings, 0 replies; 37+ messages in thread
From: Patchwork @ 2021-07-13 11:12 UTC (permalink / raw)
  To: Matthew Auld; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: document caching related bits
URL   : https://patchwork.freedesktop.org/series/92469/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
2d424bc0390d drm/i915: document caching related bits
-:58: WARNING:TYPO_SPELLING: 'specifc' may be misspelled - perhaps 'specific'?
#58: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:133:
+	 * gen7+, L3 sits between the domain specifc caches, eg sampler/render
 	                                     ^^^^^^^

-:119: WARNING:REPEATED_WORD: Possible repeated word: 'the'
#119: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:319:
+	 * that same page. Now if the @cache_level is I915_CACHE_NONE and the
+	 * the platform doesn't have the shared-LLC, then the GPU will

total: 0 errors, 2 warnings, 0 checks, 166 lines checked
33fab8144f1c drm/i915/uapi: convert drm_i915_gem_madvise to kernel-doc
-:55: WARNING:TYPO_SPELLING: 'wont' may be misspelled - perhaps 'won't'?
#55: FILE: include/uapi/drm/i915_drm.h:1523:
+	 * The buffer wont be needed. The pages and their contents can be
 	              ^^^^

total: 0 errors, 1 warnings, 0 checks, 62 lines checked
5121ee416eb6 drm/i915: convert drm_i915_gem_object to kernel-doc
-:37: WARNING:REPEATED_WORD: Possible repeated word: 'of'
#37: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:187:
+	 * mask of of the lengths for each sg entry.

total: 0 errors, 1 warnings, 0 checks, 577 lines checked
c30b089eaa9f drm/i915: pull in some more kernel-doc
d8d89d8aeb49 drm/i915/ehl: unconditionally flush the pages on acquire
-:21: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#21: 
References: 046091758b50 ("Revert "drm/i915/ehl: Update MOCS table for EHL"")

-:21: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 046091758b50 ("Revert "drm/i915/ehl: Update MOCS table for EHL"")'
#21: 
References: 046091758b50 ("Revert "drm/i915/ehl: Update MOCS table for EHL"")

total: 1 errors, 1 warnings, 0 checks, 36 lines checked


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/5] drm/i915: document caching related bits
  2021-07-13 10:45 ` [Intel-gfx] " Matthew Auld
                   ` (5 preceding siblings ...)
  (?)
@ 2021-07-13 11:39 ` Patchwork
  -1 siblings, 0 replies; 37+ messages in thread
From: Patchwork @ 2021-07-13 11:39 UTC (permalink / raw)
  To: Matthew Auld; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1424 bytes --]

== Series Details ==

Series: series starting with [1/5] drm/i915: document caching related bits
URL   : https://patchwork.freedesktop.org/series/92469/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_10337 -> Patchwork_20585
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/index.html


Changes
-------

  No changes found


Participating hosts (37 -> 36)
------------------------------

  Missing    (1): fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_10337 -> Patchwork_20585

  CI-20190529: 20190529
  CI_DRM_10337: 52d04d593394807e36200b0875a6e91c8d6af770 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6135: 3bf28f9dffd41b85c262d4e6664ffbdf5b7d9a93 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_20585: d8d89d8aeb4931dac25c7d1caff287fdef764b9d @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

d8d89d8aeb49 drm/i915/ehl: unconditionally flush the pages on acquire
c30b089eaa9f drm/i915: pull in some more kernel-doc
5121ee416eb6 drm/i915: convert drm_i915_gem_object to kernel-doc
33fab8144f1c drm/i915/uapi: convert drm_i915_gem_madvise to kernel-doc
2d424bc0390d drm/i915: document caching related bits

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/index.html

[-- Attachment #1.2: Type: text/html, Size: 2019 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-13 10:45 ` [Intel-gfx] " Matthew Auld
@ 2021-07-13 11:41   ` Mika Kuoppala
  -1 siblings, 0 replies; 37+ messages in thread
From: Mika Kuoppala @ 2021-07-13 11:41 UTC (permalink / raw)
  To: Matthew Auld, intel-gfx; +Cc: Daniel Vetter, dri-devel

Matthew Auld <matthew.auld@intel.com> writes:

> Try to document the object caching related bits, like cache_coherent and
> cache_dirty.
>
> Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> ---
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 135 +++++++++++++++++-
>  drivers/gpu/drm/i915/i915_drv.h               |   9 --
>  2 files changed, 131 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index ef3de2ae9723..02c3529b774c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -92,6 +92,57 @@ struct drm_i915_gem_object_ops {
>  	const char *name; /* friendly name for debug, e.g. lockdep classes */
>  };
>  
> +/**
> + * enum i915_cache_level - The supported GTT caching values for system memory
> + * pages.
> + *
> + * These translate to some special GTT PTE bits when binding pages into some
> + * address space. It also determines whether an object, or rather its pages are
> + * coherent with the GPU, when also reading or writing through the CPU cache
> + * with those pages.
> + *
> + * Userspace can also control this through struct drm_i915_gem_caching.
> + */
> +enum i915_cache_level {
> +	/**
> +	 * @I915_CACHE_NONE:
> +	 *
> +	 * Not coherent with the CPU cache. If the cache is dirty and we need
> +	 * the underlying pages to be coherent with some later GPU access then
> +	 * we need to manually flush the pages.
> +	 *
> +	 * Note that on shared-LLC platforms reads through the CPU cache are
> +	 * still coherent even with this setting. See also
> +	 * I915_BO_CACHE_COHERENT_FOR_READ for more details.
> +	 */
> +	I915_CACHE_NONE = 0,
> +	/**
> +	 * @I915_CACHE_LLC:
> +	 *
> +	 * Coherent with the CPU cache. If the cache is dirty, then the GPU will
> +	 * ensure that access remains coherent, when both reading and writing
> +	 * through the CPU cache.
> +	 *
> +	 * Applies to both platforms with shared-LLC(HAS_LLC), and snooping
> +	 * based platforms(HAS_SNOOP).
> +	 */
> +	I915_CACHE_LLC,
> +	/**
> +	 * @I915_CACHE_L3_LLC:
> +	 *
> +	 * gen7+, L3 sits between the domain specifc caches, eg sampler/render

typo: specifc

> +	 * caches, and the large Last-Level-Cache. LLC is coherent with the CPU,
> +	 * but L3 is only visible to the GPU.
> +	 */

I dont get the difference between this and I915_CACHE_LLC.
Could the diff between LLC and L3_LLC be described here with example?

Thanks,
-Mika

> +	I915_CACHE_L3_LLC,
> +	/**
> +	 * @I915_CACHE_WT:
> +	 *
> +	 * hsw:gt3e Write-through for scanout buffers.
> +	 */
> +	I915_CACHE_WT,
> +};
> +
>  enum i915_map_type {
>  	I915_MAP_WB = 0,
>  	I915_MAP_WC,
> @@ -228,14 +279,90 @@ struct drm_i915_gem_object {
>  	unsigned int mem_flags;
>  #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
>  #define I915_BO_FLAG_IOMEM       BIT(1) /* Object backed by IO memory */
> -	/*
> -	 * Is the object to be mapped as read-only to the GPU
> -	 * Only honoured if hardware has relevant pte bit
> +	/**
> +	 * @cache_level: The desired GTT caching level.
> +	 *
> +	 * See enum i915_cache_level for possible values, along with what
> +	 * each does.
>  	 */
>  	unsigned int cache_level:3;
> -	unsigned int cache_coherent:2;
> +	/**
> +	 * @cache_coherent:
> +	 *
> +	 * Track whether the pages are coherent with the GPU if reading or
> +	 * writing through the CPU cache.
> +	 *
> +	 * This largely depends on the @cache_level, for example if the object
> +	 * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> +	 * reads and writes through the CPU cache.
> +	 *
> +	 * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> +	 * the CPU cache are always coherent, regardless of the @cache_level. On
> +	 * snooping based platforms this is not the case, unless the full
> +	 * I915_CACHE_LLC or similar setting is used.
> +	 *
> +	 * As a result of this we need to track coherency separately for reads
> +	 * and writes, in order to avoid superfluous flushing on shared-LLC
> +	 * platforms, for reads.
> +	 *
> +	 * I915_BO_CACHE_COHERENT_FOR_READ:
> +	 *
> +	 * When reading through the CPU cache, the GPU is still coherent. Note
> +	 * that no data has actually been modified here, so it might seem
> +	 * strange that we care about this.
> +	 *
> +	 * As an example, if some object is mapped on the CPU with write-back
> +	 * caching, and we read some page, then the cache likely now contains
> +	 * the data from that read. At this point the cache and main memory
> +	 * match up, so all good. But next the GPU needs to write some data to
> +	 * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> +	 * the platform doesn't have the shared-LLC, then the GPU will
> +	 * effectively skip invalidating the cache(or however that works
> +	 * internally) when writing the new value.  This is really bad since the
> +	 * GPU has just written some new data to main memory, but the CPU cache
> +	 * is still valid and now contains stale data. As a result the next time
> +	 * we do a cached read with the CPU, we are rewarded with stale data.
> +	 * Likewise if the cache is later flushed, we might be rewarded with
> +	 * overwriting main memory with stale data.
> +	 *
> +	 * I915_BO_CACHE_COHERENT_FOR_WRITE:
> +	 *
> +	 * When writing through the CPU cache, the GPU is still coherent. Note
> +	 * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> +	 *
> +	 * This is never set when I915_CACHE_NONE is used for @cache_level,
> +	 * where instead we have to manually flush the caches after writing
> +	 * through the CPU cache. For other cache levels this should be set and
> +	 * the object is therefore considered coherent for both reads and writes
> +	 * through the CPU cache.
> +	 */
>  #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
>  #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
> +	unsigned int cache_coherent:2;
> +	/**
> +	 * @cache_dirty:
> +	 *
> +	 * Track if the cache might be dirty for the @pages i.e it has yet to be
> +	 * written back to main memory. As a result reading directly from main
> +	 * memory might yield stale data.
> +	 *
> +	 * This also ties into whether the kernel is tracking the object as
> +	 * coherent with the GPU, as per @cache_coherent, as it determines if
> +	 * flushing might be needed at various points.
> +	 *
> +	 * Another part of @cache_dirty is managing flushing when first
> +	 * acquiring the pages for system memory, at this point the pages are
> +	 * considered foreign, so the default assumption is that the cache is
> +	 * dirty, for example the page zeroing done my the kernel might leave
> +	 * writes though the CPU cache, or swapping-in, while the actual data in
> +	 * main memory is potentially stale.  Note that this is a potential
> +	 * security issue when dealing with userspace objects and zeroing. Now,
> +	 * whether we actually need apply the big sledgehammer of flushing all
> +	 * the pages on acquire depends on if @cache_coherent is marked as
> +	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
> +	 * for both reads and writes though the CPU cache. So pretty much this
> +	 * should only be needed for I915_CACHE_NONE objects.
> +	 */
>  	unsigned int cache_dirty:1;
>  
>  	/**
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c4747f4407ef..37bb1a3cadd4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -394,15 +394,6 @@ struct drm_i915_display_funcs {
>  	void (*read_luts)(struct intel_crtc_state *crtc_state);
>  };
>  
> -enum i915_cache_level {
> -	I915_CACHE_NONE = 0,
> -	I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
> -	I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc
> -			      caches, eg sampler/render caches, and the
> -			      large Last-Level-Cache. LLC is coherent with
> -			      the CPU, but L3 is only visible to the GPU. */
> -	I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
> -};
>  
>  #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */
>  
> -- 
> 2.26.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-13 11:41   ` Mika Kuoppala
  0 siblings, 0 replies; 37+ messages in thread
From: Mika Kuoppala @ 2021-07-13 11:41 UTC (permalink / raw)
  To: Matthew Auld, intel-gfx; +Cc: Daniel Vetter, dri-devel

Matthew Auld <matthew.auld@intel.com> writes:

> Try to document the object caching related bits, like cache_coherent and
> cache_dirty.
>
> Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> ---
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 135 +++++++++++++++++-
>  drivers/gpu/drm/i915/i915_drv.h               |   9 --
>  2 files changed, 131 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index ef3de2ae9723..02c3529b774c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -92,6 +92,57 @@ struct drm_i915_gem_object_ops {
>  	const char *name; /* friendly name for debug, e.g. lockdep classes */
>  };
>  
> +/**
> + * enum i915_cache_level - The supported GTT caching values for system memory
> + * pages.
> + *
> + * These translate to some special GTT PTE bits when binding pages into some
> + * address space. It also determines whether an object, or rather its pages are
> + * coherent with the GPU, when also reading or writing through the CPU cache
> + * with those pages.
> + *
> + * Userspace can also control this through struct drm_i915_gem_caching.
> + */
> +enum i915_cache_level {
> +	/**
> +	 * @I915_CACHE_NONE:
> +	 *
> +	 * Not coherent with the CPU cache. If the cache is dirty and we need
> +	 * the underlying pages to be coherent with some later GPU access then
> +	 * we need to manually flush the pages.
> +	 *
> +	 * Note that on shared-LLC platforms reads through the CPU cache are
> +	 * still coherent even with this setting. See also
> +	 * I915_BO_CACHE_COHERENT_FOR_READ for more details.
> +	 */
> +	I915_CACHE_NONE = 0,
> +	/**
> +	 * @I915_CACHE_LLC:
> +	 *
> +	 * Coherent with the CPU cache. If the cache is dirty, then the GPU will
> +	 * ensure that access remains coherent, when both reading and writing
> +	 * through the CPU cache.
> +	 *
> +	 * Applies to both platforms with shared-LLC(HAS_LLC), and snooping
> +	 * based platforms(HAS_SNOOP).
> +	 */
> +	I915_CACHE_LLC,
> +	/**
> +	 * @I915_CACHE_L3_LLC:
> +	 *
> +	 * gen7+, L3 sits between the domain specifc caches, eg sampler/render

typo: specifc

> +	 * caches, and the large Last-Level-Cache. LLC is coherent with the CPU,
> +	 * but L3 is only visible to the GPU.
> +	 */

I dont get the difference between this and I915_CACHE_LLC.
Could the diff between LLC and L3_LLC be described here with example?

Thanks,
-Mika

> +	I915_CACHE_L3_LLC,
> +	/**
> +	 * @I915_CACHE_WT:
> +	 *
> +	 * hsw:gt3e Write-through for scanout buffers.
> +	 */
> +	I915_CACHE_WT,
> +};
> +
>  enum i915_map_type {
>  	I915_MAP_WB = 0,
>  	I915_MAP_WC,
> @@ -228,14 +279,90 @@ struct drm_i915_gem_object {
>  	unsigned int mem_flags;
>  #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
>  #define I915_BO_FLAG_IOMEM       BIT(1) /* Object backed by IO memory */
> -	/*
> -	 * Is the object to be mapped as read-only to the GPU
> -	 * Only honoured if hardware has relevant pte bit
> +	/**
> +	 * @cache_level: The desired GTT caching level.
> +	 *
> +	 * See enum i915_cache_level for possible values, along with what
> +	 * each does.
>  	 */
>  	unsigned int cache_level:3;
> -	unsigned int cache_coherent:2;
> +	/**
> +	 * @cache_coherent:
> +	 *
> +	 * Track whether the pages are coherent with the GPU if reading or
> +	 * writing through the CPU cache.
> +	 *
> +	 * This largely depends on the @cache_level, for example if the object
> +	 * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> +	 * reads and writes through the CPU cache.
> +	 *
> +	 * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> +	 * the CPU cache are always coherent, regardless of the @cache_level. On
> +	 * snooping based platforms this is not the case, unless the full
> +	 * I915_CACHE_LLC or similar setting is used.
> +	 *
> +	 * As a result of this we need to track coherency separately for reads
> +	 * and writes, in order to avoid superfluous flushing on shared-LLC
> +	 * platforms, for reads.
> +	 *
> +	 * I915_BO_CACHE_COHERENT_FOR_READ:
> +	 *
> +	 * When reading through the CPU cache, the GPU is still coherent. Note
> +	 * that no data has actually been modified here, so it might seem
> +	 * strange that we care about this.
> +	 *
> +	 * As an example, if some object is mapped on the CPU with write-back
> +	 * caching, and we read some page, then the cache likely now contains
> +	 * the data from that read. At this point the cache and main memory
> +	 * match up, so all good. But next the GPU needs to write some data to
> +	 * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> +	 * the platform doesn't have the shared-LLC, then the GPU will
> +	 * effectively skip invalidating the cache(or however that works
> +	 * internally) when writing the new value.  This is really bad since the
> +	 * GPU has just written some new data to main memory, but the CPU cache
> +	 * is still valid and now contains stale data. As a result the next time
> +	 * we do a cached read with the CPU, we are rewarded with stale data.
> +	 * Likewise if the cache is later flushed, we might be rewarded with
> +	 * overwriting main memory with stale data.
> +	 *
> +	 * I915_BO_CACHE_COHERENT_FOR_WRITE:
> +	 *
> +	 * When writing through the CPU cache, the GPU is still coherent. Note
> +	 * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> +	 *
> +	 * This is never set when I915_CACHE_NONE is used for @cache_level,
> +	 * where instead we have to manually flush the caches after writing
> +	 * through the CPU cache. For other cache levels this should be set and
> +	 * the object is therefore considered coherent for both reads and writes
> +	 * through the CPU cache.
> +	 */
>  #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
>  #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
> +	unsigned int cache_coherent:2;
> +	/**
> +	 * @cache_dirty:
> +	 *
> +	 * Track if the cache might be dirty for the @pages i.e it has yet to be
> +	 * written back to main memory. As a result reading directly from main
> +	 * memory might yield stale data.
> +	 *
> +	 * This also ties into whether the kernel is tracking the object as
> +	 * coherent with the GPU, as per @cache_coherent, as it determines if
> +	 * flushing might be needed at various points.
> +	 *
> +	 * Another part of @cache_dirty is managing flushing when first
> +	 * acquiring the pages for system memory, at this point the pages are
> +	 * considered foreign, so the default assumption is that the cache is
> +	 * dirty, for example the page zeroing done my the kernel might leave
> +	 * writes though the CPU cache, or swapping-in, while the actual data in
> +	 * main memory is potentially stale.  Note that this is a potential
> +	 * security issue when dealing with userspace objects and zeroing. Now,
> +	 * whether we actually need apply the big sledgehammer of flushing all
> +	 * the pages on acquire depends on if @cache_coherent is marked as
> +	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
> +	 * for both reads and writes though the CPU cache. So pretty much this
> +	 * should only be needed for I915_CACHE_NONE objects.
> +	 */
>  	unsigned int cache_dirty:1;
>  
>  	/**
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c4747f4407ef..37bb1a3cadd4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -394,15 +394,6 @@ struct drm_i915_display_funcs {
>  	void (*read_luts)(struct intel_crtc_state *crtc_state);
>  };
>  
> -enum i915_cache_level {
> -	I915_CACHE_NONE = 0,
> -	I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
> -	I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc
> -			      caches, eg sampler/render caches, and the
> -			      large Last-Level-Cache. LLC is coherent with
> -			      the CPU, but L3 is only visible to the GPU. */
> -	I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
> -};
>  
>  #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */
>  
> -- 
> 2.26.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [1/5] drm/i915: document caching related bits
  2021-07-13 10:45 ` [Intel-gfx] " Matthew Auld
                   ` (7 preceding siblings ...)
  (?)
@ 2021-07-13 12:59 ` Patchwork
  -1 siblings, 0 replies; 37+ messages in thread
From: Patchwork @ 2021-07-13 12:59 UTC (permalink / raw)
  To: Matthew Auld; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 30289 bytes --]

== Series Details ==

Series: series starting with [1/5] drm/i915: document caching related bits
URL   : https://patchwork.freedesktop.org/series/92469/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_10337_full -> Patchwork_20585_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_20585_full:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@gem_eio@unwedge-stress:
    - {shard-rkl}:        [TIMEOUT][1] ([i915#3063]) -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-6/igt@gem_eio@unwedge-stress.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-2/igt@gem_eio@unwedge-stress.html

  * igt@kms_ccs@pipe-b-bad-rotation-90-y_tiled_gen12_mc_ccs:
    - {shard-rkl}:        [FAIL][3] ([i915#3678]) -> [SKIP][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@kms_ccs@pipe-b-bad-rotation-90-y_tiled_gen12_mc_ccs.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_ccs@pipe-b-bad-rotation-90-y_tiled_gen12_mc_ccs.html

  * igt@sysfs_preempt_timeout@timeout@vecs0:
    - {shard-rkl}:        [PASS][5] -> [FAIL][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-2/igt@sysfs_preempt_timeout@timeout@vecs0.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-2/igt@sysfs_preempt_timeout@timeout@vecs0.html

  
Known issues
------------

  Here are the changes found in Patchwork_20585_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_create@create-massive:
    - shard-apl:          NOTRUN -> [DMESG-WARN][7] ([i915#3002])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl7/igt@gem_create@create-massive.html

  * igt@gem_eio@unwedge-stress:
    - shard-tglb:         [PASS][8] -> [TIMEOUT][9] ([i915#2369] / [i915#3063] / [i915#3648])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-tglb6/igt@gem_eio@unwedge-stress.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-tglb7/igt@gem_eio@unwedge-stress.html

  * igt@gem_exec_fair@basic-deadline:
    - shard-glk:          [PASS][10] -> [FAIL][11] ([i915#2846])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-glk8/igt@gem_exec_fair@basic-deadline.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-glk6/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-none-share@rcs0:
    - shard-tglb:         [PASS][12] -> [FAIL][13] ([i915#2842])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-tglb6/igt@gem_exec_fair@basic-none-share@rcs0.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-tglb1/igt@gem_exec_fair@basic-none-share@rcs0.html

  * igt@gem_exec_fair@basic-none@vecs0:
    - shard-glk:          [PASS][14] -> [FAIL][15] ([i915#2842] / [i915#3468])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-glk9/igt@gem_exec_fair@basic-none@vecs0.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-glk9/igt@gem_exec_fair@basic-none@vecs0.html

  * igt@gem_exec_suspend@basic-s3:
    - shard-kbl:          NOTRUN -> [DMESG-WARN][16] ([i915#180])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl1/igt@gem_exec_suspend@basic-s3.html

  * igt@gem_exec_whisper@basic-queues-forked-all:
    - shard-glk:          [PASS][17] -> [DMESG-WARN][18] ([i915#118] / [i915#95]) +1 similar issue
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-glk8/igt@gem_exec_whisper@basic-queues-forked-all.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-glk4/igt@gem_exec_whisper@basic-queues-forked-all.html

  * igt@gem_userptr_blits@coherency-unsync:
    - shard-iclb:         NOTRUN -> [SKIP][19] ([i915#3297])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@gem_userptr_blits@coherency-unsync.html

  * igt@gen9_exec_parse@cmd-crossing-page:
    - shard-iclb:         NOTRUN -> [SKIP][20] ([fdo#112306])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@gen9_exec_parse@cmd-crossing-page.html

  * igt@kms_big_fb@x-tiled-16bpp-rotate-90:
    - shard-iclb:         NOTRUN -> [SKIP][21] ([fdo#110725] / [fdo#111614])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@kms_big_fb@x-tiled-16bpp-rotate-90.html

  * igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-180-async-flip:
    - shard-skl:          NOTRUN -> [FAIL][22] ([i915#3722])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl5/igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-180-async-flip.html

  * igt@kms_ccs@pipe-d-bad-pixel-format-y_tiled_gen12_rc_ccs_cc:
    - shard-iclb:         NOTRUN -> [SKIP][23] ([fdo#109278]) +3 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@kms_ccs@pipe-d-bad-pixel-format-y_tiled_gen12_rc_ccs_cc.html

  * igt@kms_chamelium@hdmi-crc-multiple:
    - shard-skl:          NOTRUN -> [SKIP][24] ([fdo#109271] / [fdo#111827]) +2 similar issues
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl5/igt@kms_chamelium@hdmi-crc-multiple.html

  * igt@kms_chamelium@hdmi-edid-change-during-suspend:
    - shard-apl:          NOTRUN -> [SKIP][25] ([fdo#109271] / [fdo#111827]) +27 similar issues
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl6/igt@kms_chamelium@hdmi-edid-change-during-suspend.html

  * igt@kms_chamelium@hdmi-hpd-for-each-pipe:
    - shard-kbl:          NOTRUN -> [SKIP][26] ([fdo#109271] / [fdo#111827]) +8 similar issues
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl6/igt@kms_chamelium@hdmi-hpd-for-each-pipe.html
    - shard-tglb:         NOTRUN -> [SKIP][27] ([fdo#109284] / [fdo#111827])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-tglb3/igt@kms_chamelium@hdmi-hpd-for-each-pipe.html

  * igt@kms_color@pipe-a-ctm-red-to-blue:
    - shard-skl:          [PASS][28] -> [DMESG-WARN][29] ([i915#1982])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-skl10/igt@kms_color@pipe-a-ctm-red-to-blue.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl2/igt@kms_color@pipe-a-ctm-red-to-blue.html

  * igt@kms_color_chamelium@pipe-b-degamma:
    - shard-iclb:         NOTRUN -> [SKIP][30] ([fdo#109284] / [fdo#111827])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@kms_color_chamelium@pipe-b-degamma.html

  * igt@kms_content_protection@lic:
    - shard-apl:          NOTRUN -> [TIMEOUT][31] ([i915#1319]) +1 similar issue
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl2/igt@kms_content_protection@lic.html
    - shard-kbl:          NOTRUN -> [TIMEOUT][32] ([i915#1319])
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl4/igt@kms_content_protection@lic.html

  * igt@kms_content_protection@uevent:
    - shard-apl:          NOTRUN -> [FAIL][33] ([i915#2105])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl6/igt@kms_content_protection@uevent.html
    - shard-iclb:         NOTRUN -> [SKIP][34] ([fdo#109300] / [fdo#111066])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@kms_content_protection@uevent.html

  * igt@kms_cursor_crc@pipe-a-cursor-512x170-random:
    - shard-iclb:         NOTRUN -> [SKIP][35] ([fdo#109278] / [fdo#109279])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@kms_cursor_crc@pipe-a-cursor-512x170-random.html

  * igt@kms_dp_tiled_display@basic-test-pattern-with-chamelium:
    - shard-iclb:         NOTRUN -> [SKIP][36] ([i915#3528])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@kms_dp_tiled_display@basic-test-pattern-with-chamelium.html

  * igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a1-hdmi-a2:
    - shard-glk:          [PASS][37] -> [FAIL][38] ([i915#2122])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-glk4/igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a1-hdmi-a2.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-glk9/igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a1-hdmi-a2.html

  * igt@kms_flip@flip-vs-suspend-interruptible@a-dp1:
    - shard-kbl:          [PASS][39] -> [DMESG-WARN][40] ([i915#180]) +6 similar issues
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-kbl4/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl3/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html

  * igt@kms_flip@plain-flip-fb-recreate@b-edp1:
    - shard-skl:          [PASS][41] -> [FAIL][42] ([i915#2122]) +1 similar issue
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-skl4/igt@kms_flip@plain-flip-fb-recreate@b-edp1.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl6/igt@kms_flip@plain-flip-fb-recreate@b-edp1.html

  * igt@kms_flip@plain-flip-ts-check-interruptible@a-dp1:
    - shard-kbl:          [PASS][43] -> [FAIL][44] ([i915#2122])
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-kbl2/igt@kms_flip@plain-flip-ts-check-interruptible@a-dp1.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl4/igt@kms_flip@plain-flip-ts-check-interruptible@a-dp1.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-pri-indfb-draw-pwrite:
    - shard-tglb:         NOTRUN -> [SKIP][45] ([fdo#111825])
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-tglb3/igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-pri-indfb-draw-pwrite.html

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-draw-mmap-cpu:
    - shard-kbl:          NOTRUN -> [SKIP][46] ([fdo#109271]) +91 similar issues
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl4/igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-draw-mmap-cpu.html

  * igt@kms_frontbuffer_tracking@psr-2p-scndscrn-cur-indfb-move:
    - shard-skl:          NOTRUN -> [SKIP][47] ([fdo#109271]) +37 similar issues
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl5/igt@kms_frontbuffer_tracking@psr-2p-scndscrn-cur-indfb-move.html

  * igt@kms_frontbuffer_tracking@psr-2p-scndscrn-pri-indfb-draw-mmap-wc:
    - shard-iclb:         NOTRUN -> [SKIP][48] ([fdo#109280]) +1 similar issue
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@kms_frontbuffer_tracking@psr-2p-scndscrn-pri-indfb-draw-mmap-wc.html

  * igt@kms_hdr@bpc-switch-dpms:
    - shard-skl:          [PASS][49] -> [FAIL][50] ([i915#1188])
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-skl7/igt@kms_hdr@bpc-switch-dpms.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl4/igt@kms_hdr@bpc-switch-dpms.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - shard-kbl:          NOTRUN -> [SKIP][51] ([fdo#109271] / [i915#533])
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl3/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb:
    - shard-apl:          NOTRUN -> [FAIL][52] ([fdo#108145] / [i915#265]) +3 similar issues
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl6/igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb.html

  * igt@kms_plane_alpha_blend@pipe-a-alpha-transparent-fb:
    - shard-skl:          NOTRUN -> [FAIL][53] ([i915#265])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl5/igt@kms_plane_alpha_blend@pipe-a-alpha-transparent-fb.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min:
    - shard-skl:          [PASS][54] -> [FAIL][55] ([fdo#108145] / [i915#265])
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-skl2/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl8/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-4:
    - shard-apl:          NOTRUN -> [SKIP][56] ([fdo#109271] / [i915#658]) +6 similar issues
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl8/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-4.html

  * igt@kms_psr2_sf@plane-move-sf-dmg-area-3:
    - shard-kbl:          NOTRUN -> [SKIP][57] ([fdo#109271] / [i915#658]) +1 similar issue
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl4/igt@kms_psr2_sf@plane-move-sf-dmg-area-3.html

  * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-5:
    - shard-skl:          NOTRUN -> [SKIP][58] ([fdo#109271] / [i915#658]) +1 similar issue
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl5/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-5.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [PASS][59] -> [SKIP][60] ([fdo#109441]) +1 similar issue
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb1/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@kms_vblank@pipe-d-ts-continuation-idle:
    - shard-apl:          NOTRUN -> [SKIP][61] ([fdo#109271]) +267 similar issues
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl8/igt@kms_vblank@pipe-d-ts-continuation-idle.html

  * igt@kms_vblank@pipe-d-wait-idle:
    - shard-apl:          NOTRUN -> [SKIP][62] ([fdo#109271] / [i915#533]) +2 similar issues
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl6/igt@kms_vblank@pipe-d-wait-idle.html

  * igt@kms_writeback@writeback-fb-id:
    - shard-apl:          NOTRUN -> [SKIP][63] ([fdo#109271] / [i915#2437]) +1 similar issue
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl3/igt@kms_writeback@writeback-fb-id.html

  * igt@kms_writeback@writeback-invalid-parameters:
    - shard-iclb:         NOTRUN -> [SKIP][64] ([i915#2437])
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@kms_writeback@writeback-invalid-parameters.html

  * igt@perf@polling-parameterized:
    - shard-skl:          [PASS][65] -> [FAIL][66] ([i915#1542])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-skl4/igt@perf@polling-parameterized.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl10/igt@perf@polling-parameterized.html

  * igt@sysfs_clients@create:
    - shard-apl:          NOTRUN -> [SKIP][67] ([fdo#109271] / [i915#2994]) +5 similar issues
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl6/igt@sysfs_clients@create.html

  * igt@sysfs_clients@pidname:
    - shard-iclb:         NOTRUN -> [SKIP][68] ([i915#2994])
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@sysfs_clients@pidname.html

  * igt@sysfs_clients@sema-50:
    - shard-kbl:          NOTRUN -> [SKIP][69] ([fdo#109271] / [i915#2994]) +1 similar issue
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl3/igt@sysfs_clients@sema-50.html

  * igt@sysfs_clients@split-50:
    - shard-skl:          NOTRUN -> [SKIP][70] ([fdo#109271] / [i915#2994])
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl5/igt@sysfs_clients@split-50.html

  
#### Possible fixes ####

  * igt@gem_eio@hibernate:
    - {shard-rkl}:        [INCOMPLETE][71] -> [PASS][72]
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@gem_eio@hibernate.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@gem_eio@hibernate.html

  * igt@gem_eio@unwedge-stress:
    - shard-iclb:         [TIMEOUT][73] ([i915#2369] / [i915#2481] / [i915#3070]) -> [PASS][74]
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-iclb4/igt@gem_eio@unwedge-stress.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb4/igt@gem_eio@unwedge-stress.html

  * igt@gem_exec_fair@basic-flow@rcs0:
    - shard-tglb:         [FAIL][75] ([i915#2842]) -> [PASS][76] +2 similar issues
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-tglb2/igt@gem_exec_fair@basic-flow@rcs0.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-tglb1/igt@gem_exec_fair@basic-flow@rcs0.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
    - shard-kbl:          [FAIL][77] ([i915#2842]) -> [PASS][78]
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-kbl7/igt@gem_exec_fair@basic-pace-solo@rcs0.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl2/igt@gem_exec_fair@basic-pace-solo@rcs0.html

  * igt@gem_exec_fair@basic-pace@rcs0:
    - shard-glk:          [FAIL][79] ([i915#2842]) -> [PASS][80]
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-glk3/igt@gem_exec_fair@basic-pace@rcs0.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-glk5/igt@gem_exec_fair@basic-pace@rcs0.html

  * igt@gem_exec_fair@basic-pace@vcs0:
    - shard-iclb:         [FAIL][81] ([i915#2842]) -> [PASS][82] +1 similar issue
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-iclb2/igt@gem_exec_fair@basic-pace@vcs0.html
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb1/igt@gem_exec_fair@basic-pace@vcs0.html

  * igt@gem_exec_fair@basic-pace@vcs1:
    - shard-kbl:          [SKIP][83] ([fdo#109271]) -> [PASS][84]
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-kbl1/igt@gem_exec_fair@basic-pace@vcs1.html
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl2/igt@gem_exec_fair@basic-pace@vcs1.html

  * igt@gem_mmap_gtt@cpuset-big-copy-odd:
    - shard-iclb:         [FAIL][85] ([i915#307]) -> [PASS][86]
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-iclb5/igt@gem_mmap_gtt@cpuset-big-copy-odd.html
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb4/igt@gem_mmap_gtt@cpuset-big-copy-odd.html

  * igt@i915_pm_rpm@gem-pread:
    - {shard-rkl}:        [SKIP][87] ([fdo#109308]) -> [PASS][88] +1 similar issue
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@i915_pm_rpm@gem-pread.html
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@i915_pm_rpm@gem-pread.html

  * igt@i915_pm_rpm@modeset-lpsp-stress:
    - {shard-rkl}:        [SKIP][89] ([i915#1397]) -> [PASS][90]
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-1/igt@i915_pm_rpm@modeset-lpsp-stress.html
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@i915_pm_rpm@modeset-lpsp-stress.html

  * igt@kms_async_flips@alternate-sync-async-flip:
    - shard-skl:          [FAIL][91] ([i915#2521]) -> [PASS][92]
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-skl4/igt@kms_async_flips@alternate-sync-async-flip.html
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl3/igt@kms_async_flips@alternate-sync-async-flip.html

  * igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-0-hflip-async-flip:
    - {shard-rkl}:        [SKIP][93] ([i915#3721]) -> [PASS][94] +3 similar issues
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-1/igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-0-hflip-async-flip.html
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-0-hflip-async-flip.html

  * igt@kms_big_fb@y-tiled-64bpp-rotate-0:
    - {shard-rkl}:        [SKIP][95] ([i915#3638]) -> [PASS][96] +3 similar issues
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@kms_big_fb@y-tiled-64bpp-rotate-0.html
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_big_fb@y-tiled-64bpp-rotate-0.html

  * igt@kms_big_fb@y-tiled-max-hw-stride-64bpp-rotate-180-hflip-async-flip:
    - shard-iclb:         [DMESG-WARN][97] -> [PASS][98]
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-iclb1/igt@kms_big_fb@y-tiled-max-hw-stride-64bpp-rotate-180-hflip-async-flip.html
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb5/igt@kms_big_fb@y-tiled-max-hw-stride-64bpp-rotate-180-hflip-async-flip.html

  * igt@kms_ccs@pipe-a-crc-primary-rotation-180-y_tiled_gen12_rc_ccs_cc:
    - {shard-rkl}:        [FAIL][99] ([i915#3678]) -> [PASS][100] +7 similar issues
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-1/igt@kms_ccs@pipe-a-crc-primary-rotation-180-y_tiled_gen12_rc_ccs_cc.html
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_ccs@pipe-a-crc-primary-rotation-180-y_tiled_gen12_rc_ccs_cc.html

  * igt@kms_color@pipe-c-ctm-max:
    - {shard-rkl}:        [SKIP][101] ([i915#1149] / [i915#1849]) -> [PASS][102]
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-1/igt@kms_color@pipe-c-ctm-max.html
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_color@pipe-c-ctm-max.html

  * igt@kms_concurrent@pipe-b:
    - shard-snb:          [SKIP][103] ([fdo#109271]) -> [PASS][104] +1 similar issue
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-snb2/igt@kms_concurrent@pipe-b.html
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-snb6/igt@kms_concurrent@pipe-b.html

  * igt@kms_cursor_crc@pipe-a-cursor-64x64-random:
    - {shard-rkl}:        [SKIP][105] ([fdo#112022]) -> [PASS][106] +11 similar issues
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@kms_cursor_crc@pipe-a-cursor-64x64-random.html
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_cursor_crc@pipe-a-cursor-64x64-random.html

  * igt@kms_cursor_legacy@basic-flip-before-cursor-legacy:
    - {shard-rkl}:        [SKIP][107] ([fdo#111825]) -> [PASS][108] +2 similar issues
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@kms_cursor_legacy@basic-flip-before-cursor-legacy.html
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_cursor_legacy@basic-flip-before-cursor-legacy.html

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic:
    - shard-skl:          [FAIL][109] ([i915#2346]) -> [PASS][110]
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-skl1/igt@kms_cursor_legacy@flip-vs-cursor-atomic.html
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl8/igt@kms_cursor_legacy@flip-vs-cursor-atomic.html

  * igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-untiled:
    - {shard-rkl}:        [SKIP][111] ([fdo#111314]) -> [PASS][112] +6 similar issues
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-untiled.html
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-untiled.html

  * igt@kms_fbcon_fbt@psr:
    - {shard-rkl}:        [SKIP][113] ([fdo#110189]) -> [PASS][114]
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-1/igt@kms_fbcon_fbt@psr.html
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_fbcon_fbt@psr.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a1:
    - shard-glk:          [FAIL][115] ([i915#79]) -> [PASS][116]
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-glk1/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a1.html
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-glk6/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a1.html

  * igt@kms_flip@flip-vs-suspend@a-dp1:
    - shard-apl:          [DMESG-WARN][117] ([i915#180]) -> [PASS][118] +1 similar issue
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-apl1/igt@kms_flip@flip-vs-suspend@a-dp1.html
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-apl2/igt@kms_flip@flip-vs-suspend@a-dp1.html

  * igt@kms_flip@flip-vs-suspend@c-dp1:
    - shard-kbl:          [DMESG-WARN][119] ([i915#180]) -> [PASS][120] +5 similar issues
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-kbl3/igt@kms_flip@flip-vs-suspend@c-dp1.html
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-kbl4/igt@kms_flip@flip-vs-suspend@c-dp1.html

  * igt@kms_flip@plain-flip-fb-recreate@c-edp1:
    - shard-skl:          [FAIL][121] ([i915#2122]) -> [PASS][122]
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-skl4/igt@kms_flip@plain-flip-fb-recreate@c-edp1.html
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl6/igt@kms_flip@plain-flip-fb-recreate@c-edp1.html

  * igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-render:
    - {shard-rkl}:        [SKIP][123] ([i915#1849]) -> [PASS][124] +33 similar issues
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-1/igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-render.html
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-render.html

  * igt@kms_plane@plane-position-hole-dpms@pipe-b-planes:
    - {shard-rkl}:        [SKIP][125] ([i915#3558]) -> [PASS][126] +3 similar issues
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@kms_plane@plane-position-hole-dpms@pipe-b-planes.html
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_plane@plane-position-hole-dpms@pipe-b-planes.html

  * igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
    - shard-skl:          [FAIL][127] ([fdo#108145] / [i915#265]) -> [PASS][128]
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-skl5/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-skl5/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-y:
    - {shard-rkl}:        [SKIP][129] ([i915#1849] / [i915#3558]) -> [PASS][130]
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@kms_plane_multiple@atomic-pipe-a-tiling-y.html
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_plane_multiple@atomic-pipe-a-tiling-y.html

  * igt@kms_psr2_su@page_flip:
    - shard-iclb:         [SKIP][131] ([fdo#109642] / [fdo#111068] / [i915#658]) -> [PASS][132]
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-iclb4/igt@kms_psr2_su@page_flip.html
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb2/igt@kms_psr2_su@page_flip.html

  * igt@kms_psr@psr2_cursor_mmap_cpu:
    - shard-iclb:         [SKIP][133] ([fdo#109441]) -> [PASS][134]
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-iclb4/igt@kms_psr@psr2_cursor_mmap_cpu.html
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html

  * igt@kms_psr@sprite_blt:
    - {shard-rkl}:        [SKIP][135] ([i915#1072]) -> [PASS][136] +1 similar issue
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@kms_psr@sprite_blt.html
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_psr@sprite_blt.html

  * igt@kms_vblank@crtc-id:
    - {shard-rkl}:        [SKIP][137] ([i915#1845]) -> [PASS][138] +17 similar issues
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-5/igt@kms_vblank@crtc-id.html
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-6/igt@kms_vblank@crtc-id.html

  * igt@perf@blocking:
    - {shard-rkl}:        [FAIL][139] -> [PASS][140] +3 similar issues
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-2/igt@perf@blocking.html
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-1/igt@perf@blocking.html

  * igt@perf_pmu@rc6-suspend:
    - {shard-rkl}:        [FAIL][141] ([fdo#103375]) -> [PASS][142]
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-rkl-6/igt@perf_pmu@rc6-suspend.html
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-rkl-2/igt@perf_pmu@rc6-suspend.html

  
#### Warnings ####

  * igt@i915_pm_rc6_residency@rc6-fence:
    - shard-iclb:         [WARN][143] ([i915#2684]) -> [WARN][144] ([i915#1804] / [i915#2684])
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-iclb8/igt@i915_pm_rc6_residency@rc6-fence.html
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb4/igt@i915_pm_rc6_residency@rc6-fence.html

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [SKIP][145] ([fdo#109349]) -> [DMESG-WARN][146] ([i915#1226])
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-iclb4/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  * igt@kms_psr2_sf@plane-move-sf-dmg-area-0:
    - shard-iclb:         [SKIP][147] ([i915#658]) -> [SKIP][148] ([i915#2920])
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10337/shard-iclb4/igt@kms_psr2_sf@plane-move-sf-dmg-area-0.html
   [148]: https://intel-gfx-

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20585/index.html

[-- Attachment #1.2: Type: text/html, Size: 33364 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-13 10:45 ` [Intel-gfx] " Matthew Auld
@ 2021-07-13 15:55   ` Ville Syrjälä
  -1 siblings, 0 replies; 37+ messages in thread
From: Ville Syrjälä @ 2021-07-13 15:55 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Daniel Vetter, intel-gfx, dri-devel

On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> +	/**
> +	 * @cache_coherent:
> +	 *
> +	 * Track whether the pages are coherent with the GPU if reading or
> +	 * writing through the CPU cache.
> +	 *
> +	 * This largely depends on the @cache_level, for example if the object
> +	 * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> +	 * reads and writes through the CPU cache.
> +	 *
> +	 * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> +	 * the CPU cache are always coherent, regardless of the @cache_level. On
> +	 * snooping based platforms this is not the case, unless the full
> +	 * I915_CACHE_LLC or similar setting is used.
> +	 *
> +	 * As a result of this we need to track coherency separately for reads
> +	 * and writes, in order to avoid superfluous flushing on shared-LLC
> +	 * platforms, for reads.
> +	 *
> +	 * I915_BO_CACHE_COHERENT_FOR_READ:
> +	 *
> +	 * When reading through the CPU cache, the GPU is still coherent. Note
> +	 * that no data has actually been modified here, so it might seem
> +	 * strange that we care about this.
> +	 *
> +	 * As an example, if some object is mapped on the CPU with write-back
> +	 * caching, and we read some page, then the cache likely now contains
> +	 * the data from that read. At this point the cache and main memory
> +	 * match up, so all good. But next the GPU needs to write some data to
> +	 * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> +	 * the platform doesn't have the shared-LLC, then the GPU will
> +	 * effectively skip invalidating the cache(or however that works
> +	 * internally) when writing the new value.  This is really bad since the
> +	 * GPU has just written some new data to main memory, but the CPU cache
> +	 * is still valid and now contains stale data. As a result the next time
> +	 * we do a cached read with the CPU, we are rewarded with stale data.
> +	 * Likewise if the cache is later flushed, we might be rewarded with
> +	 * overwriting main memory with stale data.
> +	 *
> +	 * I915_BO_CACHE_COHERENT_FOR_WRITE:
> +	 *
> +	 * When writing through the CPU cache, the GPU is still coherent. Note
> +	 * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> +	 *
> +	 * This is never set when I915_CACHE_NONE is used for @cache_level,
> +	 * where instead we have to manually flush the caches after writing
> +	 * through the CPU cache. For other cache levels this should be set and
> +	 * the object is therefore considered coherent for both reads and writes
> +	 * through the CPU cache.

I don't remember why we have this read vs. write split and this new
documentation doesn't seem to really explain it either.

Is it for optimizing some display related case where we can omit the
invalidates but still have to do the writeback to keep the display
engine happy?

-- 
Ville Syrjälä
Intel

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-13 15:55   ` Ville Syrjälä
  0 siblings, 0 replies; 37+ messages in thread
From: Ville Syrjälä @ 2021-07-13 15:55 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Daniel Vetter, intel-gfx, dri-devel

On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> +	/**
> +	 * @cache_coherent:
> +	 *
> +	 * Track whether the pages are coherent with the GPU if reading or
> +	 * writing through the CPU cache.
> +	 *
> +	 * This largely depends on the @cache_level, for example if the object
> +	 * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> +	 * reads and writes through the CPU cache.
> +	 *
> +	 * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> +	 * the CPU cache are always coherent, regardless of the @cache_level. On
> +	 * snooping based platforms this is not the case, unless the full
> +	 * I915_CACHE_LLC or similar setting is used.
> +	 *
> +	 * As a result of this we need to track coherency separately for reads
> +	 * and writes, in order to avoid superfluous flushing on shared-LLC
> +	 * platforms, for reads.
> +	 *
> +	 * I915_BO_CACHE_COHERENT_FOR_READ:
> +	 *
> +	 * When reading through the CPU cache, the GPU is still coherent. Note
> +	 * that no data has actually been modified here, so it might seem
> +	 * strange that we care about this.
> +	 *
> +	 * As an example, if some object is mapped on the CPU with write-back
> +	 * caching, and we read some page, then the cache likely now contains
> +	 * the data from that read. At this point the cache and main memory
> +	 * match up, so all good. But next the GPU needs to write some data to
> +	 * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> +	 * the platform doesn't have the shared-LLC, then the GPU will
> +	 * effectively skip invalidating the cache(or however that works
> +	 * internally) when writing the new value.  This is really bad since the
> +	 * GPU has just written some new data to main memory, but the CPU cache
> +	 * is still valid and now contains stale data. As a result the next time
> +	 * we do a cached read with the CPU, we are rewarded with stale data.
> +	 * Likewise if the cache is later flushed, we might be rewarded with
> +	 * overwriting main memory with stale data.
> +	 *
> +	 * I915_BO_CACHE_COHERENT_FOR_WRITE:
> +	 *
> +	 * When writing through the CPU cache, the GPU is still coherent. Note
> +	 * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> +	 *
> +	 * This is never set when I915_CACHE_NONE is used for @cache_level,
> +	 * where instead we have to manually flush the caches after writing
> +	 * through the CPU cache. For other cache levels this should be set and
> +	 * the object is therefore considered coherent for both reads and writes
> +	 * through the CPU cache.

I don't remember why we have this read vs. write split and this new
documentation doesn't seem to really explain it either.

Is it for optimizing some display related case where we can omit the
invalidates but still have to do the writeback to keep the display
engine happy?

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-13 15:55   ` Ville Syrjälä
@ 2021-07-13 16:13     ` Matthew Auld
  -1 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 16:13 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
>
> On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > +     /**
> > +      * @cache_coherent:
> > +      *
> > +      * Track whether the pages are coherent with the GPU if reading or
> > +      * writing through the CPU cache.
> > +      *
> > +      * This largely depends on the @cache_level, for example if the object
> > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > +      * reads and writes through the CPU cache.
> > +      *
> > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > +      * snooping based platforms this is not the case, unless the full
> > +      * I915_CACHE_LLC or similar setting is used.
> > +      *
> > +      * As a result of this we need to track coherency separately for reads
> > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > +      * platforms, for reads.
> > +      *
> > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > +      *
> > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > +      * that no data has actually been modified here, so it might seem
> > +      * strange that we care about this.
> > +      *
> > +      * As an example, if some object is mapped on the CPU with write-back
> > +      * caching, and we read some page, then the cache likely now contains
> > +      * the data from that read. At this point the cache and main memory
> > +      * match up, so all good. But next the GPU needs to write some data to
> > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > +      * the platform doesn't have the shared-LLC, then the GPU will
> > +      * effectively skip invalidating the cache(or however that works
> > +      * internally) when writing the new value.  This is really bad since the
> > +      * GPU has just written some new data to main memory, but the CPU cache
> > +      * is still valid and now contains stale data. As a result the next time
> > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > +      * Likewise if the cache is later flushed, we might be rewarded with
> > +      * overwriting main memory with stale data.
> > +      *
> > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > +      *
> > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > +      *
> > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > +      * where instead we have to manually flush the caches after writing
> > +      * through the CPU cache. For other cache levels this should be set and
> > +      * the object is therefore considered coherent for both reads and writes
> > +      * through the CPU cache.
>
> I don't remember why we have this read vs. write split and this new
> documentation doesn't seem to really explain it either.

Hmm, I attempted to explain that earlier:

* Note that on platforms with shared-LLC support(HAS_LLC) reads through
* the CPU cache are always coherent, regardless of the @cache_level. On
* snooping based platforms this is not the case, unless the full
* I915_CACHE_LLC or similar setting is used.
*
* As a result of this we need to track coherency separately for reads
* and writes, in order to avoid superfluous flushing on shared-LLC
* platforms, for reads.

So AFAIK it's just because shared-LLC can be coherent for reads, while
also not being coherent for writes(CACHE_NONE), so being able to track
each separately is kind of needed to avoid unnecessary flushing for
the read cases i.e simple boolean for coherent vs non-coherent is not
enough.

I can try to reword things to make that more clear.

>
> Is it for optimizing some display related case where we can omit the
> invalidates but still have to do the writeback to keep the display
> engine happy?
>
> --
> Ville Syrjälä
> Intel

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-13 16:13     ` Matthew Auld
  0 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 16:13 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
>
> On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > +     /**
> > +      * @cache_coherent:
> > +      *
> > +      * Track whether the pages are coherent with the GPU if reading or
> > +      * writing through the CPU cache.
> > +      *
> > +      * This largely depends on the @cache_level, for example if the object
> > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > +      * reads and writes through the CPU cache.
> > +      *
> > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > +      * snooping based platforms this is not the case, unless the full
> > +      * I915_CACHE_LLC or similar setting is used.
> > +      *
> > +      * As a result of this we need to track coherency separately for reads
> > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > +      * platforms, for reads.
> > +      *
> > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > +      *
> > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > +      * that no data has actually been modified here, so it might seem
> > +      * strange that we care about this.
> > +      *
> > +      * As an example, if some object is mapped on the CPU with write-back
> > +      * caching, and we read some page, then the cache likely now contains
> > +      * the data from that read. At this point the cache and main memory
> > +      * match up, so all good. But next the GPU needs to write some data to
> > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > +      * the platform doesn't have the shared-LLC, then the GPU will
> > +      * effectively skip invalidating the cache(or however that works
> > +      * internally) when writing the new value.  This is really bad since the
> > +      * GPU has just written some new data to main memory, but the CPU cache
> > +      * is still valid and now contains stale data. As a result the next time
> > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > +      * Likewise if the cache is later flushed, we might be rewarded with
> > +      * overwriting main memory with stale data.
> > +      *
> > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > +      *
> > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > +      *
> > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > +      * where instead we have to manually flush the caches after writing
> > +      * through the CPU cache. For other cache levels this should be set and
> > +      * the object is therefore considered coherent for both reads and writes
> > +      * through the CPU cache.
>
> I don't remember why we have this read vs. write split and this new
> documentation doesn't seem to really explain it either.

Hmm, I attempted to explain that earlier:

* Note that on platforms with shared-LLC support(HAS_LLC) reads through
* the CPU cache are always coherent, regardless of the @cache_level. On
* snooping based platforms this is not the case, unless the full
* I915_CACHE_LLC or similar setting is used.
*
* As a result of this we need to track coherency separately for reads
* and writes, in order to avoid superfluous flushing on shared-LLC
* platforms, for reads.

So AFAIK it's just because shared-LLC can be coherent for reads, while
also not being coherent for writes(CACHE_NONE), so being able to track
each separately is kind of needed to avoid unnecessary flushing for
the read cases i.e simple boolean for coherent vs non-coherent is not
enough.

I can try to reword things to make that more clear.

>
> Is it for optimizing some display related case where we can omit the
> invalidates but still have to do the writeback to keep the display
> engine happy?
>
> --
> Ville Syrjälä
> Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-13 16:13     ` Matthew Auld
@ 2021-07-13 16:50       ` Daniel Vetter
  -1 siblings, 0 replies; 37+ messages in thread
From: Daniel Vetter @ 2021-07-13 16:50 UTC (permalink / raw)
  To: Matthew Auld; +Cc: ML dri-devel, Intel Graphics Development, Matthew Auld

On Tue, Jul 13, 2021 at 6:14 PM Matthew Auld
<matthew.william.auld@gmail.com> wrote:
> On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> <ville.syrjala@linux.intel.com> wrote:
> >
> > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > +     /**
> > > +      * @cache_coherent:
> > > +      *
> > > +      * Track whether the pages are coherent with the GPU if reading or
> > > +      * writing through the CPU cache.
> > > +      *
> > > +      * This largely depends on the @cache_level, for example if the object
> > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > +      * reads and writes through the CPU cache.
> > > +      *
> > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > +      * snooping based platforms this is not the case, unless the full
> > > +      * I915_CACHE_LLC or similar setting is used.
> > > +      *
> > > +      * As a result of this we need to track coherency separately for reads
> > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > +      * platforms, for reads.
> > > +      *
> > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > +      *
> > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > +      * that no data has actually been modified here, so it might seem
> > > +      * strange that we care about this.
> > > +      *
> > > +      * As an example, if some object is mapped on the CPU with write-back
> > > +      * caching, and we read some page, then the cache likely now contains
> > > +      * the data from that read. At this point the cache and main memory
> > > +      * match up, so all good. But next the GPU needs to write some data to
> > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > +      * effectively skip invalidating the cache(or however that works
> > > +      * internally) when writing the new value.  This is really bad since the
> > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > +      * is still valid and now contains stale data. As a result the next time
> > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > +      * overwriting main memory with stale data.
> > > +      *
> > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > +      *
> > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > +      *
> > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > +      * where instead we have to manually flush the caches after writing
> > > +      * through the CPU cache. For other cache levels this should be set and
> > > +      * the object is therefore considered coherent for both reads and writes
> > > +      * through the CPU cache.
> >
> > I don't remember why we have this read vs. write split and this new
> > documentation doesn't seem to really explain it either.
>
> Hmm, I attempted to explain that earlier:
>
> * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> * the CPU cache are always coherent, regardless of the @cache_level. On
> * snooping based platforms this is not the case, unless the full
> * I915_CACHE_LLC or similar setting is used.
> *
> * As a result of this we need to track coherency separately for reads
> * and writes, in order to avoid superfluous flushing on shared-LLC
> * platforms, for reads.
>
> So AFAIK it's just because shared-LLC can be coherent for reads, while
> also not being coherent for writes(CACHE_NONE), so being able to track
> each separately is kind of needed to avoid unnecessary flushing for
> the read cases i.e simple boolean for coherent vs non-coherent is not
> enough.
>
> I can try to reword things to make that more clear.

Maybe highlight the security aspect a bit more: When reads are always
coherent, we don't have to force the clflush. If reads are not
coherent we must ensure that the clflush has finished before userspace
can get at the backing storage, like writing ptes and similar things.
Writes otoh can only result in userspace eating cacheling corruption
if it races against the kernel (by e.g. trying to predict where we'll
bind a buffer and issuing gpu access to that location before the
buffer is actually bound from some other engine in parallel with an
execbuf that binds the buffer).

Atm we don't do a great job with that, but that's something that I
think is getting looked into.
-Daniel

> > Is it for optimizing some display related case where we can omit the
> > invalidates but still have to do the writeback to keep the display
> > engine happy?
> >
> > --
> > Ville Syrjälä
> > Intel



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-13 16:50       ` Daniel Vetter
  0 siblings, 0 replies; 37+ messages in thread
From: Daniel Vetter @ 2021-07-13 16:50 UTC (permalink / raw)
  To: Matthew Auld; +Cc: ML dri-devel, Intel Graphics Development, Matthew Auld

On Tue, Jul 13, 2021 at 6:14 PM Matthew Auld
<matthew.william.auld@gmail.com> wrote:
> On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> <ville.syrjala@linux.intel.com> wrote:
> >
> > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > +     /**
> > > +      * @cache_coherent:
> > > +      *
> > > +      * Track whether the pages are coherent with the GPU if reading or
> > > +      * writing through the CPU cache.
> > > +      *
> > > +      * This largely depends on the @cache_level, for example if the object
> > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > +      * reads and writes through the CPU cache.
> > > +      *
> > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > +      * snooping based platforms this is not the case, unless the full
> > > +      * I915_CACHE_LLC or similar setting is used.
> > > +      *
> > > +      * As a result of this we need to track coherency separately for reads
> > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > +      * platforms, for reads.
> > > +      *
> > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > +      *
> > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > +      * that no data has actually been modified here, so it might seem
> > > +      * strange that we care about this.
> > > +      *
> > > +      * As an example, if some object is mapped on the CPU with write-back
> > > +      * caching, and we read some page, then the cache likely now contains
> > > +      * the data from that read. At this point the cache and main memory
> > > +      * match up, so all good. But next the GPU needs to write some data to
> > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > +      * effectively skip invalidating the cache(or however that works
> > > +      * internally) when writing the new value.  This is really bad since the
> > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > +      * is still valid and now contains stale data. As a result the next time
> > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > +      * overwriting main memory with stale data.
> > > +      *
> > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > +      *
> > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > +      *
> > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > +      * where instead we have to manually flush the caches after writing
> > > +      * through the CPU cache. For other cache levels this should be set and
> > > +      * the object is therefore considered coherent for both reads and writes
> > > +      * through the CPU cache.
> >
> > I don't remember why we have this read vs. write split and this new
> > documentation doesn't seem to really explain it either.
>
> Hmm, I attempted to explain that earlier:
>
> * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> * the CPU cache are always coherent, regardless of the @cache_level. On
> * snooping based platforms this is not the case, unless the full
> * I915_CACHE_LLC or similar setting is used.
> *
> * As a result of this we need to track coherency separately for reads
> * and writes, in order to avoid superfluous flushing on shared-LLC
> * platforms, for reads.
>
> So AFAIK it's just because shared-LLC can be coherent for reads, while
> also not being coherent for writes(CACHE_NONE), so being able to track
> each separately is kind of needed to avoid unnecessary flushing for
> the read cases i.e simple boolean for coherent vs non-coherent is not
> enough.
>
> I can try to reword things to make that more clear.

Maybe highlight the security aspect a bit more: When reads are always
coherent, we don't have to force the clflush. If reads are not
coherent we must ensure that the clflush has finished before userspace
can get at the backing storage, like writing ptes and similar things.
Writes otoh can only result in userspace eating cacheling corruption
if it races against the kernel (by e.g. trying to predict where we'll
bind a buffer and issuing gpu access to that location before the
buffer is actually bound from some other engine in parallel with an
execbuf that binds the buffer).

Atm we don't do a great job with that, but that's something that I
think is getting looked into.
-Daniel

> > Is it for optimizing some display related case where we can omit the
> > invalidates but still have to do the writeback to keep the display
> > engine happy?
> >
> > --
> > Ville Syrjälä
> > Intel



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-13 16:13     ` Matthew Auld
@ 2021-07-13 17:47       ` Ville Syrjälä
  -1 siblings, 0 replies; 37+ messages in thread
From: Ville Syrjälä @ 2021-07-13 17:47 UTC (permalink / raw)
  To: Matthew Auld
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> <ville.syrjala@linux.intel.com> wrote:
> >
> > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > +     /**
> > > +      * @cache_coherent:
> > > +      *
> > > +      * Track whether the pages are coherent with the GPU if reading or
> > > +      * writing through the CPU cache.
> > > +      *
> > > +      * This largely depends on the @cache_level, for example if the object
> > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > +      * reads and writes through the CPU cache.
> > > +      *
> > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > +      * snooping based platforms this is not the case, unless the full
> > > +      * I915_CACHE_LLC or similar setting is used.
> > > +      *
> > > +      * As a result of this we need to track coherency separately for reads
> > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > +      * platforms, for reads.
> > > +      *
> > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > +      *
> > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > +      * that no data has actually been modified here, so it might seem
> > > +      * strange that we care about this.
> > > +      *
> > > +      * As an example, if some object is mapped on the CPU with write-back
> > > +      * caching, and we read some page, then the cache likely now contains
> > > +      * the data from that read. At this point the cache and main memory
> > > +      * match up, so all good. But next the GPU needs to write some data to
> > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > +      * effectively skip invalidating the cache(or however that works
> > > +      * internally) when writing the new value.  This is really bad since the
> > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > +      * is still valid and now contains stale data. As a result the next time
> > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > +      * overwriting main memory with stale data.
> > > +      *
> > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > +      *
> > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > +      *
> > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > +      * where instead we have to manually flush the caches after writing
> > > +      * through the CPU cache. For other cache levels this should be set and
> > > +      * the object is therefore considered coherent for both reads and writes
> > > +      * through the CPU cache.
> >
> > I don't remember why we have this read vs. write split and this new
> > documentation doesn't seem to really explain it either.
> 
> Hmm, I attempted to explain that earlier:
> 
> * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> * the CPU cache are always coherent, regardless of the @cache_level. On
> * snooping based platforms this is not the case, unless the full
> * I915_CACHE_LLC or similar setting is used.
> *
> * As a result of this we need to track coherency separately for reads
> * and writes, in order to avoid superfluous flushing on shared-LLC
> * platforms, for reads.
> 
> So AFAIK it's just because shared-LLC can be coherent for reads, while
> also not being coherent for writes(CACHE_NONE),

CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
never heard of any mechanism that would make it only partially coherent.

-- 
Ville Syrjälä
Intel

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-13 17:47       ` Ville Syrjälä
  0 siblings, 0 replies; 37+ messages in thread
From: Ville Syrjälä @ 2021-07-13 17:47 UTC (permalink / raw)
  To: Matthew Auld
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> <ville.syrjala@linux.intel.com> wrote:
> >
> > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > +     /**
> > > +      * @cache_coherent:
> > > +      *
> > > +      * Track whether the pages are coherent with the GPU if reading or
> > > +      * writing through the CPU cache.
> > > +      *
> > > +      * This largely depends on the @cache_level, for example if the object
> > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > +      * reads and writes through the CPU cache.
> > > +      *
> > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > +      * snooping based platforms this is not the case, unless the full
> > > +      * I915_CACHE_LLC or similar setting is used.
> > > +      *
> > > +      * As a result of this we need to track coherency separately for reads
> > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > +      * platforms, for reads.
> > > +      *
> > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > +      *
> > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > +      * that no data has actually been modified here, so it might seem
> > > +      * strange that we care about this.
> > > +      *
> > > +      * As an example, if some object is mapped on the CPU with write-back
> > > +      * caching, and we read some page, then the cache likely now contains
> > > +      * the data from that read. At this point the cache and main memory
> > > +      * match up, so all good. But next the GPU needs to write some data to
> > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > +      * effectively skip invalidating the cache(or however that works
> > > +      * internally) when writing the new value.  This is really bad since the
> > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > +      * is still valid and now contains stale data. As a result the next time
> > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > +      * overwriting main memory with stale data.
> > > +      *
> > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > +      *
> > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > +      *
> > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > +      * where instead we have to manually flush the caches after writing
> > > +      * through the CPU cache. For other cache levels this should be set and
> > > +      * the object is therefore considered coherent for both reads and writes
> > > +      * through the CPU cache.
> >
> > I don't remember why we have this read vs. write split and this new
> > documentation doesn't seem to really explain it either.
> 
> Hmm, I attempted to explain that earlier:
> 
> * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> * the CPU cache are always coherent, regardless of the @cache_level. On
> * snooping based platforms this is not the case, unless the full
> * I915_CACHE_LLC or similar setting is used.
> *
> * As a result of this we need to track coherency separately for reads
> * and writes, in order to avoid superfluous flushing on shared-LLC
> * platforms, for reads.
> 
> So AFAIK it's just because shared-LLC can be coherent for reads, while
> also not being coherent for writes(CACHE_NONE),

CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
never heard of any mechanism that would make it only partially coherent.

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-13 17:47       ` Ville Syrjälä
@ 2021-07-13 18:24         ` Matthew Auld
  -1 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 18:24 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
>
> On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > <ville.syrjala@linux.intel.com> wrote:
> > >
> > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > +     /**
> > > > +      * @cache_coherent:
> > > > +      *
> > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > +      * writing through the CPU cache.
> > > > +      *
> > > > +      * This largely depends on the @cache_level, for example if the object
> > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > +      * reads and writes through the CPU cache.
> > > > +      *
> > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > +      * snooping based platforms this is not the case, unless the full
> > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > +      *
> > > > +      * As a result of this we need to track coherency separately for reads
> > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > +      * platforms, for reads.
> > > > +      *
> > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > +      *
> > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > +      * that no data has actually been modified here, so it might seem
> > > > +      * strange that we care about this.
> > > > +      *
> > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > +      * caching, and we read some page, then the cache likely now contains
> > > > +      * the data from that read. At this point the cache and main memory
> > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > +      * effectively skip invalidating the cache(or however that works
> > > > +      * internally) when writing the new value.  This is really bad since the
> > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > +      * is still valid and now contains stale data. As a result the next time
> > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > +      * overwriting main memory with stale data.
> > > > +      *
> > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > +      *
> > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > +      *
> > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > +      * where instead we have to manually flush the caches after writing
> > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > +      * the object is therefore considered coherent for both reads and writes
> > > > +      * through the CPU cache.
> > >
> > > I don't remember why we have this read vs. write split and this new
> > > documentation doesn't seem to really explain it either.
> >
> > Hmm, I attempted to explain that earlier:
> >
> > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > * the CPU cache are always coherent, regardless of the @cache_level. On
> > * snooping based platforms this is not the case, unless the full
> > * I915_CACHE_LLC or similar setting is used.
> > *
> > * As a result of this we need to track coherency separately for reads
> > * and writes, in order to avoid superfluous flushing on shared-LLC
> > * platforms, for reads.
> >
> > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > also not being coherent for writes(CACHE_NONE),
>
> CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> never heard of any mechanism that would make it only partially coherent.

What do you mean by "comes to LLC", are you talking about HAS_LLC() or
I915_CACHE_LLC?

If you set I915_CACHE_LLC, then yes it is fully coherent for both
HAS_LLC() and HAS_SNOOP().

If you set I915_CACHE_NONE, then reads are still coherent on
HAS_LLC(), for HAS_SNOOP() they are not. Or at least that is the
existing behaviour in the driver AFAIK.

>
> --
> Ville Syrjälä
> Intel

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-13 18:24         ` Matthew Auld
  0 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2021-07-13 18:24 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
>
> On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > <ville.syrjala@linux.intel.com> wrote:
> > >
> > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > +     /**
> > > > +      * @cache_coherent:
> > > > +      *
> > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > +      * writing through the CPU cache.
> > > > +      *
> > > > +      * This largely depends on the @cache_level, for example if the object
> > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > +      * reads and writes through the CPU cache.
> > > > +      *
> > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > +      * snooping based platforms this is not the case, unless the full
> > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > +      *
> > > > +      * As a result of this we need to track coherency separately for reads
> > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > +      * platforms, for reads.
> > > > +      *
> > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > +      *
> > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > +      * that no data has actually been modified here, so it might seem
> > > > +      * strange that we care about this.
> > > > +      *
> > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > +      * caching, and we read some page, then the cache likely now contains
> > > > +      * the data from that read. At this point the cache and main memory
> > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > +      * effectively skip invalidating the cache(or however that works
> > > > +      * internally) when writing the new value.  This is really bad since the
> > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > +      * is still valid and now contains stale data. As a result the next time
> > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > +      * overwriting main memory with stale data.
> > > > +      *
> > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > +      *
> > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > +      *
> > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > +      * where instead we have to manually flush the caches after writing
> > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > +      * the object is therefore considered coherent for both reads and writes
> > > > +      * through the CPU cache.
> > >
> > > I don't remember why we have this read vs. write split and this new
> > > documentation doesn't seem to really explain it either.
> >
> > Hmm, I attempted to explain that earlier:
> >
> > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > * the CPU cache are always coherent, regardless of the @cache_level. On
> > * snooping based platforms this is not the case, unless the full
> > * I915_CACHE_LLC or similar setting is used.
> > *
> > * As a result of this we need to track coherency separately for reads
> > * and writes, in order to avoid superfluous flushing on shared-LLC
> > * platforms, for reads.
> >
> > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > also not being coherent for writes(CACHE_NONE),
>
> CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> never heard of any mechanism that would make it only partially coherent.

What do you mean by "comes to LLC", are you talking about HAS_LLC() or
I915_CACHE_LLC?

If you set I915_CACHE_LLC, then yes it is fully coherent for both
HAS_LLC() and HAS_SNOOP().

If you set I915_CACHE_NONE, then reads are still coherent on
HAS_LLC(), for HAS_SNOOP() they are not. Or at least that is the
existing behaviour in the driver AFAIK.

>
> --
> Ville Syrjälä
> Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-13 18:24         ` Matthew Auld
@ 2021-07-13 18:46           ` Ville Syrjälä
  -1 siblings, 0 replies; 37+ messages in thread
From: Ville Syrjälä @ 2021-07-13 18:46 UTC (permalink / raw)
  To: Matthew Auld
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> <ville.syrjala@linux.intel.com> wrote:
> >
> > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > <ville.syrjala@linux.intel.com> wrote:
> > > >
> > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > +     /**
> > > > > +      * @cache_coherent:
> > > > > +      *
> > > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > > +      * writing through the CPU cache.
> > > > > +      *
> > > > > +      * This largely depends on the @cache_level, for example if the object
> > > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > > +      * reads and writes through the CPU cache.
> > > > > +      *
> > > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > +      * snooping based platforms this is not the case, unless the full
> > > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > > +      *
> > > > > +      * As a result of this we need to track coherency separately for reads
> > > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > +      * platforms, for reads.
> > > > > +      *
> > > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > +      *
> > > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > > +      * that no data has actually been modified here, so it might seem
> > > > > +      * strange that we care about this.
> > > > > +      *
> > > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > > +      * caching, and we read some page, then the cache likely now contains
> > > > > +      * the data from that read. At this point the cache and main memory
> > > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > > +      * effectively skip invalidating the cache(or however that works
> > > > > +      * internally) when writing the new value.  This is really bad since the
> > > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > > +      * is still valid and now contains stale data. As a result the next time
> > > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > > +      * overwriting main memory with stale data.
> > > > > +      *
> > > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > +      *
> > > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > +      *
> > > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > > +      * where instead we have to manually flush the caches after writing
> > > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > > +      * the object is therefore considered coherent for both reads and writes
> > > > > +      * through the CPU cache.
> > > >
> > > > I don't remember why we have this read vs. write split and this new
> > > > documentation doesn't seem to really explain it either.
> > >
> > > Hmm, I attempted to explain that earlier:
> > >
> > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > * snooping based platforms this is not the case, unless the full
> > > * I915_CACHE_LLC or similar setting is used.
> > > *
> > > * As a result of this we need to track coherency separately for reads
> > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > * platforms, for reads.
> > >
> > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > also not being coherent for writes(CACHE_NONE),
> >
> > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > never heard of any mechanism that would make it only partially coherent.
> 
> What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> I915_CACHE_LLC?

I'm talking about the actual cache.

> 
> If you set I915_CACHE_LLC, then yes it is fully coherent for both
> HAS_LLC() and HAS_SNOOP().
> 
> If you set I915_CACHE_NONE, then reads are still coherent on
> HAS_LLC(),

Reads and writes both. The only thing that's not coherent is the
display engine.

> for HAS_SNOOP() they are not. Or at least that is the
> existing behaviour in the driver AFAIK.
> 
> >
> > --
> > Ville Syrjälä
> > Intel

-- 
Ville Syrjälä
Intel

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-13 18:46           ` Ville Syrjälä
  0 siblings, 0 replies; 37+ messages in thread
From: Ville Syrjälä @ 2021-07-13 18:46 UTC (permalink / raw)
  To: Matthew Auld
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> <ville.syrjala@linux.intel.com> wrote:
> >
> > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > <ville.syrjala@linux.intel.com> wrote:
> > > >
> > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > +     /**
> > > > > +      * @cache_coherent:
> > > > > +      *
> > > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > > +      * writing through the CPU cache.
> > > > > +      *
> > > > > +      * This largely depends on the @cache_level, for example if the object
> > > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > > +      * reads and writes through the CPU cache.
> > > > > +      *
> > > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > +      * snooping based platforms this is not the case, unless the full
> > > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > > +      *
> > > > > +      * As a result of this we need to track coherency separately for reads
> > > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > +      * platforms, for reads.
> > > > > +      *
> > > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > +      *
> > > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > > +      * that no data has actually been modified here, so it might seem
> > > > > +      * strange that we care about this.
> > > > > +      *
> > > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > > +      * caching, and we read some page, then the cache likely now contains
> > > > > +      * the data from that read. At this point the cache and main memory
> > > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > > +      * effectively skip invalidating the cache(or however that works
> > > > > +      * internally) when writing the new value.  This is really bad since the
> > > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > > +      * is still valid and now contains stale data. As a result the next time
> > > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > > +      * overwriting main memory with stale data.
> > > > > +      *
> > > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > +      *
> > > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > +      *
> > > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > > +      * where instead we have to manually flush the caches after writing
> > > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > > +      * the object is therefore considered coherent for both reads and writes
> > > > > +      * through the CPU cache.
> > > >
> > > > I don't remember why we have this read vs. write split and this new
> > > > documentation doesn't seem to really explain it either.
> > >
> > > Hmm, I attempted to explain that earlier:
> > >
> > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > * snooping based platforms this is not the case, unless the full
> > > * I915_CACHE_LLC or similar setting is used.
> > > *
> > > * As a result of this we need to track coherency separately for reads
> > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > * platforms, for reads.
> > >
> > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > also not being coherent for writes(CACHE_NONE),
> >
> > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > never heard of any mechanism that would make it only partially coherent.
> 
> What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> I915_CACHE_LLC?

I'm talking about the actual cache.

> 
> If you set I915_CACHE_LLC, then yes it is fully coherent for both
> HAS_LLC() and HAS_SNOOP().
> 
> If you set I915_CACHE_NONE, then reads are still coherent on
> HAS_LLC(),

Reads and writes both. The only thing that's not coherent is the
display engine.

> for HAS_SNOOP() they are not. Or at least that is the
> existing behaviour in the driver AFAIK.
> 
> >
> > --
> > Ville Syrjälä
> > Intel

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-13 18:46           ` Ville Syrjälä
@ 2021-07-14 11:16             ` Daniel Vetter
  -1 siblings, 0 replies; 37+ messages in thread
From: Daniel Vetter @ 2021-07-14 11:16 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld,
	Matthew Auld, ML dri-devel

On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > <ville.syrjala@linux.intel.com> wrote:
> > >
> > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > <ville.syrjala@linux.intel.com> wrote:
> > > > >
> > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > +     /**
> > > > > > +      * @cache_coherent:
> > > > > > +      *
> > > > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > > > +      * writing through the CPU cache.
> > > > > > +      *
> > > > > > +      * This largely depends on the @cache_level, for example if the object
> > > > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > > > +      * reads and writes through the CPU cache.
> > > > > > +      *
> > > > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > +      * snooping based platforms this is not the case, unless the full
> > > > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > > > +      *
> > > > > > +      * As a result of this we need to track coherency separately for reads
> > > > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > +      * platforms, for reads.
> > > > > > +      *
> > > > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > +      *
> > > > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > > > +      * that no data has actually been modified here, so it might seem
> > > > > > +      * strange that we care about this.
> > > > > > +      *
> > > > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > > > +      * caching, and we read some page, then the cache likely now contains
> > > > > > +      * the data from that read. At this point the cache and main memory
> > > > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > > > +      * effectively skip invalidating the cache(or however that works
> > > > > > +      * internally) when writing the new value.  This is really bad since the
> > > > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > > > +      * is still valid and now contains stale data. As a result the next time
> > > > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > > > +      * overwriting main memory with stale data.
> > > > > > +      *
> > > > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > +      *
> > > > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > +      *
> > > > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > > > +      * where instead we have to manually flush the caches after writing
> > > > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > > > +      * the object is therefore considered coherent for both reads and writes
> > > > > > +      * through the CPU cache.
> > > > >
> > > > > I don't remember why we have this read vs. write split and this new
> > > > > documentation doesn't seem to really explain it either.
> > > >
> > > > Hmm, I attempted to explain that earlier:
> > > >
> > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > * snooping based platforms this is not the case, unless the full
> > > > * I915_CACHE_LLC or similar setting is used.
> > > > *
> > > > * As a result of this we need to track coherency separately for reads
> > > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > * platforms, for reads.
> > > >
> > > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > > also not being coherent for writes(CACHE_NONE),
> > >
> > > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > > never heard of any mechanism that would make it only partially coherent.
> > 
> > What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> > I915_CACHE_LLC?
> 
> I'm talking about the actual cache.
> 
> > 
> > If you set I915_CACHE_LLC, then yes it is fully coherent for both
> > HAS_LLC() and HAS_SNOOP().
> > 
> > If you set I915_CACHE_NONE, then reads are still coherent on
> > HAS_LLC(),
> 
> Reads and writes both. The only thing that's not coherent is the
> display engine.

There's a lot of code which seems to disagree, plus there's now this new
MOCS thing. I really hope we don't have all those cache coherency bits
just because the code complexity is entertaining?

Are there some igts to verify this all? Or selftests probably more
appropriate.
-Daniel


> > for HAS_SNOOP() they are not. Or at least that is the
> > existing behaviour in the driver AFAIK.
> > 
> > >
> > > --
> > > Ville Syrjälä
> > > Intel
> 
> -- 
> Ville Syrjälä
> Intel
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-14 11:16             ` Daniel Vetter
  0 siblings, 0 replies; 37+ messages in thread
From: Daniel Vetter @ 2021-07-14 11:16 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > <ville.syrjala@linux.intel.com> wrote:
> > >
> > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > <ville.syrjala@linux.intel.com> wrote:
> > > > >
> > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > +     /**
> > > > > > +      * @cache_coherent:
> > > > > > +      *
> > > > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > > > +      * writing through the CPU cache.
> > > > > > +      *
> > > > > > +      * This largely depends on the @cache_level, for example if the object
> > > > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > > > +      * reads and writes through the CPU cache.
> > > > > > +      *
> > > > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > +      * snooping based platforms this is not the case, unless the full
> > > > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > > > +      *
> > > > > > +      * As a result of this we need to track coherency separately for reads
> > > > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > +      * platforms, for reads.
> > > > > > +      *
> > > > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > +      *
> > > > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > > > +      * that no data has actually been modified here, so it might seem
> > > > > > +      * strange that we care about this.
> > > > > > +      *
> > > > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > > > +      * caching, and we read some page, then the cache likely now contains
> > > > > > +      * the data from that read. At this point the cache and main memory
> > > > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > > > +      * effectively skip invalidating the cache(or however that works
> > > > > > +      * internally) when writing the new value.  This is really bad since the
> > > > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > > > +      * is still valid and now contains stale data. As a result the next time
> > > > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > > > +      * overwriting main memory with stale data.
> > > > > > +      *
> > > > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > +      *
> > > > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > +      *
> > > > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > > > +      * where instead we have to manually flush the caches after writing
> > > > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > > > +      * the object is therefore considered coherent for both reads and writes
> > > > > > +      * through the CPU cache.
> > > > >
> > > > > I don't remember why we have this read vs. write split and this new
> > > > > documentation doesn't seem to really explain it either.
> > > >
> > > > Hmm, I attempted to explain that earlier:
> > > >
> > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > * snooping based platforms this is not the case, unless the full
> > > > * I915_CACHE_LLC or similar setting is used.
> > > > *
> > > > * As a result of this we need to track coherency separately for reads
> > > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > * platforms, for reads.
> > > >
> > > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > > also not being coherent for writes(CACHE_NONE),
> > >
> > > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > > never heard of any mechanism that would make it only partially coherent.
> > 
> > What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> > I915_CACHE_LLC?
> 
> I'm talking about the actual cache.
> 
> > 
> > If you set I915_CACHE_LLC, then yes it is fully coherent for both
> > HAS_LLC() and HAS_SNOOP().
> > 
> > If you set I915_CACHE_NONE, then reads are still coherent on
> > HAS_LLC(),
> 
> Reads and writes both. The only thing that's not coherent is the
> display engine.

There's a lot of code which seems to disagree, plus there's now this new
MOCS thing. I really hope we don't have all those cache coherency bits
just because the code complexity is entertaining?

Are there some igts to verify this all? Or selftests probably more
appropriate.
-Daniel


> > for HAS_SNOOP() they are not. Or at least that is the
> > existing behaviour in the driver AFAIK.
> > 
> > >
> > > --
> > > Ville Syrjälä
> > > Intel
> 
> -- 
> Ville Syrjälä
> Intel
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 5/5] drm/i915/ehl: unconditionally flush the pages on acquire
  2021-07-13 10:45   ` [Intel-gfx] " Matthew Auld
@ 2021-07-14 11:19     ` Daniel Vetter
  -1 siblings, 0 replies; 37+ messages in thread
From: Daniel Vetter @ 2021-07-14 11:19 UTC (permalink / raw)
  To: Matthew Auld
  Cc: intel-gfx, Lucas De Marchi, dri-devel, Jon Bloomfield,
	Chris Wilson, Francisco Jerez, Tejas Upadhyay

On Tue, Jul 13, 2021 at 11:45:54AM +0100, Matthew Auld wrote:
> EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
> possible for userspace to bypass the GTT caching bits set by the kernel,
> as per the given object cache_level. This is troublesome since the heavy
> flush we apply when first acquiring the pages is skipped if the kernel
> thinks the object is coherent with the GPU. As a result it might be
> possible to bypass the cache and read the contents of the page directly,
> which could be stale data. If it's just a case of userspace shooting
> themselves in the foot then so be it, but since i915 takes the stance of
> always zeroing memory before handing it to userspace, we need to prevent
> this.
> 
> v2: this time actually set cache_dirty in put_pages()
> v3: move to get_pages() which looks simpler
> 
> BSpec: 34007
> References: 046091758b50 ("Revert "drm/i915/ehl: Update MOCS table for EHL"")
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Tejas Upadhyay <tejaskumarx.surendrakumar.upadhyay@intel.com>
> Cc: Francisco Jerez <francisco.jerez.plata@intel.com>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> Cc: Chris Wilson <chris.p.wilson@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Daniel Vetter <daniel@ffwll.ch>

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

I was pondering whether we can have a solid testcase for this, but:
- igt lacks the visibility, since we can't check easily whether stuff
  leaks.
- selftests don't have rendercopy, where we could select the nasty
  mocs entry

So it's a bit awkward. Is there something, or is this pure hw workaround
stuff on theoretical grounds?
-Daniel
> ---
>  .../gpu/drm/i915/gem/i915_gem_object_types.h   |  6 ++++++
>  drivers/gpu/drm/i915/gem/i915_gem_shmem.c      | 18 ++++++++++++++++++
>  2 files changed, 24 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index da2194290436..7089d1b222c5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -522,6 +522,12 @@ struct drm_i915_gem_object {
>  	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
>  	 * for both reads and writes though the CPU cache. So pretty much this
>  	 * should only be needed for I915_CACHE_NONE objects.
> +	 *
> +	 * Update: Some bonkers hardware decided to add the 'Bypass LLC' MOCS
> +	 * entry, which defeats our @cache_coherent tracking, since userspace
> +	 * can freely bypass the CPU cache when touching the pages with the GPU,
> +	 * where the kernel is completely unaware. On such platform we need
> +	 * apply the sledgehammer-on-acquire regardless of the @cache_coherent.
>  	 */
>  	unsigned int cache_dirty:1;
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> index 6a04cce188fc..11f072193f3b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> @@ -182,6 +182,24 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
>  	if (i915_gem_object_needs_bit17_swizzle(obj))
>  		i915_gem_object_do_bit_17_swizzle(obj, st);
>  
> +	/*
> +	 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
> +	 * possible for userspace to bypass the GTT caching bits set by the
> +	 * kernel, as per the given object cache_level. This is troublesome
> +	 * since the heavy flush we apply when first gathering the pages is
> +	 * skipped if the kernel thinks the object is coherent with the GPU. As
> +	 * a result it might be possible to bypass the cache and read the
> +	 * contents of the page directly, which could be stale data. If it's
> +	 * just a case of userspace shooting themselves in the foot then so be
> +	 * it, but since i915 takes the stance of always zeroing memory before
> +	 * handing it to userspace, we need to prevent this.
> +	 *
> +	 * By setting cache_dirty here we make the clflush in set_pages
> +	 * unconditional on such platforms.
> +	 */
> +	if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
> +		obj->cache_dirty = true;
> +
>  	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
>  
>  	return 0;
> -- 
> 2.26.3
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 5/5] drm/i915/ehl: unconditionally flush the pages on acquire
@ 2021-07-14 11:19     ` Daniel Vetter
  0 siblings, 0 replies; 37+ messages in thread
From: Daniel Vetter @ 2021-07-14 11:19 UTC (permalink / raw)
  To: Matthew Auld
  Cc: intel-gfx, Lucas De Marchi, dri-devel, Chris Wilson, Francisco Jerez

On Tue, Jul 13, 2021 at 11:45:54AM +0100, Matthew Auld wrote:
> EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
> possible for userspace to bypass the GTT caching bits set by the kernel,
> as per the given object cache_level. This is troublesome since the heavy
> flush we apply when first acquiring the pages is skipped if the kernel
> thinks the object is coherent with the GPU. As a result it might be
> possible to bypass the cache and read the contents of the page directly,
> which could be stale data. If it's just a case of userspace shooting
> themselves in the foot then so be it, but since i915 takes the stance of
> always zeroing memory before handing it to userspace, we need to prevent
> this.
> 
> v2: this time actually set cache_dirty in put_pages()
> v3: move to get_pages() which looks simpler
> 
> BSpec: 34007
> References: 046091758b50 ("Revert "drm/i915/ehl: Update MOCS table for EHL"")
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Tejas Upadhyay <tejaskumarx.surendrakumar.upadhyay@intel.com>
> Cc: Francisco Jerez <francisco.jerez.plata@intel.com>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> Cc: Chris Wilson <chris.p.wilson@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Daniel Vetter <daniel@ffwll.ch>

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

I was pondering whether we can have a solid testcase for this, but:
- igt lacks the visibility, since we can't check easily whether stuff
  leaks.
- selftests don't have rendercopy, where we could select the nasty
  mocs entry

So it's a bit awkward. Is there something, or is this pure hw workaround
stuff on theoretical grounds?
-Daniel
> ---
>  .../gpu/drm/i915/gem/i915_gem_object_types.h   |  6 ++++++
>  drivers/gpu/drm/i915/gem/i915_gem_shmem.c      | 18 ++++++++++++++++++
>  2 files changed, 24 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index da2194290436..7089d1b222c5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -522,6 +522,12 @@ struct drm_i915_gem_object {
>  	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
>  	 * for both reads and writes though the CPU cache. So pretty much this
>  	 * should only be needed for I915_CACHE_NONE objects.
> +	 *
> +	 * Update: Some bonkers hardware decided to add the 'Bypass LLC' MOCS
> +	 * entry, which defeats our @cache_coherent tracking, since userspace
> +	 * can freely bypass the CPU cache when touching the pages with the GPU,
> +	 * where the kernel is completely unaware. On such platform we need
> +	 * apply the sledgehammer-on-acquire regardless of the @cache_coherent.
>  	 */
>  	unsigned int cache_dirty:1;
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> index 6a04cce188fc..11f072193f3b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> @@ -182,6 +182,24 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
>  	if (i915_gem_object_needs_bit17_swizzle(obj))
>  		i915_gem_object_do_bit_17_swizzle(obj, st);
>  
> +	/*
> +	 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
> +	 * possible for userspace to bypass the GTT caching bits set by the
> +	 * kernel, as per the given object cache_level. This is troublesome
> +	 * since the heavy flush we apply when first gathering the pages is
> +	 * skipped if the kernel thinks the object is coherent with the GPU. As
> +	 * a result it might be possible to bypass the cache and read the
> +	 * contents of the page directly, which could be stale data. If it's
> +	 * just a case of userspace shooting themselves in the foot then so be
> +	 * it, but since i915 takes the stance of always zeroing memory before
> +	 * handing it to userspace, we need to prevent this.
> +	 *
> +	 * By setting cache_dirty here we make the clflush in set_pages
> +	 * unconditional on such platforms.
> +	 */
> +	if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
> +		obj->cache_dirty = true;
> +
>  	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
>  
>  	return 0;
> -- 
> 2.26.3
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-14 11:16             ` Daniel Vetter
@ 2021-07-14 11:42               ` Ville Syrjälä
  -1 siblings, 0 replies; 37+ messages in thread
From: Ville Syrjälä @ 2021-07-14 11:42 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld,
	Matthew Auld, ML dri-devel

On Wed, Jul 14, 2021 at 01:16:57PM +0200, Daniel Vetter wrote:
> On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> > On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > > <ville.syrjala@linux.intel.com> wrote:
> > > >
> > > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > > <ville.syrjala@linux.intel.com> wrote:
> > > > > >
> > > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > > +     /**
> > > > > > > +      * @cache_coherent:
> > > > > > > +      *
> > > > > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > > > > +      * writing through the CPU cache.
> > > > > > > +      *
> > > > > > > +      * This largely depends on the @cache_level, for example if the object
> > > > > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > > > > +      * reads and writes through the CPU cache.
> > > > > > > +      *
> > > > > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > > +      * snooping based platforms this is not the case, unless the full
> > > > > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > > > > +      *
> > > > > > > +      * As a result of this we need to track coherency separately for reads
> > > > > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > > +      * platforms, for reads.
> > > > > > > +      *
> > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > > +      *
> > > > > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > > > > +      * that no data has actually been modified here, so it might seem
> > > > > > > +      * strange that we care about this.
> > > > > > > +      *
> > > > > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > > > > +      * caching, and we read some page, then the cache likely now contains
> > > > > > > +      * the data from that read. At this point the cache and main memory
> > > > > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > > > > +      * effectively skip invalidating the cache(or however that works
> > > > > > > +      * internally) when writing the new value.  This is really bad since the
> > > > > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > > > > +      * is still valid and now contains stale data. As a result the next time
> > > > > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > > > > +      * overwriting main memory with stale data.
> > > > > > > +      *
> > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > > +      *
> > > > > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > > +      *
> > > > > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > > > > +      * where instead we have to manually flush the caches after writing
> > > > > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > > > > +      * the object is therefore considered coherent for both reads and writes
> > > > > > > +      * through the CPU cache.
> > > > > >
> > > > > > I don't remember why we have this read vs. write split and this new
> > > > > > documentation doesn't seem to really explain it either.
> > > > >
> > > > > Hmm, I attempted to explain that earlier:
> > > > >
> > > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > * snooping based platforms this is not the case, unless the full
> > > > > * I915_CACHE_LLC or similar setting is used.
> > > > > *
> > > > > * As a result of this we need to track coherency separately for reads
> > > > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > * platforms, for reads.
> > > > >
> > > > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > > > also not being coherent for writes(CACHE_NONE),
> > > >
> > > > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > > > never heard of any mechanism that would make it only partially coherent.
> > > 
> > > What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> > > I915_CACHE_LLC?
> > 
> > I'm talking about the actual cache.
> > 
> > > 
> > > If you set I915_CACHE_LLC, then yes it is fully coherent for both
> > > HAS_LLC() and HAS_SNOOP().
> > > 
> > > If you set I915_CACHE_NONE, then reads are still coherent on
> > > HAS_LLC(),
> > 
> > Reads and writes both. The only thing that's not coherent is the
> > display engine.
> 
> There's a lot of code which seems to disagree,

Can't even imagine why anyone would make a cache coherency protocol
that only handles reads but not writes...

> plus there's now this new
> MOCS thing.

That's just a full LLC bypass AFAICS. Can't omit invalidates if
you use that one or you'll just get stale data from the cache
on reads as well.

> I really hope we don't have all those cache coherency bits
> just because the code complexity is entertaining?

They were definitely added to fix a display issue, and before
that it was just a single flag, which wasn't doing what the display
needed. I think before the flag was added we used some other indicators
to check when we need to clflush, or maybe we did a some extra pointless
clflushes here and there and the broken single flag was supposed to
avoid those. Not quite sure.

I suppose these two flags should maybe have been named more like
"needs invalidate" and "needs writeback" to make it clear what 
one needs to do.

-- 
Ville Syrjälä
Intel

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-14 11:42               ` Ville Syrjälä
  0 siblings, 0 replies; 37+ messages in thread
From: Ville Syrjälä @ 2021-07-14 11:42 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Wed, Jul 14, 2021 at 01:16:57PM +0200, Daniel Vetter wrote:
> On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> > On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > > <ville.syrjala@linux.intel.com> wrote:
> > > >
> > > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > > <ville.syrjala@linux.intel.com> wrote:
> > > > > >
> > > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > > +     /**
> > > > > > > +      * @cache_coherent:
> > > > > > > +      *
> > > > > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > > > > +      * writing through the CPU cache.
> > > > > > > +      *
> > > > > > > +      * This largely depends on the @cache_level, for example if the object
> > > > > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > > > > +      * reads and writes through the CPU cache.
> > > > > > > +      *
> > > > > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > > +      * snooping based platforms this is not the case, unless the full
> > > > > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > > > > +      *
> > > > > > > +      * As a result of this we need to track coherency separately for reads
> > > > > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > > +      * platforms, for reads.
> > > > > > > +      *
> > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > > +      *
> > > > > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > > > > +      * that no data has actually been modified here, so it might seem
> > > > > > > +      * strange that we care about this.
> > > > > > > +      *
> > > > > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > > > > +      * caching, and we read some page, then the cache likely now contains
> > > > > > > +      * the data from that read. At this point the cache and main memory
> > > > > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > > > > +      * effectively skip invalidating the cache(or however that works
> > > > > > > +      * internally) when writing the new value.  This is really bad since the
> > > > > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > > > > +      * is still valid and now contains stale data. As a result the next time
> > > > > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > > > > +      * overwriting main memory with stale data.
> > > > > > > +      *
> > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > > +      *
> > > > > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > > +      *
> > > > > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > > > > +      * where instead we have to manually flush the caches after writing
> > > > > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > > > > +      * the object is therefore considered coherent for both reads and writes
> > > > > > > +      * through the CPU cache.
> > > > > >
> > > > > > I don't remember why we have this read vs. write split and this new
> > > > > > documentation doesn't seem to really explain it either.
> > > > >
> > > > > Hmm, I attempted to explain that earlier:
> > > > >
> > > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > * snooping based platforms this is not the case, unless the full
> > > > > * I915_CACHE_LLC or similar setting is used.
> > > > > *
> > > > > * As a result of this we need to track coherency separately for reads
> > > > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > * platforms, for reads.
> > > > >
> > > > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > > > also not being coherent for writes(CACHE_NONE),
> > > >
> > > > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > > > never heard of any mechanism that would make it only partially coherent.
> > > 
> > > What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> > > I915_CACHE_LLC?
> > 
> > I'm talking about the actual cache.
> > 
> > > 
> > > If you set I915_CACHE_LLC, then yes it is fully coherent for both
> > > HAS_LLC() and HAS_SNOOP().
> > > 
> > > If you set I915_CACHE_NONE, then reads are still coherent on
> > > HAS_LLC(),
> > 
> > Reads and writes both. The only thing that's not coherent is the
> > display engine.
> 
> There's a lot of code which seems to disagree,

Can't even imagine why anyone would make a cache coherency protocol
that only handles reads but not writes...

> plus there's now this new
> MOCS thing.

That's just a full LLC bypass AFAICS. Can't omit invalidates if
you use that one or you'll just get stale data from the cache
on reads as well.

> I really hope we don't have all those cache coherency bits
> just because the code complexity is entertaining?

They were definitely added to fix a display issue, and before
that it was just a single flag, which wasn't doing what the display
needed. I think before the flag was added we used some other indicators
to check when we need to clflush, or maybe we did a some extra pointless
clflushes here and there and the broken single flag was supposed to
avoid those. Not quite sure.

I suppose these two flags should maybe have been named more like
"needs invalidate" and "needs writeback" to make it clear what 
one needs to do.

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-14 11:42               ` Ville Syrjälä
@ 2021-07-14 12:08                 ` Ville Syrjälä
  -1 siblings, 0 replies; 37+ messages in thread
From: Ville Syrjälä @ 2021-07-14 12:08 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld,
	Matthew Auld, ML dri-devel

On Wed, Jul 14, 2021 at 02:42:53PM +0300, Ville Syrjälä wrote:
> On Wed, Jul 14, 2021 at 01:16:57PM +0200, Daniel Vetter wrote:
> > On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> > > On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > > > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > > > <ville.syrjala@linux.intel.com> wrote:
> > > > >
> > > > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > > > <ville.syrjala@linux.intel.com> wrote:
> > > > > > >
> > > > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > > > +     /**
> > > > > > > > +      * @cache_coherent:
> > > > > > > > +      *
> > > > > > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > > > > > +      * writing through the CPU cache.
> > > > > > > > +      *
> > > > > > > > +      * This largely depends on the @cache_level, for example if the object
> > > > > > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > > > > > +      * reads and writes through the CPU cache.
> > > > > > > > +      *
> > > > > > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > > > +      * snooping based platforms this is not the case, unless the full
> > > > > > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > > > > > +      *
> > > > > > > > +      * As a result of this we need to track coherency separately for reads
> > > > > > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > > > +      * platforms, for reads.
> > > > > > > > +      *
> > > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > > > +      *
> > > > > > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > > > > > +      * that no data has actually been modified here, so it might seem
> > > > > > > > +      * strange that we care about this.
> > > > > > > > +      *
> > > > > > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > > > > > +      * caching, and we read some page, then the cache likely now contains
> > > > > > > > +      * the data from that read. At this point the cache and main memory
> > > > > > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > > > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > > > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > > > > > +      * effectively skip invalidating the cache(or however that works
> > > > > > > > +      * internally) when writing the new value.  This is really bad since the
> > > > > > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > > > > > +      * is still valid and now contains stale data. As a result the next time
> > > > > > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > > > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > > > > > +      * overwriting main memory with stale data.
> > > > > > > > +      *
> > > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > > > +      *
> > > > > > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > > > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > > > +      *
> > > > > > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > > > > > +      * where instead we have to manually flush the caches after writing
> > > > > > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > > > > > +      * the object is therefore considered coherent for both reads and writes
> > > > > > > > +      * through the CPU cache.
> > > > > > >
> > > > > > > I don't remember why we have this read vs. write split and this new
> > > > > > > documentation doesn't seem to really explain it either.
> > > > > >
> > > > > > Hmm, I attempted to explain that earlier:
> > > > > >
> > > > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > * snooping based platforms this is not the case, unless the full
> > > > > > * I915_CACHE_LLC or similar setting is used.
> > > > > > *
> > > > > > * As a result of this we need to track coherency separately for reads
> > > > > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > * platforms, for reads.
> > > > > >
> > > > > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > > > > also not being coherent for writes(CACHE_NONE),
> > > > >
> > > > > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > > > > never heard of any mechanism that would make it only partially coherent.
> > > > 
> > > > What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> > > > I915_CACHE_LLC?
> > > 
> > > I'm talking about the actual cache.
> > > 
> > > > 
> > > > If you set I915_CACHE_LLC, then yes it is fully coherent for both
> > > > HAS_LLC() and HAS_SNOOP().
> > > > 
> > > > If you set I915_CACHE_NONE, then reads are still coherent on
> > > > HAS_LLC(),
> > > 
> > > Reads and writes both. The only thing that's not coherent is the
> > > display engine.
> > 
> > There's a lot of code which seems to disagree,
> 
> Can't even imagine why anyone would make a cache coherency protocol
> that only handles reads but not writes...
> 
> > plus there's now this new
> > MOCS thing.
> 
> That's just a full LLC bypass AFAICS. Can't omit invalidates if
> you use that one or you'll just get stale data from the cache
> on reads as well.

And just to reiterate that the current "reads are coherent" thing
works is because the only non-coherent agent (display engine) never
writes anything. If/when we implement writeback support we can no
longer skip the invalidate even on LLC machines when reading from
a writeback buffer.

-- 
Ville Syrjälä
Intel

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-14 12:08                 ` Ville Syrjälä
  0 siblings, 0 replies; 37+ messages in thread
From: Ville Syrjälä @ 2021-07-14 12:08 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld, ML dri-devel

On Wed, Jul 14, 2021 at 02:42:53PM +0300, Ville Syrjälä wrote:
> On Wed, Jul 14, 2021 at 01:16:57PM +0200, Daniel Vetter wrote:
> > On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> > > On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > > > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > > > <ville.syrjala@linux.intel.com> wrote:
> > > > >
> > > > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > > > <ville.syrjala@linux.intel.com> wrote:
> > > > > > >
> > > > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > > > +     /**
> > > > > > > > +      * @cache_coherent:
> > > > > > > > +      *
> > > > > > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > > > > > +      * writing through the CPU cache.
> > > > > > > > +      *
> > > > > > > > +      * This largely depends on the @cache_level, for example if the object
> > > > > > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > > > > > +      * reads and writes through the CPU cache.
> > > > > > > > +      *
> > > > > > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > > > +      * snooping based platforms this is not the case, unless the full
> > > > > > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > > > > > +      *
> > > > > > > > +      * As a result of this we need to track coherency separately for reads
> > > > > > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > > > +      * platforms, for reads.
> > > > > > > > +      *
> > > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > > > +      *
> > > > > > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > > > > > +      * that no data has actually been modified here, so it might seem
> > > > > > > > +      * strange that we care about this.
> > > > > > > > +      *
> > > > > > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > > > > > +      * caching, and we read some page, then the cache likely now contains
> > > > > > > > +      * the data from that read. At this point the cache and main memory
> > > > > > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > > > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > > > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > > > > > +      * effectively skip invalidating the cache(or however that works
> > > > > > > > +      * internally) when writing the new value.  This is really bad since the
> > > > > > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > > > > > +      * is still valid and now contains stale data. As a result the next time
> > > > > > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > > > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > > > > > +      * overwriting main memory with stale data.
> > > > > > > > +      *
> > > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > > > +      *
> > > > > > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > > > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > > > +      *
> > > > > > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > > > > > +      * where instead we have to manually flush the caches after writing
> > > > > > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > > > > > +      * the object is therefore considered coherent for both reads and writes
> > > > > > > > +      * through the CPU cache.
> > > > > > >
> > > > > > > I don't remember why we have this read vs. write split and this new
> > > > > > > documentation doesn't seem to really explain it either.
> > > > > >
> > > > > > Hmm, I attempted to explain that earlier:
> > > > > >
> > > > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > * snooping based platforms this is not the case, unless the full
> > > > > > * I915_CACHE_LLC or similar setting is used.
> > > > > > *
> > > > > > * As a result of this we need to track coherency separately for reads
> > > > > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > * platforms, for reads.
> > > > > >
> > > > > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > > > > also not being coherent for writes(CACHE_NONE),
> > > > >
> > > > > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > > > > never heard of any mechanism that would make it only partially coherent.
> > > > 
> > > > What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> > > > I915_CACHE_LLC?
> > > 
> > > I'm talking about the actual cache.
> > > 
> > > > 
> > > > If you set I915_CACHE_LLC, then yes it is fully coherent for both
> > > > HAS_LLC() and HAS_SNOOP().
> > > > 
> > > > If you set I915_CACHE_NONE, then reads are still coherent on
> > > > HAS_LLC(),
> > > 
> > > Reads and writes both. The only thing that's not coherent is the
> > > display engine.
> > 
> > There's a lot of code which seems to disagree,
> 
> Can't even imagine why anyone would make a cache coherency protocol
> that only handles reads but not writes...
> 
> > plus there's now this new
> > MOCS thing.
> 
> That's just a full LLC bypass AFAICS. Can't omit invalidates if
> you use that one or you'll just get stale data from the cache
> on reads as well.

And just to reiterate that the current "reads are coherent" thing
works is because the only non-coherent agent (display engine) never
writes anything. If/when we implement writeback support we can no
longer skip the invalidate even on LLC machines when reading from
a writeback buffer.

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
  2021-07-14 11:42               ` Ville Syrjälä
@ 2021-07-14 12:57                 ` Daniel Vetter
  -1 siblings, 0 replies; 37+ messages in thread
From: Daniel Vetter @ 2021-07-14 12:57 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Daniel Vetter, Intel Graphics Development, Matthew Auld,
	ML dri-devel, Matthew Auld

On Wed, Jul 14, 2021 at 02:42:53PM +0300, Ville Syrjälä wrote:
> On Wed, Jul 14, 2021 at 01:16:57PM +0200, Daniel Vetter wrote:
> > On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> > > On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > > > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > > > <ville.syrjala@linux.intel.com> wrote:
> > > > >
> > > > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > > > <ville.syrjala@linux.intel.com> wrote:
> > > > > > >
> > > > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > > > +     /**
> > > > > > > > +      * @cache_coherent:
> > > > > > > > +      *
> > > > > > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > > > > > +      * writing through the CPU cache.
> > > > > > > > +      *
> > > > > > > > +      * This largely depends on the @cache_level, for example if the object
> > > > > > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > > > > > +      * reads and writes through the CPU cache.
> > > > > > > > +      *
> > > > > > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > > > +      * snooping based platforms this is not the case, unless the full
> > > > > > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > > > > > +      *
> > > > > > > > +      * As a result of this we need to track coherency separately for reads
> > > > > > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > > > +      * platforms, for reads.
> > > > > > > > +      *
> > > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > > > +      *
> > > > > > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > > > > > +      * that no data has actually been modified here, so it might seem
> > > > > > > > +      * strange that we care about this.
> > > > > > > > +      *
> > > > > > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > > > > > +      * caching, and we read some page, then the cache likely now contains
> > > > > > > > +      * the data from that read. At this point the cache and main memory
> > > > > > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > > > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > > > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > > > > > +      * effectively skip invalidating the cache(or however that works
> > > > > > > > +      * internally) when writing the new value.  This is really bad since the
> > > > > > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > > > > > +      * is still valid and now contains stale data. As a result the next time
> > > > > > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > > > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > > > > > +      * overwriting main memory with stale data.
> > > > > > > > +      *
> > > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > > > +      *
> > > > > > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > > > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > > > +      *
> > > > > > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > > > > > +      * where instead we have to manually flush the caches after writing
> > > > > > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > > > > > +      * the object is therefore considered coherent for both reads and writes
> > > > > > > > +      * through the CPU cache.
> > > > > > >
> > > > > > > I don't remember why we have this read vs. write split and this new
> > > > > > > documentation doesn't seem to really explain it either.
> > > > > >
> > > > > > Hmm, I attempted to explain that earlier:
> > > > > >
> > > > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > * snooping based platforms this is not the case, unless the full
> > > > > > * I915_CACHE_LLC or similar setting is used.
> > > > > > *
> > > > > > * As a result of this we need to track coherency separately for reads
> > > > > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > * platforms, for reads.
> > > > > >
> > > > > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > > > > also not being coherent for writes(CACHE_NONE),
> > > > >
> > > > > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > > > > never heard of any mechanism that would make it only partially coherent.
> > > > 
> > > > What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> > > > I915_CACHE_LLC?
> > > 
> > > I'm talking about the actual cache.
> > > 
> > > > 
> > > > If you set I915_CACHE_LLC, then yes it is fully coherent for both
> > > > HAS_LLC() and HAS_SNOOP().
> > > > 
> > > > If you set I915_CACHE_NONE, then reads are still coherent on
> > > > HAS_LLC(),
> > > 
> > > Reads and writes both. The only thing that's not coherent is the
> > > display engine.
> > 
> > There's a lot of code which seems to disagree,
> 
> Can't even imagine why anyone would make a cache coherency protocol
> that only handles reads but not writes...
> 
> > plus there's now this new
> > MOCS thing.
> 
> That's just a full LLC bypass AFAICS. Can't omit invalidates if
> you use that one or you'll just get stale data from the cache
> on reads as well.
> 
> > I really hope we don't have all those cache coherency bits
> > just because the code complexity is entertaining?
> 
> They were definitely added to fix a display issue, and before
> that it was just a single flag, which wasn't doing what the display
> needed. I think before the flag was added we used some other indicators
> to check when we need to clflush, or maybe we did a some extra pointless
> clflushes here and there and the broken single flag was supposed to
> avoid those. Not quite sure.

Hm I thought cache_dirty was added for that display case? But yeah this
entire model is maybe not super well-defined in terms of all the use-cases
and what exactly it means ...

I'm also not clear why this is on the object, and not some kind of
function which computes it from the platform + cache_level.

> I suppose these two flags should maybe have been named more like
> "needs invalidate" and "needs writeback" to make it clear what 
> one needs to do.

tbh I have no idea, this all started to get added after I disappeared for
a few years into display.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits
@ 2021-07-14 12:57                 ` Daniel Vetter
  0 siblings, 0 replies; 37+ messages in thread
From: Daniel Vetter @ 2021-07-14 12:57 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Daniel Vetter, Intel Graphics Development, ML dri-devel, Matthew Auld

On Wed, Jul 14, 2021 at 02:42:53PM +0300, Ville Syrjälä wrote:
> On Wed, Jul 14, 2021 at 01:16:57PM +0200, Daniel Vetter wrote:
> > On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> > > On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > > > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > > > <ville.syrjala@linux.intel.com> wrote:
> > > > >
> > > > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > > > <ville.syrjala@linux.intel.com> wrote:
> > > > > > >
> > > > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > > > +     /**
> > > > > > > > +      * @cache_coherent:
> > > > > > > > +      *
> > > > > > > > +      * Track whether the pages are coherent with the GPU if reading or
> > > > > > > > +      * writing through the CPU cache.
> > > > > > > > +      *
> > > > > > > > +      * This largely depends on the @cache_level, for example if the object
> > > > > > > > +      * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > > > > > > +      * reads and writes through the CPU cache.
> > > > > > > > +      *
> > > > > > > > +      * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > > > +      * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > > > +      * snooping based platforms this is not the case, unless the full
> > > > > > > > +      * I915_CACHE_LLC or similar setting is used.
> > > > > > > > +      *
> > > > > > > > +      * As a result of this we need to track coherency separately for reads
> > > > > > > > +      * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > > > +      * platforms, for reads.
> > > > > > > > +      *
> > > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > > > +      *
> > > > > > > > +      * When reading through the CPU cache, the GPU is still coherent. Note
> > > > > > > > +      * that no data has actually been modified here, so it might seem
> > > > > > > > +      * strange that we care about this.
> > > > > > > > +      *
> > > > > > > > +      * As an example, if some object is mapped on the CPU with write-back
> > > > > > > > +      * caching, and we read some page, then the cache likely now contains
> > > > > > > > +      * the data from that read. At this point the cache and main memory
> > > > > > > > +      * match up, so all good. But next the GPU needs to write some data to
> > > > > > > > +      * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > > > > > > > +      * the platform doesn't have the shared-LLC, then the GPU will
> > > > > > > > +      * effectively skip invalidating the cache(or however that works
> > > > > > > > +      * internally) when writing the new value.  This is really bad since the
> > > > > > > > +      * GPU has just written some new data to main memory, but the CPU cache
> > > > > > > > +      * is still valid and now contains stale data. As a result the next time
> > > > > > > > +      * we do a cached read with the CPU, we are rewarded with stale data.
> > > > > > > > +      * Likewise if the cache is later flushed, we might be rewarded with
> > > > > > > > +      * overwriting main memory with stale data.
> > > > > > > > +      *
> > > > > > > > +      * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > > > +      *
> > > > > > > > +      * When writing through the CPU cache, the GPU is still coherent. Note
> > > > > > > > +      * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > > > +      *
> > > > > > > > +      * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > > > > > > +      * where instead we have to manually flush the caches after writing
> > > > > > > > +      * through the CPU cache. For other cache levels this should be set and
> > > > > > > > +      * the object is therefore considered coherent for both reads and writes
> > > > > > > > +      * through the CPU cache.
> > > > > > >
> > > > > > > I don't remember why we have this read vs. write split and this new
> > > > > > > documentation doesn't seem to really explain it either.
> > > > > >
> > > > > > Hmm, I attempted to explain that earlier:
> > > > > >
> > > > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > > > * snooping based platforms this is not the case, unless the full
> > > > > > * I915_CACHE_LLC or similar setting is used.
> > > > > > *
> > > > > > * As a result of this we need to track coherency separately for reads
> > > > > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > > * platforms, for reads.
> > > > > >
> > > > > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > > > > also not being coherent for writes(CACHE_NONE),
> > > > >
> > > > > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > > > > never heard of any mechanism that would make it only partially coherent.
> > > > 
> > > > What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> > > > I915_CACHE_LLC?
> > > 
> > > I'm talking about the actual cache.
> > > 
> > > > 
> > > > If you set I915_CACHE_LLC, then yes it is fully coherent for both
> > > > HAS_LLC() and HAS_SNOOP().
> > > > 
> > > > If you set I915_CACHE_NONE, then reads are still coherent on
> > > > HAS_LLC(),
> > > 
> > > Reads and writes both. The only thing that's not coherent is the
> > > display engine.
> > 
> > There's a lot of code which seems to disagree,
> 
> Can't even imagine why anyone would make a cache coherency protocol
> that only handles reads but not writes...
> 
> > plus there's now this new
> > MOCS thing.
> 
> That's just a full LLC bypass AFAICS. Can't omit invalidates if
> you use that one or you'll just get stale data from the cache
> on reads as well.
> 
> > I really hope we don't have all those cache coherency bits
> > just because the code complexity is entertaining?
> 
> They were definitely added to fix a display issue, and before
> that it was just a single flag, which wasn't doing what the display
> needed. I think before the flag was added we used some other indicators
> to check when we need to clflush, or maybe we did a some extra pointless
> clflushes here and there and the broken single flag was supposed to
> avoid those. Not quite sure.

Hm I thought cache_dirty was added for that display case? But yeah this
entire model is maybe not super well-defined in terms of all the use-cases
and what exactly it means ...

I'm also not clear why this is on the object, and not some kind of
function which computes it from the platform + cache_level.

> I suppose these two flags should maybe have been named more like
> "needs invalidate" and "needs writeback" to make it clear what 
> one needs to do.

tbh I have no idea, this all started to get added after I disappeared for
a few years into display.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2021-07-14 12:57 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-13 10:45 [PATCH 1/5] drm/i915: document caching related bits Matthew Auld
2021-07-13 10:45 ` [Intel-gfx] " Matthew Auld
2021-07-13 10:45 ` [PATCH 2/5] drm/i915/uapi: convert drm_i915_gem_madvise to kernel-doc Matthew Auld
2021-07-13 10:45   ` [Intel-gfx] " Matthew Auld
2021-07-13 10:45 ` [PATCH 3/5] drm/i915: convert drm_i915_gem_object " Matthew Auld
2021-07-13 10:45   ` [Intel-gfx] " Matthew Auld
2021-07-13 10:45 ` [PATCH 4/5] drm/i915: pull in some more kernel-doc Matthew Auld
2021-07-13 10:45   ` [Intel-gfx] " Matthew Auld
2021-07-13 10:45 ` [PATCH 5/5] drm/i915/ehl: unconditionally flush the pages on acquire Matthew Auld
2021-07-13 10:45   ` [Intel-gfx] " Matthew Auld
2021-07-14 11:19   ` Daniel Vetter
2021-07-14 11:19     ` [Intel-gfx] " Daniel Vetter
2021-07-13 11:12 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: document caching related bits Patchwork
2021-07-13 11:39 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-07-13 11:41 ` [Intel-gfx] [PATCH 1/5] " Mika Kuoppala
2021-07-13 11:41   ` Mika Kuoppala
2021-07-13 12:59 ` [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [1/5] " Patchwork
2021-07-13 15:55 ` [Intel-gfx] [PATCH 1/5] " Ville Syrjälä
2021-07-13 15:55   ` Ville Syrjälä
2021-07-13 16:13   ` Matthew Auld
2021-07-13 16:13     ` Matthew Auld
2021-07-13 16:50     ` Daniel Vetter
2021-07-13 16:50       ` Daniel Vetter
2021-07-13 17:47     ` Ville Syrjälä
2021-07-13 17:47       ` Ville Syrjälä
2021-07-13 18:24       ` Matthew Auld
2021-07-13 18:24         ` Matthew Auld
2021-07-13 18:46         ` Ville Syrjälä
2021-07-13 18:46           ` Ville Syrjälä
2021-07-14 11:16           ` Daniel Vetter
2021-07-14 11:16             ` Daniel Vetter
2021-07-14 11:42             ` Ville Syrjälä
2021-07-14 11:42               ` Ville Syrjälä
2021-07-14 12:08               ` Ville Syrjälä
2021-07-14 12:08                 ` Ville Syrjälä
2021-07-14 12:57               ` Daniel Vetter
2021-07-14 12:57                 ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.