dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users
@ 2021-07-05 13:53 Matthew Auld
  2021-07-05 13:53 ` [PATCH v3 2/5] drm/i915/uapi: convert drm_i915_gem_caching to kernel doc Matthew Auld
                   ` (5 more replies)
  0 siblings, 6 replies; 12+ messages in thread
From: Matthew Auld @ 2021-07-05 13:53 UTC (permalink / raw)
  To: intel-gfx; +Cc: Thomas Hellström, dri-devel, Daniel Vetter

For discrete, users of pin_map() needs to obey the same rules at the TTM
backend, where we map system only objects as WB, and everything else as
WC. The simplest for now is to just force the correct mapping type as
per the new rules for discrete.

Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c | 34 ++++++++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.h |  4 +++
 drivers/gpu/drm/i915/gem/i915_gem_pages.c  | 22 ++++++++++++--
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 547cc9dad90d..9da7b288b7ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -625,6 +625,40 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj,
 	return obj->ops->migrate(obj, mr);
 }
 
+/**
+ * i915_gem_object_placement_possible - Check whether the object can be
+ * placed at certain memory type
+ * @obj: Pointer to the object
+ * @type: The memory type to check
+ *
+ * Return: True if the object can be placed in @type. False otherwise.
+ */
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
+					enum intel_memory_type type)
+{
+	unsigned int i;
+
+	if (!obj->mm.n_placements) {
+		switch (type) {
+		case INTEL_MEMORY_LOCAL:
+			return i915_gem_object_has_iomem(obj);
+		case INTEL_MEMORY_SYSTEM:
+			return i915_gem_object_has_pages(obj);
+		default:
+			/* Ignore stolen for now */
+			GEM_BUG_ON(1);
+			return false;
+		}
+	}
+
+	for (i = 0; i < obj->mm.n_placements; i++) {
+		if (obj->mm.placements[i]->type == type)
+			return true;
+	}
+
+	return false;
+}
+
 void i915_gem_init__objects(struct drm_i915_private *i915)
 {
 	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index d423d8cac4f2..8be4fadeee48 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -12,6 +12,7 @@
 #include <drm/drm_device.h>
 
 #include "display/intel_frontbuffer.h"
+#include "intel_memory_region.h"
 #include "i915_gem_object_types.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_ww.h"
@@ -607,6 +608,9 @@ bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj,
 int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
 				   unsigned int flags);
 
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
+					enum intel_memory_type type);
+
 #ifdef CONFIG_MMU_NOTIFIER
 static inline bool
 i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index f2f850e31b8e..810a157a18f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
 	dma_addr_t addr;
 	void *vaddr;
 
-	if (type != I915_MAP_WC)
-		return ERR_PTR(-ENODEV);
+	GEM_BUG_ON(type != I915_MAP_WC);
 
 	if (n_pfn > ARRAY_SIZE(stack)) {
 		/* Too big for stack -- allocate temporary array instead */
@@ -374,6 +373,25 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	}
 	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 
+	/*
+	 * For discrete our CPU mappings needs to be consistent in order to
+	 * function correctly on !x86. When mapping things through TTM, we use
+	 * the same rules to determine the caching type.
+	 *
+	 * Internal users of lmem are already expected to get this right, so no
+	 * fudging needed there.
+	 */
+	if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) {
+		if (type != I915_MAP_WC && !obj->mm.n_placements) {
+			ptr = ERR_PTR(-ENODEV);
+			goto err_unpin;
+		}
+
+		type = I915_MAP_WC;
+	} else if (IS_DGFX(to_i915(obj->base.dev))) {
+		type = I915_MAP_WB;
+	}
+
 	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
 	if (ptr && has_type != type) {
 		if (pinned) {
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 2/5] drm/i915/uapi: convert drm_i915_gem_caching to kernel doc
  2021-07-05 13:53 [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users Matthew Auld
@ 2021-07-05 13:53 ` Matthew Auld
  2021-07-07  0:29   ` Ramalingam C
  2021-07-05 13:53 ` [PATCH v3 3/5] drm/i915/uapi: reject caching ioctls for discrete Matthew Auld
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 12+ messages in thread
From: Matthew Auld @ 2021-07-05 13:53 UTC (permalink / raw)
  To: intel-gfx
  Cc: Thomas Hellström, Jason Ekstrand, Tvrtko Ursulin,
	Jordan Justen, dri-devel, Kenneth Graunke, Daniel Vetter

Convert all the drm_i915_gem_caching bits to proper kernel doc.

Suggested-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 include/uapi/drm/i915_drm.h | 70 +++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 27 deletions(-)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2f70c48567c0..d13c6c5fad04 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1363,43 +1363,59 @@ struct drm_i915_gem_busy {
 };
 
 /**
- * I915_CACHING_NONE
+ * struct drm_i915_gem_caching - Set or get the caching for given object
+ * handle.
  *
- * GPU access is not coherent with cpu caches. Default for machines without an
- * LLC.
- */
-#define I915_CACHING_NONE		0
-/**
- * I915_CACHING_CACHED
+ * Allow userspace to control the GTT caching bits for a given object when the
+ * object is later mapped through the ppGTT(or GGTT on older platforms lacking
+ * ppGTT support, or if the object is used for scanout). Note that this might
+ * require unbinding the object from the GTT first, if its current caching value
+ * doesn't match.
  *
- * GPU access is coherent with cpu caches and furthermore the data is cached in
- * last-level caches shared between cpu cores and the gpu GT. Default on
- * machines with HAS_LLC.
- */
-#define I915_CACHING_CACHED		1
-/**
- * I915_CACHING_DISPLAY
  *
- * Special GPU caching mode which is coherent with the scanout engines.
- * Transparently falls back to I915_CACHING_NONE on platforms where no special
- * cache mode (like write-through or gfdt flushing) is available. The kernel
- * automatically sets this mode when using a buffer as a scanout target.
- * Userspace can manually set this mode to avoid a costly stall and clflush in
- * the hotpath of drawing the first frame.
  */
-#define I915_CACHING_DISPLAY		2
-
 struct drm_i915_gem_caching {
 	/**
-	 * Handle of the buffer to set/get the caching level of. */
+	 * @handle: Handle of the buffer to set/get the caching level.
+	 */
 	__u32 handle;
 
 	/**
-	 * Cacheing level to apply or return value
+	 * @caching: The GTT caching level to apply or possible return value.
+	 *
+	 * The supported @caching values:
 	 *
-	 * bits0-15 are for generic caching control (i.e. the above defined
-	 * values). bits16-31 are reserved for platform-specific variations
-	 * (e.g. l3$ caching on gen7). */
+	 * I915_CACHING_NONE:
+	 *
+	 * GPU access is not coherent with CPU caches.  Default for machines
+	 * without an LLC. This means we need to manually clflush, if we want
+	 * GPU access to be coherent.
+	 *
+	 * I915_CACHING_CACHED:
+	 *
+	 * GPU access is coherent with CPU caches and furthermore the data is
+	 * cached in last-level caches shared between CPU cores and the GPU GT.
+	 * Default on machines with HAS_LLC. In general the fast shared
+	 * last-level cache(HAS_LLC) is considered much faster then platforms
+	 * which only support snooping(HAS_SNOOP), hence by default
+	 *
+	 * I915_CACHING_DISPLAY:
+	 *
+	 * Special GPU caching mode which is coherent with the scanout engines.
+	 * Transparently falls back to I915_CACHING_NONE on platforms where no
+	 * special cache mode (like write-through or gfdt flushing) is
+	 * available. The kernel automatically sets this mode when using a
+	 * buffer as a scanout target.  Userspace can manually set this mode to
+	 * avoid a costly stall and clflush in the hotpath of drawing the first
+	 * frame.
+	 *
+	 * Side note: On gen8+ this no longer does much since we lost the GGTT
+	 * caching bits. Although setting this is harmless, since it still
+	 * effectively falls back to I915_CACHING_NONE.
+	 */
+#define I915_CACHING_NONE		0
+#define I915_CACHING_CACHED		1
+#define I915_CACHING_DISPLAY		2
 	__u32 caching;
 };
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 3/5] drm/i915/uapi: reject caching ioctls for discrete
  2021-07-05 13:53 [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users Matthew Auld
  2021-07-05 13:53 ` [PATCH v3 2/5] drm/i915/uapi: convert drm_i915_gem_caching to kernel doc Matthew Auld
@ 2021-07-05 13:53 ` Matthew Auld
  2021-07-07  0:38   ` Ramalingam C
  2021-07-13 18:03   ` Kenneth Graunke
  2021-07-05 13:53 ` [PATCH v3 4/5] drm/i915/uapi: convert drm_i915_gem_set_domain to kernel doc Matthew Auld
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 12+ messages in thread
From: Matthew Auld @ 2021-07-05 13:53 UTC (permalink / raw)
  To: intel-gfx
  Cc: Thomas Hellström, Jason Ekstrand, Tvrtko Ursulin,
	Jordan Justen, dri-devel, Kenneth Graunke, Daniel Vetter

It's a noop on DG1, and in the future when need to support other devices
which let us control the coherency, then it should be an immutable
creation time property for the BO. This will likely be controlled
through a new gem_create_ext extension.

v2: add some kernel doc for the discrete changes, and document the
    implicit rules

Suggested-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c |  6 +++++
 include/uapi/drm/i915_drm.h                | 29 +++++++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 7d1400b13429..43004bef55cb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -268,6 +268,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_object *obj;
 	int err = 0;
 
+	if (IS_DGFX(to_i915(dev)))
+		return -ENODEV;
+
 	rcu_read_lock();
 	obj = i915_gem_object_lookup_rcu(file, args->handle);
 	if (!obj) {
@@ -303,6 +306,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	enum i915_cache_level level;
 	int ret = 0;
 
+	if (IS_DGFX(i915))
+		return -ENODEV;
+
 	switch (args->caching) {
 	case I915_CACHING_NONE:
 		level = I915_CACHE_NONE;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index d13c6c5fad04..a4faceeb8c32 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1372,7 +1372,34 @@ struct drm_i915_gem_busy {
  * require unbinding the object from the GTT first, if its current caching value
  * doesn't match.
  *
- *
+ * Note that this all changes on discrete platforms, starting from DG1, the
+ * set/get caching is no longer supported, and is now rejected.  Instead the CPU
+ * caching attributes(WB vs WC) will become an immutable creation time property
+ * for the object, along with the GTT caching level. For now we don't expose any
+ * new uAPI for this, instead on DG1 this is all implicit, although this largely
+ * shouldn't matter since DG1 is coherent by default(without any way of
+ * controlling it).
+ *
+ * Implicit caching rules, starting from DG1:
+ *
+ *     - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
+ *       contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
+ *       mapped as write-combined only.
+ *
+ *     - Everything else is always allocated and mapped as write-back, with the
+ *       guarantee that everything is also coherent with the GPU.
+ *
+ * Note that this is likely to change in the future again, where we might need
+ * more flexibility on future devices, so making this all explicit as part of a
+ * new &drm_i915_gem_create_ext extension is probable.
+ *
+ * Side note: Part of the reason for this is that changing the at-allocation-time CPU
+ * caching attributes for the pages might be required(and is expensive) if we
+ * need to then CPU map the pages later with different caching attributes. This
+ * inconsistent caching behaviour, while supported on x86, is not universally
+ * supported on other architectures. So for simplicity we opt for setting
+ * everything at creation time, whilst also making it immutable, on discrete
+ * platforms.
  */
 struct drm_i915_gem_caching {
 	/**
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 4/5] drm/i915/uapi: convert drm_i915_gem_set_domain to kernel doc
  2021-07-05 13:53 [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users Matthew Auld
  2021-07-05 13:53 ` [PATCH v3 2/5] drm/i915/uapi: convert drm_i915_gem_caching to kernel doc Matthew Auld
  2021-07-05 13:53 ` [PATCH v3 3/5] drm/i915/uapi: reject caching ioctls for discrete Matthew Auld
@ 2021-07-05 13:53 ` Matthew Auld
  2021-07-07  0:41   ` Ramalingam C
  2021-07-05 13:53 ` [PATCH v3 5/5] drm/i915/uapi: reject set_domain for discrete Matthew Auld
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 12+ messages in thread
From: Matthew Auld @ 2021-07-05 13:53 UTC (permalink / raw)
  To: intel-gfx
  Cc: Thomas Hellström, Jason Ekstrand, Tvrtko Ursulin,
	Jordan Justen, dri-devel, Kenneth Graunke, Daniel Vetter

Convert all the drm_i915_gem_set_domain bits to proper kernel doc.

Suggested-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 include/uapi/drm/i915_drm.h | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a4faceeb8c32..6f94e5e7569a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -880,14 +880,42 @@ struct drm_i915_gem_mmap_offset {
 	__u64 extensions;
 };
 
+
+/**
+ * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in
+ * preparation for accessing the pages via some CPU domain.
+ *
+ * Specifying a new write or read domain will flush the object out of the
+ * previous domain(if required), before then updating the objects domain
+ * tracking with the new domain.
+ *
+ * Note this might involve waiting for the object first if it is still active on
+ * the GPU.
+ *
+ * Supported values for @read_domains and @write_domain:
+ *
+ *	- I915_GEM_DOMAIN_WC: Uncached write-combined domain
+ *	- I915_GEM_DOMAIN_CPU: CPU cache domain
+ *	- I915_GEM_DOMAIN_GTT: Mappable aperture domain
+ *
+ * All other domains are rejected.
+ *
+ */
 struct drm_i915_gem_set_domain {
-	/** Handle for the object */
+	/** @handle: Handle for the object. */
 	__u32 handle;
 
-	/** New read domains */
+	/**
+	 * @read_domains: New read domains.
+	 */
 	__u32 read_domains;
 
-	/** New write domain */
+	/**
+	 * @write_domain: New write domain.
+	 *
+	 * Note that having something in the write domain implies it's in the
+	 * read domain, and only that read domain.
+	 */
 	__u32 write_domain;
 };
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 5/5] drm/i915/uapi: reject set_domain for discrete
  2021-07-05 13:53 [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users Matthew Auld
                   ` (2 preceding siblings ...)
  2021-07-05 13:53 ` [PATCH v3 4/5] drm/i915/uapi: convert drm_i915_gem_set_domain to kernel doc Matthew Auld
@ 2021-07-05 13:53 ` Matthew Auld
  2021-07-07  0:49   ` Ramalingam C
  2021-07-07  0:12 ` [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users Ramalingam C
  2021-07-07 11:46 ` Daniel Vetter
  5 siblings, 1 reply; 12+ messages in thread
From: Matthew Auld @ 2021-07-05 13:53 UTC (permalink / raw)
  To: intel-gfx
  Cc: Thomas Hellström, Jason Ekstrand, Tvrtko Ursulin,
	Jordan Justen, dri-devel, Kenneth Graunke, Daniel Vetter

The CPU domain should be static for discrete, and on DG1 we don't need
any flushing since everything is already coherent, so really all this
does is an object wait, for which we have an ioctl. Longer term the
desired caching should be an immutable creation time property for the
BO, which can be set with something like gem_create_ext.

One other user is iris + userptr, which uses the set_domain to probe all
the pages to check if the GUP succeeds, however keeping the set_domain
around just for that seems rather scuffed. We could equally just submit
a dummy batch, which should hopefully be good enough, otherwise adding a
new creation time flag for userptr might be an option. Although longer
term we will also have vm_bind, which should also be a nice fit for
this, so adding a whole new flag is likely overkill.

v2: add some more kernel doc, also add the implicit rules with caching

Suggested-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 +++
 include/uapi/drm/i915_drm.h                | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 43004bef55cb..b684a62bf3b0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -490,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	u32 write_domain = args->write_domain;
 	int err;
 
+	if (IS_DGFX(to_i915(dev)))
+		return -ENODEV;
+
 	/* Only handle setting domains to types used by the CPU. */
 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
 		return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6f94e5e7569a..fd1a9878730c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -900,6 +900,24 @@ struct drm_i915_gem_mmap_offset {
  *
  * All other domains are rejected.
  *
+ * Note that for discrete, starting from DG1, this is no longer supported, and
+ * is instead rejected. On such platforms the CPU domain is effectively static,
+ * where we also only support a single &drm_i915_gem_mmap_offset cache mode,
+ * which can't be set explicitly and instead depends on the object placements,
+ * as per the below.
+ *
+ * Implicit caching rules, starting from DG1:
+ *
+ *	- If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
+ *	  contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
+ *	  mapped as write-combined only.
+ *
+ *	- Everything else is always allocated and mapped as write-back, with the
+ *	  guarantee that everything is also coherent with the GPU.
+ *
+ * Note that this is likely to change in the future again, where we might need
+ * more flexibility on future devices, so making this all explicit as part of a
+ * new &drm_i915_gem_create_ext extension is probable.
  */
 struct drm_i915_gem_set_domain {
 	/** @handle: Handle for the object. */
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users
  2021-07-05 13:53 [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users Matthew Auld
                   ` (3 preceding siblings ...)
  2021-07-05 13:53 ` [PATCH v3 5/5] drm/i915/uapi: reject set_domain for discrete Matthew Auld
@ 2021-07-07  0:12 ` Ramalingam C
  2021-07-07 11:46 ` Daniel Vetter
  5 siblings, 0 replies; 12+ messages in thread
From: Ramalingam C @ 2021-07-07  0:12 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Thomas Hellström, intel-gfx, dri-devel, Daniel Vetter

On 2021-07-05 at 14:53:06 +0100, Matthew Auld wrote:
> For discrete, users of pin_map() needs to obey the same rules at the TTM
> backend, where we map system only objects as WB, and everything else as
> WC. The simplest for now is to just force the correct mapping type as
> per the new rules for discrete.
> 
> Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ramalingam C <ramalingam.c@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object.c | 34 ++++++++++++++++++++++
>  drivers/gpu/drm/i915/gem/i915_gem_object.h |  4 +++
>  drivers/gpu/drm/i915/gem/i915_gem_pages.c  | 22 ++++++++++++--
>  3 files changed, 58 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 547cc9dad90d..9da7b288b7ed 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -625,6 +625,40 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj,
>  	return obj->ops->migrate(obj, mr);
>  }
>  
> +/**
> + * i915_gem_object_placement_possible - Check whether the object can be
> + * placed at certain memory type
> + * @obj: Pointer to the object
> + * @type: The memory type to check
> + *
> + * Return: True if the object can be placed in @type. False otherwise.
> + */
> +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
> +					enum intel_memory_type type)
> +{
> +	unsigned int i;
> +
> +	if (!obj->mm.n_placements) {
> +		switch (type) {
> +		case INTEL_MEMORY_LOCAL:
> +			return i915_gem_object_has_iomem(obj);
> +		case INTEL_MEMORY_SYSTEM:
> +			return i915_gem_object_has_pages(obj);
> +		default:
> +			/* Ignore stolen for now */
> +			GEM_BUG_ON(1);
> +			return false;
> +		}
> +	}
> +
> +	for (i = 0; i < obj->mm.n_placements; i++) {
> +		if (obj->mm.placements[i]->type == type)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
>  void i915_gem_init__objects(struct drm_i915_private *i915)
>  {
>  	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index d423d8cac4f2..8be4fadeee48 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -12,6 +12,7 @@
>  #include <drm/drm_device.h>
>  
>  #include "display/intel_frontbuffer.h"
> +#include "intel_memory_region.h"
>  #include "i915_gem_object_types.h"
>  #include "i915_gem_gtt.h"
>  #include "i915_gem_ww.h"
> @@ -607,6 +608,9 @@ bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj,
>  int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
>  				   unsigned int flags);
>  
> +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
> +					enum intel_memory_type type);
> +
>  #ifdef CONFIG_MMU_NOTIFIER
>  static inline bool
>  i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index f2f850e31b8e..810a157a18f8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
>  	dma_addr_t addr;
>  	void *vaddr;
>  
> -	if (type != I915_MAP_WC)
> -		return ERR_PTR(-ENODEV);
> +	GEM_BUG_ON(type != I915_MAP_WC);
>  
>  	if (n_pfn > ARRAY_SIZE(stack)) {
>  		/* Too big for stack -- allocate temporary array instead */
> @@ -374,6 +373,25 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>  	}
>  	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
>  
> +	/*
> +	 * For discrete our CPU mappings needs to be consistent in order to
> +	 * function correctly on !x86. When mapping things through TTM, we use
> +	 * the same rules to determine the caching type.
> +	 *
> +	 * Internal users of lmem are already expected to get this right, so no
> +	 * fudging needed there.
> +	 */
> +	if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) {
> +		if (type != I915_MAP_WC && !obj->mm.n_placements) {
> +			ptr = ERR_PTR(-ENODEV);
> +			goto err_unpin;
> +		}
> +
> +		type = I915_MAP_WC;
> +	} else if (IS_DGFX(to_i915(obj->base.dev))) {
> +		type = I915_MAP_WB;
> +	}
Looks good to me.

Reviewed-by: Ramalingam C <ramalingam.c@intel.com>
> +
>  	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
>  	if (ptr && has_type != type) {
>  		if (pinned) {
> -- 
> 2.26.3
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 2/5] drm/i915/uapi: convert drm_i915_gem_caching to kernel doc
  2021-07-05 13:53 ` [PATCH v3 2/5] drm/i915/uapi: convert drm_i915_gem_caching to kernel doc Matthew Auld
@ 2021-07-07  0:29   ` Ramalingam C
  0 siblings, 0 replies; 12+ messages in thread
From: Ramalingam C @ 2021-07-07  0:29 UTC (permalink / raw)
  To: Matthew Auld
  Cc: Thomas Hellström, Jason Ekstrand, Tvrtko Ursulin,
	Jordan Justen, intel-gfx, dri-devel, Kenneth Graunke,
	Daniel Vetter

On 2021-07-05 at 14:53:07 +0100, Matthew Auld wrote:
> Convert all the drm_i915_gem_caching bits to proper kernel doc.
> 
> Suggested-by: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
LGTM.

Reviewed-by: Ramalingam C <ramalingam.c@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Jordan Justen <jordan.l.justen@intel.com>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ramalingam C <ramalingam.c@intel.com>
> ---
>  include/uapi/drm/i915_drm.h | 70 +++++++++++++++++++++++--------------
>  1 file changed, 43 insertions(+), 27 deletions(-)
> 
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 2f70c48567c0..d13c6c5fad04 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1363,43 +1363,59 @@ struct drm_i915_gem_busy {
>  };
>  
>  /**
> - * I915_CACHING_NONE
> + * struct drm_i915_gem_caching - Set or get the caching for given object
> + * handle.
>   *
> - * GPU access is not coherent with cpu caches. Default for machines without an
> - * LLC.
> - */
> -#define I915_CACHING_NONE		0
> -/**
> - * I915_CACHING_CACHED
> + * Allow userspace to control the GTT caching bits for a given object when the
> + * object is later mapped through the ppGTT(or GGTT on older platforms lacking
> + * ppGTT support, or if the object is used for scanout). Note that this might
> + * require unbinding the object from the GTT first, if its current caching value
> + * doesn't match.
>   *
> - * GPU access is coherent with cpu caches and furthermore the data is cached in
> - * last-level caches shared between cpu cores and the gpu GT. Default on
> - * machines with HAS_LLC.
> - */
> -#define I915_CACHING_CACHED		1
> -/**
> - * I915_CACHING_DISPLAY
>   *
> - * Special GPU caching mode which is coherent with the scanout engines.
> - * Transparently falls back to I915_CACHING_NONE on platforms where no special
> - * cache mode (like write-through or gfdt flushing) is available. The kernel
> - * automatically sets this mode when using a buffer as a scanout target.
> - * Userspace can manually set this mode to avoid a costly stall and clflush in
> - * the hotpath of drawing the first frame.
>   */
> -#define I915_CACHING_DISPLAY		2
> -
>  struct drm_i915_gem_caching {
>  	/**
> -	 * Handle of the buffer to set/get the caching level of. */
> +	 * @handle: Handle of the buffer to set/get the caching level.
> +	 */
>  	__u32 handle;
>  
>  	/**
> -	 * Cacheing level to apply or return value
> +	 * @caching: The GTT caching level to apply or possible return value.
> +	 *
> +	 * The supported @caching values:
>  	 *
> -	 * bits0-15 are for generic caching control (i.e. the above defined
> -	 * values). bits16-31 are reserved for platform-specific variations
> -	 * (e.g. l3$ caching on gen7). */
> +	 * I915_CACHING_NONE:
> +	 *
> +	 * GPU access is not coherent with CPU caches.  Default for machines
> +	 * without an LLC. This means we need to manually clflush, if we want
> +	 * GPU access to be coherent.
> +	 *
> +	 * I915_CACHING_CACHED:
> +	 *
> +	 * GPU access is coherent with CPU caches and furthermore the data is
> +	 * cached in last-level caches shared between CPU cores and the GPU GT.
> +	 * Default on machines with HAS_LLC. In general the fast shared
> +	 * last-level cache(HAS_LLC) is considered much faster then platforms
> +	 * which only support snooping(HAS_SNOOP), hence by default
> +	 *
> +	 * I915_CACHING_DISPLAY:
> +	 *
> +	 * Special GPU caching mode which is coherent with the scanout engines.
> +	 * Transparently falls back to I915_CACHING_NONE on platforms where no
> +	 * special cache mode (like write-through or gfdt flushing) is
> +	 * available. The kernel automatically sets this mode when using a
> +	 * buffer as a scanout target.  Userspace can manually set this mode to
> +	 * avoid a costly stall and clflush in the hotpath of drawing the first
> +	 * frame.
> +	 *
> +	 * Side note: On gen8+ this no longer does much since we lost the GGTT
> +	 * caching bits. Although setting this is harmless, since it still
> +	 * effectively falls back to I915_CACHING_NONE.
> +	 */
> +#define I915_CACHING_NONE		0
> +#define I915_CACHING_CACHED		1
> +#define I915_CACHING_DISPLAY		2
>  	__u32 caching;
>  };
>  
> -- 
> 2.26.3
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 3/5] drm/i915/uapi: reject caching ioctls for discrete
  2021-07-05 13:53 ` [PATCH v3 3/5] drm/i915/uapi: reject caching ioctls for discrete Matthew Auld
@ 2021-07-07  0:38   ` Ramalingam C
  2021-07-13 18:03   ` Kenneth Graunke
  1 sibling, 0 replies; 12+ messages in thread
From: Ramalingam C @ 2021-07-07  0:38 UTC (permalink / raw)
  To: Matthew Auld
  Cc: Thomas Hellström, Jason Ekstrand, Tvrtko Ursulin,
	Jordan Justen, intel-gfx, dri-devel, Kenneth Graunke,
	Daniel Vetter

On 2021-07-05 at 14:53:08 +0100, Matthew Auld wrote:
> It's a noop on DG1, and in the future when need to support other devices
> which let us control the coherency, then it should be an immutable
> creation time property for the BO. This will likely be controlled
> through a new gem_create_ext extension.
> 
> v2: add some kernel doc for the discrete changes, and document the
>     implicit rules
LGTM

Reviewed-by: Ramalingam C <ramalingam.c@intel.com>
> 
> Suggested-by: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Jordan Justen <jordan.l.justen@intel.com>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ramalingam C <ramalingam.c@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_domain.c |  6 +++++
>  include/uapi/drm/i915_drm.h                | 29 +++++++++++++++++++++-
>  2 files changed, 34 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 7d1400b13429..43004bef55cb 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -268,6 +268,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
>  	struct drm_i915_gem_object *obj;
>  	int err = 0;
>  
> +	if (IS_DGFX(to_i915(dev)))
> +		return -ENODEV;
> +
>  	rcu_read_lock();
>  	obj = i915_gem_object_lookup_rcu(file, args->handle);
>  	if (!obj) {
> @@ -303,6 +306,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
>  	enum i915_cache_level level;
>  	int ret = 0;
>  
> +	if (IS_DGFX(i915))
> +		return -ENODEV;
> +
>  	switch (args->caching) {
>  	case I915_CACHING_NONE:
>  		level = I915_CACHE_NONE;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index d13c6c5fad04..a4faceeb8c32 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1372,7 +1372,34 @@ struct drm_i915_gem_busy {
>   * require unbinding the object from the GTT first, if its current caching value
>   * doesn't match.
>   *
> - *
> + * Note that this all changes on discrete platforms, starting from DG1, the
> + * set/get caching is no longer supported, and is now rejected.  Instead the CPU
> + * caching attributes(WB vs WC) will become an immutable creation time property
> + * for the object, along with the GTT caching level. For now we don't expose any
> + * new uAPI for this, instead on DG1 this is all implicit, although this largely
> + * shouldn't matter since DG1 is coherent by default(without any way of
> + * controlling it).
> + *
> + * Implicit caching rules, starting from DG1:
> + *
> + *     - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
> + *       contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
> + *       mapped as write-combined only.
> + *
> + *     - Everything else is always allocated and mapped as write-back, with the
> + *       guarantee that everything is also coherent with the GPU.
> + *
> + * Note that this is likely to change in the future again, where we might need
> + * more flexibility on future devices, so making this all explicit as part of a
> + * new &drm_i915_gem_create_ext extension is probable.
> + *
> + * Side note: Part of the reason for this is that changing the at-allocation-time CPU
> + * caching attributes for the pages might be required(and is expensive) if we
> + * need to then CPU map the pages later with different caching attributes. This
> + * inconsistent caching behaviour, while supported on x86, is not universally
> + * supported on other architectures. So for simplicity we opt for setting
> + * everything at creation time, whilst also making it immutable, on discrete
> + * platforms.
>   */
>  struct drm_i915_gem_caching {
>  	/**
> -- 
> 2.26.3
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 4/5] drm/i915/uapi: convert drm_i915_gem_set_domain to kernel doc
  2021-07-05 13:53 ` [PATCH v3 4/5] drm/i915/uapi: convert drm_i915_gem_set_domain to kernel doc Matthew Auld
@ 2021-07-07  0:41   ` Ramalingam C
  0 siblings, 0 replies; 12+ messages in thread
From: Ramalingam C @ 2021-07-07  0:41 UTC (permalink / raw)
  To: Matthew Auld
  Cc: Thomas Hellström, Jason Ekstrand, Tvrtko Ursulin,
	Jordan Justen, intel-gfx, dri-devel, Kenneth Graunke,
	Daniel Vetter

On 2021-07-05 at 14:53:09 +0100, Matthew Auld wrote:
> Convert all the drm_i915_gem_set_domain bits to proper kernel doc.

LGTM.

Reviewed-by: Ramalingam C <ramalingam.c@intel.com>
> 
> Suggested-by: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Jordan Justen <jordan.l.justen@intel.com>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ramalingam C <ramalingam.c@intel.com>
> ---
>  include/uapi/drm/i915_drm.h | 34 +++++++++++++++++++++++++++++++---
>  1 file changed, 31 insertions(+), 3 deletions(-)
> 
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index a4faceeb8c32..6f94e5e7569a 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -880,14 +880,42 @@ struct drm_i915_gem_mmap_offset {
>  	__u64 extensions;
>  };
>  
> +
> +/**
> + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in
> + * preparation for accessing the pages via some CPU domain.
> + *
> + * Specifying a new write or read domain will flush the object out of the
> + * previous domain(if required), before then updating the objects domain
> + * tracking with the new domain.
> + *
> + * Note this might involve waiting for the object first if it is still active on
> + * the GPU.
> + *
> + * Supported values for @read_domains and @write_domain:
> + *
> + *	- I915_GEM_DOMAIN_WC: Uncached write-combined domain
> + *	- I915_GEM_DOMAIN_CPU: CPU cache domain
> + *	- I915_GEM_DOMAIN_GTT: Mappable aperture domain
> + *
> + * All other domains are rejected.
> + *
> + */
>  struct drm_i915_gem_set_domain {
> -	/** Handle for the object */
> +	/** @handle: Handle for the object. */
>  	__u32 handle;
>  
> -	/** New read domains */
> +	/**
> +	 * @read_domains: New read domains.
> +	 */
>  	__u32 read_domains;
>  
> -	/** New write domain */
> +	/**
> +	 * @write_domain: New write domain.
> +	 *
> +	 * Note that having something in the write domain implies it's in the
> +	 * read domain, and only that read domain.
> +	 */
>  	__u32 write_domain;
>  };
>  
> -- 
> 2.26.3
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 5/5] drm/i915/uapi: reject set_domain for discrete
  2021-07-05 13:53 ` [PATCH v3 5/5] drm/i915/uapi: reject set_domain for discrete Matthew Auld
@ 2021-07-07  0:49   ` Ramalingam C
  0 siblings, 0 replies; 12+ messages in thread
From: Ramalingam C @ 2021-07-07  0:49 UTC (permalink / raw)
  To: Matthew Auld
  Cc: Thomas Hellström, Jason Ekstrand, Tvrtko Ursulin,
	Jordan Justen, intel-gfx, dri-devel, Kenneth Graunke,
	Daniel Vetter

On 2021-07-05 at 14:53:10 +0100, Matthew Auld wrote:
> The CPU domain should be static for discrete, and on DG1 we don't need
> any flushing since everything is already coherent, so really all this
> does is an object wait, for which we have an ioctl. Longer term the
> desired caching should be an immutable creation time property for the
> BO, which can be set with something like gem_create_ext.
> 
> One other user is iris + userptr, which uses the set_domain to probe all
> the pages to check if the GUP succeeds, however keeping the set_domain
> around just for that seems rather scuffed. We could equally just submit
> a dummy batch, which should hopefully be good enough, otherwise adding a
> new creation time flag for userptr might be an option. Although longer
> term we will also have vm_bind, which should also be a nice fit for
> this, so adding a whole new flag is likely overkill.
> 
> v2: add some more kernel doc, also add the implicit rules with caching
LGTM

Reviewed-by: Ramalingam C <ramalingam.c@intel.com>
> 
> Suggested-by: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Jordan Justen <jordan.l.justen@intel.com>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ramalingam C <ramalingam.c@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 +++
>  include/uapi/drm/i915_drm.h                | 18 ++++++++++++++++++
>  2 files changed, 21 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 43004bef55cb..b684a62bf3b0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -490,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>  	u32 write_domain = args->write_domain;
>  	int err;
>  
> +	if (IS_DGFX(to_i915(dev)))
> +		return -ENODEV;
> +
>  	/* Only handle setting domains to types used by the CPU. */
>  	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
>  		return -EINVAL;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 6f94e5e7569a..fd1a9878730c 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -900,6 +900,24 @@ struct drm_i915_gem_mmap_offset {
>   *
>   * All other domains are rejected.
>   *
> + * Note that for discrete, starting from DG1, this is no longer supported, and
> + * is instead rejected. On such platforms the CPU domain is effectively static,
> + * where we also only support a single &drm_i915_gem_mmap_offset cache mode,
> + * which can't be set explicitly and instead depends on the object placements,
> + * as per the below.
> + *
> + * Implicit caching rules, starting from DG1:
> + *
> + *	- If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
> + *	  contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
> + *	  mapped as write-combined only.
> + *
> + *	- Everything else is always allocated and mapped as write-back, with the
> + *	  guarantee that everything is also coherent with the GPU.
> + *
> + * Note that this is likely to change in the future again, where we might need
> + * more flexibility on future devices, so making this all explicit as part of a
> + * new &drm_i915_gem_create_ext extension is probable.
>   */
>  struct drm_i915_gem_set_domain {
>  	/** @handle: Handle for the object. */
> -- 
> 2.26.3
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users
  2021-07-05 13:53 [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users Matthew Auld
                   ` (4 preceding siblings ...)
  2021-07-07  0:12 ` [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users Ramalingam C
@ 2021-07-07 11:46 ` Daniel Vetter
  5 siblings, 0 replies; 12+ messages in thread
From: Daniel Vetter @ 2021-07-07 11:46 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Thomas Hellström, intel-gfx, dri-devel, Daniel Vetter

On Mon, Jul 05, 2021 at 02:53:06PM +0100, Matthew Auld wrote:
> For discrete, users of pin_map() needs to obey the same rules at the TTM
> backend, where we map system only objects as WB, and everything else as
> WC. The simplest for now is to just force the correct mapping type as
> per the new rules for discrete.
> 
> Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ramalingam C <ramalingam.c@intel.com>

Huge thanks for doing all the kerneldoc work for uapi you're doing in this
series, this should help a lot with umd conversions since we can just
point them at the docs and tell them to pls update code.

Yes I know there's no kerneldoc here, but I didn't see the cover letter
:-)

Cheers, Daniel

> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object.c | 34 ++++++++++++++++++++++
>  drivers/gpu/drm/i915/gem/i915_gem_object.h |  4 +++
>  drivers/gpu/drm/i915/gem/i915_gem_pages.c  | 22 ++++++++++++--
>  3 files changed, 58 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 547cc9dad90d..9da7b288b7ed 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -625,6 +625,40 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj,
>  	return obj->ops->migrate(obj, mr);
>  }
>  
> +/**
> + * i915_gem_object_placement_possible - Check whether the object can be
> + * placed at certain memory type
> + * @obj: Pointer to the object
> + * @type: The memory type to check
> + *
> + * Return: True if the object can be placed in @type. False otherwise.
> + */
> +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
> +					enum intel_memory_type type)
> +{
> +	unsigned int i;
> +
> +	if (!obj->mm.n_placements) {
> +		switch (type) {
> +		case INTEL_MEMORY_LOCAL:
> +			return i915_gem_object_has_iomem(obj);
> +		case INTEL_MEMORY_SYSTEM:
> +			return i915_gem_object_has_pages(obj);
> +		default:
> +			/* Ignore stolen for now */
> +			GEM_BUG_ON(1);
> +			return false;
> +		}
> +	}
> +
> +	for (i = 0; i < obj->mm.n_placements; i++) {
> +		if (obj->mm.placements[i]->type == type)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
>  void i915_gem_init__objects(struct drm_i915_private *i915)
>  {
>  	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index d423d8cac4f2..8be4fadeee48 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -12,6 +12,7 @@
>  #include <drm/drm_device.h>
>  
>  #include "display/intel_frontbuffer.h"
> +#include "intel_memory_region.h"
>  #include "i915_gem_object_types.h"
>  #include "i915_gem_gtt.h"
>  #include "i915_gem_ww.h"
> @@ -607,6 +608,9 @@ bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj,
>  int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
>  				   unsigned int flags);
>  
> +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
> +					enum intel_memory_type type);
> +
>  #ifdef CONFIG_MMU_NOTIFIER
>  static inline bool
>  i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index f2f850e31b8e..810a157a18f8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
>  	dma_addr_t addr;
>  	void *vaddr;
>  
> -	if (type != I915_MAP_WC)
> -		return ERR_PTR(-ENODEV);
> +	GEM_BUG_ON(type != I915_MAP_WC);
>  
>  	if (n_pfn > ARRAY_SIZE(stack)) {
>  		/* Too big for stack -- allocate temporary array instead */
> @@ -374,6 +373,25 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>  	}
>  	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
>  
> +	/*
> +	 * For discrete our CPU mappings needs to be consistent in order to
> +	 * function correctly on !x86. When mapping things through TTM, we use
> +	 * the same rules to determine the caching type.
> +	 *
> +	 * Internal users of lmem are already expected to get this right, so no
> +	 * fudging needed there.
> +	 */
> +	if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) {
> +		if (type != I915_MAP_WC && !obj->mm.n_placements) {
> +			ptr = ERR_PTR(-ENODEV);
> +			goto err_unpin;
> +		}
> +
> +		type = I915_MAP_WC;
> +	} else if (IS_DGFX(to_i915(obj->base.dev))) {
> +		type = I915_MAP_WB;
> +	}
> +
>  	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
>  	if (ptr && has_type != type) {
>  		if (pinned) {
> -- 
> 2.26.3
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 3/5] drm/i915/uapi: reject caching ioctls for discrete
  2021-07-05 13:53 ` [PATCH v3 3/5] drm/i915/uapi: reject caching ioctls for discrete Matthew Auld
  2021-07-07  0:38   ` Ramalingam C
@ 2021-07-13 18:03   ` Kenneth Graunke
  1 sibling, 0 replies; 12+ messages in thread
From: Kenneth Graunke @ 2021-07-13 18:03 UTC (permalink / raw)
  To: intel-gfx, Matthew Auld
  Cc: Thomas Hellström, Jason Ekstrand, Tvrtko Ursulin,
	Jordan Justen, dri-devel, Daniel Vetter

[-- Attachment #1: Type: text/plain, Size: 1211 bytes --]

On Monday, July 5, 2021 6:53:08 AM PDT Matthew Auld wrote:
> It's a noop on DG1, and in the future when need to support other devices
> which let us control the coherency, then it should be an immutable
> creation time property for the BO. This will likely be controlled
> through a new gem_create_ext extension.
> 
> v2: add some kernel doc for the discrete changes, and document the
>     implicit rules
> 
> Suggested-by: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Jordan Justen <jordan.l.justen@intel.com>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ramalingam C <ramalingam.c@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_domain.c |  6 +++++
>  include/uapi/drm/i915_drm.h                | 29 +++++++++++++++++++++-
>  2 files changed, 34 insertions(+), 1 deletion(-)

This caching ioctl patch is:

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-07-13 18:11 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-05 13:53 [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users Matthew Auld
2021-07-05 13:53 ` [PATCH v3 2/5] drm/i915/uapi: convert drm_i915_gem_caching to kernel doc Matthew Auld
2021-07-07  0:29   ` Ramalingam C
2021-07-05 13:53 ` [PATCH v3 3/5] drm/i915/uapi: reject caching ioctls for discrete Matthew Auld
2021-07-07  0:38   ` Ramalingam C
2021-07-13 18:03   ` Kenneth Graunke
2021-07-05 13:53 ` [PATCH v3 4/5] drm/i915/uapi: convert drm_i915_gem_set_domain to kernel doc Matthew Auld
2021-07-07  0:41   ` Ramalingam C
2021-07-05 13:53 ` [PATCH v3 5/5] drm/i915/uapi: reject set_domain for discrete Matthew Auld
2021-07-07  0:49   ` Ramalingam C
2021-07-07  0:12 ` [PATCH v3 1/5] drm/i915: use consistent CPU mappings for pin_map users Ramalingam C
2021-07-07 11:46 ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).