linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 1/8] drm/i915/ttm: dont trample cache_level overrides during ttm move
       [not found] <20220713133818.3699604-1-bob.beckett@collabora.com>
@ 2022-07-13 13:38 ` Robert Beckett
  2022-07-13 13:38 ` [PATCH v4 2/8] drm/i915: limit ttm to dma32 for i965G[M] Robert Beckett
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 8+ messages in thread
From: Robert Beckett @ 2022-07-13 13:38 UTC (permalink / raw)
  To: dri-devel, intel-gfx, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
	Tvrtko Ursulin, David Airlie, Daniel Vetter
  Cc: kernel, Robert Beckett, Matthew Auld, Thomas Hellström,
	linux-kernel

Various places within the driver override the default chosen cache_level.
Before ttm, these overrides were permanent until explicitly changed again
or for the lifetime of the buffer.

TTM movement code came along and decided that it could make that
decision at that time, which is usually well after object creation, so
overrode the cache_level decision and reverted it back to its default
decision.

Add logic to indicate whether the caching mode has been set by anything
other than the move logic. If so, assume that the code that overrode the
defaults knows best and keep it.

Signed-off-by: Robert Beckett <bob.beckett@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c       | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c          | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c     | 9 ++++++---
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index ccec4055fde3..966ac2d778d5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
 	obj->cache_level = cache_level;
+	obj->ttm.cache_level_override = true;
 
 	if (cache_level != I915_CACHE_NONE)
 		obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 5cf36a130061..14937cf1daaa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -623,6 +623,7 @@ struct drm_i915_gem_object {
 		struct i915_gem_object_page_iter get_io_page;
 		struct drm_i915_gem_object *backup;
 		bool created:1;
+		bool cache_level_override:1;
 	} ttm;
 
 	/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 053b0022ddd0..b6c3fc25d9d1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1253,6 +1253,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
 	i915_gem_object_init_memory_region(obj, mem);
 	i915_ttm_adjust_domains_after_move(obj);
 	i915_ttm_adjust_gem_after_move(obj);
+	obj->ttm.cache_level_override = false;
 	i915_gem_object_unlock(obj);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 9a7e50534b84..042c2237e287 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -129,9 +129,12 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
 	obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
 		I915_BO_FLAG_STRUCT_PAGE;
 
-	cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
-					   bo->ttm);
-	i915_gem_object_set_cache_coherency(obj, cache_level);
+	if (!obj->ttm.cache_level_override) {
+		cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
+						   bo->resource, bo->ttm);
+		i915_gem_object_set_cache_coherency(obj, cache_level);
+		obj->ttm.cache_level_override = false;
+	}
 }
 
 /**
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 2/8] drm/i915: limit ttm to dma32 for i965G[M]
       [not found] <20220713133818.3699604-1-bob.beckett@collabora.com>
  2022-07-13 13:38 ` [PATCH v4 1/8] drm/i915/ttm: dont trample cache_level overrides during ttm move Robert Beckett
@ 2022-07-13 13:38 ` Robert Beckett
  2022-07-13 13:38 ` [PATCH v4 3/8] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level Robert Beckett
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 8+ messages in thread
From: Robert Beckett @ 2022-07-13 13:38 UTC (permalink / raw)
  To: dri-devel, intel-gfx, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
	Tvrtko Ursulin, David Airlie, Daniel Vetter
  Cc: kernel, Robert Beckett, Matthew Auld, Thomas Hellström,
	linux-kernel

i965G[M] cannot relocate objects above 4GiB.
Ensure ttm uses dma32 on these systems.

Signed-off-by: Robert Beckett <bob.beckett@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
index 6873808a7015..642cd1587976 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,15 @@
 int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *drm = &dev_priv->drm;
+	bool use_dma32 = false;
+
+	/* i965g[m] cannot relocate objects above 4GiB. */
+	if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+		use_dma32 = true;
 
 	return ttm_device_init(&dev_priv->bdev, i915_ttm_driver(),
 			       drm->dev, drm->anon_inode->i_mapping,
-			       drm->vma_offset_manager, false, false);
+			       drm->vma_offset_manager, false, use_dma32);
 }
 
 /**
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 3/8] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level
       [not found] <20220713133818.3699604-1-bob.beckett@collabora.com>
  2022-07-13 13:38 ` [PATCH v4 1/8] drm/i915/ttm: dont trample cache_level overrides during ttm move Robert Beckett
  2022-07-13 13:38 ` [PATCH v4 2/8] drm/i915: limit ttm to dma32 for i965G[M] Robert Beckett
@ 2022-07-13 13:38 ` Robert Beckett
  2022-07-13 13:38 ` [PATCH v4 4/8] drm/i915: add gen6 ppgtt dummy creation function Robert Beckett
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 8+ messages in thread
From: Robert Beckett @ 2022-07-13 13:38 UTC (permalink / raw)
  To: dri-devel, intel-gfx, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
	Tvrtko Ursulin, David Airlie, Daniel Vetter
  Cc: kernel, Robert Beckett, Matthew Auld, Thomas Hellström,
	linux-kernel

By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP.
This is divergent from existing backends code which only considers
HAS_LLC.
Testing shows that trusting snooping on gen5- is unreliable and bsw via
ggtt mappings, so limit DGFX for now and maintain previous behaviour.

Signed-off-by: Robert Beckett <bob.beckett@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 042c2237e287..a949594237d9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -52,7 +52,9 @@ static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 		     struct ttm_tt *ttm)
 {
-	return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+	bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915);
+
+	return ((HAS_LLC(i915) || can_snoop) &&
 		!i915_ttm_gtt_binds_lmem(res) &&
 		ttm->caching == ttm_cached) ? I915_CACHE_LLC :
 		I915_CACHE_NONE;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 4/8] drm/i915: add gen6 ppgtt dummy creation function
       [not found] <20220713133818.3699604-1-bob.beckett@collabora.com>
                   ` (2 preceding siblings ...)
  2022-07-13 13:38 ` [PATCH v4 3/8] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level Robert Beckett
@ 2022-07-13 13:38 ` Robert Beckett
  2022-07-13 13:38 ` [PATCH v4 5/8] drm/i915: setup ggtt scratch page after memory regions Robert Beckett
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 8+ messages in thread
From: Robert Beckett @ 2022-07-13 13:38 UTC (permalink / raw)
  To: dri-devel, intel-gfx, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
	Tvrtko Ursulin, David Airlie, Daniel Vetter
  Cc: kernel, Robert Beckett, Matthew Auld, Thomas Hellström,
	linux-kernel

Internal gem objects will soon just be volatile system memory region
objects.
To enable this, create a separate dummy object creation function
for gen6 ppgtt. The object only exists as a fake object pointing to ggtt
and gains no benefit in going via the internal backend.
Instead, create a dummy gem object and avoid having to maintain a custom
ops api in the internal backend, which makes later refactoring easier.

Signed-off-by: Robert Beckett <bob.beckett@collabora.com>
---
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 1bb766c79dcb..f3b660cfeb7f 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {
 	.put_pages = pd_dummy_obj_put_pages,
 };
 
+static struct drm_i915_gem_object *
+i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size)
+{
+	static struct lock_class_key lock_class;
+	struct drm_i915_gem_object *obj;
+	unsigned int cache_level;
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, size);
+	i915_gem_object_init(obj, &pd_dummy_obj_ops, &lock_class, 0);
+	obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+
+	/*
+	 * Mark the object as volatile, such that the pages are marked as
+	 * dontneed whilst they are still pinned. As soon as they are unpinned
+	 * they are allowed to be reaped by the shrinker, and the caller is
+	 * expected to repopulate - the contents of this object are only valid
+	 * whilst active and pinned.
+	 */
+	i915_gem_object_set_volatile(obj);
+
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+
+	cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	return obj;
+}
+
 static struct i915_page_directory *
 gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
 {
@@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
 	if (unlikely(!pd))
 		return ERR_PTR(-ENOMEM);
 
-	pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
-							&pd_dummy_obj_ops,
-							I915_PDES * SZ_4K);
+	pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt->i915, I915_PDES * SZ_4K);
 	if (IS_ERR(pd->pt.base)) {
 		err = PTR_ERR(pd->pt.base);
 		pd->pt.base = NULL;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 5/8] drm/i915: setup ggtt scratch page after memory regions
       [not found] <20220713133818.3699604-1-bob.beckett@collabora.com>
                   ` (3 preceding siblings ...)
  2022-07-13 13:38 ` [PATCH v4 4/8] drm/i915: add gen6 ppgtt dummy creation function Robert Beckett
@ 2022-07-13 13:38 ` Robert Beckett
  2022-07-13 13:38 ` [PATCH v4 6/8] drm/i915: allow volatile buffers to use ttm pool allocator Robert Beckett
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 8+ messages in thread
From: Robert Beckett @ 2022-07-13 13:38 UTC (permalink / raw)
  To: dri-devel, intel-gfx, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
	Tvrtko Ursulin, David Airlie, Daniel Vetter
  Cc: kernel, Robert Beckett, Matthew Auld, Thomas Hellström,
	linux-kernel

Reorder scratch page allocation so that memory regions are available
to allocate the buffers

Signed-off-by: Robert Beckett <bob.beckett@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 20 ++++++++++++++++++--
 drivers/gpu/drm/i915/gt/intel_gtt.h  |  1 +
 drivers/gpu/drm/i915/i915_driver.c   | 16 ++++++++++------
 3 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 15a915bb4088..c4ad03e53236 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -866,8 +866,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 	struct drm_i915_private *i915 = ggtt->vm.i915;
 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 	phys_addr_t phys_addr;
-	u32 pte_flags;
-	int ret;
 
 	GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
 	phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
@@ -889,6 +887,24 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 	}
 
 	kref_init(&ggtt->vm.resv_ref);
+
+	return 0;
+}
+
+/**
+ * i915_ggtt_setup_scratch_page - setup ggtt scratch page
+ * @i915: i915 device
+ */
+int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915)
+{
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+	u32 pte_flags;
+	int ret;
+
+	/* gen5- scratch setup currently happens in @intel_gtt_init */
+	if (GRAPHICS_VER(i915) <= 5)
+		return 0;
+
 	ret = setup_scratch_page(&ggtt->vm);
 	if (ret) {
 		drm_err(&i915->drm, "Scratch setup failed\n");
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index e639434e97fd..4ebdf70b5273 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -579,6 +579,7 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm,
 			   struct i915_vma_resource *vma_res);
 
 int i915_ggtt_probe_hw(struct drm_i915_private *i915);
+int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915);
 int i915_ggtt_init_hw(struct drm_i915_private *i915);
 int i915_ggtt_enable_hw(struct drm_i915_private *i915);
 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt);
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index deb8a8b76965..fa0956840fcc 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -69,6 +69,7 @@
 #include "gem/i915_gem_mman.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gtt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_rc6.h"
 
@@ -609,12 +610,16 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 
 	ret = intel_gt_tiles_init(dev_priv);
 	if (ret)
-		goto err_mem_regions;
+		goto err_ggtt;
+
+	ret = i915_ggtt_setup_scratch_page(dev_priv);
+	if (ret)
+		goto err_ggtt;
 
 	ret = i915_ggtt_enable_hw(dev_priv);
 	if (ret) {
 		drm_err(&dev_priv->drm, "failed to enable GGTT\n");
-		goto err_mem_regions;
+		goto err_ggtt;
 	}
 
 	pci_set_master(pdev);
@@ -675,11 +680,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 err_msi:
 	if (pdev->msi_enabled)
 		pci_disable_msi(pdev);
-err_mem_regions:
-	intel_memory_regions_driver_release(dev_priv);
 err_ggtt:
 	i915_ggtt_driver_release(dev_priv);
 	i915_gem_drain_freed_objects(dev_priv);
+	intel_memory_regions_driver_release(dev_priv);
 	i915_ggtt_driver_late_release(dev_priv);
 err_perf:
 	i915_perf_fini(dev_priv);
@@ -928,9 +932,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	intel_modeset_driver_remove_nogem(i915);
 out_cleanup_hw:
 	i915_driver_hw_remove(i915);
-	intel_memory_regions_driver_release(i915);
 	i915_ggtt_driver_release(i915);
 	i915_gem_drain_freed_objects(i915);
+	intel_memory_regions_driver_release(i915);
 	i915_ggtt_driver_late_release(i915);
 out_cleanup_mmio:
 	i915_driver_mmio_release(i915);
@@ -987,9 +991,9 @@ static void i915_driver_release(struct drm_device *dev)
 
 	i915_gem_driver_release(dev_priv);
 
-	intel_memory_regions_driver_release(dev_priv);
 	i915_ggtt_driver_release(dev_priv);
 	i915_gem_drain_freed_objects(dev_priv);
+	intel_memory_regions_driver_release(dev_priv);
 	i915_ggtt_driver_late_release(dev_priv);
 
 	i915_driver_mmio_release(dev_priv);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 6/8] drm/i915: allow volatile buffers to use ttm pool allocator
       [not found] <20220713133818.3699604-1-bob.beckett@collabora.com>
                   ` (4 preceding siblings ...)
  2022-07-13 13:38 ` [PATCH v4 5/8] drm/i915: setup ggtt scratch page after memory regions Robert Beckett
@ 2022-07-13 13:38 ` Robert Beckett
  2022-07-13 13:38 ` [PATCH v4 7/8] drm/i915/gem: further fix mman selftest Robert Beckett
  2022-07-13 13:38 ` [PATCH v4 8/8] drm/i915: internal buffers use ttm backend Robert Beckett
  7 siblings, 0 replies; 8+ messages in thread
From: Robert Beckett @ 2022-07-13 13:38 UTC (permalink / raw)
  To: dri-devel, intel-gfx, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
	Tvrtko Ursulin, David Airlie, Daniel Vetter
  Cc: kernel, Robert Beckett, Matthew Auld, Thomas Hellström,
	linux-kernel

Internal/volatile buffers should not be shmem backed.
If a volatile buffer is requested, allow ttm to use the pool allocator
to provide volatile pages as backing.
Fix i915_ttm_shrink to handle !is_shmem volatile buffers by purging.

Signed-off-by: Robert Beckett <bob.beckett@collabora.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index b6c3fc25d9d1..599ed2713359 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -291,7 +291,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 		page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
 
 	caching = i915_ttm_select_tt_caching(obj);
-	if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
+	if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached &&
+	    !i915_gem_object_is_volatile(obj)) {
 		page_flags |= TTM_TT_FLAG_EXTERNAL |
 			      TTM_TT_FLAG_EXTERNAL_MAPPABLE;
 		i915_tt->is_shmem = true;
@@ -513,9 +514,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
 	if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
 		return 0;
 
-	GEM_BUG_ON(!i915_tt->is_shmem);
+	GEM_BUG_ON(!i915_tt->is_shmem && obj->mm.madv != I915_MADV_DONTNEED);
 
-	if (!i915_tt->filp)
+	if (i915_tt->is_shmem && !i915_tt->filp)
 		return 0;
 
 	ret = ttm_bo_wait_ctx(bo, &ctx);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 7/8] drm/i915/gem: further fix mman selftest
       [not found] <20220713133818.3699604-1-bob.beckett@collabora.com>
                   ` (5 preceding siblings ...)
  2022-07-13 13:38 ` [PATCH v4 6/8] drm/i915: allow volatile buffers to use ttm pool allocator Robert Beckett
@ 2022-07-13 13:38 ` Robert Beckett
  2022-07-13 13:38 ` [PATCH v4 8/8] drm/i915: internal buffers use ttm backend Robert Beckett
  7 siblings, 0 replies; 8+ messages in thread
From: Robert Beckett @ 2022-07-13 13:38 UTC (permalink / raw)
  To: dri-devel, intel-gfx, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
	Tvrtko Ursulin, David Airlie, Daniel Vetter
  Cc: kernel, Robert Beckett, Matthew Auld, Thomas Hellström,
	linux-kernel

In commit 450cede7f380 ("drm/i915/gem: Fix the mman selftest") we fixed up
the mman selftest to allocate user buffers via smem only if we have lmem,
otherwise it uses internal buffers.

As the commit message asserts, we should only be using buffers that
userland should be able to create.
Internal buffers are not intended to be used by userland.

Instead, fix the code to always create buffers from smem.
In the case of integrated, this will create them from the shmem non-ttm
backend, which is fine.

This also fixes up the code to allow conversion of internal backend to
ttm without breaking this test.

Signed-off-by: Robert Beckett <bob.beckett@collabora.com>
---
 .../gpu/drm/i915/gem/selftests/i915_gem_mman.c  | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 3ced9948a331..e529eb8461ff 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -596,17 +596,12 @@ static enum i915_mmap_type default_mapping(struct drm_i915_private *i915)
 }
 
 static struct drm_i915_gem_object *
-create_sys_or_internal(struct drm_i915_private *i915,
-		       unsigned long size)
+create_sys(struct drm_i915_private *i915, unsigned long size)
 {
-	if (HAS_LMEM(i915)) {
-		struct intel_memory_region *sys_region =
-			i915->mm.regions[INTEL_REGION_SMEM];
+	struct intel_memory_region *sys_region =
+		i915->mm.regions[INTEL_REGION_SMEM];
 
-		return __i915_gem_object_create_user(i915, size, &sys_region, 1);
-	}
-
-	return i915_gem_object_create_internal(i915, size);
+	return __i915_gem_object_create_user(i915, size, &sys_region, 1);
 }
 
 static bool assert_mmap_offset(struct drm_i915_private *i915,
@@ -617,7 +612,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
 	u64 offset;
 	int ret;
 
-	obj = create_sys_or_internal(i915, size);
+	obj = create_sys(i915, size);
 	if (IS_ERR(obj))
 		return expected && expected == PTR_ERR(obj);
 
@@ -719,7 +714,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 	}
 
 	/* Fill the hole, further allocation attempts should then fail */
-	obj = create_sys_or_internal(i915, PAGE_SIZE);
+	obj = create_sys(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		pr_err("Unable to create object for reclaimed hole\n");
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 8/8] drm/i915: internal buffers use ttm backend
       [not found] <20220713133818.3699604-1-bob.beckett@collabora.com>
                   ` (6 preceding siblings ...)
  2022-07-13 13:38 ` [PATCH v4 7/8] drm/i915/gem: further fix mman selftest Robert Beckett
@ 2022-07-13 13:38 ` Robert Beckett
  7 siblings, 0 replies; 8+ messages in thread
From: Robert Beckett @ 2022-07-13 13:38 UTC (permalink / raw)
  To: dri-devel, intel-gfx, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
	Tvrtko Ursulin, David Airlie, Daniel Vetter
  Cc: kernel, Robert Beckett, Matthew Auld, Thomas Hellström,
	linux-kernel

Create a kernel only internal memory region that uses ttm pool allocator to
allocate volatile system pages.
Refactor internal buffer backend to simply allocate from this new
region.

Signed-off-by: Robert Beckett <bob.beckett@collabora.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c  | 187 +-----------------
 drivers/gpu/drm/i915/gem/i915_gem_internal.h  |   5 -
 drivers/gpu/drm/i915/i915_pci.c               |   4 +-
 drivers/gpu/drm/i915/intel_memory_region.c    |   8 +-
 drivers/gpu/drm/i915/intel_memory_region.h    |   2 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   2 +-
 6 files changed, 17 insertions(+), 191 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..a83751867ac7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -4,188 +4,9 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-#include <linux/swiotlb.h>
-
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_region.h"
 #include "i915_drv.h"
-#include "i915_gem.h"
-#include "i915_gem_internal.h"
-#include "i915_gem_object.h"
-#include "i915_scatterlist.h"
-#include "i915_utils.h"
-
-#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
-#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
-
-static void internal_free_pages(struct sg_table *st)
-{
-	struct scatterlist *sg;
-
-	for (sg = st->sgl; sg; sg = __sg_next(sg)) {
-		if (sg_page(sg))
-			__free_pages(sg_page(sg), get_order(sg->length));
-	}
-
-	sg_free_table(st);
-	kfree(st);
-}
-
-static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct sg_table *st;
-	struct scatterlist *sg;
-	unsigned int sg_page_sizes;
-	unsigned int npages;
-	int max_order;
-	gfp_t gfp;
-
-	max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-	if (is_swiotlb_active(obj->base.dev->dev)) {
-		unsigned int max_segment;
-
-		max_segment = swiotlb_max_segment();
-		if (max_segment) {
-			max_segment = max_t(unsigned int, max_segment,
-					    PAGE_SIZE) >> PAGE_SHIFT;
-			max_order = min(max_order, ilog2(max_segment));
-		}
-	}
-#endif
-
-	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
-	if (IS_I965GM(i915) || IS_I965G(i915)) {
-		/* 965gm cannot relocate objects above 4GiB. */
-		gfp &= ~__GFP_HIGHMEM;
-		gfp |= __GFP_DMA32;
-	}
-
-create_st:
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		return -ENOMEM;
-
-	npages = obj->base.size / PAGE_SIZE;
-	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
-		kfree(st);
-		return -ENOMEM;
-	}
-
-	sg = st->sgl;
-	st->nents = 0;
-	sg_page_sizes = 0;
-
-	do {
-		int order = min(fls(npages) - 1, max_order);
-		struct page *page;
-
-		do {
-			page = alloc_pages(gfp | (order ? QUIET : MAYFAIL),
-					   order);
-			if (page)
-				break;
-			if (!order--)
-				goto err;
-
-			/* Limit subsequent allocations as well */
-			max_order = order;
-		} while (1);
-
-		sg_set_page(sg, page, PAGE_SIZE << order, 0);
-		sg_page_sizes |= PAGE_SIZE << order;
-		st->nents++;
-
-		npages -= 1 << order;
-		if (!npages) {
-			sg_mark_end(sg);
-			break;
-		}
-
-		sg = __sg_next(sg);
-	} while (1);
-
-	if (i915_gem_gtt_prepare_pages(obj, st)) {
-		/* Failed to dma-map try again with single page sg segments */
-		if (get_order(st->sgl->length)) {
-			internal_free_pages(st);
-			max_order = 0;
-			goto create_st;
-		}
-		goto err;
-	}
-
-	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
-
-	return 0;
-
-err:
-	sg_set_page(sg, NULL, 0, 0);
-	sg_mark_end(sg);
-	internal_free_pages(st);
-
-	return -ENOMEM;
-}
-
-static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
-					       struct sg_table *pages)
-{
-	i915_gem_gtt_finish_pages(obj, pages);
-	internal_free_pages(pages);
-
-	obj->mm.dirty = false;
-
-	__start_cpu_write(obj);
-}
-
-static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
-	.name = "i915_gem_object_internal",
-	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
-	.get_pages = i915_gem_object_get_pages_internal,
-	.put_pages = i915_gem_object_put_pages_internal,
-};
-
-struct drm_i915_gem_object *
-__i915_gem_object_create_internal(struct drm_i915_private *i915,
-				  const struct drm_i915_gem_object_ops *ops,
-				  phys_addr_t size)
-{
-	static struct lock_class_key lock_class;
-	struct drm_i915_gem_object *obj;
-	unsigned int cache_level;
-
-	GEM_BUG_ON(!size);
-	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
-
-	if (overflows_type(size, obj->base.size))
-		return ERR_PTR(-E2BIG);
-
-	obj = i915_gem_object_alloc();
-	if (!obj)
-		return ERR_PTR(-ENOMEM);
-
-	drm_gem_private_object_init(&i915->drm, &obj->base, size);
-	i915_gem_object_init(obj, ops, &lock_class, 0);
-	obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
-
-	/*
-	 * Mark the object as volatile, such that the pages are marked as
-	 * dontneed whilst they are still pinned. As soon as they are unpinned
-	 * they are allowed to be reaped by the shrinker, and the caller is
-	 * expected to repopulate - the contents of this object are only valid
-	 * whilst active and pinned.
-	 */
-	i915_gem_object_set_volatile(obj);
-
-	obj->read_domains = I915_GEM_DOMAIN_CPU;
-	obj->write_domain = I915_GEM_DOMAIN_CPU;
-
-	cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
-	i915_gem_object_set_cache_coherency(obj, cache_level);
-
-	return obj;
-}
 
 /**
  * i915_gem_object_create_internal: create an object with volatile pages
@@ -206,5 +27,7 @@ struct drm_i915_gem_object *
 i915_gem_object_create_internal(struct drm_i915_private *i915,
 				phys_addr_t size)
 {
-	return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);
+	return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_INTERNAL],
+					     size, 0, I915_BO_ALLOC_VOLATILE);
 }
+
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h b/drivers/gpu/drm/i915/gem/i915_gem_internal.h
index 6664e06112fc..524e1042b20f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h
@@ -15,9 +15,4 @@ struct drm_i915_private;
 struct drm_i915_gem_object *
 i915_gem_object_create_internal(struct drm_i915_private *i915,
 				phys_addr_t size);
-struct drm_i915_gem_object *
-__i915_gem_object_create_internal(struct drm_i915_private *i915,
-				  const struct drm_i915_gem_object_ops *ops,
-				  phys_addr_t size);
-
 #endif /* __I915_GEM_INTERNAL_H__ */
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index aacc10f2e73f..48b27de8cbba 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -162,7 +162,7 @@
 	.page_sizes = I915_GTT_PAGE_SIZE_4K
 
 #define GEN_DEFAULT_REGIONS \
-	.memory_regions = REGION_SMEM | REGION_STOLEN_SMEM
+	.memory_regions = REGION_SMEM | REGION_STOLEN_SMEM | REGION_INTERNAL
 
 #define I830_FEATURES \
 	GEN(2), \
@@ -908,7 +908,7 @@ static const struct intel_device_info rkl_info = {
 };
 
 #define DGFX_FEATURES \
-	.memory_regions = REGION_SMEM | REGION_LMEM | REGION_STOLEN_LMEM, \
+	.memory_regions = REGION_SMEM | REGION_LMEM | REGION_STOLEN_LMEM | REGION_INTERNAL, \
 	.has_llc = 0, \
 	.has_pxp = 0, \
 	.has_snoop = 1, \
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index 9a4a7fb55582..e950b0a125ad 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -31,6 +31,10 @@ static const struct {
 		.class = INTEL_MEMORY_STOLEN_LOCAL,
 		.instance = 0,
 	},
+	[INTEL_REGION_INTERNAL] = {
+		.class = INTEL_MEMORY_SYSTEM,
+		.instance = 1,
+	}
 };
 
 static int __iopagetest(struct intel_memory_region *mem,
@@ -321,12 +325,14 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915)
 		instance = intel_region_map[i].instance;
 		switch (type) {
 		case INTEL_MEMORY_SYSTEM:
-			if (IS_DGFX(i915))
+			if (IS_DGFX(i915) || i == INTEL_REGION_INTERNAL)
 				mem = i915_gem_ttm_system_setup(i915, type,
 								instance);
 			else
 				mem = i915_gem_shmem_setup(i915, type,
 							   instance);
+			if (i == INTEL_REGION_INTERNAL)
+				mem->private = true;
 			break;
 		case INTEL_MEMORY_STOLEN_LOCAL:
 			mem = i915_gem_stolen_lmem_setup(i915, type, instance);
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
index 2953ed5c3248..d9cc1d8044b1 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -35,6 +35,7 @@ enum intel_region_id {
 	INTEL_REGION_LMEM_3,
 	INTEL_REGION_STOLEN_SMEM,
 	INTEL_REGION_STOLEN_LMEM,
+	INTEL_REGION_INTERNAL,
 	INTEL_REGION_UNKNOWN, /* Should be last */
 };
 
@@ -42,6 +43,7 @@ enum intel_region_id {
 #define REGION_LMEM     BIT(INTEL_REGION_LMEM_0)
 #define REGION_STOLEN_SMEM   BIT(INTEL_REGION_STOLEN_SMEM)
 #define REGION_STOLEN_LMEM   BIT(INTEL_REGION_STOLEN_LMEM)
+#define REGION_INTERNAL	BIT(INTEL_REGION_INTERNAL)
 
 #define I915_ALLOC_CONTIGUOUS     BIT(0)
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 9c31a16f8380..eaa169effedc 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -179,7 +179,7 @@ struct drm_i915_private *mock_gem_device(void)
 		I915_GTT_PAGE_SIZE_64K |
 		I915_GTT_PAGE_SIZE_2M;
 
-	mkwrite_device_info(i915)->memory_regions = REGION_SMEM;
+	mkwrite_device_info(i915)->memory_regions = REGION_SMEM | REGION_INTERNAL;
 	intel_memory_regions_hw_probe(i915);
 
 	spin_lock_init(&i915->gpu_error.lock);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-07-13 13:39 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20220713133818.3699604-1-bob.beckett@collabora.com>
2022-07-13 13:38 ` [PATCH v4 1/8] drm/i915/ttm: dont trample cache_level overrides during ttm move Robert Beckett
2022-07-13 13:38 ` [PATCH v4 2/8] drm/i915: limit ttm to dma32 for i965G[M] Robert Beckett
2022-07-13 13:38 ` [PATCH v4 3/8] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level Robert Beckett
2022-07-13 13:38 ` [PATCH v4 4/8] drm/i915: add gen6 ppgtt dummy creation function Robert Beckett
2022-07-13 13:38 ` [PATCH v4 5/8] drm/i915: setup ggtt scratch page after memory regions Robert Beckett
2022-07-13 13:38 ` [PATCH v4 6/8] drm/i915: allow volatile buffers to use ttm pool allocator Robert Beckett
2022-07-13 13:38 ` [PATCH v4 7/8] drm/i915/gem: further fix mman selftest Robert Beckett
2022-07-13 13:38 ` [PATCH v4 8/8] drm/i915: internal buffers use ttm backend Robert Beckett

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).