All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v9 1/8] drm/i915/gem: Break out some shmem backend utils
@ 2021-10-18  9:10 ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

From: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Break out some shmem backend utils for future reuse by the TTM backend:
shmem_alloc_st(), shmem_free_st() and __shmem_writeback() which we can
use to provide a shmem-backed TTM page pool for cached-only TTM
buffer objects.

Main functional change here is that we now compute the page sizes using
the dma segments rather than using the physical page address segments.

v2(Reported-by: kernel test robot <lkp@intel.com>)
    - Make sure we initialise the mapping on the error path in
      shmem_get_pages()

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 181 +++++++++++++---------
 1 file changed, 106 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 11f072193f3b..36b711ae9e28 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,46 +25,61 @@ static void check_release_pagevec(struct pagevec *pvec)
 	cond_resched();
 }
 
-static int shmem_get_pages(struct drm_i915_gem_object *obj)
+static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+			  bool dirty, bool backup)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct intel_memory_region *mem = obj->mm.region;
-	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	struct sgt_iter sgt_iter;
+	struct pagevec pvec;
+	struct page *page;
+
+	mapping_clear_unevictable(mapping);
+
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, st) {
+		if (dirty)
+			set_page_dirty(page);
+
+		if (backup)
+			mark_page_accessed(page);
+
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+
+	sg_free_table(st);
+	kfree(st);
+}
+
+static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+				       size_t size, struct intel_memory_region *mr,
+				       struct address_space *mapping,
+				       unsigned int max_segment)
+{
+	const unsigned long page_count = size / PAGE_SIZE;
 	unsigned long i;
-	struct address_space *mapping;
 	struct sg_table *st;
 	struct scatterlist *sg;
-	struct sgt_iter sgt_iter;
 	struct page *page;
 	unsigned long last_pfn = 0;	/* suppress gcc warning */
-	unsigned int max_segment = i915_sg_segment_size();
-	unsigned int sg_page_sizes;
 	gfp_t noreclaim;
 	int ret;
 
-	/*
-	 * Assert that the object is not currently in any GPU domain. As it
-	 * wasn't in the GTT, there shouldn't be any way it could have been in
-	 * a GPU cache
-	 */
-	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
-	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
 	/*
 	 * If there's no chance of allocating enough pages for the whole
 	 * object, bail early.
 	 */
-	if (obj->base.size > resource_size(&mem->region))
-		return -ENOMEM;
+	if (size > resource_size(&mr->region))
+		return ERR_PTR(-ENOMEM);
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-rebuild_st:
 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
 		kfree(st);
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	/*
@@ -73,14 +88,12 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	 *
 	 * Fail silently without starting the shrinker
 	 */
-	mapping = obj->base.filp->f_mapping;
 	mapping_set_unevictable(mapping);
 	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
 	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
 
 	sg = st->sgl;
 	st->nents = 0;
-	sg_page_sizes = 0;
 	for (i = 0; i < page_count; i++) {
 		const unsigned int shrink[] = {
 			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
@@ -135,10 +148,9 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 		if (!i ||
 		    sg->length >= max_segment ||
 		    page_to_pfn(page) != last_pfn + 1) {
-			if (i) {
-				sg_page_sizes |= sg->length;
+			if (i)
 				sg = sg_next(sg);
-			}
+
 			st->nents++;
 			sg_set_page(sg, page, PAGE_SIZE, 0);
 		} else {
@@ -149,14 +161,65 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 		/* Check that the i965g/gm workaround works. */
 		GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
 	}
-	if (sg) { /* loop terminated early; short sg table */
-		sg_page_sizes |= sg->length;
+	if (sg) /* loop terminated early; short sg table */
 		sg_mark_end(sg);
-	}
 
 	/* Trim unused sg entries to avoid wasting memory. */
 	i915_sg_trim(st);
 
+	return st;
+err_sg:
+	sg_mark_end(sg);
+	if (sg != st->sgl) {
+		shmem_free_st(st, mapping, false, false);
+	} else {
+		mapping_clear_unevictable(mapping);
+		sg_free_table(st);
+		kfree(st);
+	}
+
+	/*
+	 * shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+	if (ret == -ENOSPC)
+		ret = -ENOMEM;
+
+	return ERR_PTR(ret);
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_memory_region *mem = obj->mm.region;
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	unsigned int max_segment = i915_sg_segment_size();
+	struct sg_table *st;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	int ret;
+
+	/*
+	 * Assert that the object is not currently in any GPU domain. As it
+	 * wasn't in the GTT, there shouldn't be any way it could have been in
+	 * a GPU cache
+	 */
+	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+rebuild_st:
+	st = shmem_alloc_st(i915, obj->base.size, mem, mapping, max_segment);
+	if (IS_ERR(st)) {
+		ret = PTR_ERR(st);
+		goto err_st;
+	}
+
 	ret = i915_gem_gtt_prepare_pages(obj, st);
 	if (ret) {
 		/*
@@ -168,6 +231,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 			for_each_sgt_page(page, sgt_iter, st)
 				put_page(page);
 			sg_free_table(st);
+			kfree(st);
 
 			max_segment = PAGE_SIZE;
 			goto rebuild_st;
@@ -200,28 +264,12 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
 		obj->cache_dirty = true;
 
-	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+	__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
 
 	return 0;
 
-err_sg:
-	sg_mark_end(sg);
 err_pages:
-	mapping_clear_unevictable(mapping);
-	if (sg != st->sgl) {
-		struct pagevec pvec;
-
-		pagevec_init(&pvec);
-		for_each_sgt_page(page, sgt_iter, st) {
-			if (!pagevec_add(&pvec, page))
-				check_release_pagevec(&pvec);
-		}
-		if (pagevec_count(&pvec))
-			check_release_pagevec(&pvec);
-	}
-	sg_free_table(st);
-	kfree(st);
-
+	shmem_free_st(st, mapping, false, false);
 	/*
 	 * shmemfs first checks if there is enough memory to allocate the page
 	 * and reports ENOSPC should there be insufficient, along with the usual
@@ -231,6 +279,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	 * space and so want to translate the error from shmemfs back to our
 	 * usual understanding of ENOMEM.
 	 */
+err_st:
 	if (ret == -ENOSPC)
 		ret = -ENOMEM;
 
@@ -251,10 +300,8 @@ shmem_truncate(struct drm_i915_gem_object *obj)
 	obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
-static void
-shmem_writeback(struct drm_i915_gem_object *obj)
+static void __shmem_writeback(size_t size, struct address_space *mapping)
 {
-	struct address_space *mapping;
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_NONE,
 		.nr_to_write = SWAP_CLUSTER_MAX,
@@ -270,10 +317,9 @@ shmem_writeback(struct drm_i915_gem_object *obj)
 	 * instead of invoking writeback so they are aged and paged out
 	 * as normal.
 	 */
-	mapping = obj->base.filp->f_mapping;
 
 	/* Begin writeback on each dirty page */
-	for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
+	for (i = 0; i < size >> PAGE_SHIFT; i++) {
 		struct page *page;
 
 		page = find_lock_page(mapping, i);
@@ -296,6 +342,12 @@ shmem_writeback(struct drm_i915_gem_object *obj)
 	}
 }
 
+static void
+shmem_writeback(struct drm_i915_gem_object *obj)
+{
+	__shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
+}
+
 void
 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				struct sg_table *pages,
@@ -316,11 +368,6 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 
 void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
 {
-	struct sgt_iter sgt_iter;
-	struct pagevec pvec;
-	struct page *page;
-
-	GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev)));
 	__i915_gem_object_release_shmem(obj, pages, true);
 
 	i915_gem_gtt_finish_pages(obj, pages);
@@ -328,25 +375,9 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
 	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_save_bit_17_swizzle(obj, pages);
 
-	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
-
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, pages) {
-		if (obj->mm.dirty)
-			set_page_dirty(page);
-
-		if (obj->mm.madv == I915_MADV_WILLNEED)
-			mark_page_accessed(page);
-
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
+	shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
+		      obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
 	obj->mm.dirty = false;
-
-	sg_free_table(pages);
-	kfree(pages);
 }
 
 static void
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v9 1/8] drm/i915/gem: Break out some shmem backend utils
@ 2021-10-18  9:10 ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

From: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Break out some shmem backend utils for future reuse by the TTM backend:
shmem_alloc_st(), shmem_free_st() and __shmem_writeback() which we can
use to provide a shmem-backed TTM page pool for cached-only TTM
buffer objects.

Main functional change here is that we now compute the page sizes using
the dma segments rather than using the physical page address segments.

v2(Reported-by: kernel test robot <lkp@intel.com>)
    - Make sure we initialise the mapping on the error path in
      shmem_get_pages()

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 181 +++++++++++++---------
 1 file changed, 106 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 11f072193f3b..36b711ae9e28 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,46 +25,61 @@ static void check_release_pagevec(struct pagevec *pvec)
 	cond_resched();
 }
 
-static int shmem_get_pages(struct drm_i915_gem_object *obj)
+static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+			  bool dirty, bool backup)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct intel_memory_region *mem = obj->mm.region;
-	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	struct sgt_iter sgt_iter;
+	struct pagevec pvec;
+	struct page *page;
+
+	mapping_clear_unevictable(mapping);
+
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, st) {
+		if (dirty)
+			set_page_dirty(page);
+
+		if (backup)
+			mark_page_accessed(page);
+
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+
+	sg_free_table(st);
+	kfree(st);
+}
+
+static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+				       size_t size, struct intel_memory_region *mr,
+				       struct address_space *mapping,
+				       unsigned int max_segment)
+{
+	const unsigned long page_count = size / PAGE_SIZE;
 	unsigned long i;
-	struct address_space *mapping;
 	struct sg_table *st;
 	struct scatterlist *sg;
-	struct sgt_iter sgt_iter;
 	struct page *page;
 	unsigned long last_pfn = 0;	/* suppress gcc warning */
-	unsigned int max_segment = i915_sg_segment_size();
-	unsigned int sg_page_sizes;
 	gfp_t noreclaim;
 	int ret;
 
-	/*
-	 * Assert that the object is not currently in any GPU domain. As it
-	 * wasn't in the GTT, there shouldn't be any way it could have been in
-	 * a GPU cache
-	 */
-	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
-	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
 	/*
 	 * If there's no chance of allocating enough pages for the whole
 	 * object, bail early.
 	 */
-	if (obj->base.size > resource_size(&mem->region))
-		return -ENOMEM;
+	if (size > resource_size(&mr->region))
+		return ERR_PTR(-ENOMEM);
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-rebuild_st:
 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
 		kfree(st);
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	/*
@@ -73,14 +88,12 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	 *
 	 * Fail silently without starting the shrinker
 	 */
-	mapping = obj->base.filp->f_mapping;
 	mapping_set_unevictable(mapping);
 	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
 	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
 
 	sg = st->sgl;
 	st->nents = 0;
-	sg_page_sizes = 0;
 	for (i = 0; i < page_count; i++) {
 		const unsigned int shrink[] = {
 			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
@@ -135,10 +148,9 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 		if (!i ||
 		    sg->length >= max_segment ||
 		    page_to_pfn(page) != last_pfn + 1) {
-			if (i) {
-				sg_page_sizes |= sg->length;
+			if (i)
 				sg = sg_next(sg);
-			}
+
 			st->nents++;
 			sg_set_page(sg, page, PAGE_SIZE, 0);
 		} else {
@@ -149,14 +161,65 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 		/* Check that the i965g/gm workaround works. */
 		GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
 	}
-	if (sg) { /* loop terminated early; short sg table */
-		sg_page_sizes |= sg->length;
+	if (sg) /* loop terminated early; short sg table */
 		sg_mark_end(sg);
-	}
 
 	/* Trim unused sg entries to avoid wasting memory. */
 	i915_sg_trim(st);
 
+	return st;
+err_sg:
+	sg_mark_end(sg);
+	if (sg != st->sgl) {
+		shmem_free_st(st, mapping, false, false);
+	} else {
+		mapping_clear_unevictable(mapping);
+		sg_free_table(st);
+		kfree(st);
+	}
+
+	/*
+	 * shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+	if (ret == -ENOSPC)
+		ret = -ENOMEM;
+
+	return ERR_PTR(ret);
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_memory_region *mem = obj->mm.region;
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	unsigned int max_segment = i915_sg_segment_size();
+	struct sg_table *st;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	int ret;
+
+	/*
+	 * Assert that the object is not currently in any GPU domain. As it
+	 * wasn't in the GTT, there shouldn't be any way it could have been in
+	 * a GPU cache
+	 */
+	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+rebuild_st:
+	st = shmem_alloc_st(i915, obj->base.size, mem, mapping, max_segment);
+	if (IS_ERR(st)) {
+		ret = PTR_ERR(st);
+		goto err_st;
+	}
+
 	ret = i915_gem_gtt_prepare_pages(obj, st);
 	if (ret) {
 		/*
@@ -168,6 +231,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 			for_each_sgt_page(page, sgt_iter, st)
 				put_page(page);
 			sg_free_table(st);
+			kfree(st);
 
 			max_segment = PAGE_SIZE;
 			goto rebuild_st;
@@ -200,28 +264,12 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
 		obj->cache_dirty = true;
 
-	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+	__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
 
 	return 0;
 
-err_sg:
-	sg_mark_end(sg);
 err_pages:
-	mapping_clear_unevictable(mapping);
-	if (sg != st->sgl) {
-		struct pagevec pvec;
-
-		pagevec_init(&pvec);
-		for_each_sgt_page(page, sgt_iter, st) {
-			if (!pagevec_add(&pvec, page))
-				check_release_pagevec(&pvec);
-		}
-		if (pagevec_count(&pvec))
-			check_release_pagevec(&pvec);
-	}
-	sg_free_table(st);
-	kfree(st);
-
+	shmem_free_st(st, mapping, false, false);
 	/*
 	 * shmemfs first checks if there is enough memory to allocate the page
 	 * and reports ENOSPC should there be insufficient, along with the usual
@@ -231,6 +279,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	 * space and so want to translate the error from shmemfs back to our
 	 * usual understanding of ENOMEM.
 	 */
+err_st:
 	if (ret == -ENOSPC)
 		ret = -ENOMEM;
 
@@ -251,10 +300,8 @@ shmem_truncate(struct drm_i915_gem_object *obj)
 	obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
-static void
-shmem_writeback(struct drm_i915_gem_object *obj)
+static void __shmem_writeback(size_t size, struct address_space *mapping)
 {
-	struct address_space *mapping;
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_NONE,
 		.nr_to_write = SWAP_CLUSTER_MAX,
@@ -270,10 +317,9 @@ shmem_writeback(struct drm_i915_gem_object *obj)
 	 * instead of invoking writeback so they are aged and paged out
 	 * as normal.
 	 */
-	mapping = obj->base.filp->f_mapping;
 
 	/* Begin writeback on each dirty page */
-	for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
+	for (i = 0; i < size >> PAGE_SHIFT; i++) {
 		struct page *page;
 
 		page = find_lock_page(mapping, i);
@@ -296,6 +342,12 @@ shmem_writeback(struct drm_i915_gem_object *obj)
 	}
 }
 
+static void
+shmem_writeback(struct drm_i915_gem_object *obj)
+{
+	__shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
+}
+
 void
 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				struct sg_table *pages,
@@ -316,11 +368,6 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 
 void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
 {
-	struct sgt_iter sgt_iter;
-	struct pagevec pvec;
-	struct page *page;
-
-	GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev)));
 	__i915_gem_object_release_shmem(obj, pages, true);
 
 	i915_gem_gtt_finish_pages(obj, pages);
@@ -328,25 +375,9 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
 	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_save_bit_17_swizzle(obj, pages);
 
-	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
-
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, pages) {
-		if (obj->mm.dirty)
-			set_page_dirty(page);
-
-		if (obj->mm.madv == I915_MADV_WILLNEED)
-			mark_page_accessed(page);
-
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
+	shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
+		      obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
 	obj->mm.dirty = false;
-
-	sg_free_table(pages);
-	kfree(pages);
 }
 
 static void
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
@ 2021-10-18  9:10   ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx
  Cc: dri-devel, Thomas Hellström, Christian König, Oak Zeng

For cached objects we can allocate our pages directly in shmem. This
should make it possible(in a later patch) to utilise the existing
i915-gem shrinker code for such objects. For now this is still disabled.

v2(Thomas):
  - Add optional try_to_writeback hook for objects. Importantly we need
    to check if the object is even still shrinkable; in between us
    dropping the shrinker LRU lock and acquiring the object lock it could for
    example have been moved. Also we need to differentiate between
    "lazy" shrinking and the immediate writeback mode. Also later we need to
    handle objects which don't even have mm.pages, so bundling this into
    put_pages() would require somehow handling that edge case, hence
    just letting the ttm backend handle everything in try_to_writeback
    doesn't seem too bad.
v3(Thomas):
  - Likely a bad idea to touch the object from the unpopulate hook,
    since it's not possible to hold a reference, without also creating
    circular dependency, so likely this is too fragile. For now just
    ensure we at least mark the pages as dirty/accessed when called from the
    shrinker on WILLNEED objects.
  - s/try_to_writeback/shrinker_release_pages, since this can do more
    than just writeback.
  - Get rid of do_backup boolean and just set the SWAPPED flag prior to
    calling unpopulate.
  - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout walk, since
    these just get skipped anyway. We can try to come up with something
    better later.
v4(Thomas):
  - s/PCI_DMA/DMA/. Also drop NO_KERNEL_MAPPING and NO_WARN, which
    apparently doesn't do anything with streaming mappings.
  - Just pass along the error for ->truncate, and assume nothing.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Oak Zeng <oak.zeng@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  11 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     |   6 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  18 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  17 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 233 ++++++++++++++++--
 6 files changed, 247 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 9df3ee60604e..e641db297e0e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -93,7 +93,6 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
 struct sg_table *
 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 
 /**
  * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
@@ -449,7 +448,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 }
 
 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
 
 /**
@@ -612,6 +611,14 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
 bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
 					enum intel_memory_type type);
 
+struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+				size_t size, struct intel_memory_region *mr,
+				struct address_space *mapping,
+				unsigned int max_segment);
+void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+		   bool dirty, bool backup);
+void __shmem_writeback(size_t size, struct address_space *mapping);
+
 #ifdef CONFIG_MMU_NOTIFIER
 static inline bool
 i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 7c3da4e3e737..7dd5f804aab3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -54,8 +54,10 @@ struct drm_i915_gem_object_ops {
 	int (*get_pages)(struct drm_i915_gem_object *obj);
 	void (*put_pages)(struct drm_i915_gem_object *obj,
 			  struct sg_table *pages);
-	void (*truncate)(struct drm_i915_gem_object *obj);
+	int (*truncate)(struct drm_i915_gem_object *obj);
 	void (*writeback)(struct drm_i915_gem_object *obj);
+	int (*shrinker_release_pages)(struct drm_i915_gem_object *obj,
+				      bool should_writeback);
 
 	int (*pread)(struct drm_i915_gem_object *obj,
 		     const struct drm_i915_gem_pread *arg);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 8eb1c3a6fc9c..ea6d9b3d2d6b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -158,11 +158,13 @@ int i915_gem_object_pin_pages_unlocked(struct drm_i915_gem_object *obj)
 }
 
 /* Immediately discard the backing storage */
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 {
 	drm_gem_free_mmap_offset(&obj->base);
 	if (obj->ops->truncate)
-		obj->ops->truncate(obj);
+		return obj->ops->truncate(obj);
+
+	return 0;
 }
 
 /* Try to discard unwanted pages */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 36b711ae9e28..8375dc23f233 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec *pvec)
 	cond_resched();
 }
 
-static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
-			  bool dirty, bool backup)
+void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+		   bool dirty, bool backup)
 {
 	struct sgt_iter sgt_iter;
 	struct pagevec pvec;
@@ -52,10 +52,10 @@ static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
 	kfree(st);
 }
 
-static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
-				       size_t size, struct intel_memory_region *mr,
-				       struct address_space *mapping,
-				       unsigned int max_segment)
+struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+				size_t size, struct intel_memory_region *mr,
+				struct address_space *mapping,
+				unsigned int max_segment)
 {
 	const unsigned long page_count = size / PAGE_SIZE;
 	unsigned long i;
@@ -286,7 +286,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
-static void
+static int
 shmem_truncate(struct drm_i915_gem_object *obj)
 {
 	/*
@@ -298,9 +298,11 @@ shmem_truncate(struct drm_i915_gem_object *obj)
 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
 	obj->mm.madv = __I915_MADV_PURGED;
 	obj->mm.pages = ERR_PTR(-EFAULT);
+
+	return 0;
 }
 
-static void __shmem_writeback(size_t size, struct address_space *mapping)
+void __shmem_writeback(size_t size, struct address_space *mapping)
 {
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_NONE,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 5ab136ffdeb2..ae2a8d54b7a4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -56,19 +56,24 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
 	return false;
 }
 
-static void try_to_writeback(struct drm_i915_gem_object *obj,
-			     unsigned int flags)
+static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
 {
+	if (obj->ops->shrinker_release_pages)
+		return obj->ops->shrinker_release_pages(obj,
+							flags & I915_SHRINK_WRITEBACK);
+
 	switch (obj->mm.madv) {
 	case I915_MADV_DONTNEED:
 		i915_gem_object_truncate(obj);
-		return;
+		return 0;
 	case __I915_MADV_PURGED:
-		return;
+		return 0;
 	}
 
 	if (flags & I915_SHRINK_WRITEBACK)
 		i915_gem_object_writeback(obj);
+
+	return 0;
 }
 
 /**
@@ -222,8 +227,8 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
 				}
 
 				if (!__i915_gem_object_put_pages(obj)) {
-					try_to_writeback(obj, shrink);
-					count += obj->base.size >> PAGE_SHIFT;
+					if (!try_to_writeback(obj, shrink))
+						count += obj->base.size >> PAGE_SHIFT;
 				}
 				if (!ww)
 					i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 74a1ffd0d7dd..0fe1eb8f38e9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -35,6 +35,8 @@
  * @ttm: The base TTM page vector.
  * @dev: The struct device used for dma mapping and unmapping.
  * @cached_st: The cached scatter-gather table.
+ * @is_shmem: Set if using shmem.
+ * @filp: The shmem file, if using shmem backend.
  *
  * Note that DMA may be going on right up to the point where the page-
  * vector is unpopulated in delayed destroy. Hence keep the
@@ -46,6 +48,9 @@ struct i915_ttm_tt {
 	struct ttm_tt ttm;
 	struct device *dev;
 	struct sg_table *cached_st;
+
+	bool is_shmem;
+	struct file *filp;
 };
 
 static const struct ttm_place sys_placement_flags = {
@@ -179,12 +184,88 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
 	placement->busy_placement = busy;
 }
 
+static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
+				      struct ttm_tt *ttm,
+				      struct ttm_operation_ctx *ctx)
+{
+	struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+	struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+	const unsigned int max_segment = i915_sg_segment_size();
+	const size_t size = ttm->num_pages << PAGE_SHIFT;
+	struct file *filp = i915_tt->filp;
+	struct sgt_iter sgt_iter;
+	struct sg_table *st;
+	struct page *page;
+	unsigned long i;
+	int err;
+
+	if (!filp) {
+		struct address_space *mapping;
+		gfp_t mask;
+
+		filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE);
+		if (IS_ERR(filp))
+			return PTR_ERR(filp);
+
+		mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+
+		mapping = filp->f_mapping;
+		mapping_set_gfp_mask(mapping, mask);
+		GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+		i915_tt->filp = filp;
+	}
+
+	st = shmem_alloc_st(i915, size, mr, filp->f_mapping, max_segment);
+	if (IS_ERR(st))
+		return PTR_ERR(st);
+
+	err = dma_map_sg_attrs(i915_tt->dev,
+			       st->sgl, st->nents,
+			       DMA_BIDIRECTIONAL,
+			       DMA_ATTR_SKIP_CPU_SYNC);
+	if (err <= 0) {
+		err = -EINVAL;
+		goto err_free_st;
+	}
+
+	i = 0;
+	for_each_sgt_page(page, sgt_iter, st)
+		ttm->pages[i++] = page;
+
+	if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+		ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+
+	i915_tt->cached_st = st;
+	return 0;
+
+err_free_st:
+	shmem_free_st(st, filp->f_mapping, false, false);
+	return err;
+}
+
+static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
+{
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+	bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
+
+	dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
+		     i915_tt->cached_st->nents,
+		     DMA_BIDIRECTIONAL);
+
+	shmem_free_st(fetch_and_zero(&i915_tt->cached_st),
+		      file_inode(i915_tt->filp)->i_mapping,
+		      backup, backup);
+}
+
 static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 					 uint32_t page_flags)
 {
 	struct ttm_resource_manager *man =
 		ttm_manager_type(bo->bdev, bo->resource->mem_type);
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	enum ttm_caching caching = i915_ttm_select_tt_caching(obj);
 	struct i915_ttm_tt *i915_tt;
 	int ret;
 
@@ -196,36 +277,62 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 	    man->use_tt)
 		page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
 
-	ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags,
-			  i915_ttm_select_tt_caching(obj));
-	if (ret) {
-		kfree(i915_tt);
-		return NULL;
+	if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
+		page_flags |= TTM_TT_FLAG_EXTERNAL |
+			      TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+		i915_tt->is_shmem = true;
 	}
 
+	ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching);
+	if (ret)
+		goto err_free;
+
 	i915_tt->dev = obj->base.dev->dev;
 
 	return &i915_tt->ttm;
+
+err_free:
+	kfree(i915_tt);
+	return NULL;
+}
+
+static int i915_ttm_tt_populate(struct ttm_device *bdev,
+				struct ttm_tt *ttm,
+				struct ttm_operation_ctx *ctx)
+{
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+	if (i915_tt->is_shmem)
+		return i915_ttm_tt_shmem_populate(bdev, ttm, ctx);
+
+	return ttm_pool_alloc(&bdev->pool, ttm, ctx);
 }
 
 static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
 {
 	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
 
-	if (i915_tt->cached_st) {
-		dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
-				  DMA_BIDIRECTIONAL, 0);
-		sg_free_table(i915_tt->cached_st);
-		kfree(i915_tt->cached_st);
-		i915_tt->cached_st = NULL;
+	if (i915_tt->is_shmem) {
+		i915_ttm_tt_shmem_unpopulate(ttm);
+	} else {
+		if (i915_tt->cached_st) {
+			dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
+					  DMA_BIDIRECTIONAL, 0);
+			sg_free_table(i915_tt->cached_st);
+			kfree(i915_tt->cached_st);
+			i915_tt->cached_st = NULL;
+		}
+		ttm_pool_free(&bdev->pool, ttm);
 	}
-	ttm_pool_free(&bdev->pool, ttm);
 }
 
 static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
 {
 	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
 
+	if (i915_tt->filp)
+		fput(i915_tt->filp);
+
 	ttm_tt_fini(ttm);
 	kfree(i915_tt);
 }
@@ -235,6 +342,14 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
 {
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 
+	/*
+	 * EXTERNAL objects should never be swapped out by TTM, instead we need
+	 * to handle that ourselves. TTM will already skip such objects for us,
+	 * but we would like to avoid grabbing locks for no good reason.
+	 */
+	if (bo->ttm && bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return -EBUSY;
+
 	/* Will do for now. Our pinned objects are still on TTM's LRU lists */
 	return i915_gem_object_evictable(obj);
 }
@@ -328,9 +443,11 @@ static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
 	i915_gem_object_set_cache_coherency(obj, cache_level);
 }
 
-static void i915_ttm_purge(struct drm_i915_gem_object *obj)
+static int i915_ttm_purge(struct drm_i915_gem_object *obj)
 {
 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct i915_ttm_tt *i915_tt =
+		container_of(bo->ttm, typeof(*i915_tt), ttm);
 	struct ttm_operation_ctx ctx = {
 		.interruptible = true,
 		.no_wait_gpu = false,
@@ -339,17 +456,74 @@ static void i915_ttm_purge(struct drm_i915_gem_object *obj)
 	int ret;
 
 	if (obj->mm.madv == __I915_MADV_PURGED)
-		return;
+		return 0;
 
-	/* TTM's purge interface. Note that we might be reentering. */
 	ret = ttm_bo_validate(bo, &place, &ctx);
-	if (!ret) {
-		obj->write_domain = 0;
-		obj->read_domains = 0;
-		i915_ttm_adjust_gem_after_move(obj);
-		i915_ttm_free_cached_io_st(obj);
-		obj->mm.madv = __I915_MADV_PURGED;
+	if (ret)
+		return ret;
+
+	if (bo->ttm && i915_tt->filp) {
+		/*
+		 * The below fput(which eventually calls shmem_truncate) might
+		 * be delayed by worker, so when directly called to purge the
+		 * pages(like by the shrinker) we should try to be more
+		 * aggressive and release the pages immediately.
+		 */
+		shmem_truncate_range(file_inode(i915_tt->filp),
+				     0, (loff_t)-1);
+		fput(fetch_and_zero(&i915_tt->filp));
 	}
+
+	obj->write_domain = 0;
+	obj->read_domains = 0;
+	i915_ttm_adjust_gem_after_move(obj);
+	i915_ttm_free_cached_io_st(obj);
+	obj->mm.madv = __I915_MADV_PURGED;
+	return 0;
+}
+
+static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
+					   bool should_writeback)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct i915_ttm_tt *i915_tt =
+		container_of(bo->ttm, typeof(*i915_tt), ttm);
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement place = {};
+	int ret;
+
+	if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
+		return 0;
+
+	GEM_BUG_ON(!i915_tt->is_shmem);
+
+	if (!i915_tt->filp)
+		return 0;
+
+	switch (obj->mm.madv) {
+	case I915_MADV_DONTNEED:
+		return i915_ttm_purge(obj);
+	case __I915_MADV_PURGED:
+		return 0;
+	}
+
+	if (bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+		return 0;
+
+	bo->ttm->page_flags |= TTM_TT_FLAG_SWAPPED;
+	ret = ttm_bo_validate(bo, &place, &ctx);
+	if (ret) {
+		bo->ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+		return ret;
+	}
+
+	if (should_writeback)
+		__shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);
+
+	return 0;
 }
 
 static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
@@ -620,6 +794,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 
 static struct ttm_device_funcs i915_ttm_bo_driver = {
 	.ttm_tt_create = i915_ttm_tt_create,
+	.ttm_tt_populate = i915_ttm_tt_populate,
 	.ttm_tt_unpopulate = i915_ttm_tt_unpopulate,
 	.ttm_tt_destroy = i915_ttm_tt_destroy,
 	.eviction_valuable = i915_ttm_eviction_valuable,
@@ -687,12 +862,17 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
 	}
 
 	if (!i915_gem_object_has_pages(obj)) {
+		struct i915_ttm_tt *i915_tt =
+			container_of(bo->ttm, typeof(*i915_tt), ttm);
+
 		/* Object either has a page vector or is an iomem object */
 		st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
 		if (IS_ERR(st))
 			return PTR_ERR(st);
 
 		__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
+		if (!bo->ttm || !i915_tt->is_shmem)
+			i915_gem_object_make_unshrinkable(obj);
 	}
 
 	return ret;
@@ -772,6 +952,8 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
 static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 {
 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct i915_ttm_tt *i915_tt =
+		container_of(bo->ttm, typeof(*i915_tt), ttm);
 
 	/*
 	 * Don't manipulate the TTM LRUs while in TTM bo destruction.
@@ -784,7 +966,10 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 	 * Put on the correct LRU list depending on the MADV status
 	 */
 	spin_lock(&bo->bdev->lru_lock);
-	if (obj->mm.madv != I915_MADV_WILLNEED) {
+	if (bo->ttm && i915_tt->filp) {
+		/* Try to keep shmem_tt from being considered for shrinking. */
+		bo->priority = TTM_MAX_BO_PRIORITY - 1;
+	} else if (obj->mm.madv != I915_MADV_WILLNEED) {
 		bo->priority = I915_TTM_PRIO_PURGE;
 	} else if (!i915_gem_object_has_pages(obj)) {
 		if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
@@ -886,9 +1071,12 @@ static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
 	.get_pages = i915_ttm_get_pages,
 	.put_pages = i915_ttm_put_pages,
 	.truncate = i915_ttm_purge,
+	.shrinker_release_pages = i915_ttm_shrinker_release_pages,
+
 	.adjust_lru = i915_ttm_adjust_lru,
 	.delayed_free = i915_ttm_delayed_free,
 	.migrate = i915_ttm_migrate,
+
 	.mmap_offset = i915_ttm_mmap_offset,
 	.mmap_ops = &vm_ops_ttm,
 };
@@ -943,7 +1131,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
 	obj->mm.region = intel_memory_region_get(mem);
 	INIT_LIST_HEAD(&obj->mm.region_link);
 
-	i915_gem_object_make_unshrinkable(obj);
 	INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN);
 	mutex_init(&obj->ttm.get_io_page.lock);
 	bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device :
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
@ 2021-10-18  9:10   ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx
  Cc: dri-devel, Thomas Hellström, Christian König, Oak Zeng

For cached objects we can allocate our pages directly in shmem. This
should make it possible(in a later patch) to utilise the existing
i915-gem shrinker code for such objects. For now this is still disabled.

v2(Thomas):
  - Add optional try_to_writeback hook for objects. Importantly we need
    to check if the object is even still shrinkable; in between us
    dropping the shrinker LRU lock and acquiring the object lock it could for
    example have been moved. Also we need to differentiate between
    "lazy" shrinking and the immediate writeback mode. Also later we need to
    handle objects which don't even have mm.pages, so bundling this into
    put_pages() would require somehow handling that edge case, hence
    just letting the ttm backend handle everything in try_to_writeback
    doesn't seem too bad.
v3(Thomas):
  - Likely a bad idea to touch the object from the unpopulate hook,
    since it's not possible to hold a reference, without also creating
    circular dependency, so likely this is too fragile. For now just
    ensure we at least mark the pages as dirty/accessed when called from the
    shrinker on WILLNEED objects.
  - s/try_to_writeback/shrinker_release_pages, since this can do more
    than just writeback.
  - Get rid of do_backup boolean and just set the SWAPPED flag prior to
    calling unpopulate.
  - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout walk, since
    these just get skipped anyway. We can try to come up with something
    better later.
v4(Thomas):
  - s/PCI_DMA/DMA/. Also drop NO_KERNEL_MAPPING and NO_WARN, which
    apparently doesn't do anything with streaming mappings.
  - Just pass along the error for ->truncate, and assume nothing.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Oak Zeng <oak.zeng@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  11 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     |   6 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  18 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  17 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 233 ++++++++++++++++--
 6 files changed, 247 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 9df3ee60604e..e641db297e0e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -93,7 +93,6 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
 struct sg_table *
 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 
 /**
  * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
@@ -449,7 +448,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 }
 
 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
 
 /**
@@ -612,6 +611,14 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
 bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
 					enum intel_memory_type type);
 
+struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+				size_t size, struct intel_memory_region *mr,
+				struct address_space *mapping,
+				unsigned int max_segment);
+void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+		   bool dirty, bool backup);
+void __shmem_writeback(size_t size, struct address_space *mapping);
+
 #ifdef CONFIG_MMU_NOTIFIER
 static inline bool
 i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 7c3da4e3e737..7dd5f804aab3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -54,8 +54,10 @@ struct drm_i915_gem_object_ops {
 	int (*get_pages)(struct drm_i915_gem_object *obj);
 	void (*put_pages)(struct drm_i915_gem_object *obj,
 			  struct sg_table *pages);
-	void (*truncate)(struct drm_i915_gem_object *obj);
+	int (*truncate)(struct drm_i915_gem_object *obj);
 	void (*writeback)(struct drm_i915_gem_object *obj);
+	int (*shrinker_release_pages)(struct drm_i915_gem_object *obj,
+				      bool should_writeback);
 
 	int (*pread)(struct drm_i915_gem_object *obj,
 		     const struct drm_i915_gem_pread *arg);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 8eb1c3a6fc9c..ea6d9b3d2d6b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -158,11 +158,13 @@ int i915_gem_object_pin_pages_unlocked(struct drm_i915_gem_object *obj)
 }
 
 /* Immediately discard the backing storage */
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 {
 	drm_gem_free_mmap_offset(&obj->base);
 	if (obj->ops->truncate)
-		obj->ops->truncate(obj);
+		return obj->ops->truncate(obj);
+
+	return 0;
 }
 
 /* Try to discard unwanted pages */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 36b711ae9e28..8375dc23f233 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec *pvec)
 	cond_resched();
 }
 
-static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
-			  bool dirty, bool backup)
+void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+		   bool dirty, bool backup)
 {
 	struct sgt_iter sgt_iter;
 	struct pagevec pvec;
@@ -52,10 +52,10 @@ static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
 	kfree(st);
 }
 
-static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
-				       size_t size, struct intel_memory_region *mr,
-				       struct address_space *mapping,
-				       unsigned int max_segment)
+struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+				size_t size, struct intel_memory_region *mr,
+				struct address_space *mapping,
+				unsigned int max_segment)
 {
 	const unsigned long page_count = size / PAGE_SIZE;
 	unsigned long i;
@@ -286,7 +286,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
-static void
+static int
 shmem_truncate(struct drm_i915_gem_object *obj)
 {
 	/*
@@ -298,9 +298,11 @@ shmem_truncate(struct drm_i915_gem_object *obj)
 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
 	obj->mm.madv = __I915_MADV_PURGED;
 	obj->mm.pages = ERR_PTR(-EFAULT);
+
+	return 0;
 }
 
-static void __shmem_writeback(size_t size, struct address_space *mapping)
+void __shmem_writeback(size_t size, struct address_space *mapping)
 {
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_NONE,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 5ab136ffdeb2..ae2a8d54b7a4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -56,19 +56,24 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
 	return false;
 }
 
-static void try_to_writeback(struct drm_i915_gem_object *obj,
-			     unsigned int flags)
+static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
 {
+	if (obj->ops->shrinker_release_pages)
+		return obj->ops->shrinker_release_pages(obj,
+							flags & I915_SHRINK_WRITEBACK);
+
 	switch (obj->mm.madv) {
 	case I915_MADV_DONTNEED:
 		i915_gem_object_truncate(obj);
-		return;
+		return 0;
 	case __I915_MADV_PURGED:
-		return;
+		return 0;
 	}
 
 	if (flags & I915_SHRINK_WRITEBACK)
 		i915_gem_object_writeback(obj);
+
+	return 0;
 }
 
 /**
@@ -222,8 +227,8 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
 				}
 
 				if (!__i915_gem_object_put_pages(obj)) {
-					try_to_writeback(obj, shrink);
-					count += obj->base.size >> PAGE_SHIFT;
+					if (!try_to_writeback(obj, shrink))
+						count += obj->base.size >> PAGE_SHIFT;
 				}
 				if (!ww)
 					i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 74a1ffd0d7dd..0fe1eb8f38e9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -35,6 +35,8 @@
  * @ttm: The base TTM page vector.
  * @dev: The struct device used for dma mapping and unmapping.
  * @cached_st: The cached scatter-gather table.
+ * @is_shmem: Set if using shmem.
+ * @filp: The shmem file, if using shmem backend.
  *
  * Note that DMA may be going on right up to the point where the page-
  * vector is unpopulated in delayed destroy. Hence keep the
@@ -46,6 +48,9 @@ struct i915_ttm_tt {
 	struct ttm_tt ttm;
 	struct device *dev;
 	struct sg_table *cached_st;
+
+	bool is_shmem;
+	struct file *filp;
 };
 
 static const struct ttm_place sys_placement_flags = {
@@ -179,12 +184,88 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
 	placement->busy_placement = busy;
 }
 
+static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
+				      struct ttm_tt *ttm,
+				      struct ttm_operation_ctx *ctx)
+{
+	struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+	struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+	const unsigned int max_segment = i915_sg_segment_size();
+	const size_t size = ttm->num_pages << PAGE_SHIFT;
+	struct file *filp = i915_tt->filp;
+	struct sgt_iter sgt_iter;
+	struct sg_table *st;
+	struct page *page;
+	unsigned long i;
+	int err;
+
+	if (!filp) {
+		struct address_space *mapping;
+		gfp_t mask;
+
+		filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE);
+		if (IS_ERR(filp))
+			return PTR_ERR(filp);
+
+		mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+
+		mapping = filp->f_mapping;
+		mapping_set_gfp_mask(mapping, mask);
+		GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+		i915_tt->filp = filp;
+	}
+
+	st = shmem_alloc_st(i915, size, mr, filp->f_mapping, max_segment);
+	if (IS_ERR(st))
+		return PTR_ERR(st);
+
+	err = dma_map_sg_attrs(i915_tt->dev,
+			       st->sgl, st->nents,
+			       DMA_BIDIRECTIONAL,
+			       DMA_ATTR_SKIP_CPU_SYNC);
+	if (err <= 0) {
+		err = -EINVAL;
+		goto err_free_st;
+	}
+
+	i = 0;
+	for_each_sgt_page(page, sgt_iter, st)
+		ttm->pages[i++] = page;
+
+	if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+		ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+
+	i915_tt->cached_st = st;
+	return 0;
+
+err_free_st:
+	shmem_free_st(st, filp->f_mapping, false, false);
+	return err;
+}
+
+static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
+{
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+	bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
+
+	dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
+		     i915_tt->cached_st->nents,
+		     DMA_BIDIRECTIONAL);
+
+	shmem_free_st(fetch_and_zero(&i915_tt->cached_st),
+		      file_inode(i915_tt->filp)->i_mapping,
+		      backup, backup);
+}
+
 static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 					 uint32_t page_flags)
 {
 	struct ttm_resource_manager *man =
 		ttm_manager_type(bo->bdev, bo->resource->mem_type);
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	enum ttm_caching caching = i915_ttm_select_tt_caching(obj);
 	struct i915_ttm_tt *i915_tt;
 	int ret;
 
@@ -196,36 +277,62 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 	    man->use_tt)
 		page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
 
-	ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags,
-			  i915_ttm_select_tt_caching(obj));
-	if (ret) {
-		kfree(i915_tt);
-		return NULL;
+	if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
+		page_flags |= TTM_TT_FLAG_EXTERNAL |
+			      TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+		i915_tt->is_shmem = true;
 	}
 
+	ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching);
+	if (ret)
+		goto err_free;
+
 	i915_tt->dev = obj->base.dev->dev;
 
 	return &i915_tt->ttm;
+
+err_free:
+	kfree(i915_tt);
+	return NULL;
+}
+
+static int i915_ttm_tt_populate(struct ttm_device *bdev,
+				struct ttm_tt *ttm,
+				struct ttm_operation_ctx *ctx)
+{
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+	if (i915_tt->is_shmem)
+		return i915_ttm_tt_shmem_populate(bdev, ttm, ctx);
+
+	return ttm_pool_alloc(&bdev->pool, ttm, ctx);
 }
 
 static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
 {
 	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
 
-	if (i915_tt->cached_st) {
-		dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
-				  DMA_BIDIRECTIONAL, 0);
-		sg_free_table(i915_tt->cached_st);
-		kfree(i915_tt->cached_st);
-		i915_tt->cached_st = NULL;
+	if (i915_tt->is_shmem) {
+		i915_ttm_tt_shmem_unpopulate(ttm);
+	} else {
+		if (i915_tt->cached_st) {
+			dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
+					  DMA_BIDIRECTIONAL, 0);
+			sg_free_table(i915_tt->cached_st);
+			kfree(i915_tt->cached_st);
+			i915_tt->cached_st = NULL;
+		}
+		ttm_pool_free(&bdev->pool, ttm);
 	}
-	ttm_pool_free(&bdev->pool, ttm);
 }
 
 static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
 {
 	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
 
+	if (i915_tt->filp)
+		fput(i915_tt->filp);
+
 	ttm_tt_fini(ttm);
 	kfree(i915_tt);
 }
@@ -235,6 +342,14 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
 {
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 
+	/*
+	 * EXTERNAL objects should never be swapped out by TTM, instead we need
+	 * to handle that ourselves. TTM will already skip such objects for us,
+	 * but we would like to avoid grabbing locks for no good reason.
+	 */
+	if (bo->ttm && bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return -EBUSY;
+
 	/* Will do for now. Our pinned objects are still on TTM's LRU lists */
 	return i915_gem_object_evictable(obj);
 }
@@ -328,9 +443,11 @@ static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
 	i915_gem_object_set_cache_coherency(obj, cache_level);
 }
 
-static void i915_ttm_purge(struct drm_i915_gem_object *obj)
+static int i915_ttm_purge(struct drm_i915_gem_object *obj)
 {
 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct i915_ttm_tt *i915_tt =
+		container_of(bo->ttm, typeof(*i915_tt), ttm);
 	struct ttm_operation_ctx ctx = {
 		.interruptible = true,
 		.no_wait_gpu = false,
@@ -339,17 +456,74 @@ static void i915_ttm_purge(struct drm_i915_gem_object *obj)
 	int ret;
 
 	if (obj->mm.madv == __I915_MADV_PURGED)
-		return;
+		return 0;
 
-	/* TTM's purge interface. Note that we might be reentering. */
 	ret = ttm_bo_validate(bo, &place, &ctx);
-	if (!ret) {
-		obj->write_domain = 0;
-		obj->read_domains = 0;
-		i915_ttm_adjust_gem_after_move(obj);
-		i915_ttm_free_cached_io_st(obj);
-		obj->mm.madv = __I915_MADV_PURGED;
+	if (ret)
+		return ret;
+
+	if (bo->ttm && i915_tt->filp) {
+		/*
+		 * The below fput(which eventually calls shmem_truncate) might
+		 * be delayed by worker, so when directly called to purge the
+		 * pages(like by the shrinker) we should try to be more
+		 * aggressive and release the pages immediately.
+		 */
+		shmem_truncate_range(file_inode(i915_tt->filp),
+				     0, (loff_t)-1);
+		fput(fetch_and_zero(&i915_tt->filp));
 	}
+
+	obj->write_domain = 0;
+	obj->read_domains = 0;
+	i915_ttm_adjust_gem_after_move(obj);
+	i915_ttm_free_cached_io_st(obj);
+	obj->mm.madv = __I915_MADV_PURGED;
+	return 0;
+}
+
+static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
+					   bool should_writeback)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct i915_ttm_tt *i915_tt =
+		container_of(bo->ttm, typeof(*i915_tt), ttm);
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement place = {};
+	int ret;
+
+	if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
+		return 0;
+
+	GEM_BUG_ON(!i915_tt->is_shmem);
+
+	if (!i915_tt->filp)
+		return 0;
+
+	switch (obj->mm.madv) {
+	case I915_MADV_DONTNEED:
+		return i915_ttm_purge(obj);
+	case __I915_MADV_PURGED:
+		return 0;
+	}
+
+	if (bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+		return 0;
+
+	bo->ttm->page_flags |= TTM_TT_FLAG_SWAPPED;
+	ret = ttm_bo_validate(bo, &place, &ctx);
+	if (ret) {
+		bo->ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+		return ret;
+	}
+
+	if (should_writeback)
+		__shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);
+
+	return 0;
 }
 
 static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
@@ -620,6 +794,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 
 static struct ttm_device_funcs i915_ttm_bo_driver = {
 	.ttm_tt_create = i915_ttm_tt_create,
+	.ttm_tt_populate = i915_ttm_tt_populate,
 	.ttm_tt_unpopulate = i915_ttm_tt_unpopulate,
 	.ttm_tt_destroy = i915_ttm_tt_destroy,
 	.eviction_valuable = i915_ttm_eviction_valuable,
@@ -687,12 +862,17 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
 	}
 
 	if (!i915_gem_object_has_pages(obj)) {
+		struct i915_ttm_tt *i915_tt =
+			container_of(bo->ttm, typeof(*i915_tt), ttm);
+
 		/* Object either has a page vector or is an iomem object */
 		st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
 		if (IS_ERR(st))
 			return PTR_ERR(st);
 
 		__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
+		if (!bo->ttm || !i915_tt->is_shmem)
+			i915_gem_object_make_unshrinkable(obj);
 	}
 
 	return ret;
@@ -772,6 +952,8 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
 static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 {
 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct i915_ttm_tt *i915_tt =
+		container_of(bo->ttm, typeof(*i915_tt), ttm);
 
 	/*
 	 * Don't manipulate the TTM LRUs while in TTM bo destruction.
@@ -784,7 +966,10 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 	 * Put on the correct LRU list depending on the MADV status
 	 */
 	spin_lock(&bo->bdev->lru_lock);
-	if (obj->mm.madv != I915_MADV_WILLNEED) {
+	if (bo->ttm && i915_tt->filp) {
+		/* Try to keep shmem_tt from being considered for shrinking. */
+		bo->priority = TTM_MAX_BO_PRIORITY - 1;
+	} else if (obj->mm.madv != I915_MADV_WILLNEED) {
 		bo->priority = I915_TTM_PRIO_PURGE;
 	} else if (!i915_gem_object_has_pages(obj)) {
 		if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
@@ -886,9 +1071,12 @@ static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
 	.get_pages = i915_ttm_get_pages,
 	.put_pages = i915_ttm_put_pages,
 	.truncate = i915_ttm_purge,
+	.shrinker_release_pages = i915_ttm_shrinker_release_pages,
+
 	.adjust_lru = i915_ttm_adjust_lru,
 	.delayed_free = i915_ttm_delayed_free,
 	.migrate = i915_ttm_migrate,
+
 	.mmap_offset = i915_ttm_mmap_offset,
 	.mmap_ops = &vm_ops_ttm,
 };
@@ -943,7 +1131,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
 	obj->mm.region = intel_memory_region_get(mem);
 	INIT_LIST_HEAD(&obj->mm.region_link);
 
-	i915_gem_object_make_unshrinkable(obj);
 	INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN);
 	mutex_init(&obj->ttm.get_io_page.lock);
 	bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device :
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v9 3/8] drm/i915/gtt: drop unneeded make_unshrinkable
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
@ 2021-10-18  9:10   ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

We already do this when mapping the pages.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 1 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 890191f286e3..baea9770200a 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -185,7 +185,6 @@ static void gen6_alloc_va_range(struct i915_address_space *vm,
 
 			pt = stash->pt[0];
 			__i915_gem_object_pin_pages(pt->base);
-			i915_gem_object_make_unshrinkable(pt->base);
 
 			fill32_px(pt, vm->scratch[0]->encode);
 
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 037a9a6e4889..8af2f709571c 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -301,7 +301,6 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
 
 			pt = stash->pt[!!lvl];
 			__i915_gem_object_pin_pages(pt->base);
-			i915_gem_object_make_unshrinkable(pt->base);
 
 			fill_px(pt, vm->scratch[lvl]->encode);
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v9 3/8] drm/i915/gtt: drop unneeded make_unshrinkable
@ 2021-10-18  9:10   ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

We already do this when mapping the pages.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 1 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 890191f286e3..baea9770200a 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -185,7 +185,6 @@ static void gen6_alloc_va_range(struct i915_address_space *vm,
 
 			pt = stash->pt[0];
 			__i915_gem_object_pin_pages(pt->base);
-			i915_gem_object_make_unshrinkable(pt->base);
 
 			fill32_px(pt, vm->scratch[0]->encode);
 
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 037a9a6e4889..8af2f709571c 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -301,7 +301,6 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
 
 			pt = stash->pt[!!lvl];
 			__i915_gem_object_pin_pages(pt->base);
-			i915_gem_object_make_unshrinkable(pt->base);
 
 			fill_px(pt, vm->scratch[lvl]->encode);
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v9 4/8] drm/i915: drop unneeded make_unshrinkable in free_object
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
@ 2021-10-18  9:10   ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

The comment here is no longer accurate, since the current shrinker code
requires a full ref before touching any objects. Also unset_pages()
should already do the required make_unshrinkable() for us, if needed,
which is also nicely balanced with set_pages().

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 76ce6a1500bc..1dc3c1940c32 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -337,15 +337,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	 */
 	atomic_inc(&i915->mm.free_count);
 
-	/*
-	 * This serializes freeing with the shrinker. Since the free
-	 * is delayed, first by RCU then by the workqueue, we want the
-	 * shrinker to be able to free pages of unreferenced objects,
-	 * or else we may oom whilst there are plenty of deferred
-	 * freed objects.
-	 */
-	i915_gem_object_make_unshrinkable(obj);
-
 	/*
 	 * Since we require blocking on struct_mutex to unbind the freed
 	 * object from the GPU before releasing resources back to the
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v9 4/8] drm/i915: drop unneeded make_unshrinkable in free_object
@ 2021-10-18  9:10   ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

The comment here is no longer accurate, since the current shrinker code
requires a full ref before touching any objects. Also unset_pages()
should already do the required make_unshrinkable() for us, if needed,
which is also nicely balanced with set_pages().

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 76ce6a1500bc..1dc3c1940c32 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -337,15 +337,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	 */
 	atomic_inc(&i915->mm.free_count);
 
-	/*
-	 * This serializes freeing with the shrinker. Since the free
-	 * is delayed, first by RCU then by the workqueue, we want the
-	 * shrinker to be able to free pages of unreferenced objects,
-	 * or else we may oom whilst there are plenty of deferred
-	 * freed objects.
-	 */
-	i915_gem_object_make_unshrinkable(obj);
-
 	/*
 	 * Since we require blocking on struct_mutex to unbind the freed
 	 * object from the GPU before releasing resources back to the
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v9 5/8] drm/i915: add some kernel-doc for shrink_pin and friends
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
@ 2021-10-18  9:10   ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

Attempt to document shrink_pin and the other relevant interfaces that
interact with it, before we start messing with it.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 24 +++++++++++++-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  | 31 +++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 7dd5f804aab3..f4233c4e8d2e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -461,6 +461,28 @@ struct drm_i915_gem_object {
 		 * instead go through the pin/unpin interfaces.
 		 */
 		atomic_t pages_pin_count;
+
+		/**
+		 * @shrink_pin: Prevents the pages from being made visible to
+		 * the shrinker, while the shrink_pin is non-zero. Most users
+		 * should pretty much never have to care about this, outside of
+		 * some special use cases.
+		 *
+		 * By default most objects will start out as visible to the
+		 * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the
+		 * backing pages are attached to the object, like in
+		 * __i915_gem_object_set_pages(). They will then be removed the
+		 * shrinker list once the pages are released.
+		 *
+		 * The @shrink_pin is incremented by calling
+		 * i915_gem_object_make_unshrinkable(), which will also remove
+		 * the object from the shrinker list, if the pin count was zero.
+		 *
+		 * Callers will then typically call
+		 * i915_gem_object_make_shrinkable() or
+		 * i915_gem_object_make_purgeable() to decrement the pin count,
+		 * and make the pages visible again.
+		 */
 		atomic_t shrink_pin;
 
 		/**
@@ -522,7 +544,7 @@ struct drm_i915_gem_object {
 		struct i915_gem_object_page_iter get_dma_page;
 
 		/**
-		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
+		 * Element within i915->mm.shrink_list or i915->mm.purge_list,
 		 * locked by i915->mm.obj_lock.
 		 */
 		struct list_head link;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index ae2a8d54b7a4..66121fedc655 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -463,6 +463,16 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
 
 #define obj_to_i915(obj__) to_i915((obj__)->base.dev)
 
+/**
+ * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
+ * default all object types that support shrinking(see IS_SHRINKABLE), will also
+ * make the object visible to the shrinker after allocating the system memory
+ * pages.
+ * @obj: The GEM object.
+ *
+ * This is typically used for special kernel internal objects that can't be
+ * easily processed by the shrinker, like if they are perma-pinned.
+ */
 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = obj_to_i915(obj);
@@ -513,12 +523,33 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
 	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
+/**
+ * i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
 {
 	__i915_gem_object_make_shrinkable(obj,
 					  &obj_to_i915(obj)->mm.shrink_list);
 }
 
+/**
+ * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable
+ * list. Used with DONTNEED objects. Unlike with shrinkable objects, the
+ * shrinker will attempt to discard the backing pages, instead of trying to swap
+ * them out.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
 {
 	__i915_gem_object_make_shrinkable(obj,
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v9 5/8] drm/i915: add some kernel-doc for shrink_pin and friends
@ 2021-10-18  9:10   ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

Attempt to document shrink_pin and the other relevant interfaces that
interact with it, before we start messing with it.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 24 +++++++++++++-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  | 31 +++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 7dd5f804aab3..f4233c4e8d2e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -461,6 +461,28 @@ struct drm_i915_gem_object {
 		 * instead go through the pin/unpin interfaces.
 		 */
 		atomic_t pages_pin_count;
+
+		/**
+		 * @shrink_pin: Prevents the pages from being made visible to
+		 * the shrinker, while the shrink_pin is non-zero. Most users
+		 * should pretty much never have to care about this, outside of
+		 * some special use cases.
+		 *
+		 * By default most objects will start out as visible to the
+		 * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the
+		 * backing pages are attached to the object, like in
+		 * __i915_gem_object_set_pages(). They will then be removed the
+		 * shrinker list once the pages are released.
+		 *
+		 * The @shrink_pin is incremented by calling
+		 * i915_gem_object_make_unshrinkable(), which will also remove
+		 * the object from the shrinker list, if the pin count was zero.
+		 *
+		 * Callers will then typically call
+		 * i915_gem_object_make_shrinkable() or
+		 * i915_gem_object_make_purgeable() to decrement the pin count,
+		 * and make the pages visible again.
+		 */
 		atomic_t shrink_pin;
 
 		/**
@@ -522,7 +544,7 @@ struct drm_i915_gem_object {
 		struct i915_gem_object_page_iter get_dma_page;
 
 		/**
-		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
+		 * Element within i915->mm.shrink_list or i915->mm.purge_list,
 		 * locked by i915->mm.obj_lock.
 		 */
 		struct list_head link;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index ae2a8d54b7a4..66121fedc655 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -463,6 +463,16 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
 
 #define obj_to_i915(obj__) to_i915((obj__)->base.dev)
 
+/**
+ * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
+ * default all object types that support shrinking(see IS_SHRINKABLE), will also
+ * make the object visible to the shrinker after allocating the system memory
+ * pages.
+ * @obj: The GEM object.
+ *
+ * This is typically used for special kernel internal objects that can't be
+ * easily processed by the shrinker, like if they are perma-pinned.
+ */
 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = obj_to_i915(obj);
@@ -513,12 +523,33 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
 	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
+/**
+ * i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
 {
 	__i915_gem_object_make_shrinkable(obj,
 					  &obj_to_i915(obj)->mm.shrink_list);
 }
 
+/**
+ * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable
+ * list. Used with DONTNEED objects. Unlike with shrinkable objects, the
+ * shrinker will attempt to discard the backing pages, instead of trying to swap
+ * them out.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
 {
 	__i915_gem_object_make_shrinkable(obj,
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v9 6/8] drm/i915/ttm: move shrinker management into adjust_lru
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
@ 2021-10-18  9:10   ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

We currently just evict lmem objects to system memory when under memory
pressure. For this case we might lack the usual object mm.pages, which
effectively hides the pages from the i915-gem shrinker, until we
actually "attach" the TT to the object, or in the case of lmem-only
objects it just gets migrated back to lmem when touched again.

For all cases we can just adjust the i915 shrinker LRU each time we also
adjust the TTM LRU. The two cases we care about are:

  1) When something is moved by TTM, including when initially populating
     an object. Importantly this covers the case where TTM moves something from
     lmem <-> smem, outside of the normal get_pages() interface, which
     should still ensure the shmem pages underneath are reclaimable.

  2) When calling into i915_gem_object_unlock(). The unlock should
     ensure the object is removed from the shinker LRU, if it was indeed
     swapped out, or just purged, when the shrinker drops the object lock.

v2(Thomas):
  - Handle managing the shrinker LRU in adjust_lru, where it is always
    safe to touch the object.
v3(Thomas):
  - Pretty much a re-write. This time piggy back off the shrink_pin
    stuff, which actually seems to fit quite well for what we want here.
v4(Thomas):
  - Just use a simple boolean for tracking ttm_shrinkable.
v5:
  - Ensure we call adjust_lru when faulting the object, to ensure the
    pages are visible to the shrinker, if needed.
  - Add back the adjust_lru when in i915_ttm_move (Thomas)
v6(Reported-by: kernel test robot <lkp@intel.com>):
  - Remove unused i915_tt

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> #v4
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 14 ++-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     |  5 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  | 45 ++++++++--
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 87 ++++++++++++++++---
 5 files changed, 137 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index e641db297e0e..3eac8cf2ae10 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -294,6 +294,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
 	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE);
 }
 
+static inline bool
+i915_gem_object_has_self_managed_shrink_list(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST);
+}
+
 static inline bool
 i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
 {
@@ -531,6 +537,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
 
 static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index f4233c4e8d2e..5718a09f5533 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -34,9 +34,11 @@ struct i915_lut_handle {
 
 struct drm_i915_gem_object_ops {
 	unsigned int flags;
-#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
-#define I915_GEM_OBJECT_NO_MMAP		BIT(3)
+#define I915_GEM_OBJECT_IS_SHRINKABLE			BIT(1)
+/* Skip the shrinker management in set_pages/unset_pages */
+#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST	BIT(2)
+#define I915_GEM_OBJECT_IS_PROXY			BIT(3)
+#define I915_GEM_OBJECT_NO_MMAP				BIT(4)
 
 	/* Interface between the GEM object and its backing storage.
 	 * get_pages() is called once prior to the use of the associated set
@@ -485,6 +487,12 @@ struct drm_i915_gem_object {
 		 */
 		atomic_t shrink_pin;
 
+		/**
+		 * @ttm_shrinkable: True when the object is using shmem pages
+		 * underneath. Protected by the object lock.
+		 */
+		bool ttm_shrinkable;
+
 		/**
 		 * Priority list of potential placements for this object.
 		 */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index ea6d9b3d2d6b..308e22a80af4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -68,7 +68,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 		shrinkable = false;
 	}
 
-	if (shrinkable) {
+	if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) {
 		struct list_head *list;
 		unsigned long flags;
 
@@ -210,7 +210,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 	if (i915_gem_object_is_volatile(obj))
 		obj->mm.madv = I915_MADV_WILLNEED;
 
-	i915_gem_object_make_unshrinkable(obj);
+	if (!i915_gem_object_has_self_managed_shrink_list(obj))
+		i915_gem_object_make_unshrinkable(obj);
 
 	if (obj->mm.mapping) {
 		unmap_object(obj, page_mask_bits(obj->mm.mapping));
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 66121fedc655..dde0a5c232f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -497,13 +497,12 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
 	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
-static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
-					      struct list_head *head)
+static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+					       struct list_head *head)
 {
 	struct drm_i915_private *i915 = obj_to_i915(obj);
 	unsigned long flags;
 
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 	if (!i915_gem_object_is_shrinkable(obj))
 		return;
 
@@ -523,6 +522,38 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
 	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
+/**
+ * __i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+	___i915_gem_object_make_shrinkable(obj,
+					   &obj_to_i915(obj)->mm.shrink_list);
+}
+
+/**
+ * __i915_gem_object_make_purgeable - Move the object to the tail of the
+ * purgeable list. Objects on this list might be swapped out. Used with
+ * DONTNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
+{
+	___i915_gem_object_make_shrinkable(obj,
+					   &obj_to_i915(obj)->mm.purge_list);
+}
+
 /**
  * i915_gem_object_make_shrinkable - Move the object to the tail of the
  * shrinkable list. Objects on this list might be swapped out. Used with
@@ -535,8 +566,8 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
  */
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
 {
-	__i915_gem_object_make_shrinkable(obj,
-					  &obj_to_i915(obj)->mm.shrink_list);
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	__i915_gem_object_make_shrinkable(obj);
 }
 
 /**
@@ -552,6 +583,6 @@ void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
  */
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
 {
-	__i915_gem_object_make_shrinkable(obj,
-					  &obj_to_i915(obj)->mm.purge_list);
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	__i915_gem_object_make_purgeable(obj);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 0fe1eb8f38e9..8de1031a2c6e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -762,6 +762,7 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
 		obj->ttm.get_io_page.sg_idx = 0;
 	}
 
+	i915_ttm_adjust_lru(obj);
 	i915_ttm_adjust_gem_after_move(obj);
 	return 0;
 }
@@ -851,7 +852,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
 			return i915_ttm_err_to_gem(ret);
 	}
 
-	i915_ttm_adjust_lru(obj);
 	if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) {
 		ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx);
 		if (ret)
@@ -862,19 +862,15 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
 	}
 
 	if (!i915_gem_object_has_pages(obj)) {
-		struct i915_ttm_tt *i915_tt =
-			container_of(bo->ttm, typeof(*i915_tt), ttm);
-
 		/* Object either has a page vector or is an iomem object */
 		st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
 		if (IS_ERR(st))
 			return PTR_ERR(st);
 
 		__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
-		if (!bo->ttm || !i915_tt->is_shmem)
-			i915_gem_object_make_unshrinkable(obj);
 	}
 
+	i915_ttm_adjust_lru(obj);
 	return ret;
 }
 
@@ -945,8 +941,6 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
 	 * If the object is not destroyed next, The TTM eviction logic
 	 * and shrinkers will move it out if needed.
 	 */
-
-	i915_ttm_adjust_lru(obj);
 }
 
 static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
@@ -954,6 +948,8 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
 	struct i915_ttm_tt *i915_tt =
 		container_of(bo->ttm, typeof(*i915_tt), ttm);
+	bool shrinkable =
+		bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo->ttm);
 
 	/*
 	 * Don't manipulate the TTM LRUs while in TTM bo destruction.
@@ -962,11 +958,40 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 	if (!kref_read(&bo->kref))
 		return;
 
+	/*
+	 * We skip managing the shrinker LRU in set_pages() and just manage
+	 * everything here. This does at least solve the issue with having
+	 * temporary shmem mappings(like with evicted lmem) not being visible to
+	 * the shrinker. Only our shmem objects are shrinkable, everything else
+	 * we keep as unshrinkable.
+	 *
+	 * To make sure everything plays nice we keep an extra shrink pin in TTM
+	 * if the underlying pages are not currently shrinkable. Once we release
+	 * our pin, like when the pages are moved to shmem, the pages will then
+	 * be added to the shrinker LRU, assuming the caller isn't also holding
+	 * a pin.
+	 *
+	 * TODO: consider maybe also bumping the shrinker list here when we have
+	 * already unpinned it, which should give us something more like an LRU.
+	 */
+	if (shrinkable != obj->mm.ttm_shrinkable) {
+		if (shrinkable) {
+			if (obj->mm.madv == I915_MADV_WILLNEED)
+				__i915_gem_object_make_shrinkable(obj);
+			else
+				__i915_gem_object_make_purgeable(obj);
+		} else {
+			i915_gem_object_make_unshrinkable(obj);
+		}
+
+		obj->mm.ttm_shrinkable = shrinkable;
+	}
+
 	/*
 	 * Put on the correct LRU list depending on the MADV status
 	 */
 	spin_lock(&bo->bdev->lru_lock);
-	if (bo->ttm && i915_tt->filp) {
+	if (shrinkable) {
 		/* Try to keep shmem_tt from being considered for shrinking. */
 		bo->priority = TTM_MAX_BO_PRIORITY - 1;
 	} else if (obj->mm.madv != I915_MADV_WILLNEED) {
@@ -1010,13 +1035,34 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
 	struct vm_area_struct *area = vmf->vma;
 	struct drm_i915_gem_object *obj =
 		i915_ttm_to_gem(area->vm_private_data);
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct drm_device *dev = bo->base.dev;
+	vm_fault_t ret;
+	int idx;
 
 	/* Sanity check that we allow writing into this object */
 	if (unlikely(i915_gem_object_is_readonly(obj) &&
 		     area->vm_flags & VM_WRITE))
 		return VM_FAULT_SIGBUS;
 
-	return ttm_bo_vm_fault(vmf);
+	ret = ttm_bo_vm_reserve(bo, vmf);
+	if (ret)
+		return ret;
+
+	if (drm_dev_enter(dev, &idx)) {
+		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+					       TTM_BO_VM_NUM_PREFAULT, 1);
+		drm_dev_exit(idx);
+	} else {
+		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+	}
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		return ret;
+
+	i915_ttm_adjust_lru(obj);
+
+	dma_resv_unlock(bo->base.resv);
+	return ret;
 }
 
 static int
@@ -1067,6 +1113,7 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
 
 static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
 	.name = "i915_gem_object_ttm",
+	.flags = I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
 
 	.get_pages = i915_ttm_get_pages,
 	.put_pages = i915_ttm_put_pages,
@@ -1089,6 +1136,18 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo)
 	mutex_destroy(&obj->ttm.get_io_page.lock);
 
 	if (obj->ttm.created) {
+		/*
+		 * We freely manage the shrinker LRU outide of the mm.pages life
+		 * cycle. As a result when destroying the object we should be
+		 * extra paranoid and ensure we remove it from the LRU, before
+		 * we free the object.
+		 *
+		 * Touching the ttm_shrinkable outside of the object lock here
+		 * should be safe now that the last GEM object ref was dropped.
+		 */
+		if (obj->mm.ttm_shrinkable)
+			i915_gem_object_make_unshrinkable(obj);
+
 		i915_ttm_backup_free(obj);
 
 		/* This releases all gem object bindings to the backend. */
@@ -1141,6 +1200,14 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
 	/* Forcing the page size is kernel internal only */
 	GEM_BUG_ON(page_size && obj->mm.n_placements);
 
+	/*
+	 * Keep an extra shrink pin to prevent the object from being made
+	 * shrinkable too early. If the ttm_tt is ever allocated in shmem, we
+	 * drop the pin. The TTM backend manages the shrinker LRU itself,
+	 * outside of the normal mm.pages life cycle.
+	 */
+	i915_gem_object_make_unshrinkable(obj);
+
 	/*
 	 * If this function fails, it will call the destructor, but
 	 * our caller still owns the object. So no freeing in the
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v9 6/8] drm/i915/ttm: move shrinker management into adjust_lru
@ 2021-10-18  9:10   ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

We currently just evict lmem objects to system memory when under memory
pressure. For this case we might lack the usual object mm.pages, which
effectively hides the pages from the i915-gem shrinker, until we
actually "attach" the TT to the object, or in the case of lmem-only
objects it just gets migrated back to lmem when touched again.

For all cases we can just adjust the i915 shrinker LRU each time we also
adjust the TTM LRU. The two cases we care about are:

  1) When something is moved by TTM, including when initially populating
     an object. Importantly this covers the case where TTM moves something from
     lmem <-> smem, outside of the normal get_pages() interface, which
     should still ensure the shmem pages underneath are reclaimable.

  2) When calling into i915_gem_object_unlock(). The unlock should
     ensure the object is removed from the shinker LRU, if it was indeed
     swapped out, or just purged, when the shrinker drops the object lock.

v2(Thomas):
  - Handle managing the shrinker LRU in adjust_lru, where it is always
    safe to touch the object.
v3(Thomas):
  - Pretty much a re-write. This time piggy back off the shrink_pin
    stuff, which actually seems to fit quite well for what we want here.
v4(Thomas):
  - Just use a simple boolean for tracking ttm_shrinkable.
v5:
  - Ensure we call adjust_lru when faulting the object, to ensure the
    pages are visible to the shrinker, if needed.
  - Add back the adjust_lru when in i915_ttm_move (Thomas)
v6(Reported-by: kernel test robot <lkp@intel.com>):
  - Remove unused i915_tt

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> #v4
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 14 ++-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     |  5 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  | 45 ++++++++--
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 87 ++++++++++++++++---
 5 files changed, 137 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index e641db297e0e..3eac8cf2ae10 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -294,6 +294,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
 	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE);
 }
 
+static inline bool
+i915_gem_object_has_self_managed_shrink_list(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST);
+}
+
 static inline bool
 i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
 {
@@ -531,6 +537,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
 
 static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index f4233c4e8d2e..5718a09f5533 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -34,9 +34,11 @@ struct i915_lut_handle {
 
 struct drm_i915_gem_object_ops {
 	unsigned int flags;
-#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
-#define I915_GEM_OBJECT_NO_MMAP		BIT(3)
+#define I915_GEM_OBJECT_IS_SHRINKABLE			BIT(1)
+/* Skip the shrinker management in set_pages/unset_pages */
+#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST	BIT(2)
+#define I915_GEM_OBJECT_IS_PROXY			BIT(3)
+#define I915_GEM_OBJECT_NO_MMAP				BIT(4)
 
 	/* Interface between the GEM object and its backing storage.
 	 * get_pages() is called once prior to the use of the associated set
@@ -485,6 +487,12 @@ struct drm_i915_gem_object {
 		 */
 		atomic_t shrink_pin;
 
+		/**
+		 * @ttm_shrinkable: True when the object is using shmem pages
+		 * underneath. Protected by the object lock.
+		 */
+		bool ttm_shrinkable;
+
 		/**
 		 * Priority list of potential placements for this object.
 		 */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index ea6d9b3d2d6b..308e22a80af4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -68,7 +68,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 		shrinkable = false;
 	}
 
-	if (shrinkable) {
+	if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) {
 		struct list_head *list;
 		unsigned long flags;
 
@@ -210,7 +210,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 	if (i915_gem_object_is_volatile(obj))
 		obj->mm.madv = I915_MADV_WILLNEED;
 
-	i915_gem_object_make_unshrinkable(obj);
+	if (!i915_gem_object_has_self_managed_shrink_list(obj))
+		i915_gem_object_make_unshrinkable(obj);
 
 	if (obj->mm.mapping) {
 		unmap_object(obj, page_mask_bits(obj->mm.mapping));
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 66121fedc655..dde0a5c232f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -497,13 +497,12 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
 	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
-static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
-					      struct list_head *head)
+static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+					       struct list_head *head)
 {
 	struct drm_i915_private *i915 = obj_to_i915(obj);
 	unsigned long flags;
 
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 	if (!i915_gem_object_is_shrinkable(obj))
 		return;
 
@@ -523,6 +522,38 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
 	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
+/**
+ * __i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+	___i915_gem_object_make_shrinkable(obj,
+					   &obj_to_i915(obj)->mm.shrink_list);
+}
+
+/**
+ * __i915_gem_object_make_purgeable - Move the object to the tail of the
+ * purgeable list. Objects on this list might be swapped out. Used with
+ * DONTNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
+{
+	___i915_gem_object_make_shrinkable(obj,
+					   &obj_to_i915(obj)->mm.purge_list);
+}
+
 /**
  * i915_gem_object_make_shrinkable - Move the object to the tail of the
  * shrinkable list. Objects on this list might be swapped out. Used with
@@ -535,8 +566,8 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
  */
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
 {
-	__i915_gem_object_make_shrinkable(obj,
-					  &obj_to_i915(obj)->mm.shrink_list);
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	__i915_gem_object_make_shrinkable(obj);
 }
 
 /**
@@ -552,6 +583,6 @@ void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
  */
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
 {
-	__i915_gem_object_make_shrinkable(obj,
-					  &obj_to_i915(obj)->mm.purge_list);
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	__i915_gem_object_make_purgeable(obj);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 0fe1eb8f38e9..8de1031a2c6e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -762,6 +762,7 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
 		obj->ttm.get_io_page.sg_idx = 0;
 	}
 
+	i915_ttm_adjust_lru(obj);
 	i915_ttm_adjust_gem_after_move(obj);
 	return 0;
 }
@@ -851,7 +852,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
 			return i915_ttm_err_to_gem(ret);
 	}
 
-	i915_ttm_adjust_lru(obj);
 	if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) {
 		ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx);
 		if (ret)
@@ -862,19 +862,15 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
 	}
 
 	if (!i915_gem_object_has_pages(obj)) {
-		struct i915_ttm_tt *i915_tt =
-			container_of(bo->ttm, typeof(*i915_tt), ttm);
-
 		/* Object either has a page vector or is an iomem object */
 		st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
 		if (IS_ERR(st))
 			return PTR_ERR(st);
 
 		__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
-		if (!bo->ttm || !i915_tt->is_shmem)
-			i915_gem_object_make_unshrinkable(obj);
 	}
 
+	i915_ttm_adjust_lru(obj);
 	return ret;
 }
 
@@ -945,8 +941,6 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
 	 * If the object is not destroyed next, The TTM eviction logic
 	 * and shrinkers will move it out if needed.
 	 */
-
-	i915_ttm_adjust_lru(obj);
 }
 
 static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
@@ -954,6 +948,8 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
 	struct i915_ttm_tt *i915_tt =
 		container_of(bo->ttm, typeof(*i915_tt), ttm);
+	bool shrinkable =
+		bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo->ttm);
 
 	/*
 	 * Don't manipulate the TTM LRUs while in TTM bo destruction.
@@ -962,11 +958,40 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 	if (!kref_read(&bo->kref))
 		return;
 
+	/*
+	 * We skip managing the shrinker LRU in set_pages() and just manage
+	 * everything here. This does at least solve the issue with having
+	 * temporary shmem mappings(like with evicted lmem) not being visible to
+	 * the shrinker. Only our shmem objects are shrinkable, everything else
+	 * we keep as unshrinkable.
+	 *
+	 * To make sure everything plays nice we keep an extra shrink pin in TTM
+	 * if the underlying pages are not currently shrinkable. Once we release
+	 * our pin, like when the pages are moved to shmem, the pages will then
+	 * be added to the shrinker LRU, assuming the caller isn't also holding
+	 * a pin.
+	 *
+	 * TODO: consider maybe also bumping the shrinker list here when we have
+	 * already unpinned it, which should give us something more like an LRU.
+	 */
+	if (shrinkable != obj->mm.ttm_shrinkable) {
+		if (shrinkable) {
+			if (obj->mm.madv == I915_MADV_WILLNEED)
+				__i915_gem_object_make_shrinkable(obj);
+			else
+				__i915_gem_object_make_purgeable(obj);
+		} else {
+			i915_gem_object_make_unshrinkable(obj);
+		}
+
+		obj->mm.ttm_shrinkable = shrinkable;
+	}
+
 	/*
 	 * Put on the correct LRU list depending on the MADV status
 	 */
 	spin_lock(&bo->bdev->lru_lock);
-	if (bo->ttm && i915_tt->filp) {
+	if (shrinkable) {
 		/* Try to keep shmem_tt from being considered for shrinking. */
 		bo->priority = TTM_MAX_BO_PRIORITY - 1;
 	} else if (obj->mm.madv != I915_MADV_WILLNEED) {
@@ -1010,13 +1035,34 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
 	struct vm_area_struct *area = vmf->vma;
 	struct drm_i915_gem_object *obj =
 		i915_ttm_to_gem(area->vm_private_data);
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct drm_device *dev = bo->base.dev;
+	vm_fault_t ret;
+	int idx;
 
 	/* Sanity check that we allow writing into this object */
 	if (unlikely(i915_gem_object_is_readonly(obj) &&
 		     area->vm_flags & VM_WRITE))
 		return VM_FAULT_SIGBUS;
 
-	return ttm_bo_vm_fault(vmf);
+	ret = ttm_bo_vm_reserve(bo, vmf);
+	if (ret)
+		return ret;
+
+	if (drm_dev_enter(dev, &idx)) {
+		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+					       TTM_BO_VM_NUM_PREFAULT, 1);
+		drm_dev_exit(idx);
+	} else {
+		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+	}
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		return ret;
+
+	i915_ttm_adjust_lru(obj);
+
+	dma_resv_unlock(bo->base.resv);
+	return ret;
 }
 
 static int
@@ -1067,6 +1113,7 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
 
 static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
 	.name = "i915_gem_object_ttm",
+	.flags = I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
 
 	.get_pages = i915_ttm_get_pages,
 	.put_pages = i915_ttm_put_pages,
@@ -1089,6 +1136,18 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo)
 	mutex_destroy(&obj->ttm.get_io_page.lock);
 
 	if (obj->ttm.created) {
+		/*
+		 * We freely manage the shrinker LRU outide of the mm.pages life
+		 * cycle. As a result when destroying the object we should be
+		 * extra paranoid and ensure we remove it from the LRU, before
+		 * we free the object.
+		 *
+		 * Touching the ttm_shrinkable outside of the object lock here
+		 * should be safe now that the last GEM object ref was dropped.
+		 */
+		if (obj->mm.ttm_shrinkable)
+			i915_gem_object_make_unshrinkable(obj);
+
 		i915_ttm_backup_free(obj);
 
 		/* This releases all gem object bindings to the backend. */
@@ -1141,6 +1200,14 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
 	/* Forcing the page size is kernel internal only */
 	GEM_BUG_ON(page_size && obj->mm.n_placements);
 
+	/*
+	 * Keep an extra shrink pin to prevent the object from being made
+	 * shrinkable too early. If the ttm_tt is ever allocated in shmem, we
+	 * drop the pin. The TTM backend manages the shrinker LRU itself,
+	 * outside of the normal mm.pages life cycle.
+	 */
+	i915_gem_object_make_unshrinkable(obj);
+
 	/*
 	 * If this function fails, it will call the destructor, but
 	 * our caller still owns the object. So no freeing in the
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v9 7/8] drm/i915/ttm: use cached system pages when evicting lmem
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
@ 2021-10-18  9:10   ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

This should let us do an accelerated copy directly to the shmem pages
when temporarily moving lmem-only objects, where the i915-gem shrinker
can later kick in to swap out the pages, if needed.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 8de1031a2c6e..d37581d9194c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -134,11 +134,11 @@ static enum ttm_caching
 i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
 {
 	/*
-	 * Objects only allowed in system get cached cpu-mappings.
-	 * Other objects get WC mapping for now. Even if in system.
+	 * Objects only allowed in system get cached cpu-mappings, or when
+	 * evicting lmem-only buffers to system for swapping. Other objects get
+	 * WC mapping for now. Even if in system.
 	 */
-	if (obj->mm.region->type == INTEL_MEMORY_SYSTEM &&
-	    obj->mm.n_placements <= 1)
+	if (obj->mm.n_placements <= 1)
 		return ttm_cached;
 
 	return ttm_write_combined;
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v9 7/8] drm/i915/ttm: use cached system pages when evicting lmem
@ 2021-10-18  9:10   ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

This should let us do an accelerated copy directly to the shmem pages
when temporarily moving lmem-only objects, where the i915-gem shrinker
can later kick in to swap out the pages, if needed.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 8de1031a2c6e..d37581d9194c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -134,11 +134,11 @@ static enum ttm_caching
 i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
 {
 	/*
-	 * Objects only allowed in system get cached cpu-mappings.
-	 * Other objects get WC mapping for now. Even if in system.
+	 * Objects only allowed in system get cached cpu-mappings, or when
+	 * evicting lmem-only buffers to system for swapping. Other objects get
+	 * WC mapping for now. Even if in system.
 	 */
-	if (obj->mm.region->type == INTEL_MEMORY_SYSTEM &&
-	    obj->mm.n_placements <= 1)
+	if (obj->mm.n_placements <= 1)
 		return ttm_cached;
 
 	return ttm_write_combined;
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v9 8/8] drm/i915/ttm: enable shmem tt backend
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
@ 2021-10-18  9:10   ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

Turn on the shmem tt backend, and enable shrinking.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index d37581d9194c..4fd2edb20dd9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1113,7 +1113,8 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
 
 static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
 	.name = "i915_gem_object_ttm",
-	.flags = I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE |
+		 I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
 
 	.get_pages = i915_ttm_get_pages,
 	.put_pages = i915_ttm_put_pages,
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v9 8/8] drm/i915/ttm: enable shmem tt backend
@ 2021-10-18  9:10   ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-10-18  9:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Thomas Hellström

Turn on the shmem tt backend, and enable shrinking.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index d37581d9194c..4fd2edb20dd9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1113,7 +1113,8 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
 
 static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
 	.name = "i915_gem_object_ttm",
-	.flags = I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE |
+		 I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
 
 	.get_pages = i915_ttm_get_pages,
 	.put_pages = i915_ttm_put_pages,
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v9,1/8] drm/i915/gem: Break out some shmem backend utils
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
                   ` (7 preceding siblings ...)
  (?)
@ 2021-10-18 11:57 ` Patchwork
  -1 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2021-10-18 11:57 UTC (permalink / raw)
  To: Matthew Auld; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v9,1/8] drm/i915/gem: Break out some shmem backend utils
URL   : https://patchwork.freedesktop.org/series/95942/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
53802cc4ee04 drm/i915/gem: Break out some shmem backend utils
52832ef9f7a8 drm/i915/ttm: add tt shmem backend
-:16: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#16: 
    dropping the shrinker LRU lock and acquiring the object lock it could for

total: 0 errors, 1 warnings, 0 checks, 486 lines checked
f53947eeb752 drm/i915/gtt: drop unneeded make_unshrinkable
14c9126d88dc drm/i915: drop unneeded make_unshrinkable in free_object
391284f29461 drm/i915: add some kernel-doc for shrink_pin and friends
cade90c069e7 drm/i915/ttm: move shrinker management into adjust_lru
-:19: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#19: 
     an object. Importantly this covers the case where TTM moves something from

total: 0 errors, 1 warnings, 0 checks, 299 lines checked
1e11734bd61a drm/i915/ttm: use cached system pages when evicting lmem
b5e031de1986 drm/i915/ttm: enable shmem tt backend



^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [v9,1/8] drm/i915/gem: Break out some shmem backend utils
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
                   ` (8 preceding siblings ...)
  (?)
@ 2021-10-18 11:58 ` Patchwork
  -1 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2021-10-18 11:58 UTC (permalink / raw)
  To: Matthew Auld; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v9,1/8] drm/i915/gem: Break out some shmem backend utils
URL   : https://patchwork.freedesktop.org/series/95942/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.
-
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:27:9: warning: trying to copy expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:27:9: warning: trying to copy expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:27:9: warning: trying to copy expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:32:9: warning: trying to copy expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:32:9: warning: trying to copy expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:49:9: warning: trying to copy expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:49:9: warning: trying to copy expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:49:9: warning: trying to copy expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:56:9: warning: trying to copy expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:56:9: warning: trying to copy expression type 31
+drivers/gpu/drm/i915/gt/intel_reset.c:1392:5: warning: context imbalance in 'intel_gt_reset_trylock' - different lock contexts for basic block
+drivers/gpu/drm/i915/i915_perf.c:1442:15: warning: memset with byte count of 16777216
+drivers/gpu/drm/i915/i915_perf.c:1496:15: warning: memset with byte count of 16777216
+./include/asm-generic/bitops/find.h:112:45: warning: shift count is negative (-262080)
+./include/asm-generic/bitops/find.h:32:31: warning: shift count is negative (-262080)
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_write8' - different lock contexts for basic block



^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v9,1/8] drm/i915/gem: Break out some shmem backend utils
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
                   ` (9 preceding siblings ...)
  (?)
@ 2021-10-18 12:29 ` Patchwork
  -1 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2021-10-18 12:29 UTC (permalink / raw)
  To: Matthew Auld; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 6760 bytes --]

== Series Details ==

Series: series starting with [v9,1/8] drm/i915/gem: Break out some shmem backend utils
URL   : https://patchwork.freedesktop.org/series/95942/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_10752 -> Patchwork_21366
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/index.html

Known issues
------------

  Here are the changes found in Patchwork_21366 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_basic@query-info:
    - fi-bsw-kefka:       NOTRUN -> [SKIP][1] ([fdo#109271]) +17 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-bsw-kefka/igt@amdgpu/amd_basic@query-info.html
    - fi-tgl-1115g4:      NOTRUN -> [SKIP][2] ([fdo#109315])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@amdgpu/amd_basic@query-info.html
    - fi-kbl-soraka:      NOTRUN -> [SKIP][3] ([fdo#109271])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-kbl-soraka/igt@amdgpu/amd_basic@query-info.html

  * igt@amdgpu/amd_cs_nop@nop-gfx0:
    - fi-tgl-1115g4:      NOTRUN -> [SKIP][4] ([fdo#109315] / [i915#2575]) +16 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@amdgpu/amd_cs_nop@nop-gfx0.html

  * igt@gem_exec_suspend@basic-s3:
    - fi-tgl-1115g4:      NOTRUN -> [FAIL][5] ([i915#1888])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@gem_exec_suspend@basic-s3.html

  * igt@gem_huc_copy@huc-copy:
    - fi-tgl-1115g4:      NOTRUN -> [SKIP][6] ([i915#2190])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@gem_huc_copy@huc-copy.html

  * igt@i915_pm_backlight@basic-brightness:
    - fi-tgl-1115g4:      NOTRUN -> [SKIP][7] ([i915#1155])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@i915_pm_backlight@basic-brightness.html

  * igt@i915_selftest@live@gt_heartbeat:
    - fi-bsw-nick:        [PASS][8] -> [DMESG-FAIL][9] ([i915#541])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/fi-bsw-nick/igt@i915_selftest@live@gt_heartbeat.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-bsw-nick/igt@i915_selftest@live@gt_heartbeat.html

  * igt@i915_selftest@live@gt_pm:
    - fi-tgl-1115g4:      NOTRUN -> [DMESG-FAIL][10] ([i915#3987])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@i915_selftest@live@gt_pm.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-tgl-1115g4:      NOTRUN -> [SKIP][11] ([fdo#111827]) +8 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@kms_chamelium@common-hpd-after-suspend.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - fi-tgl-1115g4:      NOTRUN -> [SKIP][12] ([i915#4103]) +1 similar issue
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  * igt@kms_force_connector_basic@force-load-detect:
    - fi-tgl-1115g4:      NOTRUN -> [SKIP][13] ([fdo#109285])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@kms_force_connector_basic@force-load-detect.html

  * igt@kms_psr@primary_mmap_gtt:
    - fi-tgl-1115g4:      NOTRUN -> [SKIP][14] ([i915#1072]) +3 similar issues
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@kms_psr@primary_mmap_gtt.html

  * igt@prime_vgem@basic-userptr:
    - fi-tgl-1115g4:      NOTRUN -> [SKIP][15] ([i915#3301])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-tgl-1115g4/igt@prime_vgem@basic-userptr.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@execlists:
    - fi-bsw-kefka:       [INCOMPLETE][16] ([i915#2940]) -> [PASS][17]
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/fi-bsw-kefka/igt@i915_selftest@live@execlists.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-bsw-kefka/igt@i915_selftest@live@execlists.html

  * igt@i915_selftest@live@gt_lrc:
    - fi-bsw-n3050:       [DMESG-FAIL][18] ([i915#2373]) -> [PASS][19]
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/fi-bsw-n3050/igt@i915_selftest@live@gt_lrc.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/fi-bsw-n3050/igt@i915_selftest@live@gt_lrc.html

  
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#1155]: https://gitlab.freedesktop.org/drm/intel/issues/1155
  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#2373]: https://gitlab.freedesktop.org/drm/intel/issues/2373
  [i915#2575]: https://gitlab.freedesktop.org/drm/intel/issues/2575
  [i915#2940]: https://gitlab.freedesktop.org/drm/intel/issues/2940
  [i915#3301]: https://gitlab.freedesktop.org/drm/intel/issues/3301
  [i915#3987]: https://gitlab.freedesktop.org/drm/intel/issues/3987
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#541]: https://gitlab.freedesktop.org/drm/intel/issues/541


Participating hosts (38 -> 37)
------------------------------

  Additional (1): fi-tgl-1115g4 
  Missing    (2): bat-dg1-6 bat-dg1-5 


Build changes
-------------

  * Linux: CI_DRM_10752 -> Patchwork_21366

  CI-20190529: 20190529
  CI_DRM_10752: c76aaeb23ed1eebb2af30e8ba3dca7c31b9f66ec @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6252: 996f2707195ed10c19905bcd8ccdb860a5e9d9c5 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_21366: b5e031de1986da92d928cda6a6f86d9c3a583a95 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

b5e031de1986 drm/i915/ttm: enable shmem tt backend
1e11734bd61a drm/i915/ttm: use cached system pages when evicting lmem
cade90c069e7 drm/i915/ttm: move shrinker management into adjust_lru
391284f29461 drm/i915: add some kernel-doc for shrink_pin and friends
14c9126d88dc drm/i915: drop unneeded make_unshrinkable in free_object
f53947eeb752 drm/i915/gtt: drop unneeded make_unshrinkable
52832ef9f7a8 drm/i915/ttm: add tt shmem backend
53802cc4ee04 drm/i915/gem: Break out some shmem backend utils

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/index.html

[-- Attachment #2: Type: text/html, Size: 7924 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [v9,1/8] drm/i915/gem: Break out some shmem backend utils
  2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
                   ` (10 preceding siblings ...)
  (?)
@ 2021-10-18 15:48 ` Patchwork
  -1 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2021-10-18 15:48 UTC (permalink / raw)
  To: Matthew Auld; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 30301 bytes --]

== Series Details ==

Series: series starting with [v9,1/8] drm/i915/gem: Break out some shmem backend utils
URL   : https://patchwork.freedesktop.org/series/95942/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_10752_full -> Patchwork_21366_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_21366_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_21366_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_21366_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_exec_schedule@pi-ringfull@bcs0:
    - shard-skl:          [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl1/igt@gem_exec_schedule@pi-ringfull@bcs0.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl5/igt@gem_exec_schedule@pi-ringfull@bcs0.html

  * igt@kms_busy@extended-modeset-hang-oldfb-with-reset@pipe-d:
    - shard-tglb:         [PASS][3] -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-tglb1/igt@kms_busy@extended-modeset-hang-oldfb-with-reset@pipe-d.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb8/igt@kms_busy@extended-modeset-hang-oldfb-with-reset@pipe-d.html

  * igt@kms_bw@linear-tiling-1-displays-2560x1440p:
    - shard-snb:          NOTRUN -> [FAIL][5]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-snb2/igt@kms_bw@linear-tiling-1-displays-2560x1440p.html

  
Known issues
------------

  Here are the changes found in Patchwork_21366_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_caching@writes:
    - shard-skl:          [PASS][6] -> [DMESG-WARN][7] ([i915#1982]) +2 similar issues
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl3/igt@gem_caching@writes.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl3/igt@gem_caching@writes.html

  * igt@gem_ctx_persistence@legacy-engines-queued:
    - shard-snb:          NOTRUN -> [SKIP][8] ([fdo#109271] / [i915#1099]) +2 similar issues
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-snb2/igt@gem_ctx_persistence@legacy-engines-queued.html

  * igt@gem_ctx_sseu@invalid-sseu:
    - shard-tglb:         NOTRUN -> [SKIP][9] ([i915#280])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb6/igt@gem_ctx_sseu@invalid-sseu.html

  * igt@gem_eio@unwedge-stress:
    - shard-tglb:         [PASS][10] -> [TIMEOUT][11] ([i915#2369] / [i915#3063] / [i915#3648])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-tglb7/igt@gem_eio@unwedge-stress.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@gem_eio@unwedge-stress.html

  * igt@gem_exec_fair@basic-deadline:
    - shard-glk:          [PASS][12] -> [FAIL][13] ([i915#2846])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-glk1/igt@gem_exec_fair@basic-deadline.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-glk1/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
    - shard-kbl:          [PASS][14] -> [FAIL][15] ([i915#2842])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl2/igt@gem_exec_fair@basic-pace-solo@rcs0.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl2/igt@gem_exec_fair@basic-pace-solo@rcs0.html

  * igt@gem_exec_fair@basic-throttle@rcs0:
    - shard-glk:          [PASS][16] -> [FAIL][17] ([i915#2842])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-glk7/igt@gem_exec_fair@basic-throttle@rcs0.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-glk9/igt@gem_exec_fair@basic-throttle@rcs0.html

  * igt@gem_exec_whisper@basic-contexts-all:
    - shard-glk:          [PASS][18] -> [DMESG-WARN][19] ([i915#118]) +3 similar issues
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-glk6/igt@gem_exec_whisper@basic-contexts-all.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-glk6/igt@gem_exec_whisper@basic-contexts-all.html

  * igt@gem_media_vme:
    - shard-tglb:         NOTRUN -> [SKIP][20] ([i915#284])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@gem_media_vme.html

  * igt@gem_pxp@regular-baseline-src-copy-readible:
    - shard-tglb:         NOTRUN -> [SKIP][21] ([i915#4270])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb6/igt@gem_pxp@regular-baseline-src-copy-readible.html

  * igt@gem_render_copy@x-tiled-to-vebox-yf-tiled:
    - shard-kbl:          NOTRUN -> [SKIP][22] ([fdo#109271]) +43 similar issues
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl3/igt@gem_render_copy@x-tiled-to-vebox-yf-tiled.html

  * igt@gem_userptr_blits@readonly-pwrite-unsync:
    - shard-tglb:         NOTRUN -> [SKIP][23] ([i915#3297]) +1 similar issue
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@gem_userptr_blits@readonly-pwrite-unsync.html

  * igt@gem_workarounds@suspend-resume-fd:
    - shard-kbl:          [PASS][24] -> [DMESG-WARN][25] ([i915#180]) +2 similar issues
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl7/igt@gem_workarounds@suspend-resume-fd.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl7/igt@gem_workarounds@suspend-resume-fd.html

  * igt@gen7_exec_parse@bitmasks:
    - shard-tglb:         NOTRUN -> [SKIP][26] ([fdo#109289])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb1/igt@gen7_exec_parse@bitmasks.html

  * igt@gen9_exec_parse@basic-rejected:
    - shard-tglb:         NOTRUN -> [SKIP][27] ([i915#2856]) +1 similar issue
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb6/igt@gen9_exec_parse@basic-rejected.html

  * igt@i915_suspend@debugfs-reader:
    - shard-tglb:         [PASS][28] -> [INCOMPLETE][29] ([i915#456])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-tglb1/igt@i915_suspend@debugfs-reader.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb7/igt@i915_suspend@debugfs-reader.html

  * igt@kms_big_fb@x-tiled-16bpp-rotate-90:
    - shard-tglb:         NOTRUN -> [SKIP][30] ([fdo#111614]) +1 similar issue
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_big_fb@x-tiled-16bpp-rotate-90.html

  * igt@kms_big_fb@yf-tiled-8bpp-rotate-270:
    - shard-tglb:         NOTRUN -> [SKIP][31] ([fdo#111615])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb6/igt@kms_big_fb@yf-tiled-8bpp-rotate-270.html

  * igt@kms_big_fb@yf-tiled-max-hw-stride-32bpp-rotate-0-hflip:
    - shard-apl:          NOTRUN -> [SKIP][32] ([fdo#109271] / [i915#3777]) +2 similar issues
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl8/igt@kms_big_fb@yf-tiled-max-hw-stride-32bpp-rotate-0-hflip.html

  * igt@kms_bw@linear-tiling-2-displays-1920x1080p:
    - shard-apl:          NOTRUN -> [DMESG-FAIL][33] ([i915#4298])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl6/igt@kms_bw@linear-tiling-2-displays-1920x1080p.html
    - shard-skl:          NOTRUN -> [DMESG-FAIL][34] ([i915#4298])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl4/igt@kms_bw@linear-tiling-2-displays-1920x1080p.html

  * igt@kms_bw@linear-tiling-5-displays-3840x2160p:
    - shard-tglb:         NOTRUN -> [FAIL][35] ([i915#1385]) +1 similar issue
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_bw@linear-tiling-5-displays-3840x2160p.html

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][36] ([i915#3689]) +10 similar issues
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_ccs.html

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_gen12_mc_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][37] ([i915#3689] / [i915#3886])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb6/igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-a-missing-ccs-buffer-y_tiled_gen12_rc_ccs_cc:
    - shard-apl:          NOTRUN -> [SKIP][38] ([fdo#109271] / [i915#3886]) +6 similar issues
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl7/igt@kms_ccs@pipe-a-missing-ccs-buffer-y_tiled_gen12_rc_ccs_cc.html

  * igt@kms_chamelium@vga-hpd-without-ddc:
    - shard-snb:          NOTRUN -> [SKIP][39] ([fdo#109271] / [fdo#111827]) +16 similar issues
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-snb5/igt@kms_chamelium@vga-hpd-without-ddc.html

  * igt@kms_color_chamelium@pipe-a-ctm-0-25:
    - shard-kbl:          NOTRUN -> [SKIP][40] ([fdo#109271] / [fdo#111827]) +1 similar issue
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl3/igt@kms_color_chamelium@pipe-a-ctm-0-25.html

  * igt@kms_color_chamelium@pipe-b-ctm-0-75:
    - shard-tglb:         NOTRUN -> [SKIP][41] ([fdo#109284] / [fdo#111827]) +4 similar issues
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_color_chamelium@pipe-b-ctm-0-75.html

  * igt@kms_color_chamelium@pipe-c-degamma:
    - shard-skl:          NOTRUN -> [SKIP][42] ([fdo#109271] / [fdo#111827])
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl4/igt@kms_color_chamelium@pipe-c-degamma.html

  * igt@kms_color_chamelium@pipe-d-ctm-limited-range:
    - shard-apl:          NOTRUN -> [SKIP][43] ([fdo#109271] / [fdo#111827]) +10 similar issues
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl1/igt@kms_color_chamelium@pipe-d-ctm-limited-range.html

  * igt@kms_content_protection@dp-mst-lic-type-0:
    - shard-tglb:         NOTRUN -> [SKIP][44] ([i915#3116])
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_content_protection@dp-mst-lic-type-0.html

  * igt@kms_cursor_crc@pipe-a-cursor-32x10-offscreen:
    - shard-apl:          NOTRUN -> [SKIP][45] ([fdo#109271]) +102 similar issues
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl7/igt@kms_cursor_crc@pipe-a-cursor-32x10-offscreen.html

  * igt@kms_cursor_crc@pipe-a-cursor-64x64-sliding:
    - shard-snb:          [PASS][46] -> [SKIP][47] ([fdo#109271])
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-snb2/igt@kms_cursor_crc@pipe-a-cursor-64x64-sliding.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-snb2/igt@kms_cursor_crc@pipe-a-cursor-64x64-sliding.html

  * igt@kms_cursor_crc@pipe-a-cursor-max-size-rapid-movement:
    - shard-tglb:         NOTRUN -> [SKIP][48] ([i915#3359])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb1/igt@kms_cursor_crc@pipe-a-cursor-max-size-rapid-movement.html

  * igt@kms_cursor_crc@pipe-b-cursor-32x10-sliding:
    - shard-skl:          NOTRUN -> [SKIP][49] ([fdo#109271]) +6 similar issues
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl5/igt@kms_cursor_crc@pipe-b-cursor-32x10-sliding.html
    - shard-iclb:         NOTRUN -> [SKIP][50] ([fdo#109278])
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-iclb7/igt@kms_cursor_crc@pipe-b-cursor-32x10-sliding.html

  * igt@kms_cursor_crc@pipe-c-cursor-32x32-onscreen:
    - shard-tglb:         NOTRUN -> [SKIP][51] ([i915#3319]) +2 similar issues
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_cursor_crc@pipe-c-cursor-32x32-onscreen.html

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size:
    - shard-skl:          [PASS][52] -> [FAIL][53] ([i915#2346] / [i915#533])
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl8/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl7/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size.html

  * igt@kms_fbcon_fbt@fbc-suspend:
    - shard-apl:          [PASS][54] -> [INCOMPLETE][55] ([i915#180])
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-apl3/igt@kms_fbcon_fbt@fbc-suspend.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl2/igt@kms_fbcon_fbt@fbc-suspend.html

  * igt@kms_flip@2x-absolute-wf_vblank:
    - shard-tglb:         NOTRUN -> [SKIP][56] ([fdo#111825] / [i915#3966])
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_flip@2x-absolute-wf_vblank.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a1:
    - shard-glk:          [PASS][57] -> [FAIL][58] ([i915#79]) +1 similar issue
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-glk8/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a1.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-glk8/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a1.html

  * igt@kms_flip@flip-vs-expired-vblank@c-edp1:
    - shard-skl:          [PASS][59] -> [FAIL][60] ([i915#79])
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl5/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl9/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html

  * igt@kms_flip@flip-vs-suspend@c-dp1:
    - shard-apl:          [PASS][61] -> [DMESG-WARN][62] ([i915#180]) +1 similar issue
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-apl7/igt@kms_flip@flip-vs-suspend@c-dp1.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl3/igt@kms_flip@flip-vs-suspend@c-dp1.html

  * igt@kms_flip@plain-flip-ts-check-interruptible@a-edp1:
    - shard-skl:          [PASS][63] -> [FAIL][64] ([i915#2122])
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl9/igt@kms_flip@plain-flip-ts-check-interruptible@a-edp1.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl1/igt@kms_flip@plain-flip-ts-check-interruptible@a-edp1.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile:
    - shard-iclb:         [PASS][65] -> [SKIP][66] ([i915#3701])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-iclb6/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-iclb2/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-pri-indfb-draw-blt:
    - shard-tglb:         NOTRUN -> [SKIP][67] ([fdo#111825]) +13 similar issues
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-pri-indfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-pri-indfb-draw-mmap-wc:
    - shard-iclb:         NOTRUN -> [SKIP][68] ([fdo#109280])
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-iclb7/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-pri-indfb-draw-mmap-wc.html

  * igt@kms_hdr@bpc-switch-dpms:
    - shard-skl:          [PASS][69] -> [FAIL][70] ([i915#1188])
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl2/igt@kms_hdr@bpc-switch-dpms.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl10/igt@kms_hdr@bpc-switch-dpms.html

  * igt@kms_plane_alpha_blend@pipe-a-alpha-transparent-fb:
    - shard-apl:          NOTRUN -> [FAIL][71] ([i915#265])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl7/igt@kms_plane_alpha_blend@pipe-a-alpha-transparent-fb.html

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          [PASS][72] -> [FAIL][73] ([fdo#108145] / [i915#265])
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl1/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl2/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html

  * igt@kms_plane_lowres@pipe-a-tiling-x:
    - shard-tglb:         NOTRUN -> [SKIP][74] ([i915#3536])
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_plane_lowres@pipe-a-tiling-x.html

  * igt@kms_psr2_sf@plane-move-sf-dmg-area-2:
    - shard-tglb:         NOTRUN -> [SKIP][75] ([i915#2920])
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb1/igt@kms_psr2_sf@plane-move-sf-dmg-area-2.html
    - shard-apl:          NOTRUN -> [SKIP][76] ([fdo#109271] / [i915#658]) +1 similar issue
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl1/igt@kms_psr2_sf@plane-move-sf-dmg-area-2.html

  * igt@kms_psr2_su@frontbuffer:
    - shard-kbl:          NOTRUN -> [SKIP][77] ([fdo#109271] / [i915#658])
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl3/igt@kms_psr2_su@frontbuffer.html

  * igt@kms_psr@psr2_cursor_plane_onoff:
    - shard-tglb:         NOTRUN -> [FAIL][78] ([i915#132] / [i915#3467]) +3 similar issues
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_psr@psr2_cursor_plane_onoff.html

  * igt@kms_sysfs_edid_timing:
    - shard-apl:          NOTRUN -> [FAIL][79] ([IGT#2])
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl1/igt@kms_sysfs_edid_timing.html

  * igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend:
    - shard-tglb:         [PASS][80] -> [INCOMPLETE][81] ([i915#2828] / [i915#456])
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-tglb6/igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb7/igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend.html

  * igt@kms_vblank@pipe-d-query-forked-hang:
    - shard-snb:          NOTRUN -> [SKIP][82] ([fdo#109271]) +292 similar issues
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-snb7/igt@kms_vblank@pipe-d-query-forked-hang.html

  * igt@kms_writeback@writeback-invalid-parameters:
    - shard-apl:          NOTRUN -> [SKIP][83] ([fdo#109271] / [i915#2437])
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl1/igt@kms_writeback@writeback-invalid-parameters.html

  * igt@nouveau_crc@pipe-a-ctx-flip-skip-current-frame:
    - shard-tglb:         NOTRUN -> [SKIP][84] ([i915#2530]) +2 similar issues
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb6/igt@nouveau_crc@pipe-a-ctx-flip-skip-current-frame.html

  * igt@prime_nv_api@i915_nv_import_twice:
    - shard-tglb:         NOTRUN -> [SKIP][85] ([fdo#109291])
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@prime_nv_api@i915_nv_import_twice.html

  * igt@prime_vgem@basic-userptr:
    - shard-tglb:         NOTRUN -> [SKIP][86] ([i915#3301])
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@prime_vgem@basic-userptr.html

  * igt@sysfs_clients@create:
    - shard-tglb:         NOTRUN -> [SKIP][87] ([i915#2994]) +1 similar issue
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@sysfs_clients@create.html

  * igt@sysfs_clients@sema-25:
    - shard-apl:          NOTRUN -> [SKIP][88] ([fdo#109271] / [i915#2994])
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl1/igt@sysfs_clients@sema-25.html

  * igt@sysfs_clients@sema-50:
    - shard-kbl:          NOTRUN -> [SKIP][89] ([fdo#109271] / [i915#2994]) +1 similar issue
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl3/igt@sysfs_clients@sema-50.html

  
#### Possible fixes ####

  * igt@gem_ctx_isolation@preservation-s3@bcs0:
    - shard-tglb:         [INCOMPLETE][90] ([i915#456]) -> [PASS][91] +2 similar issues
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-tglb7/igt@gem_ctx_isolation@preservation-s3@bcs0.html
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@gem_ctx_isolation@preservation-s3@bcs0.html

  * igt@gem_eio@unwedge-stress:
    - shard-iclb:         [TIMEOUT][92] ([i915#2369] / [i915#2481] / [i915#3070]) -> [PASS][93]
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-iclb2/igt@gem_eio@unwedge-stress.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-iclb7/igt@gem_eio@unwedge-stress.html

  * igt@gem_exec_fair@basic-none@rcs0:
    - shard-glk:          [FAIL][94] ([i915#2842]) -> [PASS][95]
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-glk6/igt@gem_exec_fair@basic-none@rcs0.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-glk7/igt@gem_exec_fair@basic-none@rcs0.html

  * igt@gem_exec_fair@basic-none@vcs0:
    - shard-kbl:          [FAIL][96] ([i915#2842]) -> [PASS][97] +1 similar issue
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl1/igt@gem_exec_fair@basic-none@vcs0.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl1/igt@gem_exec_fair@basic-none@vcs0.html

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions:
    - shard-skl:          [FAIL][98] ([i915#2346]) -> [PASS][99]
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl9/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl1/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html

  * igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ab-hdmi-a1-hdmi-a2:
    - shard-glk:          [FAIL][100] ([i915#2122]) -> [PASS][101]
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-glk4/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ab-hdmi-a1-hdmi-a2.html
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-glk7/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ab-hdmi-a1-hdmi-a2.html

  * igt@kms_flip@flip-vs-expired-vblank@a-edp1:
    - shard-skl:          [DMESG-WARN][102] ([i915#1982]) -> [PASS][103] +1 similar issue
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl5/igt@kms_flip@flip-vs-expired-vblank@a-edp1.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl9/igt@kms_flip@flip-vs-expired-vblank@a-edp1.html

  * igt@kms_flip@flip-vs-suspend-interruptible@a-dp1:
    - shard-apl:          [DMESG-WARN][104] ([i915#180]) -> [PASS][105]
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-apl2/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl8/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html

  * igt@kms_flip@flip-vs-suspend@b-edp1:
    - shard-skl:          [INCOMPLETE][106] ([i915#146] / [i915#198]) -> [PASS][107]
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl3/igt@kms_flip@flip-vs-suspend@b-edp1.html
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl3/igt@kms_flip@flip-vs-suspend@b-edp1.html

  * igt@kms_flip@flip-vs-suspend@c-dp1:
    - shard-kbl:          [DMESG-WARN][108] ([i915#180]) -> [PASS][109] +6 similar issues
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl7/igt@kms_flip@flip-vs-suspend@c-dp1.html
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl3/igt@kms_flip@flip-vs-suspend@c-dp1.html

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min:
    - shard-skl:          [FAIL][110] ([fdo#108145] / [i915#265]) -> [PASS][111]
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-skl1/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-skl10/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html

  * igt@kms_plane_alpha_blend@pipe-d-alpha-basic:
    - shard-tglb:         [INCOMPLETE][112] -> [PASS][113]
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-tglb8/igt@kms_plane_alpha_blend@pipe-d-alpha-basic.html
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-tglb2/igt@kms_plane_alpha_blend@pipe-d-alpha-basic.html

  * igt@kms_psr@psr2_no_drrs:
    - shard-iclb:         [SKIP][114] ([fdo#109441]) -> [PASS][115] +2 similar issues
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-iclb6/igt@kms_psr@psr2_no_drrs.html
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-iclb2/igt@kms_psr@psr2_no_drrs.html

  
#### Warnings ####

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-1:
    - shard-iclb:         [SKIP][116] ([i915#2920]) -> [SKIP][117] ([i915#658]) +1 similar issue
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-iclb2/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-1.html
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-iclb5/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-1.html

  * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-4:
    - shard-iclb:         [SKIP][118] ([i915#658]) -> [SKIP][119] ([i915#2920])
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-iclb6/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-4.html
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-iclb2/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-4.html

  * igt@runner@aborted:
    - shard-kbl:          ([FAIL][120], [FAIL][121], [FAIL][122], [FAIL][123], [FAIL][124], [FAIL][125], [FAIL][126], [FAIL][127], [FAIL][128]) ([i915#1436] / [i915#180] / [i915#1814] / [i915#3002] / [i915#3363]) -> ([FAIL][129], [FAIL][130], [FAIL][131], [FAIL][132], [FAIL][133], [FAIL][134], [FAIL][135], [FAIL][136], [FAIL][137]) ([i915#180] / [i915#1814] / [i915#3002] / [i915#3363] / [i915#602])
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl1/igt@runner@aborted.html
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl1/igt@runner@aborted.html
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl7/igt@runner@aborted.html
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl6/igt@runner@aborted.html
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl6/igt@runner@aborted.html
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl6/igt@runner@aborted.html
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl7/igt@runner@aborted.html
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl1/igt@runner@aborted.html
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-kbl1/igt@runner@aborted.html
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl1/igt@runner@aborted.html
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl1/igt@runner@aborted.html
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl7/igt@runner@aborted.html
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl7/igt@runner@aborted.html
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl7/igt@runner@aborted.html
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl7/igt@runner@aborted.html
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl7/igt@runner@aborted.html
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl2/igt@runner@aborted.html
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-kbl7/igt@runner@aborted.html
    - shard-apl:          ([FAIL][138], [FAIL][139], [FAIL][140], [FAIL][141], [FAIL][142], [FAIL][143], [FAIL][144]) ([fdo#109271] / [i915#180] / [i915#3002] / [i915#3363]) -> ([FAIL][145], [FAIL][146], [FAIL][147], [FAIL][148], [FAIL][149], [FAIL][150], [FAIL][151], [FAIL][152]) ([i915#180] / [i915#3002] / [i915#3363])
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-apl1/igt@runner@aborted.html
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-apl8/igt@runner@aborted.html
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-apl1/igt@runner@aborted.html
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-apl1/igt@runner@aborted.html
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-apl7/igt@runner@aborted.html
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-apl2/igt@runner@aborted.html
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10752/shard-apl3/igt@runner@aborted.html
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl1/igt@runner@aborted.html
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl1/igt@runner@aborted.html
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl1/igt@runner@aborted.html
   [148]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl2/igt@runner@aborted.html
   [149]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl6/igt@runner@aborted.html
   [150]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl8/igt@runner@aborted.html
   [151]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl6/igt@runner@aborted.html
   [152]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/shard-apl3/igt@runner@aborted.html

  
  [IGT#2]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/2
  [f

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21366/index.html

[-- Attachment #2: Type: text/html, Size: 33720 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v9 6/8] drm/i915/ttm: move shrinker management into adjust_lru
  2021-10-18  9:10   ` [Intel-gfx] " Matthew Auld
@ 2021-10-20 14:32     ` Thomas Hellström
  -1 siblings, 0 replies; 34+ messages in thread
From: Thomas Hellström @ 2021-10-20 14:32 UTC (permalink / raw)
  To: Matthew Auld, intel-gfx; +Cc: dri-devel

On Mon, 2021-10-18 at 10:10 +0100, Matthew Auld wrote:
> We currently just evict lmem objects to system memory when under
> memory
> pressure. For this case we might lack the usual object mm.pages,
> which
> effectively hides the pages from the i915-gem shrinker, until we
> actually "attach" the TT to the object, or in the case of lmem-only
> objects it just gets migrated back to lmem when touched again.
> 
> For all cases we can just adjust the i915 shrinker LRU each time we
> also
> adjust the TTM LRU. The two cases we care about are:
> 
>   1) When something is moved by TTM, including when initially
> populating
>      an object. Importantly this covers the case where TTM moves
> something from
>      lmem <-> smem, outside of the normal get_pages() interface,
> which
>      should still ensure the shmem pages underneath are reclaimable.
> 
>   2) When calling into i915_gem_object_unlock(). The unlock should
>      ensure the object is removed from the shinker LRU, if it was
> indeed
>      swapped out, or just purged, when the shrinker drops the object
> lock.
> 
> v2(Thomas):
>   - Handle managing the shrinker LRU in adjust_lru, where it is
> always
>     safe to touch the object.
> v3(Thomas):
>   - Pretty much a re-write. This time piggy back off the shrink_pin
>     stuff, which actually seems to fit quite well for what we want
> here.
> v4(Thomas):
>   - Just use a simple boolean for tracking ttm_shrinkable.
> v5:
>   - Ensure we call adjust_lru when faulting the object, to ensure the
>     pages are visible to the shrinker, if needed.
>   - Add back the adjust_lru when in i915_ttm_move (Thomas)
> v6(Reported-by: kernel test robot <lkp@intel.com>):
>   - Remove unused i915_tt
> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> #v4

R-B also for v6. Note that this may clash with a recent patch by Jason
Gunthorpe that removes the last argument of ttm_bo_vm_fault_reserved().

/Thomas


> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object.h    |  8 ++
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 14 ++-
>  drivers/gpu/drm/i915/gem/i915_gem_pages.c     |  5 +-
>  drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  | 45 ++++++++--
>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 87 ++++++++++++++++-
> --
>  5 files changed, 137 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index e641db297e0e..3eac8cf2ae10 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -294,6 +294,12 @@ i915_gem_object_is_shrinkable(const struct
> drm_i915_gem_object *obj)
>         return i915_gem_object_type_has(obj,
> I915_GEM_OBJECT_IS_SHRINKABLE);
>  }
>  
> +static inline bool
> +i915_gem_object_has_self_managed_shrink_list(const struct
> drm_i915_gem_object *obj)
> +{
> +       return i915_gem_object_type_has(obj,
> I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST);
> +}
> +
>  static inline bool
>  i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
>  {
> @@ -531,6 +537,8 @@ i915_gem_object_pin_to_display_plane(struct
> drm_i915_gem_object *obj,
>  
>  void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object
> *obj);
>  void i915_gem_object_make_shrinkable(struct drm_i915_gem_object
> *obj);
> +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object
> *obj);
> +void __i915_gem_object_make_purgeable(struct drm_i915_gem_object
> *obj);
>  void i915_gem_object_make_purgeable(struct drm_i915_gem_object
> *obj);
>  
>  static inline bool cpu_write_needs_clflush(struct
> drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index f4233c4e8d2e..5718a09f5533 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -34,9 +34,11 @@ struct i915_lut_handle {
>  
>  struct drm_i915_gem_object_ops {
>         unsigned int flags;
> -#define I915_GEM_OBJECT_IS_SHRINKABLE  BIT(1)
> -#define I915_GEM_OBJECT_IS_PROXY       BIT(2)
> -#define I915_GEM_OBJECT_NO_MMAP                BIT(3)
> +#define I915_GEM_OBJECT_IS_SHRINKABLE                  BIT(1)
> +/* Skip the shrinker management in set_pages/unset_pages */
> +#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST       BIT(2)
> +#define I915_GEM_OBJECT_IS_PROXY                       BIT(3)
> +#define
> I915_GEM_OBJECT_NO_MMAP                                BIT(4)
>  
>         /* Interface between the GEM object and its backing storage.
>          * get_pages() is called once prior to the use of the
> associated set
> @@ -485,6 +487,12 @@ struct drm_i915_gem_object {
>                  */
>                 atomic_t shrink_pin;
>  
> +               /**
> +                * @ttm_shrinkable: True when the object is using
> shmem pages
> +                * underneath. Protected by the object lock.
> +                */
> +               bool ttm_shrinkable;
> +
>                 /**
>                  * Priority list of potential placements for this
> object.
>                  */
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index ea6d9b3d2d6b..308e22a80af4 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -68,7 +68,7 @@ void __i915_gem_object_set_pages(struct
> drm_i915_gem_object *obj,
>                 shrinkable = false;
>         }
>  
> -       if (shrinkable) {
> +       if (shrinkable &&
> !i915_gem_object_has_self_managed_shrink_list(obj)) {
>                 struct list_head *list;
>                 unsigned long flags;
>  
> @@ -210,7 +210,8 @@ __i915_gem_object_unset_pages(struct
> drm_i915_gem_object *obj)
>         if (i915_gem_object_is_volatile(obj))
>                 obj->mm.madv = I915_MADV_WILLNEED;
>  
> -       i915_gem_object_make_unshrinkable(obj);
> +       if (!i915_gem_object_has_self_managed_shrink_list(obj))
> +               i915_gem_object_make_unshrinkable(obj);
>  
>         if (obj->mm.mapping) {
>                 unmap_object(obj, page_mask_bits(obj->mm.mapping));
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> index 66121fedc655..dde0a5c232f8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> @@ -497,13 +497,12 @@ void i915_gem_object_make_unshrinkable(struct
> drm_i915_gem_object *obj)
>         spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
>  }
>  
> -static void __i915_gem_object_make_shrinkable(struct
> drm_i915_gem_object *obj,
> -                                             struct list_head *head)
> +static void ___i915_gem_object_make_shrinkable(struct
> drm_i915_gem_object *obj,
> +                                              struct list_head
> *head)
>  {
>         struct drm_i915_private *i915 = obj_to_i915(obj);
>         unsigned long flags;
>  
> -       GEM_BUG_ON(!i915_gem_object_has_pages(obj));
>         if (!i915_gem_object_is_shrinkable(obj))
>                 return;
>  
> @@ -523,6 +522,38 @@ static void
> __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
>         spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
>  }
>  
> +/**
> + * __i915_gem_object_make_shrinkable - Move the object to the tail
> of the
> + * shrinkable list. Objects on this list might be swapped out. Used
> with
> + * WILLNEED objects.
> + * @obj: The GEM object.
> + *
> + * DO NOT USE. This is intended to be called on very special objects
> that don't
> + * yet have mm.pages, but are guaranteed to have potentially
> reclaimable pages
> + * underneath.
> + */
> +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object
> *obj)
> +{
> +       ___i915_gem_object_make_shrinkable(obj,
> +                                          &obj_to_i915(obj)-
> >mm.shrink_list);
> +}
> +
> +/**
> + * __i915_gem_object_make_purgeable - Move the object to the tail of
> the
> + * purgeable list. Objects on this list might be swapped out. Used
> with
> + * DONTNEED objects.
> + * @obj: The GEM object.
> + *
> + * DO NOT USE. This is intended to be called on very special objects
> that don't
> + * yet have mm.pages, but are guaranteed to have potentially
> reclaimable pages
> + * underneath.
> + */
> +void __i915_gem_object_make_purgeable(struct drm_i915_gem_object
> *obj)
> +{
> +       ___i915_gem_object_make_shrinkable(obj,
> +                                          &obj_to_i915(obj)-
> >mm.purge_list);
> +}
> +
>  /**
>   * i915_gem_object_make_shrinkable - Move the object to the tail of
> the
>   * shrinkable list. Objects on this list might be swapped out. Used
> with
> @@ -535,8 +566,8 @@ static void
> __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
>   */
>  void i915_gem_object_make_shrinkable(struct drm_i915_gem_object
> *obj)
>  {
> -       __i915_gem_object_make_shrinkable(obj,
> -                                         &obj_to_i915(obj)-
> >mm.shrink_list);
> +       GEM_BUG_ON(!i915_gem_object_has_pages(obj));
> +       __i915_gem_object_make_shrinkable(obj);
>  }
>  
>  /**
> @@ -552,6 +583,6 @@ void i915_gem_object_make_shrinkable(struct
> drm_i915_gem_object *obj)
>   */
>  void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
>  {
> -       __i915_gem_object_make_shrinkable(obj,
> -                                         &obj_to_i915(obj)-
> >mm.purge_list);
> +       GEM_BUG_ON(!i915_gem_object_has_pages(obj));
> +       __i915_gem_object_make_purgeable(obj);
>  }
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index 0fe1eb8f38e9..8de1031a2c6e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -762,6 +762,7 @@ static int i915_ttm_move(struct ttm_buffer_object
> *bo, bool evict,
>                 obj->ttm.get_io_page.sg_idx = 0;
>         }
>  
> +       i915_ttm_adjust_lru(obj);
>         i915_ttm_adjust_gem_after_move(obj);
>         return 0;
>  }
> @@ -851,7 +852,6 @@ static int __i915_ttm_get_pages(struct
> drm_i915_gem_object *obj,
>                         return i915_ttm_err_to_gem(ret);
>         }
>  
> -       i915_ttm_adjust_lru(obj);
>         if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) {
>                 ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx);
>                 if (ret)
> @@ -862,19 +862,15 @@ static int __i915_ttm_get_pages(struct
> drm_i915_gem_object *obj,
>         }
>  
>         if (!i915_gem_object_has_pages(obj)) {
> -               struct i915_ttm_tt *i915_tt =
> -                       container_of(bo->ttm, typeof(*i915_tt), ttm);
> -
>                 /* Object either has a page vector or is an iomem
> object */
>                 st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj-
> >ttm.cached_io_st;
>                 if (IS_ERR(st))
>                         return PTR_ERR(st);
>  
>                 __i915_gem_object_set_pages(obj, st,
> i915_sg_dma_sizes(st->sgl));
> -               if (!bo->ttm || !i915_tt->is_shmem)
> -                       i915_gem_object_make_unshrinkable(obj);
>         }
>  
> +       i915_ttm_adjust_lru(obj);
>         return ret;
>  }
>  
> @@ -945,8 +941,6 @@ static void i915_ttm_put_pages(struct
> drm_i915_gem_object *obj,
>          * If the object is not destroyed next, The TTM eviction
> logic
>          * and shrinkers will move it out if needed.
>          */
> -
> -       i915_ttm_adjust_lru(obj);
>  }
>  
>  static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
> @@ -954,6 +948,8 @@ static void i915_ttm_adjust_lru(struct
> drm_i915_gem_object *obj)
>         struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
>         struct i915_ttm_tt *i915_tt =
>                 container_of(bo->ttm, typeof(*i915_tt), ttm);
> +       bool shrinkable =
> +               bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo-
> >ttm);
>  
>         /*
>          * Don't manipulate the TTM LRUs while in TTM bo destruction.
> @@ -962,11 +958,40 @@ static void i915_ttm_adjust_lru(struct
> drm_i915_gem_object *obj)
>         if (!kref_read(&bo->kref))
>                 return;
>  
> +       /*
> +        * We skip managing the shrinker LRU in set_pages() and just
> manage
> +        * everything here. This does at least solve the issue with
> having
> +        * temporary shmem mappings(like with evicted lmem) not being
> visible to
> +        * the shrinker. Only our shmem objects are shrinkable,
> everything else
> +        * we keep as unshrinkable.
> +        *
> +        * To make sure everything plays nice we keep an extra shrink
> pin in TTM
> +        * if the underlying pages are not currently shrinkable. Once
> we release
> +        * our pin, like when the pages are moved to shmem, the pages
> will then
> +        * be added to the shrinker LRU, assuming the caller isn't
> also holding
> +        * a pin.
> +        *
> +        * TODO: consider maybe also bumping the shrinker list here
> when we have
> +        * already unpinned it, which should give us something more
> like an LRU.
> +        */
> +       if (shrinkable != obj->mm.ttm_shrinkable) {
> +               if (shrinkable) {
> +                       if (obj->mm.madv == I915_MADV_WILLNEED)
> +                               __i915_gem_object_make_shrinkable(obj
> );
> +                       else
> +                               __i915_gem_object_make_purgeable(obj)
> ;
> +               } else {
> +                       i915_gem_object_make_unshrinkable(obj);
> +               }
> +
> +               obj->mm.ttm_shrinkable = shrinkable;
> +       }
> +
>         /*
>          * Put on the correct LRU list depending on the MADV status
>          */
>         spin_lock(&bo->bdev->lru_lock);
> -       if (bo->ttm && i915_tt->filp) {
> +       if (shrinkable) {
>                 /* Try to keep shmem_tt from being considered for
> shrinking. */
>                 bo->priority = TTM_MAX_BO_PRIORITY - 1;
>         } else if (obj->mm.madv != I915_MADV_WILLNEED) {
> @@ -1010,13 +1035,34 @@ static vm_fault_t vm_fault_ttm(struct
> vm_fault *vmf)
>         struct vm_area_struct *area = vmf->vma;
>         struct drm_i915_gem_object *obj =
>                 i915_ttm_to_gem(area->vm_private_data);
> +       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
> +       struct drm_device *dev = bo->base.dev;
> +       vm_fault_t ret;
> +       int idx;
>  
>         /* Sanity check that we allow writing into this object */
>         if (unlikely(i915_gem_object_is_readonly(obj) &&
>                      area->vm_flags & VM_WRITE))
>                 return VM_FAULT_SIGBUS;
>  
> -       return ttm_bo_vm_fault(vmf);
> +       ret = ttm_bo_vm_reserve(bo, vmf);
> +       if (ret)
> +               return ret;
> +
> +       if (drm_dev_enter(dev, &idx)) {
> +               ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma-
> >vm_page_prot,
> +                                             
> TTM_BO_VM_NUM_PREFAULT, 1);
> +               drm_dev_exit(idx);
> +       } else {
> +               ret = ttm_bo_vm_dummy_page(vmf, vmf->vma-
> >vm_page_prot);
> +       }
> +       if (ret == VM_FAULT_RETRY && !(vmf->flags &
> FAULT_FLAG_RETRY_NOWAIT))
> +               return ret;
> +
> +       i915_ttm_adjust_lru(obj);
> +
> +       dma_resv_unlock(bo->base.resv);
> +       return ret;
>  }
>  
>  static int
> @@ -1067,6 +1113,7 @@ static u64 i915_ttm_mmap_offset(struct
> drm_i915_gem_object *obj)
>  
>  static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
>         .name = "i915_gem_object_ttm",
> +       .flags = I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
>  
>         .get_pages = i915_ttm_get_pages,
>         .put_pages = i915_ttm_put_pages,
> @@ -1089,6 +1136,18 @@ void i915_ttm_bo_destroy(struct
> ttm_buffer_object *bo)
>         mutex_destroy(&obj->ttm.get_io_page.lock);
>  
>         if (obj->ttm.created) {
> +               /*
> +                * We freely manage the shrinker LRU outide of the
> mm.pages life
> +                * cycle. As a result when destroying the object we
> should be
> +                * extra paranoid and ensure we remove it from the
> LRU, before
> +                * we free the object.
> +                *
> +                * Touching the ttm_shrinkable outside of the object
> lock here
> +                * should be safe now that the last GEM object ref
> was dropped.
> +                */
> +               if (obj->mm.ttm_shrinkable)
> +                       i915_gem_object_make_unshrinkable(obj);
> +
>                 i915_ttm_backup_free(obj);
>  
>                 /* This releases all gem object bindings to the
> backend. */
> @@ -1141,6 +1200,14 @@ int __i915_gem_ttm_object_init(struct
> intel_memory_region *mem,
>         /* Forcing the page size is kernel internal only */
>         GEM_BUG_ON(page_size && obj->mm.n_placements);
>  
> +       /*
> +        * Keep an extra shrink pin to prevent the object from being
> made
> +        * shrinkable too early. If the ttm_tt is ever allocated in
> shmem, we
> +        * drop the pin. The TTM backend manages the shrinker LRU
> itself,
> +        * outside of the normal mm.pages life cycle.
> +        */
> +       i915_gem_object_make_unshrinkable(obj);
> +
>         /*
>          * If this function fails, it will call the destructor, but
>          * our caller still owns the object. So no freeing in the



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 6/8] drm/i915/ttm: move shrinker management into adjust_lru
@ 2021-10-20 14:32     ` Thomas Hellström
  0 siblings, 0 replies; 34+ messages in thread
From: Thomas Hellström @ 2021-10-20 14:32 UTC (permalink / raw)
  To: Matthew Auld, intel-gfx; +Cc: dri-devel

On Mon, 2021-10-18 at 10:10 +0100, Matthew Auld wrote:
> We currently just evict lmem objects to system memory when under
> memory
> pressure. For this case we might lack the usual object mm.pages,
> which
> effectively hides the pages from the i915-gem shrinker, until we
> actually "attach" the TT to the object, or in the case of lmem-only
> objects it just gets migrated back to lmem when touched again.
> 
> For all cases we can just adjust the i915 shrinker LRU each time we
> also
> adjust the TTM LRU. The two cases we care about are:
> 
>   1) When something is moved by TTM, including when initially
> populating
>      an object. Importantly this covers the case where TTM moves
> something from
>      lmem <-> smem, outside of the normal get_pages() interface,
> which
>      should still ensure the shmem pages underneath are reclaimable.
> 
>   2) When calling into i915_gem_object_unlock(). The unlock should
>      ensure the object is removed from the shinker LRU, if it was
> indeed
>      swapped out, or just purged, when the shrinker drops the object
> lock.
> 
> v2(Thomas):
>   - Handle managing the shrinker LRU in adjust_lru, where it is
> always
>     safe to touch the object.
> v3(Thomas):
>   - Pretty much a re-write. This time piggy back off the shrink_pin
>     stuff, which actually seems to fit quite well for what we want
> here.
> v4(Thomas):
>   - Just use a simple boolean for tracking ttm_shrinkable.
> v5:
>   - Ensure we call adjust_lru when faulting the object, to ensure the
>     pages are visible to the shrinker, if needed.
>   - Add back the adjust_lru when in i915_ttm_move (Thomas)
> v6(Reported-by: kernel test robot <lkp@intel.com>):
>   - Remove unused i915_tt
> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> #v4

R-B also for v6. Note that this may clash with a recent patch by Jason
Gunthorpe that removes the last argument of ttm_bo_vm_fault_reserved().

/Thomas


> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object.h    |  8 ++
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 14 ++-
>  drivers/gpu/drm/i915/gem/i915_gem_pages.c     |  5 +-
>  drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  | 45 ++++++++--
>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 87 ++++++++++++++++-
> --
>  5 files changed, 137 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index e641db297e0e..3eac8cf2ae10 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -294,6 +294,12 @@ i915_gem_object_is_shrinkable(const struct
> drm_i915_gem_object *obj)
>         return i915_gem_object_type_has(obj,
> I915_GEM_OBJECT_IS_SHRINKABLE);
>  }
>  
> +static inline bool
> +i915_gem_object_has_self_managed_shrink_list(const struct
> drm_i915_gem_object *obj)
> +{
> +       return i915_gem_object_type_has(obj,
> I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST);
> +}
> +
>  static inline bool
>  i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
>  {
> @@ -531,6 +537,8 @@ i915_gem_object_pin_to_display_plane(struct
> drm_i915_gem_object *obj,
>  
>  void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object
> *obj);
>  void i915_gem_object_make_shrinkable(struct drm_i915_gem_object
> *obj);
> +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object
> *obj);
> +void __i915_gem_object_make_purgeable(struct drm_i915_gem_object
> *obj);
>  void i915_gem_object_make_purgeable(struct drm_i915_gem_object
> *obj);
>  
>  static inline bool cpu_write_needs_clflush(struct
> drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index f4233c4e8d2e..5718a09f5533 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -34,9 +34,11 @@ struct i915_lut_handle {
>  
>  struct drm_i915_gem_object_ops {
>         unsigned int flags;
> -#define I915_GEM_OBJECT_IS_SHRINKABLE  BIT(1)
> -#define I915_GEM_OBJECT_IS_PROXY       BIT(2)
> -#define I915_GEM_OBJECT_NO_MMAP                BIT(3)
> +#define I915_GEM_OBJECT_IS_SHRINKABLE                  BIT(1)
> +/* Skip the shrinker management in set_pages/unset_pages */
> +#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST       BIT(2)
> +#define I915_GEM_OBJECT_IS_PROXY                       BIT(3)
> +#define
> I915_GEM_OBJECT_NO_MMAP                                BIT(4)
>  
>         /* Interface between the GEM object and its backing storage.
>          * get_pages() is called once prior to the use of the
> associated set
> @@ -485,6 +487,12 @@ struct drm_i915_gem_object {
>                  */
>                 atomic_t shrink_pin;
>  
> +               /**
> +                * @ttm_shrinkable: True when the object is using
> shmem pages
> +                * underneath. Protected by the object lock.
> +                */
> +               bool ttm_shrinkable;
> +
>                 /**
>                  * Priority list of potential placements for this
> object.
>                  */
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index ea6d9b3d2d6b..308e22a80af4 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -68,7 +68,7 @@ void __i915_gem_object_set_pages(struct
> drm_i915_gem_object *obj,
>                 shrinkable = false;
>         }
>  
> -       if (shrinkable) {
> +       if (shrinkable &&
> !i915_gem_object_has_self_managed_shrink_list(obj)) {
>                 struct list_head *list;
>                 unsigned long flags;
>  
> @@ -210,7 +210,8 @@ __i915_gem_object_unset_pages(struct
> drm_i915_gem_object *obj)
>         if (i915_gem_object_is_volatile(obj))
>                 obj->mm.madv = I915_MADV_WILLNEED;
>  
> -       i915_gem_object_make_unshrinkable(obj);
> +       if (!i915_gem_object_has_self_managed_shrink_list(obj))
> +               i915_gem_object_make_unshrinkable(obj);
>  
>         if (obj->mm.mapping) {
>                 unmap_object(obj, page_mask_bits(obj->mm.mapping));
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> index 66121fedc655..dde0a5c232f8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> @@ -497,13 +497,12 @@ void i915_gem_object_make_unshrinkable(struct
> drm_i915_gem_object *obj)
>         spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
>  }
>  
> -static void __i915_gem_object_make_shrinkable(struct
> drm_i915_gem_object *obj,
> -                                             struct list_head *head)
> +static void ___i915_gem_object_make_shrinkable(struct
> drm_i915_gem_object *obj,
> +                                              struct list_head
> *head)
>  {
>         struct drm_i915_private *i915 = obj_to_i915(obj);
>         unsigned long flags;
>  
> -       GEM_BUG_ON(!i915_gem_object_has_pages(obj));
>         if (!i915_gem_object_is_shrinkable(obj))
>                 return;
>  
> @@ -523,6 +522,38 @@ static void
> __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
>         spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
>  }
>  
> +/**
> + * __i915_gem_object_make_shrinkable - Move the object to the tail
> of the
> + * shrinkable list. Objects on this list might be swapped out. Used
> with
> + * WILLNEED objects.
> + * @obj: The GEM object.
> + *
> + * DO NOT USE. This is intended to be called on very special objects
> that don't
> + * yet have mm.pages, but are guaranteed to have potentially
> reclaimable pages
> + * underneath.
> + */
> +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object
> *obj)
> +{
> +       ___i915_gem_object_make_shrinkable(obj,
> +                                          &obj_to_i915(obj)-
> >mm.shrink_list);
> +}
> +
> +/**
> + * __i915_gem_object_make_purgeable - Move the object to the tail of
> the
> + * purgeable list. Objects on this list might be swapped out. Used
> with
> + * DONTNEED objects.
> + * @obj: The GEM object.
> + *
> + * DO NOT USE. This is intended to be called on very special objects
> that don't
> + * yet have mm.pages, but are guaranteed to have potentially
> reclaimable pages
> + * underneath.
> + */
> +void __i915_gem_object_make_purgeable(struct drm_i915_gem_object
> *obj)
> +{
> +       ___i915_gem_object_make_shrinkable(obj,
> +                                          &obj_to_i915(obj)-
> >mm.purge_list);
> +}
> +
>  /**
>   * i915_gem_object_make_shrinkable - Move the object to the tail of
> the
>   * shrinkable list. Objects on this list might be swapped out. Used
> with
> @@ -535,8 +566,8 @@ static void
> __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
>   */
>  void i915_gem_object_make_shrinkable(struct drm_i915_gem_object
> *obj)
>  {
> -       __i915_gem_object_make_shrinkable(obj,
> -                                         &obj_to_i915(obj)-
> >mm.shrink_list);
> +       GEM_BUG_ON(!i915_gem_object_has_pages(obj));
> +       __i915_gem_object_make_shrinkable(obj);
>  }
>  
>  /**
> @@ -552,6 +583,6 @@ void i915_gem_object_make_shrinkable(struct
> drm_i915_gem_object *obj)
>   */
>  void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
>  {
> -       __i915_gem_object_make_shrinkable(obj,
> -                                         &obj_to_i915(obj)-
> >mm.purge_list);
> +       GEM_BUG_ON(!i915_gem_object_has_pages(obj));
> +       __i915_gem_object_make_purgeable(obj);
>  }
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index 0fe1eb8f38e9..8de1031a2c6e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -762,6 +762,7 @@ static int i915_ttm_move(struct ttm_buffer_object
> *bo, bool evict,
>                 obj->ttm.get_io_page.sg_idx = 0;
>         }
>  
> +       i915_ttm_adjust_lru(obj);
>         i915_ttm_adjust_gem_after_move(obj);
>         return 0;
>  }
> @@ -851,7 +852,6 @@ static int __i915_ttm_get_pages(struct
> drm_i915_gem_object *obj,
>                         return i915_ttm_err_to_gem(ret);
>         }
>  
> -       i915_ttm_adjust_lru(obj);
>         if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) {
>                 ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx);
>                 if (ret)
> @@ -862,19 +862,15 @@ static int __i915_ttm_get_pages(struct
> drm_i915_gem_object *obj,
>         }
>  
>         if (!i915_gem_object_has_pages(obj)) {
> -               struct i915_ttm_tt *i915_tt =
> -                       container_of(bo->ttm, typeof(*i915_tt), ttm);
> -
>                 /* Object either has a page vector or is an iomem
> object */
>                 st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj-
> >ttm.cached_io_st;
>                 if (IS_ERR(st))
>                         return PTR_ERR(st);
>  
>                 __i915_gem_object_set_pages(obj, st,
> i915_sg_dma_sizes(st->sgl));
> -               if (!bo->ttm || !i915_tt->is_shmem)
> -                       i915_gem_object_make_unshrinkable(obj);
>         }
>  
> +       i915_ttm_adjust_lru(obj);
>         return ret;
>  }
>  
> @@ -945,8 +941,6 @@ static void i915_ttm_put_pages(struct
> drm_i915_gem_object *obj,
>          * If the object is not destroyed next, The TTM eviction
> logic
>          * and shrinkers will move it out if needed.
>          */
> -
> -       i915_ttm_adjust_lru(obj);
>  }
>  
>  static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
> @@ -954,6 +948,8 @@ static void i915_ttm_adjust_lru(struct
> drm_i915_gem_object *obj)
>         struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
>         struct i915_ttm_tt *i915_tt =
>                 container_of(bo->ttm, typeof(*i915_tt), ttm);
> +       bool shrinkable =
> +               bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo-
> >ttm);
>  
>         /*
>          * Don't manipulate the TTM LRUs while in TTM bo destruction.
> @@ -962,11 +958,40 @@ static void i915_ttm_adjust_lru(struct
> drm_i915_gem_object *obj)
>         if (!kref_read(&bo->kref))
>                 return;
>  
> +       /*
> +        * We skip managing the shrinker LRU in set_pages() and just
> manage
> +        * everything here. This does at least solve the issue with
> having
> +        * temporary shmem mappings(like with evicted lmem) not being
> visible to
> +        * the shrinker. Only our shmem objects are shrinkable,
> everything else
> +        * we keep as unshrinkable.
> +        *
> +        * To make sure everything plays nice we keep an extra shrink
> pin in TTM
> +        * if the underlying pages are not currently shrinkable. Once
> we release
> +        * our pin, like when the pages are moved to shmem, the pages
> will then
> +        * be added to the shrinker LRU, assuming the caller isn't
> also holding
> +        * a pin.
> +        *
> +        * TODO: consider maybe also bumping the shrinker list here
> when we have
> +        * already unpinned it, which should give us something more
> like an LRU.
> +        */
> +       if (shrinkable != obj->mm.ttm_shrinkable) {
> +               if (shrinkable) {
> +                       if (obj->mm.madv == I915_MADV_WILLNEED)
> +                               __i915_gem_object_make_shrinkable(obj
> );
> +                       else
> +                               __i915_gem_object_make_purgeable(obj)
> ;
> +               } else {
> +                       i915_gem_object_make_unshrinkable(obj);
> +               }
> +
> +               obj->mm.ttm_shrinkable = shrinkable;
> +       }
> +
>         /*
>          * Put on the correct LRU list depending on the MADV status
>          */
>         spin_lock(&bo->bdev->lru_lock);
> -       if (bo->ttm && i915_tt->filp) {
> +       if (shrinkable) {
>                 /* Try to keep shmem_tt from being considered for
> shrinking. */
>                 bo->priority = TTM_MAX_BO_PRIORITY - 1;
>         } else if (obj->mm.madv != I915_MADV_WILLNEED) {
> @@ -1010,13 +1035,34 @@ static vm_fault_t vm_fault_ttm(struct
> vm_fault *vmf)
>         struct vm_area_struct *area = vmf->vma;
>         struct drm_i915_gem_object *obj =
>                 i915_ttm_to_gem(area->vm_private_data);
> +       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
> +       struct drm_device *dev = bo->base.dev;
> +       vm_fault_t ret;
> +       int idx;
>  
>         /* Sanity check that we allow writing into this object */
>         if (unlikely(i915_gem_object_is_readonly(obj) &&
>                      area->vm_flags & VM_WRITE))
>                 return VM_FAULT_SIGBUS;
>  
> -       return ttm_bo_vm_fault(vmf);
> +       ret = ttm_bo_vm_reserve(bo, vmf);
> +       if (ret)
> +               return ret;
> +
> +       if (drm_dev_enter(dev, &idx)) {
> +               ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma-
> >vm_page_prot,
> +                                             
> TTM_BO_VM_NUM_PREFAULT, 1);
> +               drm_dev_exit(idx);
> +       } else {
> +               ret = ttm_bo_vm_dummy_page(vmf, vmf->vma-
> >vm_page_prot);
> +       }
> +       if (ret == VM_FAULT_RETRY && !(vmf->flags &
> FAULT_FLAG_RETRY_NOWAIT))
> +               return ret;
> +
> +       i915_ttm_adjust_lru(obj);
> +
> +       dma_resv_unlock(bo->base.resv);
> +       return ret;
>  }
>  
>  static int
> @@ -1067,6 +1113,7 @@ static u64 i915_ttm_mmap_offset(struct
> drm_i915_gem_object *obj)
>  
>  static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
>         .name = "i915_gem_object_ttm",
> +       .flags = I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
>  
>         .get_pages = i915_ttm_get_pages,
>         .put_pages = i915_ttm_put_pages,
> @@ -1089,6 +1136,18 @@ void i915_ttm_bo_destroy(struct
> ttm_buffer_object *bo)
>         mutex_destroy(&obj->ttm.get_io_page.lock);
>  
>         if (obj->ttm.created) {
> +               /*
> +                * We freely manage the shrinker LRU outide of the
> mm.pages life
> +                * cycle. As a result when destroying the object we
> should be
> +                * extra paranoid and ensure we remove it from the
> LRU, before
> +                * we free the object.
> +                *
> +                * Touching the ttm_shrinkable outside of the object
> lock here
> +                * should be safe now that the last GEM object ref
> was dropped.
> +                */
> +               if (obj->mm.ttm_shrinkable)
> +                       i915_gem_object_make_unshrinkable(obj);
> +
>                 i915_ttm_backup_free(obj);
>  
>                 /* This releases all gem object bindings to the
> backend. */
> @@ -1141,6 +1200,14 @@ int __i915_gem_ttm_object_init(struct
> intel_memory_region *mem,
>         /* Forcing the page size is kernel internal only */
>         GEM_BUG_ON(page_size && obj->mm.n_placements);
>  
> +       /*
> +        * Keep an extra shrink pin to prevent the object from being
> made
> +        * shrinkable too early. If the ttm_tt is ever allocated in
> shmem, we
> +        * drop the pin. The TTM backend manages the shrinker LRU
> itself,
> +        * outside of the normal mm.pages life cycle.
> +        */
> +       i915_gem_object_make_unshrinkable(obj);
> +
>         /*
>          * If this function fails, it will call the destructor, but
>          * our caller still owns the object. So no freeing in the



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
  2021-10-18  9:10   ` [Intel-gfx] " Matthew Auld
@ 2021-12-07 13:10     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 34+ messages in thread
From: Tvrtko Ursulin @ 2021-12-07 13:10 UTC (permalink / raw)
  To: Matthew Auld, intel-gfx
  Cc: Thomas Hellström, Oak Zeng, Christian König, dri-devel


On 18/10/2021 10:10, Matthew Auld wrote:
> For cached objects we can allocate our pages directly in shmem. This
> should make it possible(in a later patch) to utilise the existing
> i915-gem shrinker code for such objects. For now this is still disabled.
> 
> v2(Thomas):
>    - Add optional try_to_writeback hook for objects. Importantly we need
>      to check if the object is even still shrinkable; in between us
>      dropping the shrinker LRU lock and acquiring the object lock it could for
>      example have been moved. Also we need to differentiate between
>      "lazy" shrinking and the immediate writeback mode. Also later we need to
>      handle objects which don't even have mm.pages, so bundling this into
>      put_pages() would require somehow handling that edge case, hence
>      just letting the ttm backend handle everything in try_to_writeback
>      doesn't seem too bad.
> v3(Thomas):
>    - Likely a bad idea to touch the object from the unpopulate hook,
>      since it's not possible to hold a reference, without also creating
>      circular dependency, so likely this is too fragile. For now just
>      ensure we at least mark the pages as dirty/accessed when called from the
>      shrinker on WILLNEED objects.
>    - s/try_to_writeback/shrinker_release_pages, since this can do more
>      than just writeback.
>    - Get rid of do_backup boolean and just set the SWAPPED flag prior to
>      calling unpopulate.
>    - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout walk, since
>      these just get skipped anyway. We can try to come up with something
>      better later.
> v4(Thomas):
>    - s/PCI_DMA/DMA/. Also drop NO_KERNEL_MAPPING and NO_WARN, which
>      apparently doesn't do anything with streaming mappings.
>    - Just pass along the error for ->truncate, and assume nothing.
> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Oak Zeng <oak.zeng@intel.com>
> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Acked-by: Oak Zeng <oak.zeng@intel.com>

[snip]

> -static void try_to_writeback(struct drm_i915_gem_object *obj,
> -			     unsigned int flags)
> +static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
>   {
> +	if (obj->ops->shrinker_release_pages)
> +		return obj->ops->shrinker_release_pages(obj,
> +							flags & I915_SHRINK_WRITEBACK);

I have a high level question about how does this new vfunc fit with the existing code.

Because I notice in the implementation (i915_ttm_shrinker_release_pages) it ends up doing:
...

        switch (obj->mm.madv) {
        case I915_MADV_DONTNEED:
                return i915_ttm_purge(obj);
        case __I915_MADV_PURGED:
                return 0;
        }

... and then...

        if (should_writeback)
                __shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);

Which on a glance looks like a possible conceptual duplication of the concepts in this very function (try_to_writeback):

> +
>   	switch (obj->mm.madv) {
>   	case I915_MADV_DONTNEED:
>   		i915_gem_object_truncate(obj);
> -		return;
> +		return 0;
>   	case __I915_MADV_PURGED:
> -		return;
> +		return 0;
>   	}
>   
>   	if (flags & I915_SHRINK_WRITEBACK)
>   		i915_gem_object_writeback(obj);

So question is this the final design or some futher tidy is possible/planned?

Background to my question is that I will float a patch which proposes to remove writeback altogether. There are reports from the fields that it causes deadlocks and looking at 2d6692e642e7 ("drm/i915: Start writeback from the shrinker") and its history it seems like it was a known risk.

Apart from the code organisation questions, on the practical level - do you need writeback from the TTM backend or while I am proposing to remove it from the "legacy" paths, I can propose removing it from the TTM flow as well?

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
@ 2021-12-07 13:10     ` Tvrtko Ursulin
  0 siblings, 0 replies; 34+ messages in thread
From: Tvrtko Ursulin @ 2021-12-07 13:10 UTC (permalink / raw)
  To: Matthew Auld, intel-gfx
  Cc: Thomas Hellström, Christian König, dri-devel


On 18/10/2021 10:10, Matthew Auld wrote:
> For cached objects we can allocate our pages directly in shmem. This
> should make it possible(in a later patch) to utilise the existing
> i915-gem shrinker code for such objects. For now this is still disabled.
> 
> v2(Thomas):
>    - Add optional try_to_writeback hook for objects. Importantly we need
>      to check if the object is even still shrinkable; in between us
>      dropping the shrinker LRU lock and acquiring the object lock it could for
>      example have been moved. Also we need to differentiate between
>      "lazy" shrinking and the immediate writeback mode. Also later we need to
>      handle objects which don't even have mm.pages, so bundling this into
>      put_pages() would require somehow handling that edge case, hence
>      just letting the ttm backend handle everything in try_to_writeback
>      doesn't seem too bad.
> v3(Thomas):
>    - Likely a bad idea to touch the object from the unpopulate hook,
>      since it's not possible to hold a reference, without also creating
>      circular dependency, so likely this is too fragile. For now just
>      ensure we at least mark the pages as dirty/accessed when called from the
>      shrinker on WILLNEED objects.
>    - s/try_to_writeback/shrinker_release_pages, since this can do more
>      than just writeback.
>    - Get rid of do_backup boolean and just set the SWAPPED flag prior to
>      calling unpopulate.
>    - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout walk, since
>      these just get skipped anyway. We can try to come up with something
>      better later.
> v4(Thomas):
>    - s/PCI_DMA/DMA/. Also drop NO_KERNEL_MAPPING and NO_WARN, which
>      apparently doesn't do anything with streaming mappings.
>    - Just pass along the error for ->truncate, and assume nothing.
> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Oak Zeng <oak.zeng@intel.com>
> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Acked-by: Oak Zeng <oak.zeng@intel.com>

[snip]

> -static void try_to_writeback(struct drm_i915_gem_object *obj,
> -			     unsigned int flags)
> +static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
>   {
> +	if (obj->ops->shrinker_release_pages)
> +		return obj->ops->shrinker_release_pages(obj,
> +							flags & I915_SHRINK_WRITEBACK);

I have a high level question about how does this new vfunc fit with the existing code.

Because I notice in the implementation (i915_ttm_shrinker_release_pages) it ends up doing:
...

        switch (obj->mm.madv) {
        case I915_MADV_DONTNEED:
                return i915_ttm_purge(obj);
        case __I915_MADV_PURGED:
                return 0;
        }

... and then...

        if (should_writeback)
                __shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);

Which on a glance looks like a possible conceptual duplication of the concepts in this very function (try_to_writeback):

> +
>   	switch (obj->mm.madv) {
>   	case I915_MADV_DONTNEED:
>   		i915_gem_object_truncate(obj);
> -		return;
> +		return 0;
>   	case __I915_MADV_PURGED:
> -		return;
> +		return 0;
>   	}
>   
>   	if (flags & I915_SHRINK_WRITEBACK)
>   		i915_gem_object_writeback(obj);

So question is this the final design or some futher tidy is possible/planned?

Background to my question is that I will float a patch which proposes to remove writeback altogether. There are reports from the fields that it causes deadlocks and looking at 2d6692e642e7 ("drm/i915: Start writeback from the shrinker") and its history it seems like it was a known risk.

Apart from the code organisation questions, on the practical level - do you need writeback from the TTM backend or while I am proposing to remove it from the "legacy" paths, I can propose removing it from the TTM flow as well?

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
  2021-12-07 13:10     ` Tvrtko Ursulin
@ 2021-12-07 14:05       ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-12-07 14:05 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx
  Cc: Thomas Hellström, Oak Zeng, Christian König, dri-devel

On 07/12/2021 13:10, Tvrtko Ursulin wrote:
> 
> On 18/10/2021 10:10, Matthew Auld wrote:
>> For cached objects we can allocate our pages directly in shmem. This
>> should make it possible(in a later patch) to utilise the existing
>> i915-gem shrinker code for such objects. For now this is still disabled.
>>
>> v2(Thomas):
>>    - Add optional try_to_writeback hook for objects. Importantly we need
>>      to check if the object is even still shrinkable; in between us
>>      dropping the shrinker LRU lock and acquiring the object lock it 
>> could for
>>      example have been moved. Also we need to differentiate between
>>      "lazy" shrinking and the immediate writeback mode. Also later we 
>> need to
>>      handle objects which don't even have mm.pages, so bundling this into
>>      put_pages() would require somehow handling that edge case, hence
>>      just letting the ttm backend handle everything in try_to_writeback
>>      doesn't seem too bad.
>> v3(Thomas):
>>    - Likely a bad idea to touch the object from the unpopulate hook,
>>      since it's not possible to hold a reference, without also creating
>>      circular dependency, so likely this is too fragile. For now just
>>      ensure we at least mark the pages as dirty/accessed when called 
>> from the
>>      shrinker on WILLNEED objects.
>>    - s/try_to_writeback/shrinker_release_pages, since this can do more
>>      than just writeback.
>>    - Get rid of do_backup boolean and just set the SWAPPED flag prior to
>>      calling unpopulate.
>>    - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout walk, 
>> since
>>      these just get skipped anyway. We can try to come up with something
>>      better later.
>> v4(Thomas):
>>    - s/PCI_DMA/DMA/. Also drop NO_KERNEL_MAPPING and NO_WARN, which
>>      apparently doesn't do anything with streaming mappings.
>>    - Just pass along the error for ->truncate, and assume nothing.
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> Cc: Christian König <christian.koenig@amd.com>
>> Cc: Oak Zeng <oak.zeng@intel.com>
>> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> Acked-by: Oak Zeng <oak.zeng@intel.com>
> 
> [snip]
> 
>> -static void try_to_writeback(struct drm_i915_gem_object *obj,
>> -                 unsigned int flags)
>> +static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned 
>> int flags)
>>   {
>> +    if (obj->ops->shrinker_release_pages)
>> +        return obj->ops->shrinker_release_pages(obj,
>> +                            flags & I915_SHRINK_WRITEBACK);
> 
> I have a high level question about how does this new vfunc fit with the 
> existing code.
> 
> Because I notice in the implementation (i915_ttm_shrinker_release_pages) 
> it ends up doing:
> ...
> 
>         switch (obj->mm.madv) {
>         case I915_MADV_DONTNEED:
>                 return i915_ttm_purge(obj);
>         case __I915_MADV_PURGED:
>                 return 0;
>         }
> 
> ... and then...
> 
>         if (should_writeback)
>                 __shmem_writeback(obj->base.size, 
> i915_tt->filp->f_mapping);
> 
> Which on a glance looks like a possible conceptual duplication of the 
> concepts in this very function (try_to_writeback):
> 
>> +
>>       switch (obj->mm.madv) {
>>       case I915_MADV_DONTNEED:
>>           i915_gem_object_truncate(obj);
>> -        return;
>> +        return 0;
>>       case __I915_MADV_PURGED:
>> -        return;
>> +        return 0;
>>       }
>>       if (flags & I915_SHRINK_WRITEBACK)
>>           i915_gem_object_writeback(obj);
> 
> So question is this the final design or some futher tidy is 
> possible/planned?

It seems ok to me, all things considered. The TTM version needs to check 
if the object is still shrinkable at the start(plus some other stuff), 
upon acquiring the object lock. If that succeeds we can do the above 
madv checks and potentially just call truncate. Otherwise we can proceed 
with shrinking, but again TTM is special here, and we have to call 
ttm_bo_validate() underneath(we might not even have mm.pages here). And 
then if that all works we might be able to also perform the writeback, 
if needed. So I suppose we could add all that directly in 
try_to_writeback(), and make it conditional for TTM devices, or I guess 
we need two separate hooks, one to do some pre-checking and another do 
the actual swap step. Not sure if that is better or worse though.

> 
> Background to my question is that I will float a patch which proposes to 
> remove writeback altogether. There are reports from the fields that it 
> causes deadlocks and looking at 2d6692e642e7 ("drm/i915: Start writeback 
> from the shrinker") and its history it seems like it was a known risk.
> 
> Apart from the code organisation questions, on the practical level - do 
> you need writeback from the TTM backend or while I am proposing to 
> remove it from the "legacy" paths, I can propose removing it from the 
> TTM flow as well?

Yeah, if that is somehow busted then we should remove from TTM backend also.

> 
> Regards,
> 
> Tvrtko

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
@ 2021-12-07 14:05       ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2021-12-07 14:05 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx
  Cc: Thomas Hellström, Christian König, dri-devel

On 07/12/2021 13:10, Tvrtko Ursulin wrote:
> 
> On 18/10/2021 10:10, Matthew Auld wrote:
>> For cached objects we can allocate our pages directly in shmem. This
>> should make it possible(in a later patch) to utilise the existing
>> i915-gem shrinker code for such objects. For now this is still disabled.
>>
>> v2(Thomas):
>>    - Add optional try_to_writeback hook for objects. Importantly we need
>>      to check if the object is even still shrinkable; in between us
>>      dropping the shrinker LRU lock and acquiring the object lock it 
>> could for
>>      example have been moved. Also we need to differentiate between
>>      "lazy" shrinking and the immediate writeback mode. Also later we 
>> need to
>>      handle objects which don't even have mm.pages, so bundling this into
>>      put_pages() would require somehow handling that edge case, hence
>>      just letting the ttm backend handle everything in try_to_writeback
>>      doesn't seem too bad.
>> v3(Thomas):
>>    - Likely a bad idea to touch the object from the unpopulate hook,
>>      since it's not possible to hold a reference, without also creating
>>      circular dependency, so likely this is too fragile. For now just
>>      ensure we at least mark the pages as dirty/accessed when called 
>> from the
>>      shrinker on WILLNEED objects.
>>    - s/try_to_writeback/shrinker_release_pages, since this can do more
>>      than just writeback.
>>    - Get rid of do_backup boolean and just set the SWAPPED flag prior to
>>      calling unpopulate.
>>    - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout walk, 
>> since
>>      these just get skipped anyway. We can try to come up with something
>>      better later.
>> v4(Thomas):
>>    - s/PCI_DMA/DMA/. Also drop NO_KERNEL_MAPPING and NO_WARN, which
>>      apparently doesn't do anything with streaming mappings.
>>    - Just pass along the error for ->truncate, and assume nothing.
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> Cc: Christian König <christian.koenig@amd.com>
>> Cc: Oak Zeng <oak.zeng@intel.com>
>> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> Acked-by: Oak Zeng <oak.zeng@intel.com>
> 
> [snip]
> 
>> -static void try_to_writeback(struct drm_i915_gem_object *obj,
>> -                 unsigned int flags)
>> +static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned 
>> int flags)
>>   {
>> +    if (obj->ops->shrinker_release_pages)
>> +        return obj->ops->shrinker_release_pages(obj,
>> +                            flags & I915_SHRINK_WRITEBACK);
> 
> I have a high level question about how does this new vfunc fit with the 
> existing code.
> 
> Because I notice in the implementation (i915_ttm_shrinker_release_pages) 
> it ends up doing:
> ...
> 
>         switch (obj->mm.madv) {
>         case I915_MADV_DONTNEED:
>                 return i915_ttm_purge(obj);
>         case __I915_MADV_PURGED:
>                 return 0;
>         }
> 
> ... and then...
> 
>         if (should_writeback)
>                 __shmem_writeback(obj->base.size, 
> i915_tt->filp->f_mapping);
> 
> Which on a glance looks like a possible conceptual duplication of the 
> concepts in this very function (try_to_writeback):
> 
>> +
>>       switch (obj->mm.madv) {
>>       case I915_MADV_DONTNEED:
>>           i915_gem_object_truncate(obj);
>> -        return;
>> +        return 0;
>>       case __I915_MADV_PURGED:
>> -        return;
>> +        return 0;
>>       }
>>       if (flags & I915_SHRINK_WRITEBACK)
>>           i915_gem_object_writeback(obj);
> 
> So question is this the final design or some futher tidy is 
> possible/planned?

It seems ok to me, all things considered. The TTM version needs to check 
if the object is still shrinkable at the start(plus some other stuff), 
upon acquiring the object lock. If that succeeds we can do the above 
madv checks and potentially just call truncate. Otherwise we can proceed 
with shrinking, but again TTM is special here, and we have to call 
ttm_bo_validate() underneath(we might not even have mm.pages here). And 
then if that all works we might be able to also perform the writeback, 
if needed. So I suppose we could add all that directly in 
try_to_writeback(), and make it conditional for TTM devices, or I guess 
we need two separate hooks, one to do some pre-checking and another do 
the actual swap step. Not sure if that is better or worse though.

> 
> Background to my question is that I will float a patch which proposes to 
> remove writeback altogether. There are reports from the fields that it 
> causes deadlocks and looking at 2d6692e642e7 ("drm/i915: Start writeback 
> from the shrinker") and its history it seems like it was a known risk.
> 
> Apart from the code organisation questions, on the practical level - do 
> you need writeback from the TTM backend or while I am proposing to 
> remove it from the "legacy" paths, I can propose removing it from the 
> TTM flow as well?

Yeah, if that is somehow busted then we should remove from TTM backend also.

> 
> Regards,
> 
> Tvrtko

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
  2021-12-07 14:05       ` Matthew Auld
@ 2021-12-08  8:30         ` Tvrtko Ursulin
  -1 siblings, 0 replies; 34+ messages in thread
From: Tvrtko Ursulin @ 2021-12-08  8:30 UTC (permalink / raw)
  To: Matthew Auld, intel-gfx
  Cc: Thomas Hellström, Oak Zeng, Christian König, dri-devel


On 07/12/2021 14:05, Matthew Auld wrote:
> On 07/12/2021 13:10, Tvrtko Ursulin wrote:
>>
>> On 18/10/2021 10:10, Matthew Auld wrote:
>>> For cached objects we can allocate our pages directly in shmem. This
>>> should make it possible(in a later patch) to utilise the existing
>>> i915-gem shrinker code for such objects. For now this is still disabled.
>>>
>>> v2(Thomas):
>>>    - Add optional try_to_writeback hook for objects. Importantly we need
>>>      to check if the object is even still shrinkable; in between us
>>>      dropping the shrinker LRU lock and acquiring the object lock it 
>>> could for
>>>      example have been moved. Also we need to differentiate between
>>>      "lazy" shrinking and the immediate writeback mode. Also later we 
>>> need to
>>>      handle objects which don't even have mm.pages, so bundling this 
>>> into
>>>      put_pages() would require somehow handling that edge case, hence
>>>      just letting the ttm backend handle everything in try_to_writeback
>>>      doesn't seem too bad.
>>> v3(Thomas):
>>>    - Likely a bad idea to touch the object from the unpopulate hook,
>>>      since it's not possible to hold a reference, without also creating
>>>      circular dependency, so likely this is too fragile. For now just
>>>      ensure we at least mark the pages as dirty/accessed when called 
>>> from the
>>>      shrinker on WILLNEED objects.
>>>    - s/try_to_writeback/shrinker_release_pages, since this can do more
>>>      than just writeback.
>>>    - Get rid of do_backup boolean and just set the SWAPPED flag prior to
>>>      calling unpopulate.
>>>    - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout 
>>> walk, since
>>>      these just get skipped anyway. We can try to come up with something
>>>      better later.
>>> v4(Thomas):
>>>    - s/PCI_DMA/DMA/. Also drop NO_KERNEL_MAPPING and NO_WARN, which
>>>      apparently doesn't do anything with streaming mappings.
>>>    - Just pass along the error for ->truncate, and assume nothing.
>>>
>>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> Cc: Christian König <christian.koenig@amd.com>
>>> Cc: Oak Zeng <oak.zeng@intel.com>
>>> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> Acked-by: Oak Zeng <oak.zeng@intel.com>
>>
>> [snip]
>>
>>> -static void try_to_writeback(struct drm_i915_gem_object *obj,
>>> -                 unsigned int flags)
>>> +static int try_to_writeback(struct drm_i915_gem_object *obj, 
>>> unsigned int flags)
>>>   {
>>> +    if (obj->ops->shrinker_release_pages)
>>> +        return obj->ops->shrinker_release_pages(obj,
>>> +                            flags & I915_SHRINK_WRITEBACK);
>>
>> I have a high level question about how does this new vfunc fit with 
>> the existing code.
>>
>> Because I notice in the implementation 
>> (i915_ttm_shrinker_release_pages) it ends up doing:
>> ...
>>
>>         switch (obj->mm.madv) {
>>         case I915_MADV_DONTNEED:
>>                 return i915_ttm_purge(obj);
>>         case __I915_MADV_PURGED:
>>                 return 0;
>>         }
>>
>> ... and then...
>>
>>         if (should_writeback)
>>                 __shmem_writeback(obj->base.size, 
>> i915_tt->filp->f_mapping);
>>
>> Which on a glance looks like a possible conceptual duplication of the 
>> concepts in this very function (try_to_writeback):
>>
>>> +
>>>       switch (obj->mm.madv) {
>>>       case I915_MADV_DONTNEED:
>>>           i915_gem_object_truncate(obj);
>>> -        return;
>>> +        return 0;
>>>       case __I915_MADV_PURGED:
>>> -        return;
>>> +        return 0;
>>>       }
>>>       if (flags & I915_SHRINK_WRITEBACK)
>>>           i915_gem_object_writeback(obj);
>>
>> So question is this the final design or some futher tidy is 
>> possible/planned?
> 
> It seems ok to me, all things considered. The TTM version needs to check 
> if the object is still shrinkable at the start(plus some other stuff), 
> upon acquiring the object lock. If that succeeds we can do the above 
> madv checks and potentially just call truncate. Otherwise we can proceed 
> with shrinking, but again TTM is special here, and we have to call 
> ttm_bo_validate() underneath(we might not even have mm.pages here). And 
> then if that all works we might be able to also perform the writeback, 
> if needed. So I suppose we could add all that directly in 
> try_to_writeback(), and make it conditional for TTM devices, or I guess 
> we need two separate hooks, one to do some pre-checking and another do 
> the actual swap step. Not sure if that is better or worse though.

Would implementing the shrinker_release_pages for all objects work? It 
would contain what currently is in try_to_writeback and so the two 
paths, if not compatible, would diverge cleanly on the same level of 
indirection. I mean we would not have two effectively mutually exclusive 
vfuncs (shrinker_release_pages and writeback) and could unexport 
i915_gem_object_writeback.

>> Background to my question is that I will float a patch which proposes 
>> to remove writeback altogether. There are reports from the fields that 
>> it causes deadlocks and looking at 2d6692e642e7 ("drm/i915: Start 
>> writeback from the shrinker") and its history it seems like it was a 
>> known risk.
>>
>> Apart from the code organisation questions, on the practical level - 
>> do you need writeback from the TTM backend or while I am proposing to 
>> remove it from the "legacy" paths, I can propose removing it from the 
>> TTM flow as well?
> 
> Yeah, if that is somehow busted then we should remove from TTM backend 
> also.

Okay thanks, I wanted to check in case there was an extra need in TTM. I 
will float a patch soon hopefully but testing will be a problem since it 
seems very hard to repro at the moment.

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
@ 2021-12-08  8:30         ` Tvrtko Ursulin
  0 siblings, 0 replies; 34+ messages in thread
From: Tvrtko Ursulin @ 2021-12-08  8:30 UTC (permalink / raw)
  To: Matthew Auld, intel-gfx
  Cc: Thomas Hellström, Christian König, dri-devel


On 07/12/2021 14:05, Matthew Auld wrote:
> On 07/12/2021 13:10, Tvrtko Ursulin wrote:
>>
>> On 18/10/2021 10:10, Matthew Auld wrote:
>>> For cached objects we can allocate our pages directly in shmem. This
>>> should make it possible(in a later patch) to utilise the existing
>>> i915-gem shrinker code for such objects. For now this is still disabled.
>>>
>>> v2(Thomas):
>>>    - Add optional try_to_writeback hook for objects. Importantly we need
>>>      to check if the object is even still shrinkable; in between us
>>>      dropping the shrinker LRU lock and acquiring the object lock it 
>>> could for
>>>      example have been moved. Also we need to differentiate between
>>>      "lazy" shrinking and the immediate writeback mode. Also later we 
>>> need to
>>>      handle objects which don't even have mm.pages, so bundling this 
>>> into
>>>      put_pages() would require somehow handling that edge case, hence
>>>      just letting the ttm backend handle everything in try_to_writeback
>>>      doesn't seem too bad.
>>> v3(Thomas):
>>>    - Likely a bad idea to touch the object from the unpopulate hook,
>>>      since it's not possible to hold a reference, without also creating
>>>      circular dependency, so likely this is too fragile. For now just
>>>      ensure we at least mark the pages as dirty/accessed when called 
>>> from the
>>>      shrinker on WILLNEED objects.
>>>    - s/try_to_writeback/shrinker_release_pages, since this can do more
>>>      than just writeback.
>>>    - Get rid of do_backup boolean and just set the SWAPPED flag prior to
>>>      calling unpopulate.
>>>    - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout 
>>> walk, since
>>>      these just get skipped anyway. We can try to come up with something
>>>      better later.
>>> v4(Thomas):
>>>    - s/PCI_DMA/DMA/. Also drop NO_KERNEL_MAPPING and NO_WARN, which
>>>      apparently doesn't do anything with streaming mappings.
>>>    - Just pass along the error for ->truncate, and assume nothing.
>>>
>>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> Cc: Christian König <christian.koenig@amd.com>
>>> Cc: Oak Zeng <oak.zeng@intel.com>
>>> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> Acked-by: Oak Zeng <oak.zeng@intel.com>
>>
>> [snip]
>>
>>> -static void try_to_writeback(struct drm_i915_gem_object *obj,
>>> -                 unsigned int flags)
>>> +static int try_to_writeback(struct drm_i915_gem_object *obj, 
>>> unsigned int flags)
>>>   {
>>> +    if (obj->ops->shrinker_release_pages)
>>> +        return obj->ops->shrinker_release_pages(obj,
>>> +                            flags & I915_SHRINK_WRITEBACK);
>>
>> I have a high level question about how does this new vfunc fit with 
>> the existing code.
>>
>> Because I notice in the implementation 
>> (i915_ttm_shrinker_release_pages) it ends up doing:
>> ...
>>
>>         switch (obj->mm.madv) {
>>         case I915_MADV_DONTNEED:
>>                 return i915_ttm_purge(obj);
>>         case __I915_MADV_PURGED:
>>                 return 0;
>>         }
>>
>> ... and then...
>>
>>         if (should_writeback)
>>                 __shmem_writeback(obj->base.size, 
>> i915_tt->filp->f_mapping);
>>
>> Which on a glance looks like a possible conceptual duplication of the 
>> concepts in this very function (try_to_writeback):
>>
>>> +
>>>       switch (obj->mm.madv) {
>>>       case I915_MADV_DONTNEED:
>>>           i915_gem_object_truncate(obj);
>>> -        return;
>>> +        return 0;
>>>       case __I915_MADV_PURGED:
>>> -        return;
>>> +        return 0;
>>>       }
>>>       if (flags & I915_SHRINK_WRITEBACK)
>>>           i915_gem_object_writeback(obj);
>>
>> So question is this the final design or some futher tidy is 
>> possible/planned?
> 
> It seems ok to me, all things considered. The TTM version needs to check 
> if the object is still shrinkable at the start(plus some other stuff), 
> upon acquiring the object lock. If that succeeds we can do the above 
> madv checks and potentially just call truncate. Otherwise we can proceed 
> with shrinking, but again TTM is special here, and we have to call 
> ttm_bo_validate() underneath(we might not even have mm.pages here). And 
> then if that all works we might be able to also perform the writeback, 
> if needed. So I suppose we could add all that directly in 
> try_to_writeback(), and make it conditional for TTM devices, or I guess 
> we need two separate hooks, one to do some pre-checking and another do 
> the actual swap step. Not sure if that is better or worse though.

Would implementing the shrinker_release_pages for all objects work? It 
would contain what currently is in try_to_writeback and so the two 
paths, if not compatible, would diverge cleanly on the same level of 
indirection. I mean we would not have two effectively mutually exclusive 
vfuncs (shrinker_release_pages and writeback) and could unexport 
i915_gem_object_writeback.

>> Background to my question is that I will float a patch which proposes 
>> to remove writeback altogether. There are reports from the fields that 
>> it causes deadlocks and looking at 2d6692e642e7 ("drm/i915: Start 
>> writeback from the shrinker") and its history it seems like it was a 
>> known risk.
>>
>> Apart from the code organisation questions, on the practical level - 
>> do you need writeback from the TTM backend or while I am proposing to 
>> remove it from the "legacy" paths, I can propose removing it from the 
>> TTM flow as well?
> 
> Yeah, if that is somehow busted then we should remove from TTM backend 
> also.

Okay thanks, I wanted to check in case there was an extra need in TTM. I 
will float a patch soon hopefully but testing will be a problem since it 
seems very hard to repro at the moment.

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
  2021-12-08  8:30         ` Tvrtko Ursulin
@ 2021-12-08  8:39           ` Thomas Hellström
  -1 siblings, 0 replies; 34+ messages in thread
From: Thomas Hellström @ 2021-12-08  8:39 UTC (permalink / raw)
  To: Tvrtko Ursulin, Matthew Auld, intel-gfx
  Cc: Oak Zeng, Christian König, dri-devel


On 12/8/21 09:30, Tvrtko Ursulin wrote:


...


>>> Apart from the code organisation questions, on the practical level - 
>>> do you need writeback from the TTM backend or while I am proposing 
>>> to remove it from the "legacy" paths, I can propose removing it from 
>>> the TTM flow as well?
>>
>> Yeah, if that is somehow busted then we should remove from TTM 
>> backend also.
>
> Okay thanks, I wanted to check in case there was an extra need in TTM. 
> I will float a patch soon hopefully but testing will be a problem 
> since it seems very hard to repro at the moment.

Do we have some information about what's causing the deadlock or a 
signature? I'm asking because if some sort of shrinker was added to TTM 
itself, for the TTM page vectors, it would need to allocate shmem pages 
at shrink time rather than to unpin them at shrink time as we do here. 
And for that to have any chance of working sort of reliably, I think 
writeback is needed.

But I agree for this implementation, the need for writeback isn't 
different than for the non-TTM shmem objects

Thanks,

Thomas


>
> Regards,
>
> Tvrtko


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
@ 2021-12-08  8:39           ` Thomas Hellström
  0 siblings, 0 replies; 34+ messages in thread
From: Thomas Hellström @ 2021-12-08  8:39 UTC (permalink / raw)
  To: Tvrtko Ursulin, Matthew Auld, intel-gfx; +Cc: Christian König, dri-devel


On 12/8/21 09:30, Tvrtko Ursulin wrote:


...


>>> Apart from the code organisation questions, on the practical level - 
>>> do you need writeback from the TTM backend or while I am proposing 
>>> to remove it from the "legacy" paths, I can propose removing it from 
>>> the TTM flow as well?
>>
>> Yeah, if that is somehow busted then we should remove from TTM 
>> backend also.
>
> Okay thanks, I wanted to check in case there was an extra need in TTM. 
> I will float a patch soon hopefully but testing will be a problem 
> since it seems very hard to repro at the moment.

Do we have some information about what's causing the deadlock or a 
signature? I'm asking because if some sort of shrinker was added to TTM 
itself, for the TTM page vectors, it would need to allocate shmem pages 
at shrink time rather than to unpin them at shrink time as we do here. 
And for that to have any chance of working sort of reliably, I think 
writeback is needed.

But I agree for this implementation, the need for writeback isn't 
different than for the non-TTM shmem objects

Thanks,

Thomas


>
> Regards,
>
> Tvrtko


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
  2021-12-08  8:39           ` Thomas Hellström
@ 2021-12-08  9:24             ` Tvrtko Ursulin
  -1 siblings, 0 replies; 34+ messages in thread
From: Tvrtko Ursulin @ 2021-12-08  9:24 UTC (permalink / raw)
  To: Thomas Hellström, Matthew Auld, intel-gfx
  Cc: Oak Zeng, Christian König, dri-devel


On 08/12/2021 08:39, Thomas Hellström wrote:
> 
> On 12/8/21 09:30, Tvrtko Ursulin wrote:
> 
> 
> ...
> 
> 
>>>> Apart from the code organisation questions, on the practical level - 
>>>> do you need writeback from the TTM backend or while I am proposing 
>>>> to remove it from the "legacy" paths, I can propose removing it from 
>>>> the TTM flow as well?
>>>
>>> Yeah, if that is somehow busted then we should remove from TTM 
>>> backend also.
>>
>> Okay thanks, I wanted to check in case there was an extra need in TTM. 
>> I will float a patch soon hopefully but testing will be a problem 
>> since it seems very hard to repro at the moment.
> 
> Do we have some information about what's causing the deadlock or a 
> signature? I'm asking because if some sort of shrinker was added to TTM 

Yes, signature is hung task detector kicking in and pretty much system standstill ensues. You will find a bunch of tasks stuck like this:

[  247.165578] chrome          D    0  1791   1785 0x00004080
[  247.171732] Call Trace:
[  247.174492]  __schedule+0x57e/0x10d2
[  247.178514]  ? pcpu_alloc_area+0x25d/0x273
[  247.183115]  schedule+0x7c/0x9f
[  247.186647]  rwsem_down_write_slowpath+0x2ae/0x494
[  247.192025]  register_shrinker_prepared+0x19/0x48
[  247.197310]  ? test_single_super+0x10/0x10
[  247.201910]  sget_fc+0x1fc/0x20e
[  247.205540]  ? kill_litter_super+0x40/0x40
[  247.210139]  ? proc_apply_options+0x42/0x42
[  247.214838]  vfs_get_super+0x3a/0xdf
[  247.218855]  vfs_get_tree+0x2b/0xc3
[  247.222911]  fc_mount+0x12/0x39
[  247.226814]  pid_ns_prepare_proc+0x9d/0xc5
[  247.232468]  alloc_pid+0x275/0x289
[  247.236432]  copy_process+0x5e5/0xeea
[  247.240640]  _do_fork+0x95/0x303
[  247.244261]  __se_sys_clone+0x65/0x7f
[  247.248366]  do_syscall_64+0x54/0x7e
[  247.252365]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

Or this:

[  247.030274] minijail-init   D    0  1773   1770 0x80004082
[  247.036419] Call Trace:
[  247.039167]  __schedule+0x57e/0x10d2
[  247.043175]  ? __schedule+0x586/0x10d2
[  247.047381]  ? _raw_spin_unlock+0xe/0x20
[  247.051779]  ? __queue_work+0x316/0x371
[  247.056079]  schedule+0x7c/0x9f
[  247.059602]  rwsem_down_write_slowpath+0x2ae/0x494
[  247.064971]  unregister_shrinker+0x20/0x65
[  247.069562]  deactivate_locked_super+0x38/0x88
[  247.074538]  cleanup_mnt+0xcc/0x10e
[  247.078447]  task_work_run+0x7d/0xa6
[  247.082459]  do_exit+0x23d/0x831
[  247.086079]  ? syscall_trace_enter+0x207/0x20e
[  247.091055]  do_group_exit+0x8d/0x9d
[  247.095062]  __x64_sys_exit_group+0x17/0x17
[  247.099750]  do_syscall_64+0x54/0x7e
[  247.103758]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

And smoking gun is:

[  247.383338] CPU: 3 PID: 88 Comm: kswapd0 Tainted: G     U            5.4.154 #36
[  247.383338] Hardware name: Google Delbin/Delbin, BIOS Google_Delbin.13672.57.0 02/09/2021
[  247.383339] RIP: 0010:__rcu_read_lock+0x0/0x1a
[  247.383339] Code: ff ff 0f 0b e9 61 fe ff ff 0f 0b e9 76 fe ff ff 0f 0b 49 8b 44 24 20 e9 59 ff ff ff 0f 0b e9 67 ff ff ff 0f 0b e9 1b ff ff ff <0f> 1f 44 00 00 55 48 89 e5 65 48 8b 04 25 80 5d 01 00 ff 80 f8 03
[  247.383340] RSP: 0018:ffffb0aa0031b978 EFLAGS: 00000286
[  247.383340] RAX: 0000000000000000 RBX: fffff6b944ca8040 RCX: fffff6b944ca8001
[  247.383341] RDX: 0000000000000028 RSI: 0000000000000001 RDI: ffff8b52bc618c18
[  247.383341] RBP: ffffb0aa0031b9d0 R08: 0000000000000000 R09: ffff8b52fb5f00d8
[  247.383341] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[  247.383342] R13: 61c8864680b583eb R14: 0000000000000001 R15: ffffb0aa0031b980
[  247.383342] FS:  0000000000000000(0000) GS:ffff8b52fbf80000(0000) knlGS:0000000000000000
[  247.383343] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  247.383343] CR2: 00007c78a400d680 CR3: 0000000120f46006 CR4: 0000000000762ee0
[  247.383344] PKRU: 55555554
[  247.383344] Call Trace:
[  247.383345]  find_get_entry+0x4c/0x116
[  247.383345]  find_lock_entry+0xc8/0xec
[  247.383346]  shmem_writeback+0x7b/0x163
[  247.383346]  i915_gem_shrink+0x302/0x40b
[  247.383347]  ? __intel_runtime_pm_get+0x22/0x82
[  247.383347]  i915_gem_shrinker_scan+0x86/0xa8
[  247.383348]  shrink_slab+0x272/0x48b
[  247.383348]  shrink_node+0x784/0xbea
[  247.383348]  ? rcu_read_unlock_special+0x66/0x15f
[  247.383349]  ? update_batch_size+0x78/0x78
[  247.383349]  kswapd+0x75c/0xa56
[  247.383350]  kthread+0x147/0x156
[  247.383350]  ? kswapd_run+0xb6/0xb6
[  247.383351]  ? kthread_blkcg+0x2e/0x2e
[  247.383351]  ret_from_fork+0x1f/0x40

Sadly getting logs or repro from 5.16-rc is more difficult due other issues, or altogether gone, which is also a possibility. It is also possible that transparent hugepages either enable the hang, or just make it more likely.

However due history of writeback I think it sounds plausible that it is indeed unsafe. I will try to dig out a reply from Hugh Dickins who advised against doing it and I think that advice did not change, or I failed to find a later thread. There is at least a mention of that discussion in the patch which added writeback.

> itself, for the TTM page vectors, it would need to allocate shmem pages 
> at shrink time rather than to unpin them at shrink time as we do here. 
> And for that to have any chance of working sort of reliably, I think 
> writeback is needed.

I don't claim to understand it fully, but won't the system take care of that, with the only difference being that allocation might work a little less reliably under extreme memory pressure? And I did not find other drivers use it at least which may be and indication we should indeed steer clear of it.

Regards,

Tvrtko

> But I agree for this implementation, the need for writeback isn't 
> different than for the non-TTM shmem objects> 
> Thanks,
> 
> Thomas
> 
> 
>>
>> Regards,
>>
>> Tvrtko
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
@ 2021-12-08  9:24             ` Tvrtko Ursulin
  0 siblings, 0 replies; 34+ messages in thread
From: Tvrtko Ursulin @ 2021-12-08  9:24 UTC (permalink / raw)
  To: Thomas Hellström, Matthew Auld, intel-gfx
  Cc: Christian König, dri-devel


On 08/12/2021 08:39, Thomas Hellström wrote:
> 
> On 12/8/21 09:30, Tvrtko Ursulin wrote:
> 
> 
> ...
> 
> 
>>>> Apart from the code organisation questions, on the practical level - 
>>>> do you need writeback from the TTM backend or while I am proposing 
>>>> to remove it from the "legacy" paths, I can propose removing it from 
>>>> the TTM flow as well?
>>>
>>> Yeah, if that is somehow busted then we should remove from TTM 
>>> backend also.
>>
>> Okay thanks, I wanted to check in case there was an extra need in TTM. 
>> I will float a patch soon hopefully but testing will be a problem 
>> since it seems very hard to repro at the moment.
> 
> Do we have some information about what's causing the deadlock or a 
> signature? I'm asking because if some sort of shrinker was added to TTM 

Yes, signature is hung task detector kicking in and pretty much system standstill ensues. You will find a bunch of tasks stuck like this:

[  247.165578] chrome          D    0  1791   1785 0x00004080
[  247.171732] Call Trace:
[  247.174492]  __schedule+0x57e/0x10d2
[  247.178514]  ? pcpu_alloc_area+0x25d/0x273
[  247.183115]  schedule+0x7c/0x9f
[  247.186647]  rwsem_down_write_slowpath+0x2ae/0x494
[  247.192025]  register_shrinker_prepared+0x19/0x48
[  247.197310]  ? test_single_super+0x10/0x10
[  247.201910]  sget_fc+0x1fc/0x20e
[  247.205540]  ? kill_litter_super+0x40/0x40
[  247.210139]  ? proc_apply_options+0x42/0x42
[  247.214838]  vfs_get_super+0x3a/0xdf
[  247.218855]  vfs_get_tree+0x2b/0xc3
[  247.222911]  fc_mount+0x12/0x39
[  247.226814]  pid_ns_prepare_proc+0x9d/0xc5
[  247.232468]  alloc_pid+0x275/0x289
[  247.236432]  copy_process+0x5e5/0xeea
[  247.240640]  _do_fork+0x95/0x303
[  247.244261]  __se_sys_clone+0x65/0x7f
[  247.248366]  do_syscall_64+0x54/0x7e
[  247.252365]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

Or this:

[  247.030274] minijail-init   D    0  1773   1770 0x80004082
[  247.036419] Call Trace:
[  247.039167]  __schedule+0x57e/0x10d2
[  247.043175]  ? __schedule+0x586/0x10d2
[  247.047381]  ? _raw_spin_unlock+0xe/0x20
[  247.051779]  ? __queue_work+0x316/0x371
[  247.056079]  schedule+0x7c/0x9f
[  247.059602]  rwsem_down_write_slowpath+0x2ae/0x494
[  247.064971]  unregister_shrinker+0x20/0x65
[  247.069562]  deactivate_locked_super+0x38/0x88
[  247.074538]  cleanup_mnt+0xcc/0x10e
[  247.078447]  task_work_run+0x7d/0xa6
[  247.082459]  do_exit+0x23d/0x831
[  247.086079]  ? syscall_trace_enter+0x207/0x20e
[  247.091055]  do_group_exit+0x8d/0x9d
[  247.095062]  __x64_sys_exit_group+0x17/0x17
[  247.099750]  do_syscall_64+0x54/0x7e
[  247.103758]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

And smoking gun is:

[  247.383338] CPU: 3 PID: 88 Comm: kswapd0 Tainted: G     U            5.4.154 #36
[  247.383338] Hardware name: Google Delbin/Delbin, BIOS Google_Delbin.13672.57.0 02/09/2021
[  247.383339] RIP: 0010:__rcu_read_lock+0x0/0x1a
[  247.383339] Code: ff ff 0f 0b e9 61 fe ff ff 0f 0b e9 76 fe ff ff 0f 0b 49 8b 44 24 20 e9 59 ff ff ff 0f 0b e9 67 ff ff ff 0f 0b e9 1b ff ff ff <0f> 1f 44 00 00 55 48 89 e5 65 48 8b 04 25 80 5d 01 00 ff 80 f8 03
[  247.383340] RSP: 0018:ffffb0aa0031b978 EFLAGS: 00000286
[  247.383340] RAX: 0000000000000000 RBX: fffff6b944ca8040 RCX: fffff6b944ca8001
[  247.383341] RDX: 0000000000000028 RSI: 0000000000000001 RDI: ffff8b52bc618c18
[  247.383341] RBP: ffffb0aa0031b9d0 R08: 0000000000000000 R09: ffff8b52fb5f00d8
[  247.383341] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[  247.383342] R13: 61c8864680b583eb R14: 0000000000000001 R15: ffffb0aa0031b980
[  247.383342] FS:  0000000000000000(0000) GS:ffff8b52fbf80000(0000) knlGS:0000000000000000
[  247.383343] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  247.383343] CR2: 00007c78a400d680 CR3: 0000000120f46006 CR4: 0000000000762ee0
[  247.383344] PKRU: 55555554
[  247.383344] Call Trace:
[  247.383345]  find_get_entry+0x4c/0x116
[  247.383345]  find_lock_entry+0xc8/0xec
[  247.383346]  shmem_writeback+0x7b/0x163
[  247.383346]  i915_gem_shrink+0x302/0x40b
[  247.383347]  ? __intel_runtime_pm_get+0x22/0x82
[  247.383347]  i915_gem_shrinker_scan+0x86/0xa8
[  247.383348]  shrink_slab+0x272/0x48b
[  247.383348]  shrink_node+0x784/0xbea
[  247.383348]  ? rcu_read_unlock_special+0x66/0x15f
[  247.383349]  ? update_batch_size+0x78/0x78
[  247.383349]  kswapd+0x75c/0xa56
[  247.383350]  kthread+0x147/0x156
[  247.383350]  ? kswapd_run+0xb6/0xb6
[  247.383351]  ? kthread_blkcg+0x2e/0x2e
[  247.383351]  ret_from_fork+0x1f/0x40

Sadly getting logs or repro from 5.16-rc is more difficult due other issues, or altogether gone, which is also a possibility. It is also possible that transparent hugepages either enable the hang, or just make it more likely.

However due history of writeback I think it sounds plausible that it is indeed unsafe. I will try to dig out a reply from Hugh Dickins who advised against doing it and I think that advice did not change, or I failed to find a later thread. There is at least a mention of that discussion in the patch which added writeback.

> itself, for the TTM page vectors, it would need to allocate shmem pages 
> at shrink time rather than to unpin them at shrink time as we do here. 
> And for that to have any chance of working sort of reliably, I think 
> writeback is needed.

I don't claim to understand it fully, but won't the system take care of that, with the only difference being that allocation might work a little less reliably under extreme memory pressure? And I did not find other drivers use it at least which may be and indication we should indeed steer clear of it.

Regards,

Tvrtko

> But I agree for this implementation, the need for writeback isn't 
> different than for the non-TTM shmem objects> 
> Thanks,
> 
> Thomas
> 
> 
>>
>> Regards,
>>
>> Tvrtko
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
  2021-12-08  9:24             ` Tvrtko Ursulin
@ 2021-12-08  9:32               ` Thomas Hellström
  -1 siblings, 0 replies; 34+ messages in thread
From: Thomas Hellström @ 2021-12-08  9:32 UTC (permalink / raw)
  To: Tvrtko Ursulin, Matthew Auld, intel-gfx
  Cc: Oak Zeng, Christian König, dri-devel


On 12/8/21 10:24, Tvrtko Ursulin wrote:
>
> On 08/12/2021 08:39, Thomas Hellström wrote:
>>
>> On 12/8/21 09:30, Tvrtko Ursulin wrote:
>>
>>
>> ...
>>
>>
>>>>> Apart from the code organisation questions, on the practical level 
>>>>> - do you need writeback from the TTM backend or while I am 
>>>>> proposing to remove it from the "legacy" paths, I can propose 
>>>>> removing it from the TTM flow as well?
>>>>
>>>> Yeah, if that is somehow busted then we should remove from TTM 
>>>> backend also.
>>>
>>> Okay thanks, I wanted to check in case there was an extra need in 
>>> TTM. I will float a patch soon hopefully but testing will be a 
>>> problem since it seems very hard to repro at the moment.
>>
>> Do we have some information about what's causing the deadlock or a 
>> signature? I'm asking because if some sort of shrinker was added to TTM 
>
> Yes, signature is hung task detector kicking in and pretty much system 
> standstill ensues. You will find a bunch of tasks stuck like this:
>
> [  247.165578] chrome          D    0  1791   1785 0x00004080
> [  247.171732] Call Trace:
> [  247.174492]  __schedule+0x57e/0x10d2
> [  247.178514]  ? pcpu_alloc_area+0x25d/0x273
> [  247.183115]  schedule+0x7c/0x9f
> [  247.186647]  rwsem_down_write_slowpath+0x2ae/0x494
> [  247.192025]  register_shrinker_prepared+0x19/0x48
> [  247.197310]  ? test_single_super+0x10/0x10
> [  247.201910]  sget_fc+0x1fc/0x20e
> [  247.205540]  ? kill_litter_super+0x40/0x40
> [  247.210139]  ? proc_apply_options+0x42/0x42
> [  247.214838]  vfs_get_super+0x3a/0xdf
> [  247.218855]  vfs_get_tree+0x2b/0xc3
> [  247.222911]  fc_mount+0x12/0x39
> [  247.226814]  pid_ns_prepare_proc+0x9d/0xc5
> [  247.232468]  alloc_pid+0x275/0x289
> [  247.236432]  copy_process+0x5e5/0xeea
> [  247.240640]  _do_fork+0x95/0x303
> [  247.244261]  __se_sys_clone+0x65/0x7f
> [  247.248366]  do_syscall_64+0x54/0x7e
> [  247.252365]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>
> Or this:
>
> [  247.030274] minijail-init   D    0  1773   1770 0x80004082
> [  247.036419] Call Trace:
> [  247.039167]  __schedule+0x57e/0x10d2
> [  247.043175]  ? __schedule+0x586/0x10d2
> [  247.047381]  ? _raw_spin_unlock+0xe/0x20
> [  247.051779]  ? __queue_work+0x316/0x371
> [  247.056079]  schedule+0x7c/0x9f
> [  247.059602]  rwsem_down_write_slowpath+0x2ae/0x494
> [  247.064971]  unregister_shrinker+0x20/0x65
> [  247.069562]  deactivate_locked_super+0x38/0x88
> [  247.074538]  cleanup_mnt+0xcc/0x10e
> [  247.078447]  task_work_run+0x7d/0xa6
> [  247.082459]  do_exit+0x23d/0x831
> [  247.086079]  ? syscall_trace_enter+0x207/0x20e
> [  247.091055]  do_group_exit+0x8d/0x9d
> [  247.095062]  __x64_sys_exit_group+0x17/0x17
> [  247.099750]  do_syscall_64+0x54/0x7e
> [  247.103758]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>
> And smoking gun is:
>
> [  247.383338] CPU: 3 PID: 88 Comm: kswapd0 Tainted: G U            
> 5.4.154 #36
> [  247.383338] Hardware name: Google Delbin/Delbin, BIOS 
> Google_Delbin.13672.57.0 02/09/2021
> [  247.383339] RIP: 0010:__rcu_read_lock+0x0/0x1a
> [  247.383339] Code: ff ff 0f 0b e9 61 fe ff ff 0f 0b e9 76 fe ff ff 
> 0f 0b 49 8b 44 24 20 e9 59 ff ff ff 0f 0b e9 67 ff ff ff 0f 0b e9 1b 
> ff ff ff <0f> 1f 44 00 00 55 48 89 e5 65 48 8b 04 25 80 5d 01 00 ff 80 
> f8 03
> [  247.383340] RSP: 0018:ffffb0aa0031b978 EFLAGS: 00000286
> [  247.383340] RAX: 0000000000000000 RBX: fffff6b944ca8040 RCX: 
> fffff6b944ca8001
> [  247.383341] RDX: 0000000000000028 RSI: 0000000000000001 RDI: 
> ffff8b52bc618c18
> [  247.383341] RBP: ffffb0aa0031b9d0 R08: 0000000000000000 R09: 
> ffff8b52fb5f00d8
> [  247.383341] R10: 0000000000000000 R11: 0000000000000000 R12: 
> 0000000000000000
> [  247.383342] R13: 61c8864680b583eb R14: 0000000000000001 R15: 
> ffffb0aa0031b980
> [  247.383342] FS:  0000000000000000(0000) GS:ffff8b52fbf80000(0000) 
> knlGS:0000000000000000
> [  247.383343] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  247.383343] CR2: 00007c78a400d680 CR3: 0000000120f46006 CR4: 
> 0000000000762ee0
> [  247.383344] PKRU: 55555554
> [  247.383344] Call Trace:
> [  247.383345]  find_get_entry+0x4c/0x116
> [  247.383345]  find_lock_entry+0xc8/0xec
> [  247.383346]  shmem_writeback+0x7b/0x163
> [  247.383346]  i915_gem_shrink+0x302/0x40b
> [  247.383347]  ? __intel_runtime_pm_get+0x22/0x82
> [  247.383347]  i915_gem_shrinker_scan+0x86/0xa8
> [  247.383348]  shrink_slab+0x272/0x48b
> [  247.383348]  shrink_node+0x784/0xbea
> [  247.383348]  ? rcu_read_unlock_special+0x66/0x15f
> [  247.383349]  ? update_batch_size+0x78/0x78
> [  247.383349]  kswapd+0x75c/0xa56
> [  247.383350]  kthread+0x147/0x156
> [  247.383350]  ? kswapd_run+0xb6/0xb6
> [  247.383351]  ? kthread_blkcg+0x2e/0x2e
> [  247.383351]  ret_from_fork+0x1f/0x40
>
> Sadly getting logs or repro from 5.16-rc is more difficult due other 
> issues, or altogether gone, which is also a possibility. It is also 
> possible that transparent hugepages either enable the hang, or just 
> make it more likely.
>
> However due history of writeback I think it sounds plausible that it 
> is indeed unsafe. I will try to dig out a reply from Hugh Dickins who 
> advised against doing it and I think that advice did not change, or I 
> failed to find a later thread. There is at least a mention of that 
> discussion in the patch which added writeback.
>
>> itself, for the TTM page vectors, it would need to allocate shmem 
>> pages at shrink time rather than to unpin them at shrink time as we 
>> do here. And for that to have any chance of working sort of reliably, 
>> I think writeback is needed.
>
> I don't claim to understand it fully, but won't the system take care 
> of that, with the only difference being that allocation might work a 
> little less reliably under extreme memory pressure? 

Yes, IIRC it's exactly this that made the previous attempt of a generic 
TTM shrinker fail.


> And I did not find other drivers use it at least which may be and 
> indication we should indeed steer clear of it.

You're probably right.

>
> Regards,
>
> Tvrtko

Thanks,

Thomas




>
>> But I agree for this implementation, the need for writeback isn't 
>> different than for the non-TTM shmem objects> Thanks,
>>
>> Thomas
>>
>>
>>>
>>> Regards,
>>>
>>> Tvrtko
>>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend
@ 2021-12-08  9:32               ` Thomas Hellström
  0 siblings, 0 replies; 34+ messages in thread
From: Thomas Hellström @ 2021-12-08  9:32 UTC (permalink / raw)
  To: Tvrtko Ursulin, Matthew Auld, intel-gfx; +Cc: Christian König, dri-devel


On 12/8/21 10:24, Tvrtko Ursulin wrote:
>
> On 08/12/2021 08:39, Thomas Hellström wrote:
>>
>> On 12/8/21 09:30, Tvrtko Ursulin wrote:
>>
>>
>> ...
>>
>>
>>>>> Apart from the code organisation questions, on the practical level 
>>>>> - do you need writeback from the TTM backend or while I am 
>>>>> proposing to remove it from the "legacy" paths, I can propose 
>>>>> removing it from the TTM flow as well?
>>>>
>>>> Yeah, if that is somehow busted then we should remove from TTM 
>>>> backend also.
>>>
>>> Okay thanks, I wanted to check in case there was an extra need in 
>>> TTM. I will float a patch soon hopefully but testing will be a 
>>> problem since it seems very hard to repro at the moment.
>>
>> Do we have some information about what's causing the deadlock or a 
>> signature? I'm asking because if some sort of shrinker was added to TTM 
>
> Yes, signature is hung task detector kicking in and pretty much system 
> standstill ensues. You will find a bunch of tasks stuck like this:
>
> [  247.165578] chrome          D    0  1791   1785 0x00004080
> [  247.171732] Call Trace:
> [  247.174492]  __schedule+0x57e/0x10d2
> [  247.178514]  ? pcpu_alloc_area+0x25d/0x273
> [  247.183115]  schedule+0x7c/0x9f
> [  247.186647]  rwsem_down_write_slowpath+0x2ae/0x494
> [  247.192025]  register_shrinker_prepared+0x19/0x48
> [  247.197310]  ? test_single_super+0x10/0x10
> [  247.201910]  sget_fc+0x1fc/0x20e
> [  247.205540]  ? kill_litter_super+0x40/0x40
> [  247.210139]  ? proc_apply_options+0x42/0x42
> [  247.214838]  vfs_get_super+0x3a/0xdf
> [  247.218855]  vfs_get_tree+0x2b/0xc3
> [  247.222911]  fc_mount+0x12/0x39
> [  247.226814]  pid_ns_prepare_proc+0x9d/0xc5
> [  247.232468]  alloc_pid+0x275/0x289
> [  247.236432]  copy_process+0x5e5/0xeea
> [  247.240640]  _do_fork+0x95/0x303
> [  247.244261]  __se_sys_clone+0x65/0x7f
> [  247.248366]  do_syscall_64+0x54/0x7e
> [  247.252365]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>
> Or this:
>
> [  247.030274] minijail-init   D    0  1773   1770 0x80004082
> [  247.036419] Call Trace:
> [  247.039167]  __schedule+0x57e/0x10d2
> [  247.043175]  ? __schedule+0x586/0x10d2
> [  247.047381]  ? _raw_spin_unlock+0xe/0x20
> [  247.051779]  ? __queue_work+0x316/0x371
> [  247.056079]  schedule+0x7c/0x9f
> [  247.059602]  rwsem_down_write_slowpath+0x2ae/0x494
> [  247.064971]  unregister_shrinker+0x20/0x65
> [  247.069562]  deactivate_locked_super+0x38/0x88
> [  247.074538]  cleanup_mnt+0xcc/0x10e
> [  247.078447]  task_work_run+0x7d/0xa6
> [  247.082459]  do_exit+0x23d/0x831
> [  247.086079]  ? syscall_trace_enter+0x207/0x20e
> [  247.091055]  do_group_exit+0x8d/0x9d
> [  247.095062]  __x64_sys_exit_group+0x17/0x17
> [  247.099750]  do_syscall_64+0x54/0x7e
> [  247.103758]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>
> And smoking gun is:
>
> [  247.383338] CPU: 3 PID: 88 Comm: kswapd0 Tainted: G U            
> 5.4.154 #36
> [  247.383338] Hardware name: Google Delbin/Delbin, BIOS 
> Google_Delbin.13672.57.0 02/09/2021
> [  247.383339] RIP: 0010:__rcu_read_lock+0x0/0x1a
> [  247.383339] Code: ff ff 0f 0b e9 61 fe ff ff 0f 0b e9 76 fe ff ff 
> 0f 0b 49 8b 44 24 20 e9 59 ff ff ff 0f 0b e9 67 ff ff ff 0f 0b e9 1b 
> ff ff ff <0f> 1f 44 00 00 55 48 89 e5 65 48 8b 04 25 80 5d 01 00 ff 80 
> f8 03
> [  247.383340] RSP: 0018:ffffb0aa0031b978 EFLAGS: 00000286
> [  247.383340] RAX: 0000000000000000 RBX: fffff6b944ca8040 RCX: 
> fffff6b944ca8001
> [  247.383341] RDX: 0000000000000028 RSI: 0000000000000001 RDI: 
> ffff8b52bc618c18
> [  247.383341] RBP: ffffb0aa0031b9d0 R08: 0000000000000000 R09: 
> ffff8b52fb5f00d8
> [  247.383341] R10: 0000000000000000 R11: 0000000000000000 R12: 
> 0000000000000000
> [  247.383342] R13: 61c8864680b583eb R14: 0000000000000001 R15: 
> ffffb0aa0031b980
> [  247.383342] FS:  0000000000000000(0000) GS:ffff8b52fbf80000(0000) 
> knlGS:0000000000000000
> [  247.383343] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  247.383343] CR2: 00007c78a400d680 CR3: 0000000120f46006 CR4: 
> 0000000000762ee0
> [  247.383344] PKRU: 55555554
> [  247.383344] Call Trace:
> [  247.383345]  find_get_entry+0x4c/0x116
> [  247.383345]  find_lock_entry+0xc8/0xec
> [  247.383346]  shmem_writeback+0x7b/0x163
> [  247.383346]  i915_gem_shrink+0x302/0x40b
> [  247.383347]  ? __intel_runtime_pm_get+0x22/0x82
> [  247.383347]  i915_gem_shrinker_scan+0x86/0xa8
> [  247.383348]  shrink_slab+0x272/0x48b
> [  247.383348]  shrink_node+0x784/0xbea
> [  247.383348]  ? rcu_read_unlock_special+0x66/0x15f
> [  247.383349]  ? update_batch_size+0x78/0x78
> [  247.383349]  kswapd+0x75c/0xa56
> [  247.383350]  kthread+0x147/0x156
> [  247.383350]  ? kswapd_run+0xb6/0xb6
> [  247.383351]  ? kthread_blkcg+0x2e/0x2e
> [  247.383351]  ret_from_fork+0x1f/0x40
>
> Sadly getting logs or repro from 5.16-rc is more difficult due other 
> issues, or altogether gone, which is also a possibility. It is also 
> possible that transparent hugepages either enable the hang, or just 
> make it more likely.
>
> However due history of writeback I think it sounds plausible that it 
> is indeed unsafe. I will try to dig out a reply from Hugh Dickins who 
> advised against doing it and I think that advice did not change, or I 
> failed to find a later thread. There is at least a mention of that 
> discussion in the patch which added writeback.
>
>> itself, for the TTM page vectors, it would need to allocate shmem 
>> pages at shrink time rather than to unpin them at shrink time as we 
>> do here. And for that to have any chance of working sort of reliably, 
>> I think writeback is needed.
>
> I don't claim to understand it fully, but won't the system take care 
> of that, with the only difference being that allocation might work a 
> little less reliably under extreme memory pressure? 

Yes, IIRC it's exactly this that made the previous attempt of a generic 
TTM shrinker fail.


> And I did not find other drivers use it at least which may be and 
> indication we should indeed steer clear of it.

You're probably right.

>
> Regards,
>
> Tvrtko

Thanks,

Thomas




>
>> But I agree for this implementation, the need for writeback isn't 
>> different than for the non-TTM shmem objects> Thanks,
>>
>> Thomas
>>
>>
>>>
>>> Regards,
>>>
>>> Tvrtko
>>

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2021-12-08  9:32 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-18  9:10 [PATCH v9 1/8] drm/i915/gem: Break out some shmem backend utils Matthew Auld
2021-10-18  9:10 ` [Intel-gfx] " Matthew Auld
2021-10-18  9:10 ` [PATCH v9 2/8] drm/i915/ttm: add tt shmem backend Matthew Auld
2021-10-18  9:10   ` [Intel-gfx] " Matthew Auld
2021-12-07 13:10   ` Tvrtko Ursulin
2021-12-07 13:10     ` Tvrtko Ursulin
2021-12-07 14:05     ` Matthew Auld
2021-12-07 14:05       ` Matthew Auld
2021-12-08  8:30       ` Tvrtko Ursulin
2021-12-08  8:30         ` Tvrtko Ursulin
2021-12-08  8:39         ` Thomas Hellström
2021-12-08  8:39           ` Thomas Hellström
2021-12-08  9:24           ` Tvrtko Ursulin
2021-12-08  9:24             ` Tvrtko Ursulin
2021-12-08  9:32             ` Thomas Hellström
2021-12-08  9:32               ` Thomas Hellström
2021-10-18  9:10 ` [PATCH v9 3/8] drm/i915/gtt: drop unneeded make_unshrinkable Matthew Auld
2021-10-18  9:10   ` [Intel-gfx] " Matthew Auld
2021-10-18  9:10 ` [PATCH v9 4/8] drm/i915: drop unneeded make_unshrinkable in free_object Matthew Auld
2021-10-18  9:10   ` [Intel-gfx] " Matthew Auld
2021-10-18  9:10 ` [PATCH v9 5/8] drm/i915: add some kernel-doc for shrink_pin and friends Matthew Auld
2021-10-18  9:10   ` [Intel-gfx] " Matthew Auld
2021-10-18  9:10 ` [PATCH v9 6/8] drm/i915/ttm: move shrinker management into adjust_lru Matthew Auld
2021-10-18  9:10   ` [Intel-gfx] " Matthew Auld
2021-10-20 14:32   ` Thomas Hellström
2021-10-20 14:32     ` [Intel-gfx] " Thomas Hellström
2021-10-18  9:10 ` [PATCH v9 7/8] drm/i915/ttm: use cached system pages when evicting lmem Matthew Auld
2021-10-18  9:10   ` [Intel-gfx] " Matthew Auld
2021-10-18  9:10 ` [PATCH v9 8/8] drm/i915/ttm: enable shmem tt backend Matthew Auld
2021-10-18  9:10   ` [Intel-gfx] " Matthew Auld
2021-10-18 11:57 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v9,1/8] drm/i915/gem: Break out some shmem backend utils Patchwork
2021-10-18 11:58 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-10-18 12:29 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-10-18 15:48 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.