dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/ttm: set the tt caching state at creation time
@ 2020-10-08  9:31 Christian König
  2020-10-08  9:31 ` [PATCH 2/4] drm/ttm: add caching state to ttm_bus_placement Christian König
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Christian König @ 2020-10-08  9:31 UTC (permalink / raw)
  To: dri-devel, daniel

All drivers can determine the tt caching state at creation time,
no need to do this on the fly during every validation.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 11 +++++--
 drivers/gpu/drm/drm_gem_vram_helper.c      |  2 +-
 drivers/gpu/drm/nouveau/nouveau_sgdma.c    | 13 ++++++++-
 drivers/gpu/drm/qxl/qxl_ttm.c              |  2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c        | 16 ++++++++--
 drivers/gpu/drm/ttm/ttm_agp_backend.c      |  2 +-
 drivers/gpu/drm/ttm/ttm_page_alloc.c       | 26 ++++++++---------
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c   | 20 ++++++-------
 drivers/gpu/drm/ttm/ttm_tt.c               | 33 +++++++++++----------
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  6 ++--
 include/drm/ttm/ttm_caching.h              | 34 ++++++++++++++++++++++
 include/drm/ttm/ttm_tt.h                   | 16 ++++------
 13 files changed, 123 insertions(+), 60 deletions(-)
 create mode 100644 include/drm/ttm/ttm_caching.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 213ef090bb0e..3c5ad69eff19 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -124,7 +124,7 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct ttm_dma_tt *ttm;
 
-	if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
+	if (bo->num_pages != 1 || bo->ttm->caching == ttm_cached)
 		return AMDGPU_BO_INVALID_OFFSET;
 
 	ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 399961035ae6..7f41a47e7353 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1292,7 +1292,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev,
 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 					   uint32_t page_flags)
 {
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 	struct amdgpu_ttm_tt *gtt;
+	enum ttm_caching caching;
 
 	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
 	if (gtt == NULL) {
@@ -1300,8 +1302,13 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 	}
 	gtt->gobj = &bo->base;
 
+	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
+		caching = ttm_write_combined;
+	else
+		caching = ttm_cached;
+
 	/* allocate space for the uninitialized page entries */
-	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
+	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
 		kfree(gtt);
 		return NULL;
 	}
@@ -1525,7 +1532,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
 	if (mem && mem->mem_type == TTM_PL_TT) {
 		flags |= AMDGPU_PTE_SYSTEM;
 
-		if (ttm->caching_state == tt_cached)
+		if (ttm->caching == ttm_cached)
 			flags |= AMDGPU_PTE_SNOOPED;
 	}
 
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index 3213429f8444..ad58d0af5141 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -918,7 +918,7 @@ static struct ttm_tt *bo_driver_ttm_tt_create(struct ttm_buffer_object *bo,
 	if (!tt)
 		return NULL;
 
-	ret = ttm_tt_init(tt, bo, page_flags);
+	ret = ttm_tt_init(tt, bo, page_flags, ttm_cached);
 	if (ret < 0)
 		goto err_ttm_tt_init;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 806d9ec310f5..cd6fdebae795 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -5,6 +5,7 @@
 #include "nouveau_drv.h"
 #include "nouveau_mem.h"
 #include "nouveau_ttm.h"
+#include "nouveau_bo.h"
 
 struct nouveau_sgdma_be {
 	/* this has to be the first field so populate/unpopulated in
@@ -67,13 +68,23 @@ nouveau_sgdma_unbind(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
 struct ttm_tt *
 nouveau_sgdma_create_ttm(struct ttm_buffer_object *bo, uint32_t page_flags)
 {
+	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
+	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	struct nouveau_sgdma_be *nvbe;
+	enum ttm_caching caching;
+
+	if (nvbo->force_coherent)
+		caching = ttm_uncached;
+	else if (drm->agp.bridge)
+		caching = ttm_write_combined;
+	else
+		caching = ttm_cached;
 
 	nvbe = kzalloc(sizeof(*nvbe), GFP_KERNEL);
 	if (!nvbe)
 		return NULL;
 
-	if (ttm_dma_tt_init(&nvbe->ttm, bo, page_flags)) {
+	if (ttm_dma_tt_init(&nvbe->ttm, bo, page_flags, caching)) {
 		kfree(nvbe);
 		return NULL;
 	}
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 669bceb58205..f50863493f64 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -133,7 +133,7 @@ static struct ttm_tt *qxl_ttm_tt_create(struct ttm_buffer_object *bo,
 	ttm = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
 	if (ttm == NULL)
 		return NULL;
-	if (ttm_tt_init(ttm, bo, page_flags)) {
+	if (ttm_tt_init(ttm, bo, page_flags, ttm_cached)) {
 		kfree(ttm);
 		return NULL;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 63e38b05a5bc..130a7cea35c3 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -546,7 +546,7 @@ static int radeon_ttm_backend_bind(struct ttm_bo_device *bdev,
 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
 		     ttm->num_pages, bo_mem, ttm);
 	}
-	if (ttm->caching_state == tt_cached)
+	if (ttm->caching == ttm_cached)
 		flags |= RADEON_GART_PAGE_SNOOP;
 	r = radeon_gart_bind(rdev, gtt->offset, ttm->num_pages,
 			     ttm->pages, gtt->ttm.dma_address, flags);
@@ -590,6 +590,10 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo,
 {
 	struct radeon_device *rdev;
 	struct radeon_ttm_tt *gtt;
+	enum ttm_caching caching;
+	struct radeon_bo *rbo;
+
+	rbo = container_of(bo, struct radeon_bo, tbo);
 
 	rdev = radeon_get_rdev(bo->bdev);
 #if IS_ENABLED(CONFIG_AGP)
@@ -603,7 +607,15 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo,
 	if (gtt == NULL) {
 		return NULL;
 	}
-	if (ttm_dma_tt_init(&gtt->ttm, bo, page_flags)) {
+
+	if (rbo->flags & RADEON_GEM_GTT_UC)
+		caching = ttm_uncached;
+	else if (rbo->flags & RADEON_GEM_GTT_WC)
+		caching = ttm_write_combined;
+	else
+		caching = ttm_cached;
+
+	if (ttm_dma_tt_init(&gtt->ttm, bo, page_flags, caching)) {
 		kfree(gtt);
 		return NULL;
 	}
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index a98fd795b752..a723062d37e7 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -136,7 +136,7 @@ struct ttm_tt *ttm_agp_tt_create(struct ttm_buffer_object *bo,
 	agp_be->mem = NULL;
 	agp_be->bridge = bridge;
 
-	if (ttm_tt_init(&agp_be->ttm, bo, page_flags)) {
+	if (ttm_tt_init(&agp_be->ttm, bo, page_flags, ttm_write_combined)) {
 		kfree(agp_be);
 		return NULL;
 	}
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 111031cbb6df..c8f6790962b9 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -220,14 +220,14 @@ static struct ttm_pool_manager *_manager;
 /**
  * Select the right pool or requested caching state and ttm flags. */
 static struct ttm_page_pool *ttm_get_pool(int flags, bool huge,
-					  enum ttm_caching_state cstate)
+					  enum ttm_caching cstate)
 {
 	int pool_index;
 
-	if (cstate == tt_cached)
+	if (cstate == ttm_cached)
 		return NULL;
 
-	if (cstate == tt_wc)
+	if (cstate == ttm_write_combined)
 		pool_index = 0x0;
 	else
 		pool_index = 0x1;
@@ -441,17 +441,17 @@ static void ttm_pool_mm_shrink_fini(struct ttm_pool_manager *manager)
 }
 
 static int ttm_set_pages_caching(struct page **pages,
-		enum ttm_caching_state cstate, unsigned cpages)
+		enum ttm_caching cstate, unsigned cpages)
 {
 	int r = 0;
 	/* Set page caching */
 	switch (cstate) {
-	case tt_uncached:
+	case ttm_uncached:
 		r = ttm_set_pages_array_uc(pages, cpages);
 		if (r)
 			pr_err("Failed to set %d pages to uc!\n", cpages);
 		break;
-	case tt_wc:
+	case ttm_write_combined:
 		r = ttm_set_pages_array_wc(pages, cpages);
 		if (r)
 			pr_err("Failed to set %d pages to wc!\n", cpages);
@@ -486,7 +486,7 @@ static void ttm_handle_caching_failure(struct page **failed_pages,
  * pages returned in pages array.
  */
 static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
-			       int ttm_flags, enum ttm_caching_state cstate,
+			       int ttm_flags, enum ttm_caching cstate,
 			       unsigned count, unsigned order)
 {
 	struct page **caching_array;
@@ -566,7 +566,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
  * pages is small.
  */
 static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags,
-				      enum ttm_caching_state cstate,
+				      enum ttm_caching cstate,
 				      unsigned count, unsigned long *irq_flags)
 {
 	struct page *p;
@@ -626,7 +626,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags,
 static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
 				   struct list_head *pages,
 				   int ttm_flags,
-				   enum ttm_caching_state cstate,
+				   enum ttm_caching cstate,
 				   unsigned count, unsigned order)
 {
 	unsigned long irq_flags;
@@ -703,7 +703,7 @@ static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
 
 /* Put all pages in pages list to correct pool to wait for reuse */
 static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
-			  enum ttm_caching_state cstate)
+			  enum ttm_caching cstate)
 {
 	struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -821,7 +821,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
  * cached pages.
  */
 static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
-			 enum ttm_caching_state cstate)
+			 enum ttm_caching cstate)
 {
 	struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -1040,7 +1040,7 @@ ttm_pool_unpopulate_helper(struct ttm_tt *ttm, unsigned mem_count_update)
 
 put_pages:
 	ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
-		      ttm->caching_state);
+		      ttm->caching);
 	ttm_tt_set_unpopulated(ttm);
 }
 
@@ -1057,7 +1057,7 @@ int ttm_pool_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
 		return -ENOMEM;
 
 	ret = ttm_get_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
-			    ttm->caching_state);
+			    ttm->caching);
 	if (unlikely(ret != 0)) {
 		ttm_pool_unpopulate_helper(ttm, 0);
 		return ret;
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index 1045a5c26ee3..6625b43f6256 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -325,15 +325,15 @@ static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
 	}
 	return d_page;
 }
-static enum pool_type ttm_to_type(int flags, enum ttm_caching_state cstate)
+static enum pool_type ttm_to_type(int flags, enum ttm_caching cstate)
 {
 	enum pool_type type = IS_UNDEFINED;
 
 	if (flags & TTM_PAGE_FLAG_DMA32)
 		type |= IS_DMA32;
-	if (cstate == tt_cached)
+	if (cstate == ttm_cached)
 		type |= IS_CACHED;
-	else if (cstate == tt_uncached)
+	else if (cstate == ttm_uncached)
 		type |= IS_UC;
 	else
 		type |= IS_WC;
@@ -663,7 +663,7 @@ static struct dma_pool *ttm_dma_find_pool(struct device *dev,
  * are pages that have changed their caching state already put them to the
  * pool.
  */
-static void ttm_dma_handle_caching_state_failure(struct dma_pool *pool,
+static void ttm_dma_handle_caching_failure(struct dma_pool *pool,
 						 struct list_head *d_pages,
 						 struct page **failed_pages,
 						 unsigned cpages)
@@ -734,7 +734,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
 				r = ttm_set_pages_caching(pool, caching_array,
 							  cpages);
 				if (r)
-					ttm_dma_handle_caching_state_failure(
+					ttm_dma_handle_caching_failure(
 						pool, d_pages, caching_array,
 						cpages);
 			}
@@ -760,7 +760,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
 				r = ttm_set_pages_caching(pool, caching_array,
 							  cpages);
 				if (r) {
-					ttm_dma_handle_caching_state_failure(
+					ttm_dma_handle_caching_failure(
 					     pool, d_pages, caching_array,
 					     cpages);
 					goto out;
@@ -773,7 +773,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
 	if (cpages) {
 		r = ttm_set_pages_caching(pool, caching_array, cpages);
 		if (r)
-			ttm_dma_handle_caching_state_failure(pool, d_pages,
+			ttm_dma_handle_caching_failure(pool, d_pages,
 					caching_array, cpages);
 	}
 out:
@@ -904,7 +904,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev,
 	INIT_LIST_HEAD(&ttm_dma->pages_list);
 	i = 0;
 
-	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
+	type = ttm_to_type(ttm->page_flags, ttm->caching);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
@@ -1000,7 +1000,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 	unsigned count, i, npages = 0;
 	unsigned long irq_flags;
 
-	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
+	type = ttm_to_type(ttm->page_flags, ttm->caching);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	pool = ttm_dma_find_pool(dev, type | IS_HUGE);
@@ -1032,7 +1032,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 		return;
 
 	is_cached = (ttm_dma_find_pool(pool->dev,
-		     ttm_to_type(ttm->page_flags, tt_cached)) == pool);
+		     ttm_to_type(ttm->page_flags, ttm_cached)) == pool);
 
 	/* make sure pages array match list and count number of pages */
 	count = 0;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 23e9604bc924..a465f51df027 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -114,31 +114,30 @@ static int ttm_sg_tt_alloc_page_directory(struct ttm_dma_tt *ttm)
 	return 0;
 }
 
-static int ttm_tt_set_caching(struct ttm_tt *ttm,
-			      enum ttm_caching_state c_state)
+static int ttm_tt_set_caching(struct ttm_tt *ttm, enum ttm_caching caching)
 {
-	if (ttm->caching_state == c_state)
+	if (ttm->caching == caching)
 		return 0;
 
 	/* Can't change the caching state after TT is populated */
 	if (WARN_ON_ONCE(ttm_tt_is_populated(ttm)))
 		return -EINVAL;
 
-	ttm->caching_state = c_state;
+	ttm->caching = caching;
 
 	return 0;
 }
 
 int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement)
 {
-	enum ttm_caching_state state;
+	enum ttm_caching state;
 
 	if (placement & TTM_PL_FLAG_WC)
-		state = tt_wc;
+		state = ttm_write_combined;
 	else if (placement & TTM_PL_FLAG_UNCACHED)
-		state = tt_uncached;
+		state = ttm_uncached;
 	else
-		state = tt_cached;
+		state = ttm_cached;
 
 	return ttm_tt_set_caching(ttm, state);
 }
@@ -162,20 +161,22 @@ void ttm_tt_destroy(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
 
 static void ttm_tt_init_fields(struct ttm_tt *ttm,
 			       struct ttm_buffer_object *bo,
-			       uint32_t page_flags)
+			       uint32_t page_flags,
+			       enum ttm_caching caching)
 {
 	ttm->num_pages = bo->num_pages;
-	ttm->caching_state = tt_cached;
+	ttm->caching = ttm_cached;
 	ttm->page_flags = page_flags;
 	ttm_tt_set_unpopulated(ttm);
 	ttm->swap_storage = NULL;
 	ttm->sg = bo->sg;
+	ttm->caching = caching;
 }
 
 int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
-		uint32_t page_flags)
+		uint32_t page_flags, enum ttm_caching caching)
 {
-	ttm_tt_init_fields(ttm, bo, page_flags);
+	ttm_tt_init_fields(ttm, bo, page_flags, caching);
 
 	if (ttm_tt_alloc_page_directory(ttm)) {
 		pr_err("Failed allocating page table\n");
@@ -193,11 +194,11 @@ void ttm_tt_fini(struct ttm_tt *ttm)
 EXPORT_SYMBOL(ttm_tt_fini);
 
 int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
-		    uint32_t page_flags)
+		    uint32_t page_flags, enum ttm_caching caching)
 {
 	struct ttm_tt *ttm = &ttm_dma->ttm;
 
-	ttm_tt_init_fields(ttm, bo, page_flags);
+	ttm_tt_init_fields(ttm, bo, page_flags, caching);
 
 	INIT_LIST_HEAD(&ttm_dma->pages_list);
 	if (ttm_dma_tt_alloc_page_directory(ttm_dma)) {
@@ -209,12 +210,12 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
 EXPORT_SYMBOL(ttm_dma_tt_init);
 
 int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
-		   uint32_t page_flags)
+		   uint32_t page_flags, enum ttm_caching caching)
 {
 	struct ttm_tt *ttm = &ttm_dma->ttm;
 	int ret;
 
-	ttm_tt_init_fields(ttm, bo, page_flags);
+	ttm_tt_init_fields(ttm, bo, page_flags, caching);
 
 	INIT_LIST_HEAD(&ttm_dma->pages_list);
 	if (page_flags & TTM_PAGE_FLAG_SG)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index 7b5fd5288870..1fa7f9438ec4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -647,9 +647,11 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
 	vmw_be->mob = NULL;
 
 	if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
-		ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bo, page_flags);
+		ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bo, page_flags,
+				      ttm_cached);
 	else
-		ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bo, page_flags);
+		ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bo, page_flags,
+				  ttm_cached);
 	if (unlikely(ret != 0))
 		goto out_no_init;
 
diff --git a/include/drm/ttm/ttm_caching.h b/include/drm/ttm/ttm_caching.h
new file mode 100644
index 000000000000..161624dcf6be
--- /dev/null
+++ b/include/drm/ttm/ttm_caching.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+
+#ifndef _TTM_CACHING_H_
+#define _TTM_CACHING_H_
+
+enum ttm_caching {
+	ttm_uncached,
+	ttm_write_combined,
+	ttm_cached
+};
+
+#endif
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 5d1835d44084..c39c722d5184 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -28,6 +28,7 @@
 #define _TTM_TT_H_
 
 #include <linux/types.h>
+#include <drm/ttm/ttm_caching.h>
 
 struct ttm_tt;
 struct ttm_resource;
@@ -42,12 +43,6 @@ struct ttm_operation_ctx;
 
 #define TTM_PAGE_FLAG_PRIV_POPULATED  (1 << 31)
 
-enum ttm_caching_state {
-	tt_uncached,
-	tt_wc,
-	tt_cached
-};
-
 /**
  * struct ttm_tt
  *
@@ -69,7 +64,7 @@ struct ttm_tt {
 	unsigned long num_pages;
 	struct sg_table *sg; /* for SG objects via dma-buf */
 	struct file *swap_storage;
-	enum ttm_caching_state caching_state;
+	enum ttm_caching caching;
 };
 
 static inline bool ttm_tt_is_populated(struct ttm_tt *tt)
@@ -121,6 +116,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc);
  * @ttm: The struct ttm_tt.
  * @bo: The buffer object we create the ttm for.
  * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags.
+ * @caching: the desired caching state of the pages
  *
  * Create a struct ttm_tt to back data with system memory pages.
  * No pages are actually allocated.
@@ -128,11 +124,11 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc);
  * NULL: Out of memory.
  */
 int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
-		uint32_t page_flags);
+		uint32_t page_flags, enum ttm_caching caching);
 int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
-		    uint32_t page_flags);
+		    uint32_t page_flags, enum ttm_caching caching);
 int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
-		   uint32_t page_flags);
+		   uint32_t page_flags, enum ttm_caching caching);
 
 /**
  * ttm_tt_fini
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/4] drm/ttm: add caching state to ttm_bus_placement
  2020-10-08  9:31 [PATCH 1/4] drm/ttm: set the tt caching state at creation time Christian König
@ 2020-10-08  9:31 ` Christian König
  2020-10-08  9:31 ` [PATCH 3/4] drm/ttm: use caching instead of placement for ttm_io_prot Christian König
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2020-10-08  9:31 UTC (permalink / raw)
  To: dri-devel, daniel

And implement setting it up correctly in the drivers.

This allows getting rid of the placement flags for this.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    |  1 +
 drivers/gpu/drm/drm_gem_vram_helper.c      |  1 +
 drivers/gpu/drm/nouveau/nouveau_bo.c       | 11 +++++++++++
 drivers/gpu/drm/qxl/qxl_ttm.c              |  2 ++
 drivers/gpu/drm/radeon/radeon_ttm.c        |  2 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  1 +
 include/drm/ttm/ttm_resource.h             |  8 +++++---
 7 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7f41a47e7353..5b56a66063fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -769,6 +769,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_reso
 
 		mem->bus.offset += adev->gmc.aper_base;
 		mem->bus.is_iomem = true;
+		mem->bus.caching = ttm_write_combined;
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index ad58d0af5141..b9e7ce1adf25 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -981,6 +981,7 @@ static int bo_driver_io_mem_reserve(struct ttm_bo_device *bdev,
 	case TTM_PL_VRAM:
 		mem->bus.offset = (mem->start << PAGE_SHIFT) + vmm->vram_base;
 		mem->bus.is_iomem = true;
+		mem->bus.caching = ttm_write_combined;
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 0c0ca44a6802..cb878c0e8276 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1134,6 +1134,8 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_resource *reg)
 	struct nouveau_drm *drm = nouveau_bdev(bdev);
 	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct nouveau_mem *mem = nouveau_mem(reg);
+	struct nvif_mmu *mmu = &drm->client.mmu;
+	const u8 type = mmu->type[drm->ttm.type_vram].type;
 	int ret;
 
 	mutex_lock(&drm->ttm.io_reserve_mutex);
@@ -1149,6 +1151,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_resource *reg)
 			reg->bus.offset = (reg->start << PAGE_SHIFT) +
 				drm->agp.base;
 			reg->bus.is_iomem = !drm->agp.cma;
+			reg->bus.caching = ttm_write_combined;
 		}
 #endif
 		if (drm->client.mem->oclass < NVIF_CLASS_MEM_NV50 ||
@@ -1162,6 +1165,14 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_resource *reg)
 		reg->bus.offset = (reg->start << PAGE_SHIFT) +
 			device->func->resource_addr(device, 1);
 		reg->bus.is_iomem = true;
+
+		/* Some BARs do not support being ioremapped WC */
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
+		    type & NVIF_MEM_UNCACHED)
+			reg->bus.caching = ttm_uncached;
+		else
+			reg->bus.caching = ttm_write_combined;
+
 		if (drm->client.mem->oclass >= NVIF_CLASS_MEM_NV50) {
 			union {
 				struct nv50_mem_map_v0 nv50;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index f50863493f64..61eb06dbbce8 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -83,11 +83,13 @@ int qxl_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
 	case TTM_PL_VRAM:
 		mem->bus.is_iomem = true;
 		mem->bus.offset = (mem->start << PAGE_SHIFT) + qdev->vram_base;
+		mem->bus.caching = ttm_cached;
 		break;
 	case TTM_PL_PRIV:
 		mem->bus.is_iomem = true;
 		mem->bus.offset = (mem->start << PAGE_SHIFT) +
 			qdev->surfaceram_base;
+		mem->bus.caching = ttm_cached;
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 130a7cea35c3..9b53a1d80632 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -372,6 +372,7 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_reso
 			mem->bus.offset = (mem->start << PAGE_SHIFT) +
 				rdev->mc.agp_base;
 			mem->bus.is_iomem = !rdev->ddev->agp->cant_use_aperture;
+			mem->bus.caching = ttm_write_combined;
 		}
 #endif
 		break;
@@ -382,6 +383,7 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_reso
 			return -EINVAL;
 		mem->bus.offset += rdev->mc.aper_base;
 		mem->bus.is_iomem = true;
+		mem->bus.caching = ttm_write_combined;
 #ifdef __alpha__
 		/*
 		 * Alpha: use bus.addr to hold the ioremap() return,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index 1fa7f9438ec4..fae88969a15a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -688,6 +688,7 @@ static int vmw_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_resourc
 		mem->bus.offset = (mem->start << PAGE_SHIFT) +
 			dev_priv->vram_start;
 		mem->bus.is_iomem = true;
+		mem->bus.caching = ttm_cached;
 		break;
 	default:
 		return -EINVAL;
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 1b2f56163c6c..f48a70d39ac5 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -29,6 +29,7 @@
 #include <linux/mutex.h>
 #include <linux/dma-fence.h>
 #include <drm/drm_print.h>
+#include <drm/ttm/ttm_caching.h>
 
 #define TTM_MAX_BO_PRIORITY	4U
 
@@ -148,9 +149,10 @@ struct ttm_resource_manager {
  * Structure indicating the bus placement of an object.
  */
 struct ttm_bus_placement {
-	void		*addr;
-	phys_addr_t	offset;
-	bool		is_iomem;
+	void			*addr;
+	phys_addr_t		offset;
+	bool			is_iomem;
+	enum ttm_caching	caching;
 };
 
 /**
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/4] drm/ttm: use caching instead of placement for ttm_io_prot
  2020-10-08  9:31 [PATCH 1/4] drm/ttm: set the tt caching state at creation time Christian König
  2020-10-08  9:31 ` [PATCH 2/4] drm/ttm: add caching state to ttm_bus_placement Christian König
@ 2020-10-08  9:31 ` Christian König
  2020-10-08  9:31 ` [PATCH 4/4] drm/ttm: nuke caching placement flags Christian König
  2020-10-12  8:57 ` [PATCH 1/4] drm/ttm: set the tt caching state at creation time Christian König
  3 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2020-10-08  9:31 UTC (permalink / raw)
  To: dri-devel, daniel

Instead of the placement flags use the caching of the bus
mapping or tt object for the page protection flags.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
---
 drivers/gpu/drm/ttm/ttm_bo_util.c    | 23 ++++++++++++++---------
 drivers/gpu/drm/ttm/ttm_bo_vm.c      |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_blit.c |  4 ++--
 include/drm/ttm/ttm_bo_driver.h      |  6 ++++--
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index bdee4df1f3f2..0542097dc419 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -279,13 +279,11 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	for (i = 0; i < new_mem->num_pages; ++i) {
 		page = i * dir + add;
 		if (old_iomap == NULL) {
-			pgprot_t prot = ttm_io_prot(old_mem->placement,
-						    PAGE_KERNEL);
+			pgprot_t prot = ttm_io_prot(bo, old_mem, PAGE_KERNEL);
 			ret = ttm_copy_ttm_io_page(ttm, new_iomap, page,
 						   prot);
 		} else if (new_iomap == NULL) {
-			pgprot_t prot = ttm_io_prot(new_mem->placement,
-						    PAGE_KERNEL);
+			pgprot_t prot = ttm_io_prot(bo, new_mem, PAGE_KERNEL);
 			ret = ttm_copy_io_ttm_page(ttm, old_iomap, page,
 						   prot);
 		} else {
@@ -384,21 +382,28 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	return 0;
 }
 
-pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp)
+pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res,
+		     pgprot_t tmp)
 {
+	struct ttm_resource_manager *man;
+	enum ttm_caching caching;
+
+	man = ttm_manager_type(bo->bdev, res->mem_type);
+	caching = man->use_tt ? bo->ttm->caching : res->bus.caching;
+
 	/* Cached mappings need no adjustment */
-	if (caching_flags & TTM_PL_FLAG_CACHED)
+	if (caching == ttm_cached)
 		return tmp;
 
 #if defined(__i386__) || defined(__x86_64__)
-	if (caching_flags & TTM_PL_FLAG_WC)
+	if (caching == ttm_write_combined)
 		tmp = pgprot_writecombine(tmp);
 	else if (boot_cpu_data.x86 > 3)
 		tmp = pgprot_noncached(tmp);
 #endif
 #if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \
     defined(__powerpc__) || defined(__mips__)
-	if (caching_flags & TTM_PL_FLAG_WC)
+	if (caching == ttm_write_combined)
 		tmp = pgprot_writecombine(tmp);
 	else
 		tmp = pgprot_noncached(tmp);
@@ -466,7 +471,7 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
 		 * We need to use vmap to get the desired page protection
 		 * or to make the buffer object look contiguous.
 		 */
-		prot = ttm_io_prot(mem->placement, PAGE_KERNEL);
+		prot = ttm_io_prot(bo, mem, PAGE_KERNEL);
 		map->bo_kmap_type = ttm_bo_map_vmap;
 		map->virtual = vmap(ttm->pages + start_page, num_pages,
 				    0, prot);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 87ee8f0ca08e..eeaca5d1efe3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -310,7 +310,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
 	if (unlikely(page_offset >= bo->num_pages))
 		return VM_FAULT_SIGBUS;
 
-	prot = ttm_io_prot(bo->mem.placement, prot);
+	prot = ttm_io_prot(bo, &bo->mem, prot);
 	if (!bo->mem.bus.is_iomem) {
 		struct ttm_operation_ctx ctx = {
 			.interruptible = false,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
index ea2f2f937eb3..f21881e087db 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
@@ -484,8 +484,8 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
 	d.src_pages = src->ttm->pages;
 	d.dst_num_pages = dst->num_pages;
 	d.src_num_pages = src->num_pages;
-	d.dst_prot = ttm_io_prot(dst->mem.placement, PAGE_KERNEL);
-	d.src_prot = ttm_io_prot(src->mem.placement, PAGE_KERNEL);
+	d.dst_prot = ttm_io_prot(dst, &dst->mem, PAGE_KERNEL);
+	d.src_prot = ttm_io_prot(src, &src->mem, PAGE_KERNEL);
 	d.diff = diff;
 
 	for (j = 0; j < h; ++j) {
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index b58dedce7079..f29419cb7980 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -657,13 +657,15 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo);
 /**
  * ttm_io_prot
  *
- * @c_state: Caching state.
+ * bo: ttm buffer object
+ * res: ttm resource object
  * @tmp: Page protection flag for a normal, cached mapping.
  *
  * Utility function that returns the pgprot_t that should be used for
  * setting up a PTE with the caching model indicated by @c_state.
  */
-pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp);
+pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res,
+		     pgprot_t tmp);
 
 /**
  * ttm_bo_tt_bind
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/4] drm/ttm: nuke caching placement flags
  2020-10-08  9:31 [PATCH 1/4] drm/ttm: set the tt caching state at creation time Christian König
  2020-10-08  9:31 ` [PATCH 2/4] drm/ttm: add caching state to ttm_bus_placement Christian König
  2020-10-08  9:31 ` [PATCH 3/4] drm/ttm: use caching instead of placement for ttm_io_prot Christian König
@ 2020-10-08  9:31 ` Christian König
  2020-10-12  8:57 ` [PATCH 1/4] drm/ttm: set the tt caching state at creation time Christian König
  3 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2020-10-08  9:31 UTC (permalink / raw)
  To: dri-devel, daniel

Changing the caching on the fly never really worked
flawlessly.

So stop this completely and just let drivers specific the
desired caching in the tt or bus object.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 20 +++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 12 ++----
 drivers/gpu/drm/drm_gem_vram_helper.c      |  7 +---
 drivers/gpu/drm/nouveau/nouveau_bo.c       | 36 +++++------------
 drivers/gpu/drm/qxl/qxl_object.c           | 10 ++---
 drivers/gpu/drm/qxl/qxl_ttm.c              |  2 +-
 drivers/gpu/drm/radeon/radeon_object.c     | 46 +++++-----------------
 drivers/gpu/drm/radeon/radeon_ttm.c        | 18 ++-------
 drivers/gpu/drm/ttm/ttm_agp_backend.c      |  2 +-
 drivers/gpu/drm/ttm/ttm_bo.c               | 44 ++-------------------
 drivers/gpu/drm/ttm/ttm_bo_util.c          | 10 ++---
 drivers/gpu/drm/ttm/ttm_tt.c               | 29 --------------
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 30 +++++++-------
 include/drm/ttm/ttm_placement.h            | 14 -------
 include/drm/ttm/ttm_tt.h                   | 15 -------
 15 files changed, 61 insertions(+), 234 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8b96e7aaeff1..1aa516429c80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -137,7 +137,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 		places[c].fpfn = 0;
 		places[c].lpfn = 0;
 		places[c].mem_type = TTM_PL_VRAM;
-		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED;
+		places[c].flags = 0;
 
 		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
 			places[c].lpfn = visible_pfn;
@@ -154,11 +154,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 		places[c].lpfn = 0;
 		places[c].mem_type = TTM_PL_TT;
 		places[c].flags = 0;
-		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
-			places[c].flags |= TTM_PL_FLAG_WC |
-				TTM_PL_FLAG_UNCACHED;
-		else
-			places[c].flags |= TTM_PL_FLAG_CACHED;
 		c++;
 	}
 
@@ -167,11 +162,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 		places[c].lpfn = 0;
 		places[c].mem_type = TTM_PL_SYSTEM;
 		places[c].flags = 0;
-		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
-			places[c].flags |= TTM_PL_FLAG_WC |
-				TTM_PL_FLAG_UNCACHED;
-		else
-			places[c].flags |= TTM_PL_FLAG_CACHED;
 		c++;
 	}
 
@@ -179,7 +169,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 		places[c].fpfn = 0;
 		places[c].lpfn = 0;
 		places[c].mem_type = AMDGPU_PL_GDS;
-		places[c].flags = TTM_PL_FLAG_UNCACHED;
+		places[c].flags = 0;
 		c++;
 	}
 
@@ -187,7 +177,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 		places[c].fpfn = 0;
 		places[c].lpfn = 0;
 		places[c].mem_type = AMDGPU_PL_GWS;
-		places[c].flags = TTM_PL_FLAG_UNCACHED;
+		places[c].flags = 0;
 		c++;
 	}
 
@@ -195,7 +185,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 		places[c].fpfn = 0;
 		places[c].lpfn = 0;
 		places[c].mem_type = AMDGPU_PL_OA;
-		places[c].flags = TTM_PL_FLAG_UNCACHED;
+		places[c].flags = 0;
 		c++;
 	}
 
@@ -203,7 +193,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 		places[c].fpfn = 0;
 		places[c].lpfn = 0;
 		places[c].mem_type = TTM_PL_SYSTEM;
-		places[c].flags = TTM_PL_MASK_CACHING;
+		places[c].flags = 0;
 		c++;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 5b56a66063fd..8cdec58b9106 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -92,7 +92,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = TTM_PL_SYSTEM,
-		.flags = TTM_PL_MASK_CACHING
+		.flags = 0
 	};
 
 	/* Don't handle scatter gather BOs */
@@ -538,19 +538,13 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 	placements.fpfn = 0;
 	placements.lpfn = 0;
 	placements.mem_type = TTM_PL_TT;
-	placements.flags = TTM_PL_MASK_CACHING;
+	placements.flags = 0;
 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
 	if (unlikely(r)) {
 		pr_err("Failed to find GTT space for blit from VRAM\n");
 		return r;
 	}
 
-	/* set caching flags */
-	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
-	if (unlikely(r)) {
-		goto out_cleanup;
-	}
-
 	r = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
 	if (unlikely(r))
 		goto out_cleanup;
@@ -599,7 +593,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
 	placements.fpfn = 0;
 	placements.lpfn = 0;
 	placements.mem_type = TTM_PL_TT;
-	placements.flags = TTM_PL_MASK_CACHING;
+	placements.flags = 0;
 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
 	if (unlikely(r)) {
 		pr_err("Failed to find GTT space for blit to VRAM\n");
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index b9e7ce1adf25..7aeb5daf2805 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -147,15 +147,12 @@ static void drm_gem_vram_placement(struct drm_gem_vram_object *gbo,
 
 	if (pl_flag & DRM_GEM_VRAM_PL_FLAG_VRAM) {
 		gbo->placements[c].mem_type = TTM_PL_VRAM;
-		gbo->placements[c++].flags = TTM_PL_FLAG_WC |
-					     TTM_PL_FLAG_UNCACHED |
-					     invariant_flags;
+		gbo->placements[c++].flags = invariant_flags;
 	}
 
 	if (pl_flag & DRM_GEM_VRAM_PL_FLAG_SYSTEM || !c) {
 		gbo->placements[c].mem_type = TTM_PL_SYSTEM;
-		gbo->placements[c++].flags = TTM_PL_MASK_CACHING |
-					     invariant_flags;
+		gbo->placements[c++].flags = invariant_flags;
 	}
 
 	gbo->placement.num_placement = c;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index cb878c0e8276..4ccb3329014b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -343,37 +343,23 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
 }
 
 static void
-set_placement_list(struct nouveau_drm *drm, struct ttm_place *pl, unsigned *n,
-		   uint32_t domain, uint32_t flags)
+set_placement_list(struct ttm_place *pl, unsigned *n, uint32_t domain)
 {
 	*n = 0;
 
 	if (domain & NOUVEAU_GEM_DOMAIN_VRAM) {
-		struct nvif_mmu *mmu = &drm->client.mmu;
-		const u8 type = mmu->type[drm->ttm.type_vram].type;
-
 		pl[*n].mem_type = TTM_PL_VRAM;
-		pl[*n].flags = flags & ~TTM_PL_FLAG_CACHED;
-
-		/* Some BARs do not support being ioremapped WC */
-		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
-		    type & NVIF_MEM_UNCACHED)
-			pl[*n].flags &= ~TTM_PL_FLAG_WC;
-
+		pl[*n].flags = 0;
 		(*n)++;
 	}
 	if (domain & NOUVEAU_GEM_DOMAIN_GART) {
 		pl[*n].mem_type = TTM_PL_TT;
-		pl[*n].flags = flags;
-
-		if (drm->agp.bridge)
-			pl[*n].flags &= ~TTM_PL_FLAG_CACHED;
-
+		pl[*n].flags = 0;
 		(*n)++;
 	}
 	if (domain & NOUVEAU_GEM_DOMAIN_CPU) {
 		pl[*n].mem_type = TTM_PL_SYSTEM;
-		pl[(*n)++].flags = flags;
+		pl[(*n)++].flags = 0;
 	}
 }
 
@@ -415,18 +401,14 @@ void
 nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t domain,
 			 uint32_t busy)
 {
-	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
 	struct ttm_placement *pl = &nvbo->placement;
-	uint32_t flags = nvbo->force_coherent ? TTM_PL_FLAG_UNCACHED :
-						TTM_PL_MASK_CACHING;
 
 	pl->placement = nvbo->placements;
-	set_placement_list(drm, nvbo->placements, &pl->num_placement,
-			   domain, flags);
+	set_placement_list(nvbo->placements, &pl->num_placement, domain);
 
 	pl->busy_placement = nvbo->busy_placements;
-	set_placement_list(drm, nvbo->busy_placements, &pl->num_busy_placement,
-			   domain | busy, flags);
+	set_placement_list(nvbo->busy_placements, &pl->num_busy_placement,
+			   domain | busy);
 
 	set_placement_range(nvbo, domain);
 }
@@ -888,7 +870,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict,
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = TTM_PL_TT,
-		.flags = TTM_PL_MASK_CACHING
+		.flags = 0
 	};
 	struct ttm_placement placement;
 	struct ttm_resource tmp_reg;
@@ -930,7 +912,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict,
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = TTM_PL_TT,
-		.flags = TTM_PL_MASK_CACHING
+		.flags = 0
 	};
 	struct ttm_placement placement;
 	struct ttm_resource tmp_reg;
diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c
index 940e99354f49..547d46c14d56 100644
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@@ -64,21 +64,21 @@ void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain)
 	qbo->placement.busy_placement = qbo->placements;
 	if (domain == QXL_GEM_DOMAIN_VRAM) {
 		qbo->placements[c].mem_type = TTM_PL_VRAM;
-		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | pflag;
+		qbo->placements[c++].flags = pflag;
 	}
 	if (domain == QXL_GEM_DOMAIN_SURFACE) {
 		qbo->placements[c].mem_type = TTM_PL_PRIV;
-		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | pflag;
+		qbo->placements[c++].flags = pflag;
 		qbo->placements[c].mem_type = TTM_PL_VRAM;
-		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | pflag;
+		qbo->placements[c++].flags = pflag;
 	}
 	if (domain == QXL_GEM_DOMAIN_CPU) {
 		qbo->placements[c].mem_type = TTM_PL_SYSTEM;
-		qbo->placements[c++].flags = TTM_PL_MASK_CACHING | pflag;
+		qbo->placements[c++].flags = pflag;
 	}
 	if (!c) {
 		qbo->placements[c].mem_type = TTM_PL_SYSTEM;
-		qbo->placements[c++].flags = TTM_PL_MASK_CACHING;
+		qbo->placements[c++].flags = 0;
 	}
 	qbo->placement.num_placement = c;
 	qbo->placement.num_busy_placement = c;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 61eb06dbbce8..e3ed20215f18 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -56,7 +56,7 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo,
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = TTM_PL_SYSTEM,
-		.flags = TTM_PL_MASK_CACHING
+		.flags = 0
 	};
 
 	if (!qxl_ttm_bo_is_qxl_bo(bo)) {
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index ad0e6e9ef922..ab81e35cb060 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -113,57 +113,29 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 			rbo->placements[c].fpfn =
 				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
 			rbo->placements[c].mem_type = TTM_PL_VRAM;
-			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
-						     TTM_PL_FLAG_UNCACHED;
+			rbo->placements[c++].flags = 0;
 		}
 
 		rbo->placements[c].fpfn = 0;
 		rbo->placements[c].mem_type = TTM_PL_VRAM;
-		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
-					     TTM_PL_FLAG_UNCACHED;
+		rbo->placements[c++].flags = 0;
 	}
 
 	if (domain & RADEON_GEM_DOMAIN_GTT) {
-		if (rbo->flags & RADEON_GEM_GTT_UC) {
-			rbo->placements[c].fpfn = 0;
-			rbo->placements[c].mem_type = TTM_PL_TT;
-			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED;
-
-		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
-			   (rbo->rdev->flags & RADEON_IS_AGP)) {
-			rbo->placements[c].fpfn = 0;
-			rbo->placements[c].mem_type = TTM_PL_TT;
-			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
-				TTM_PL_FLAG_UNCACHED;
-		} else {
-			rbo->placements[c].fpfn = 0;
-			rbo->placements[c].mem_type = TTM_PL_TT;
-			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED;
-		}
+		rbo->placements[c].fpfn = 0;
+		rbo->placements[c].mem_type = TTM_PL_TT;
+		rbo->placements[c++].flags = 0;
 	}
 
 	if (domain & RADEON_GEM_DOMAIN_CPU) {
-		if (rbo->flags & RADEON_GEM_GTT_UC) {
-			rbo->placements[c].fpfn = 0;
-			rbo->placements[c].mem_type = TTM_PL_SYSTEM;
-			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED;
-
-		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
-		    rbo->rdev->flags & RADEON_IS_AGP) {
-			rbo->placements[c].fpfn = 0;
-			rbo->placements[c].mem_type = TTM_PL_SYSTEM;
-			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
-				TTM_PL_FLAG_UNCACHED;
-		} else {
-			rbo->placements[c].fpfn = 0;
-			rbo->placements[c].mem_type = TTM_PL_SYSTEM;
-			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED;
-		}
+		rbo->placements[c].fpfn = 0;
+		rbo->placements[c].mem_type = TTM_PL_SYSTEM;
+		rbo->placements[c++].flags = 0;
 	}
 	if (!c) {
 		rbo->placements[c].fpfn = 0;
 		rbo->placements[c].mem_type = TTM_PL_SYSTEM;
-		rbo->placements[c++].flags = TTM_PL_MASK_CACHING;
+		rbo->placements[c++].flags = 0;
 	}
 
 	rbo->placement.num_placement = c;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 9b53a1d80632..d6f42fbc81f4 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -89,7 +89,7 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = TTM_PL_SYSTEM,
-		.flags = TTM_PL_MASK_CACHING
+		.flags = 0
 	};
 
 	struct radeon_bo *rbo;
@@ -225,17 +225,12 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 	placements.fpfn = 0;
 	placements.lpfn = 0;
 	placements.mem_type = TTM_PL_TT;
-	placements.flags = TTM_PL_MASK_CACHING;
+	placements.flags = 0;
 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
 	if (unlikely(r)) {
 		return r;
 	}
 
-	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
-	if (unlikely(r)) {
-		goto out_cleanup;
-	}
-
 	r = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
 	if (unlikely(r)) {
 		goto out_cleanup;
@@ -275,7 +270,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
 	placements.fpfn = 0;
 	placements.lpfn = 0;
 	placements.mem_type = TTM_PL_TT;
-	placements.flags = TTM_PL_MASK_CACHING;
+	placements.flags = 0;
 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
 	if (unlikely(r)) {
 		return r;
@@ -389,12 +384,7 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_reso
 		 * Alpha: use bus.addr to hold the ioremap() return,
 		 * so we can modify bus.base below.
 		 */
-		if (mem->placement & TTM_PL_FLAG_WC)
-			mem->bus.addr =
-				ioremap_wc(mem->bus.offset, bus_size);
-		else
-			mem->bus.addr =
-				ioremap(mem->bus.offset, bus_size);
+		mem->bus.addr = ioremap_wc(mem->bus.offset, bus_size);
 		if (!mem->bus.addr)
 			return -ENOMEM;
 
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index a723062d37e7..4f76c9287159 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -54,7 +54,7 @@ int ttm_agp_bind(struct ttm_tt *ttm, struct ttm_resource *bo_mem)
 	struct page *dummy_read_page = ttm_bo_glob.dummy_read_page;
 	struct drm_mm_node *node = bo_mem->mm_node;
 	struct agp_memory *mem;
-	int ret, cached = (bo_mem->placement & TTM_PL_FLAG_CACHED);
+	int ret, cached = ttm->caching == ttm_cached;
 	unsigned i;
 
 	if (agp_be->mem)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3732dcb58aad..b97ed6ca8765 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -252,10 +252,6 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 		if (ret)
 			goto out_err;
 
-		ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement);
-		if (ret)
-			goto out_err;
-
 		if (mem->mem_type != TTM_PL_SYSTEM) {
 			ret = ttm_tt_populate(bdev, bo->ttm, ctx);
 			if (ret)
@@ -843,29 +839,6 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 	return ttm_bo_add_move_fence(bo, man, mem, ctx->no_wait_gpu);
 }
 
-static uint32_t ttm_bo_select_caching(struct ttm_resource_manager *man,
-				      uint32_t cur_placement,
-				      uint32_t proposed_placement)
-{
-	uint32_t caching = proposed_placement & TTM_PL_MASK_CACHING;
-	uint32_t result = proposed_placement & ~TTM_PL_MASK_CACHING;
-
-	/**
-	 * Keep current caching if possible.
-	 */
-
-	if ((cur_placement & caching) != 0)
-		result |= (cur_placement & caching);
-	else if ((TTM_PL_FLAG_CACHED & caching) != 0)
-		result |= TTM_PL_FLAG_CACHED;
-	else if ((TTM_PL_FLAG_WC & caching) != 0)
-		result |= TTM_PL_FLAG_WC;
-	else if ((TTM_PL_FLAG_UNCACHED & caching) != 0)
-		result |= TTM_PL_FLAG_UNCACHED;
-
-	return result;
-}
-
 /**
  * ttm_bo_mem_placement - check if placement is compatible
  * @bo: BO to find memory for
@@ -884,18 +857,13 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_resource_manager *man;
-	uint32_t cur_flags = 0;
 
 	man = ttm_manager_type(bdev, place->mem_type);
 	if (!man || !ttm_resource_manager_used(man))
 		return -EBUSY;
 
-	cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
-					  place->flags);
-	cur_flags |= place->flags & ~TTM_PL_MASK_CACHING;
-
 	mem->mem_type = place->mem_type;
-	mem->placement = cur_flags;
+	mem->placement = place->flags;
 
 	spin_lock(&ttm_bo_glob.lru_lock);
 	ttm_bo_del_from_lru(bo);
@@ -1028,8 +996,7 @@ static bool ttm_bo_places_compat(const struct ttm_place *places,
 			continue;
 
 		*new_flags = heap->flags;
-		if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
-		    (mem->mem_type == heap->mem_type) &&
+		if ((mem->mem_type == heap->mem_type) &&
 		    (!(*new_flags & TTM_PL_FLAG_CONTIGUOUS) ||
 		     (mem->placement & TTM_PL_FLAG_CONTIGUOUS)))
 			return true;
@@ -1083,9 +1050,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 		ret = ttm_bo_move_buffer(bo, placement, ctx);
 		if (ret)
 			return ret;
-	} else {
-		bo->mem.placement &= TTM_PL_MASK_CACHING;
-		bo->mem.placement |= new_flags & ~TTM_PL_MASK_CACHING;
 	}
 	/*
 	 * We might need to add a TTM.
@@ -1153,7 +1117,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
 	bo->mem.bus.offset = 0;
 	bo->mem.bus.addr = NULL;
 	bo->moving = NULL;
-	bo->mem.placement = TTM_PL_FLAG_CACHED;
+	bo->mem.placement = 0;
 	bo->acc_size = acc_size;
 	bo->pin_count = 0;
 	bo->sg = sg;
@@ -1484,7 +1448,7 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx)
 
 		evict_mem = bo->mem;
 		evict_mem.mm_node = NULL;
-		evict_mem.placement = TTM_PL_MASK_CACHING;
+		evict_mem.placement = 0;
 		evict_mem.mem_type = TTM_PL_SYSTEM;
 
 		ret = ttm_bo_handle_move_mem(bo, &evict_mem, true, &ctx);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 0542097dc419..ba7ab5ed85d0 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -72,10 +72,6 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
 		old_mem->mem_type = TTM_PL_SYSTEM;
 	}
 
-	ret = ttm_tt_set_placement_caching(ttm, new_mem->placement);
-	if (unlikely(ret != 0))
-		return ret;
-
 	if (new_mem->mem_type != TTM_PL_SYSTEM) {
 
 		ret = ttm_tt_populate(bo->bdev, ttm, ctx);
@@ -135,7 +131,7 @@ static int ttm_resource_ioremap(struct ttm_bo_device *bdev,
 	} else {
 		size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
 
-		if (mem->placement & TTM_PL_FLAG_WC)
+		if (mem->bus.caching == ttm_write_combined)
 			addr = ioremap_wc(mem->bus.offset, bus_size);
 		else
 			addr = ioremap(mem->bus.offset, bus_size);
@@ -427,7 +423,7 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
 		map->virtual = (void *)(((u8 *)bo->mem.bus.addr) + offset);
 	} else {
 		map->bo_kmap_type = ttm_bo_map_iomap;
-		if (mem->placement & TTM_PL_FLAG_WC)
+		if (mem->bus.caching == ttm_write_combined)
 			map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
 						  size);
 		else
@@ -457,7 +453,7 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
 	if (ret)
 		return ret;
 
-	if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED)) {
+	if (num_pages == 1 && ttm->caching == ttm_cached) {
 		/*
 		 * We're mapping a single page, and the desired
 		 * page protection is consistent with the bo.
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index a465f51df027..3e5dd6271d4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -114,35 +114,6 @@ static int ttm_sg_tt_alloc_page_directory(struct ttm_dma_tt *ttm)
 	return 0;
 }
 
-static int ttm_tt_set_caching(struct ttm_tt *ttm, enum ttm_caching caching)
-{
-	if (ttm->caching == caching)
-		return 0;
-
-	/* Can't change the caching state after TT is populated */
-	if (WARN_ON_ONCE(ttm_tt_is_populated(ttm)))
-		return -EINVAL;
-
-	ttm->caching = caching;
-
-	return 0;
-}
-
-int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement)
-{
-	enum ttm_caching state;
-
-	if (placement & TTM_PL_FLAG_WC)
-		state = ttm_write_combined;
-	else if (placement & TTM_PL_FLAG_UNCACHED)
-		state = ttm_uncached;
-	else
-		state = ttm_cached;
-
-	return ttm_tt_set_caching(ttm, state);
-}
-EXPORT_SYMBOL(ttm_tt_set_placement_caching);
-
 void ttm_tt_destroy_common(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
 {
 	ttm_tt_unpopulate(bdev, ttm);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index fae88969a15a..112253246f08 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -34,28 +34,28 @@ static const struct ttm_place vram_placement_flags = {
 	.fpfn = 0,
 	.lpfn = 0,
 	.mem_type = TTM_PL_VRAM,
-	.flags = TTM_PL_FLAG_CACHED
+	.flags = 0
 };
 
 static const struct ttm_place sys_placement_flags = {
 	.fpfn = 0,
 	.lpfn = 0,
 	.mem_type = TTM_PL_SYSTEM,
-	.flags = TTM_PL_FLAG_CACHED
+	.flags = 0
 };
 
 static const struct ttm_place gmr_placement_flags = {
 	.fpfn = 0,
 	.lpfn = 0,
 	.mem_type = VMW_PL_GMR,
-	.flags = TTM_PL_FLAG_CACHED
+	.flags = 0
 };
 
 static const struct ttm_place mob_placement_flags = {
 	.fpfn = 0,
 	.lpfn = 0,
 	.mem_type = VMW_PL_MOB,
-	.flags = TTM_PL_FLAG_CACHED
+	.flags = 0
 };
 
 struct ttm_placement vmw_vram_placement = {
@@ -70,12 +70,12 @@ static const struct ttm_place vram_gmr_placement_flags[] = {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = TTM_PL_VRAM,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}, {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = VMW_PL_GMR,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}
 };
 
@@ -84,12 +84,12 @@ static const struct ttm_place gmr_vram_placement_flags[] = {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = VMW_PL_GMR,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}, {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = TTM_PL_VRAM,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}
 };
 
@@ -119,22 +119,22 @@ static const struct ttm_place evictable_placement_flags[] = {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = TTM_PL_SYSTEM,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}, {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = TTM_PL_VRAM,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}, {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = VMW_PL_GMR,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}, {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = VMW_PL_MOB,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}
 };
 
@@ -143,17 +143,17 @@ static const struct ttm_place nonfixed_placement_flags[] = {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = TTM_PL_SYSTEM,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}, {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = VMW_PL_GMR,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}, {
 		.fpfn = 0,
 		.lpfn = 0,
 		.mem_type = VMW_PL_MOB,
-		.flags = TTM_PL_FLAG_CACHED
+		.flags = 0
 	}
 };
 
diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h
index 50e72df48b8d..aa6ba4d0cf78 100644
--- a/include/drm/ttm/ttm_placement.h
+++ b/include/drm/ttm/ttm_placement.h
@@ -43,27 +43,13 @@
 #define TTM_PL_PRIV             3
 
 /*
- * Other flags that affects data placement.
- * TTM_PL_FLAG_CACHED indicates cache-coherent mappings
- * if available.
- * TTM_PL_FLAG_SHARED means that another application may
- * reference the buffer.
- * TTM_PL_FLAG_NO_EVICT means that the buffer may never
- * be evicted to make room for other buffers.
  * TTM_PL_FLAG_TOPDOWN requests to be placed from the
  * top of the memory area, instead of the bottom.
  */
 
-#define TTM_PL_FLAG_CACHED      (1 << 16)
-#define TTM_PL_FLAG_UNCACHED    (1 << 17)
-#define TTM_PL_FLAG_WC          (1 << 18)
 #define TTM_PL_FLAG_CONTIGUOUS  (1 << 19)
 #define TTM_PL_FLAG_TOPDOWN     (1 << 22)
 
-#define TTM_PL_MASK_CACHING     (TTM_PL_FLAG_CACHED | \
-				 TTM_PL_FLAG_UNCACHED | \
-				 TTM_PL_FLAG_WC)
-
 /**
  * struct ttm_place
  *
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index c39c722d5184..e042dec5e6c1 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -164,21 +164,6 @@ void ttm_tt_destroy_common(struct ttm_bo_device *bdev, struct ttm_tt *ttm);
  * Swap in a previously swap out ttm_tt.
  */
 int ttm_tt_swapin(struct ttm_tt *ttm);
-
-/**
- * ttm_tt_set_placement_caching:
- *
- * @ttm A struct ttm_tt the backing pages of which will change caching policy.
- * @placement: Flag indicating the desired caching policy.
- *
- * This function will change caching policy of any default kernel mappings of
- * the pages backing @ttm. If changing from cached to uncached or
- * write-combined,
- * all CPU caches will first be flushed to make sure the data of the pages
- * hit RAM. This function may be very costly as it involves global TLB
- * and cache flushes and potential page splitting / combining.
- */
-int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement);
 int ttm_tt_swapout(struct ttm_bo_device *bdev, struct ttm_tt *ttm);
 
 /**
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/4] drm/ttm: set the tt caching state at creation time
  2020-10-08  9:31 [PATCH 1/4] drm/ttm: set the tt caching state at creation time Christian König
                   ` (2 preceding siblings ...)
  2020-10-08  9:31 ` [PATCH 4/4] drm/ttm: nuke caching placement flags Christian König
@ 2020-10-12  8:57 ` Christian König
  2020-10-12 14:14   ` Daniel Vetter
  3 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2020-10-12  8:57 UTC (permalink / raw)
  To: dri-devel

Ping? Anybody any more comments on this?

Otherwise I'm going to push it to drm-misc-next by tomorrow or so.

Thanks,
Christian.

Am 08.10.20 um 11:31 schrieb Christian König:
> All drivers can determine the tt caching state at creation time,
> no need to do this on the fly during every validation.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 11 +++++--
>   drivers/gpu/drm/drm_gem_vram_helper.c      |  2 +-
>   drivers/gpu/drm/nouveau/nouveau_sgdma.c    | 13 ++++++++-
>   drivers/gpu/drm/qxl/qxl_ttm.c              |  2 +-
>   drivers/gpu/drm/radeon/radeon_ttm.c        | 16 ++++++++--
>   drivers/gpu/drm/ttm/ttm_agp_backend.c      |  2 +-
>   drivers/gpu/drm/ttm/ttm_page_alloc.c       | 26 ++++++++---------
>   drivers/gpu/drm/ttm/ttm_page_alloc_dma.c   | 20 ++++++-------
>   drivers/gpu/drm/ttm/ttm_tt.c               | 33 +++++++++++----------
>   drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  6 ++--
>   include/drm/ttm/ttm_caching.h              | 34 ++++++++++++++++++++++
>   include/drm/ttm/ttm_tt.h                   | 16 ++++------
>   13 files changed, 123 insertions(+), 60 deletions(-)
>   create mode 100644 include/drm/ttm/ttm_caching.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 213ef090bb0e..3c5ad69eff19 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -124,7 +124,7 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>   	struct ttm_dma_tt *ttm;
>   
> -	if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
> +	if (bo->num_pages != 1 || bo->ttm->caching == ttm_cached)
>   		return AMDGPU_BO_INVALID_OFFSET;
>   
>   	ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 399961035ae6..7f41a47e7353 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1292,7 +1292,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev,
>   static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
>   					   uint32_t page_flags)
>   {
> +	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>   	struct amdgpu_ttm_tt *gtt;
> +	enum ttm_caching caching;
>   
>   	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
>   	if (gtt == NULL) {
> @@ -1300,8 +1302,13 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
>   	}
>   	gtt->gobj = &bo->base;
>   
> +	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
> +		caching = ttm_write_combined;
> +	else
> +		caching = ttm_cached;
> +
>   	/* allocate space for the uninitialized page entries */
> -	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
> +	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
>   		kfree(gtt);
>   		return NULL;
>   	}
> @@ -1525,7 +1532,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
>   	if (mem && mem->mem_type == TTM_PL_TT) {
>   		flags |= AMDGPU_PTE_SYSTEM;
>   
> -		if (ttm->caching_state == tt_cached)
> +		if (ttm->caching == ttm_cached)
>   			flags |= AMDGPU_PTE_SNOOPED;
>   	}
>   
> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
> index 3213429f8444..ad58d0af5141 100644
> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
> @@ -918,7 +918,7 @@ static struct ttm_tt *bo_driver_ttm_tt_create(struct ttm_buffer_object *bo,
>   	if (!tt)
>   		return NULL;
>   
> -	ret = ttm_tt_init(tt, bo, page_flags);
> +	ret = ttm_tt_init(tt, bo, page_flags, ttm_cached);
>   	if (ret < 0)
>   		goto err_ttm_tt_init;
>   
> diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
> index 806d9ec310f5..cd6fdebae795 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
> @@ -5,6 +5,7 @@
>   #include "nouveau_drv.h"
>   #include "nouveau_mem.h"
>   #include "nouveau_ttm.h"
> +#include "nouveau_bo.h"
>   
>   struct nouveau_sgdma_be {
>   	/* this has to be the first field so populate/unpopulated in
> @@ -67,13 +68,23 @@ nouveau_sgdma_unbind(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
>   struct ttm_tt *
>   nouveau_sgdma_create_ttm(struct ttm_buffer_object *bo, uint32_t page_flags)
>   {
> +	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
> +	struct nouveau_bo *nvbo = nouveau_bo(bo);
>   	struct nouveau_sgdma_be *nvbe;
> +	enum ttm_caching caching;
> +
> +	if (nvbo->force_coherent)
> +		caching = ttm_uncached;
> +	else if (drm->agp.bridge)
> +		caching = ttm_write_combined;
> +	else
> +		caching = ttm_cached;
>   
>   	nvbe = kzalloc(sizeof(*nvbe), GFP_KERNEL);
>   	if (!nvbe)
>   		return NULL;
>   
> -	if (ttm_dma_tt_init(&nvbe->ttm, bo, page_flags)) {
> +	if (ttm_dma_tt_init(&nvbe->ttm, bo, page_flags, caching)) {
>   		kfree(nvbe);
>   		return NULL;
>   	}
> diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
> index 669bceb58205..f50863493f64 100644
> --- a/drivers/gpu/drm/qxl/qxl_ttm.c
> +++ b/drivers/gpu/drm/qxl/qxl_ttm.c
> @@ -133,7 +133,7 @@ static struct ttm_tt *qxl_ttm_tt_create(struct ttm_buffer_object *bo,
>   	ttm = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
>   	if (ttm == NULL)
>   		return NULL;
> -	if (ttm_tt_init(ttm, bo, page_flags)) {
> +	if (ttm_tt_init(ttm, bo, page_flags, ttm_cached)) {
>   		kfree(ttm);
>   		return NULL;
>   	}
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index 63e38b05a5bc..130a7cea35c3 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -546,7 +546,7 @@ static int radeon_ttm_backend_bind(struct ttm_bo_device *bdev,
>   		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
>   		     ttm->num_pages, bo_mem, ttm);
>   	}
> -	if (ttm->caching_state == tt_cached)
> +	if (ttm->caching == ttm_cached)
>   		flags |= RADEON_GART_PAGE_SNOOP;
>   	r = radeon_gart_bind(rdev, gtt->offset, ttm->num_pages,
>   			     ttm->pages, gtt->ttm.dma_address, flags);
> @@ -590,6 +590,10 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo,
>   {
>   	struct radeon_device *rdev;
>   	struct radeon_ttm_tt *gtt;
> +	enum ttm_caching caching;
> +	struct radeon_bo *rbo;
> +
> +	rbo = container_of(bo, struct radeon_bo, tbo);
>   
>   	rdev = radeon_get_rdev(bo->bdev);
>   #if IS_ENABLED(CONFIG_AGP)
> @@ -603,7 +607,15 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo,
>   	if (gtt == NULL) {
>   		return NULL;
>   	}
> -	if (ttm_dma_tt_init(&gtt->ttm, bo, page_flags)) {
> +
> +	if (rbo->flags & RADEON_GEM_GTT_UC)
> +		caching = ttm_uncached;
> +	else if (rbo->flags & RADEON_GEM_GTT_WC)
> +		caching = ttm_write_combined;
> +	else
> +		caching = ttm_cached;
> +
> +	if (ttm_dma_tt_init(&gtt->ttm, bo, page_flags, caching)) {
>   		kfree(gtt);
>   		return NULL;
>   	}
> diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
> index a98fd795b752..a723062d37e7 100644
> --- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
> +++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
> @@ -136,7 +136,7 @@ struct ttm_tt *ttm_agp_tt_create(struct ttm_buffer_object *bo,
>   	agp_be->mem = NULL;
>   	agp_be->bridge = bridge;
>   
> -	if (ttm_tt_init(&agp_be->ttm, bo, page_flags)) {
> +	if (ttm_tt_init(&agp_be->ttm, bo, page_flags, ttm_write_combined)) {
>   		kfree(agp_be);
>   		return NULL;
>   	}
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> index 111031cbb6df..c8f6790962b9 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -220,14 +220,14 @@ static struct ttm_pool_manager *_manager;
>   /**
>    * Select the right pool or requested caching state and ttm flags. */
>   static struct ttm_page_pool *ttm_get_pool(int flags, bool huge,
> -					  enum ttm_caching_state cstate)
> +					  enum ttm_caching cstate)
>   {
>   	int pool_index;
>   
> -	if (cstate == tt_cached)
> +	if (cstate == ttm_cached)
>   		return NULL;
>   
> -	if (cstate == tt_wc)
> +	if (cstate == ttm_write_combined)
>   		pool_index = 0x0;
>   	else
>   		pool_index = 0x1;
> @@ -441,17 +441,17 @@ static void ttm_pool_mm_shrink_fini(struct ttm_pool_manager *manager)
>   }
>   
>   static int ttm_set_pages_caching(struct page **pages,
> -		enum ttm_caching_state cstate, unsigned cpages)
> +		enum ttm_caching cstate, unsigned cpages)
>   {
>   	int r = 0;
>   	/* Set page caching */
>   	switch (cstate) {
> -	case tt_uncached:
> +	case ttm_uncached:
>   		r = ttm_set_pages_array_uc(pages, cpages);
>   		if (r)
>   			pr_err("Failed to set %d pages to uc!\n", cpages);
>   		break;
> -	case tt_wc:
> +	case ttm_write_combined:
>   		r = ttm_set_pages_array_wc(pages, cpages);
>   		if (r)
>   			pr_err("Failed to set %d pages to wc!\n", cpages);
> @@ -486,7 +486,7 @@ static void ttm_handle_caching_failure(struct page **failed_pages,
>    * pages returned in pages array.
>    */
>   static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
> -			       int ttm_flags, enum ttm_caching_state cstate,
> +			       int ttm_flags, enum ttm_caching cstate,
>   			       unsigned count, unsigned order)
>   {
>   	struct page **caching_array;
> @@ -566,7 +566,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
>    * pages is small.
>    */
>   static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags,
> -				      enum ttm_caching_state cstate,
> +				      enum ttm_caching cstate,
>   				      unsigned count, unsigned long *irq_flags)
>   {
>   	struct page *p;
> @@ -626,7 +626,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags,
>   static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
>   				   struct list_head *pages,
>   				   int ttm_flags,
> -				   enum ttm_caching_state cstate,
> +				   enum ttm_caching cstate,
>   				   unsigned count, unsigned order)
>   {
>   	unsigned long irq_flags;
> @@ -703,7 +703,7 @@ static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
>   
>   /* Put all pages in pages list to correct pool to wait for reuse */
>   static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
> -			  enum ttm_caching_state cstate)
> +			  enum ttm_caching cstate)
>   {
>   	struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate);
>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> @@ -821,7 +821,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
>    * cached pages.
>    */
>   static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
> -			 enum ttm_caching_state cstate)
> +			 enum ttm_caching cstate)
>   {
>   	struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate);
>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> @@ -1040,7 +1040,7 @@ ttm_pool_unpopulate_helper(struct ttm_tt *ttm, unsigned mem_count_update)
>   
>   put_pages:
>   	ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
> -		      ttm->caching_state);
> +		      ttm->caching);
>   	ttm_tt_set_unpopulated(ttm);
>   }
>   
> @@ -1057,7 +1057,7 @@ int ttm_pool_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
>   		return -ENOMEM;
>   
>   	ret = ttm_get_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
> -			    ttm->caching_state);
> +			    ttm->caching);
>   	if (unlikely(ret != 0)) {
>   		ttm_pool_unpopulate_helper(ttm, 0);
>   		return ret;
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> index 1045a5c26ee3..6625b43f6256 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> @@ -325,15 +325,15 @@ static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
>   	}
>   	return d_page;
>   }
> -static enum pool_type ttm_to_type(int flags, enum ttm_caching_state cstate)
> +static enum pool_type ttm_to_type(int flags, enum ttm_caching cstate)
>   {
>   	enum pool_type type = IS_UNDEFINED;
>   
>   	if (flags & TTM_PAGE_FLAG_DMA32)
>   		type |= IS_DMA32;
> -	if (cstate == tt_cached)
> +	if (cstate == ttm_cached)
>   		type |= IS_CACHED;
> -	else if (cstate == tt_uncached)
> +	else if (cstate == ttm_uncached)
>   		type |= IS_UC;
>   	else
>   		type |= IS_WC;
> @@ -663,7 +663,7 @@ static struct dma_pool *ttm_dma_find_pool(struct device *dev,
>    * are pages that have changed their caching state already put them to the
>    * pool.
>    */
> -static void ttm_dma_handle_caching_state_failure(struct dma_pool *pool,
> +static void ttm_dma_handle_caching_failure(struct dma_pool *pool,
>   						 struct list_head *d_pages,
>   						 struct page **failed_pages,
>   						 unsigned cpages)
> @@ -734,7 +734,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
>   				r = ttm_set_pages_caching(pool, caching_array,
>   							  cpages);
>   				if (r)
> -					ttm_dma_handle_caching_state_failure(
> +					ttm_dma_handle_caching_failure(
>   						pool, d_pages, caching_array,
>   						cpages);
>   			}
> @@ -760,7 +760,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
>   				r = ttm_set_pages_caching(pool, caching_array,
>   							  cpages);
>   				if (r) {
> -					ttm_dma_handle_caching_state_failure(
> +					ttm_dma_handle_caching_failure(
>   					     pool, d_pages, caching_array,
>   					     cpages);
>   					goto out;
> @@ -773,7 +773,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
>   	if (cpages) {
>   		r = ttm_set_pages_caching(pool, caching_array, cpages);
>   		if (r)
> -			ttm_dma_handle_caching_state_failure(pool, d_pages,
> +			ttm_dma_handle_caching_failure(pool, d_pages,
>   					caching_array, cpages);
>   	}
>   out:
> @@ -904,7 +904,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev,
>   	INIT_LIST_HEAD(&ttm_dma->pages_list);
>   	i = 0;
>   
> -	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
> +	type = ttm_to_type(ttm->page_flags, ttm->caching);
>   
>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>   	if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
> @@ -1000,7 +1000,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>   	unsigned count, i, npages = 0;
>   	unsigned long irq_flags;
>   
> -	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
> +	type = ttm_to_type(ttm->page_flags, ttm->caching);
>   
>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>   	pool = ttm_dma_find_pool(dev, type | IS_HUGE);
> @@ -1032,7 +1032,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>   		return;
>   
>   	is_cached = (ttm_dma_find_pool(pool->dev,
> -		     ttm_to_type(ttm->page_flags, tt_cached)) == pool);
> +		     ttm_to_type(ttm->page_flags, ttm_cached)) == pool);
>   
>   	/* make sure pages array match list and count number of pages */
>   	count = 0;
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index 23e9604bc924..a465f51df027 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -114,31 +114,30 @@ static int ttm_sg_tt_alloc_page_directory(struct ttm_dma_tt *ttm)
>   	return 0;
>   }
>   
> -static int ttm_tt_set_caching(struct ttm_tt *ttm,
> -			      enum ttm_caching_state c_state)
> +static int ttm_tt_set_caching(struct ttm_tt *ttm, enum ttm_caching caching)
>   {
> -	if (ttm->caching_state == c_state)
> +	if (ttm->caching == caching)
>   		return 0;
>   
>   	/* Can't change the caching state after TT is populated */
>   	if (WARN_ON_ONCE(ttm_tt_is_populated(ttm)))
>   		return -EINVAL;
>   
> -	ttm->caching_state = c_state;
> +	ttm->caching = caching;
>   
>   	return 0;
>   }
>   
>   int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement)
>   {
> -	enum ttm_caching_state state;
> +	enum ttm_caching state;
>   
>   	if (placement & TTM_PL_FLAG_WC)
> -		state = tt_wc;
> +		state = ttm_write_combined;
>   	else if (placement & TTM_PL_FLAG_UNCACHED)
> -		state = tt_uncached;
> +		state = ttm_uncached;
>   	else
> -		state = tt_cached;
> +		state = ttm_cached;
>   
>   	return ttm_tt_set_caching(ttm, state);
>   }
> @@ -162,20 +161,22 @@ void ttm_tt_destroy(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
>   
>   static void ttm_tt_init_fields(struct ttm_tt *ttm,
>   			       struct ttm_buffer_object *bo,
> -			       uint32_t page_flags)
> +			       uint32_t page_flags,
> +			       enum ttm_caching caching)
>   {
>   	ttm->num_pages = bo->num_pages;
> -	ttm->caching_state = tt_cached;
> +	ttm->caching = ttm_cached;
>   	ttm->page_flags = page_flags;
>   	ttm_tt_set_unpopulated(ttm);
>   	ttm->swap_storage = NULL;
>   	ttm->sg = bo->sg;
> +	ttm->caching = caching;
>   }
>   
>   int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
> -		uint32_t page_flags)
> +		uint32_t page_flags, enum ttm_caching caching)
>   {
> -	ttm_tt_init_fields(ttm, bo, page_flags);
> +	ttm_tt_init_fields(ttm, bo, page_flags, caching);
>   
>   	if (ttm_tt_alloc_page_directory(ttm)) {
>   		pr_err("Failed allocating page table\n");
> @@ -193,11 +194,11 @@ void ttm_tt_fini(struct ttm_tt *ttm)
>   EXPORT_SYMBOL(ttm_tt_fini);
>   
>   int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
> -		    uint32_t page_flags)
> +		    uint32_t page_flags, enum ttm_caching caching)
>   {
>   	struct ttm_tt *ttm = &ttm_dma->ttm;
>   
> -	ttm_tt_init_fields(ttm, bo, page_flags);
> +	ttm_tt_init_fields(ttm, bo, page_flags, caching);
>   
>   	INIT_LIST_HEAD(&ttm_dma->pages_list);
>   	if (ttm_dma_tt_alloc_page_directory(ttm_dma)) {
> @@ -209,12 +210,12 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
>   EXPORT_SYMBOL(ttm_dma_tt_init);
>   
>   int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
> -		   uint32_t page_flags)
> +		   uint32_t page_flags, enum ttm_caching caching)
>   {
>   	struct ttm_tt *ttm = &ttm_dma->ttm;
>   	int ret;
>   
> -	ttm_tt_init_fields(ttm, bo, page_flags);
> +	ttm_tt_init_fields(ttm, bo, page_flags, caching);
>   
>   	INIT_LIST_HEAD(&ttm_dma->pages_list);
>   	if (page_flags & TTM_PAGE_FLAG_SG)
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
> index 7b5fd5288870..1fa7f9438ec4 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
> @@ -647,9 +647,11 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
>   	vmw_be->mob = NULL;
>   
>   	if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
> -		ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bo, page_flags);
> +		ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bo, page_flags,
> +				      ttm_cached);
>   	else
> -		ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bo, page_flags);
> +		ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bo, page_flags,
> +				  ttm_cached);
>   	if (unlikely(ret != 0))
>   		goto out_no_init;
>   
> diff --git a/include/drm/ttm/ttm_caching.h b/include/drm/ttm/ttm_caching.h
> new file mode 100644
> index 000000000000..161624dcf6be
> --- /dev/null
> +++ b/include/drm/ttm/ttm_caching.h
> @@ -0,0 +1,34 @@
> +/*
> + * Copyright 2020 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Christian König
> + */
> +
> +#ifndef _TTM_CACHING_H_
> +#define _TTM_CACHING_H_
> +
> +enum ttm_caching {
> +	ttm_uncached,
> +	ttm_write_combined,
> +	ttm_cached
> +};
> +
> +#endif
> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
> index 5d1835d44084..c39c722d5184 100644
> --- a/include/drm/ttm/ttm_tt.h
> +++ b/include/drm/ttm/ttm_tt.h
> @@ -28,6 +28,7 @@
>   #define _TTM_TT_H_
>   
>   #include <linux/types.h>
> +#include <drm/ttm/ttm_caching.h>
>   
>   struct ttm_tt;
>   struct ttm_resource;
> @@ -42,12 +43,6 @@ struct ttm_operation_ctx;
>   
>   #define TTM_PAGE_FLAG_PRIV_POPULATED  (1 << 31)
>   
> -enum ttm_caching_state {
> -	tt_uncached,
> -	tt_wc,
> -	tt_cached
> -};
> -
>   /**
>    * struct ttm_tt
>    *
> @@ -69,7 +64,7 @@ struct ttm_tt {
>   	unsigned long num_pages;
>   	struct sg_table *sg; /* for SG objects via dma-buf */
>   	struct file *swap_storage;
> -	enum ttm_caching_state caching_state;
> +	enum ttm_caching caching;
>   };
>   
>   static inline bool ttm_tt_is_populated(struct ttm_tt *tt)
> @@ -121,6 +116,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc);
>    * @ttm: The struct ttm_tt.
>    * @bo: The buffer object we create the ttm for.
>    * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags.
> + * @caching: the desired caching state of the pages
>    *
>    * Create a struct ttm_tt to back data with system memory pages.
>    * No pages are actually allocated.
> @@ -128,11 +124,11 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc);
>    * NULL: Out of memory.
>    */
>   int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
> -		uint32_t page_flags);
> +		uint32_t page_flags, enum ttm_caching caching);
>   int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
> -		    uint32_t page_flags);
> +		    uint32_t page_flags, enum ttm_caching caching);
>   int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
> -		   uint32_t page_flags);
> +		   uint32_t page_flags, enum ttm_caching caching);
>   
>   /**
>    * ttm_tt_fini

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/4] drm/ttm: set the tt caching state at creation time
  2020-10-12  8:57 ` [PATCH 1/4] drm/ttm: set the tt caching state at creation time Christian König
@ 2020-10-12 14:14   ` Daniel Vetter
  2020-10-12 15:22     ` Christian König
  0 siblings, 1 reply; 7+ messages in thread
From: Daniel Vetter @ 2020-10-12 14:14 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Mon, Oct 12, 2020 at 10:57:57AM +0200, Christian König wrote:
> Ping? Anybody any more comments on this?
> 
> Otherwise I'm going to push it to drm-misc-next by tomorrow or so.

tbh the entire coherency/caching topic is imo a giantic mess in
drivers/gpu (mostly because we're half-fighting dma-api all the time). But
I don't have clear opinion where to go, hence *shrug*.
-Daniel

> 
> Thanks,
> Christian.
> 
> Am 08.10.20 um 11:31 schrieb Christian König:
> > All drivers can determine the tt caching state at creation time,
> > no need to do this on the fly during every validation.
> > 
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    |  2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 11 +++++--
> >   drivers/gpu/drm/drm_gem_vram_helper.c      |  2 +-
> >   drivers/gpu/drm/nouveau/nouveau_sgdma.c    | 13 ++++++++-
> >   drivers/gpu/drm/qxl/qxl_ttm.c              |  2 +-
> >   drivers/gpu/drm/radeon/radeon_ttm.c        | 16 ++++++++--
> >   drivers/gpu/drm/ttm/ttm_agp_backend.c      |  2 +-
> >   drivers/gpu/drm/ttm/ttm_page_alloc.c       | 26 ++++++++---------
> >   drivers/gpu/drm/ttm/ttm_page_alloc_dma.c   | 20 ++++++-------
> >   drivers/gpu/drm/ttm/ttm_tt.c               | 33 +++++++++++----------
> >   drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  6 ++--
> >   include/drm/ttm/ttm_caching.h              | 34 ++++++++++++++++++++++
> >   include/drm/ttm/ttm_tt.h                   | 16 ++++------
> >   13 files changed, 123 insertions(+), 60 deletions(-)
> >   create mode 100644 include/drm/ttm/ttm_caching.h
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> > index 213ef090bb0e..3c5ad69eff19 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> > @@ -124,7 +124,7 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
> >   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> >   	struct ttm_dma_tt *ttm;
> > -	if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
> > +	if (bo->num_pages != 1 || bo->ttm->caching == ttm_cached)
> >   		return AMDGPU_BO_INVALID_OFFSET;
> >   	ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 399961035ae6..7f41a47e7353 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -1292,7 +1292,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev,
> >   static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
> >   					   uint32_t page_flags)
> >   {
> > +	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> >   	struct amdgpu_ttm_tt *gtt;
> > +	enum ttm_caching caching;
> >   	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
> >   	if (gtt == NULL) {
> > @@ -1300,8 +1302,13 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
> >   	}
> >   	gtt->gobj = &bo->base;
> > +	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
> > +		caching = ttm_write_combined;
> > +	else
> > +		caching = ttm_cached;
> > +
> >   	/* allocate space for the uninitialized page entries */
> > -	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
> > +	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
> >   		kfree(gtt);
> >   		return NULL;
> >   	}
> > @@ -1525,7 +1532,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
> >   	if (mem && mem->mem_type == TTM_PL_TT) {
> >   		flags |= AMDGPU_PTE_SYSTEM;
> > -		if (ttm->caching_state == tt_cached)
> > +		if (ttm->caching == ttm_cached)
> >   			flags |= AMDGPU_PTE_SNOOPED;
> >   	}
> > diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
> > index 3213429f8444..ad58d0af5141 100644
> > --- a/drivers/gpu/drm/drm_gem_vram_helper.c
> > +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
> > @@ -918,7 +918,7 @@ static struct ttm_tt *bo_driver_ttm_tt_create(struct ttm_buffer_object *bo,
> >   	if (!tt)
> >   		return NULL;
> > -	ret = ttm_tt_init(tt, bo, page_flags);
> > +	ret = ttm_tt_init(tt, bo, page_flags, ttm_cached);
> >   	if (ret < 0)
> >   		goto err_ttm_tt_init;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
> > index 806d9ec310f5..cd6fdebae795 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
> > @@ -5,6 +5,7 @@
> >   #include "nouveau_drv.h"
> >   #include "nouveau_mem.h"
> >   #include "nouveau_ttm.h"
> > +#include "nouveau_bo.h"
> >   struct nouveau_sgdma_be {
> >   	/* this has to be the first field so populate/unpopulated in
> > @@ -67,13 +68,23 @@ nouveau_sgdma_unbind(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
> >   struct ttm_tt *
> >   nouveau_sgdma_create_ttm(struct ttm_buffer_object *bo, uint32_t page_flags)
> >   {
> > +	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
> > +	struct nouveau_bo *nvbo = nouveau_bo(bo);
> >   	struct nouveau_sgdma_be *nvbe;
> > +	enum ttm_caching caching;
> > +
> > +	if (nvbo->force_coherent)
> > +		caching = ttm_uncached;
> > +	else if (drm->agp.bridge)
> > +		caching = ttm_write_combined;
> > +	else
> > +		caching = ttm_cached;
> >   	nvbe = kzalloc(sizeof(*nvbe), GFP_KERNEL);
> >   	if (!nvbe)
> >   		return NULL;
> > -	if (ttm_dma_tt_init(&nvbe->ttm, bo, page_flags)) {
> > +	if (ttm_dma_tt_init(&nvbe->ttm, bo, page_flags, caching)) {
> >   		kfree(nvbe);
> >   		return NULL;
> >   	}
> > diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
> > index 669bceb58205..f50863493f64 100644
> > --- a/drivers/gpu/drm/qxl/qxl_ttm.c
> > +++ b/drivers/gpu/drm/qxl/qxl_ttm.c
> > @@ -133,7 +133,7 @@ static struct ttm_tt *qxl_ttm_tt_create(struct ttm_buffer_object *bo,
> >   	ttm = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
> >   	if (ttm == NULL)
> >   		return NULL;
> > -	if (ttm_tt_init(ttm, bo, page_flags)) {
> > +	if (ttm_tt_init(ttm, bo, page_flags, ttm_cached)) {
> >   		kfree(ttm);
> >   		return NULL;
> >   	}
> > diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> > index 63e38b05a5bc..130a7cea35c3 100644
> > --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> > +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> > @@ -546,7 +546,7 @@ static int radeon_ttm_backend_bind(struct ttm_bo_device *bdev,
> >   		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
> >   		     ttm->num_pages, bo_mem, ttm);
> >   	}
> > -	if (ttm->caching_state == tt_cached)
> > +	if (ttm->caching == ttm_cached)
> >   		flags |= RADEON_GART_PAGE_SNOOP;
> >   	r = radeon_gart_bind(rdev, gtt->offset, ttm->num_pages,
> >   			     ttm->pages, gtt->ttm.dma_address, flags);
> > @@ -590,6 +590,10 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo,
> >   {
> >   	struct radeon_device *rdev;
> >   	struct radeon_ttm_tt *gtt;
> > +	enum ttm_caching caching;
> > +	struct radeon_bo *rbo;
> > +
> > +	rbo = container_of(bo, struct radeon_bo, tbo);
> >   	rdev = radeon_get_rdev(bo->bdev);
> >   #if IS_ENABLED(CONFIG_AGP)
> > @@ -603,7 +607,15 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo,
> >   	if (gtt == NULL) {
> >   		return NULL;
> >   	}
> > -	if (ttm_dma_tt_init(&gtt->ttm, bo, page_flags)) {
> > +
> > +	if (rbo->flags & RADEON_GEM_GTT_UC)
> > +		caching = ttm_uncached;
> > +	else if (rbo->flags & RADEON_GEM_GTT_WC)
> > +		caching = ttm_write_combined;
> > +	else
> > +		caching = ttm_cached;
> > +
> > +	if (ttm_dma_tt_init(&gtt->ttm, bo, page_flags, caching)) {
> >   		kfree(gtt);
> >   		return NULL;
> >   	}
> > diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
> > index a98fd795b752..a723062d37e7 100644
> > --- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
> > +++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
> > @@ -136,7 +136,7 @@ struct ttm_tt *ttm_agp_tt_create(struct ttm_buffer_object *bo,
> >   	agp_be->mem = NULL;
> >   	agp_be->bridge = bridge;
> > -	if (ttm_tt_init(&agp_be->ttm, bo, page_flags)) {
> > +	if (ttm_tt_init(&agp_be->ttm, bo, page_flags, ttm_write_combined)) {
> >   		kfree(agp_be);
> >   		return NULL;
> >   	}
> > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> > index 111031cbb6df..c8f6790962b9 100644
> > --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
> > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> > @@ -220,14 +220,14 @@ static struct ttm_pool_manager *_manager;
> >   /**
> >    * Select the right pool or requested caching state and ttm flags. */
> >   static struct ttm_page_pool *ttm_get_pool(int flags, bool huge,
> > -					  enum ttm_caching_state cstate)
> > +					  enum ttm_caching cstate)
> >   {
> >   	int pool_index;
> > -	if (cstate == tt_cached)
> > +	if (cstate == ttm_cached)
> >   		return NULL;
> > -	if (cstate == tt_wc)
> > +	if (cstate == ttm_write_combined)
> >   		pool_index = 0x0;
> >   	else
> >   		pool_index = 0x1;
> > @@ -441,17 +441,17 @@ static void ttm_pool_mm_shrink_fini(struct ttm_pool_manager *manager)
> >   }
> >   static int ttm_set_pages_caching(struct page **pages,
> > -		enum ttm_caching_state cstate, unsigned cpages)
> > +		enum ttm_caching cstate, unsigned cpages)
> >   {
> >   	int r = 0;
> >   	/* Set page caching */
> >   	switch (cstate) {
> > -	case tt_uncached:
> > +	case ttm_uncached:
> >   		r = ttm_set_pages_array_uc(pages, cpages);
> >   		if (r)
> >   			pr_err("Failed to set %d pages to uc!\n", cpages);
> >   		break;
> > -	case tt_wc:
> > +	case ttm_write_combined:
> >   		r = ttm_set_pages_array_wc(pages, cpages);
> >   		if (r)
> >   			pr_err("Failed to set %d pages to wc!\n", cpages);
> > @@ -486,7 +486,7 @@ static void ttm_handle_caching_failure(struct page **failed_pages,
> >    * pages returned in pages array.
> >    */
> >   static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
> > -			       int ttm_flags, enum ttm_caching_state cstate,
> > +			       int ttm_flags, enum ttm_caching cstate,
> >   			       unsigned count, unsigned order)
> >   {
> >   	struct page **caching_array;
> > @@ -566,7 +566,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
> >    * pages is small.
> >    */
> >   static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags,
> > -				      enum ttm_caching_state cstate,
> > +				      enum ttm_caching cstate,
> >   				      unsigned count, unsigned long *irq_flags)
> >   {
> >   	struct page *p;
> > @@ -626,7 +626,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags,
> >   static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
> >   				   struct list_head *pages,
> >   				   int ttm_flags,
> > -				   enum ttm_caching_state cstate,
> > +				   enum ttm_caching cstate,
> >   				   unsigned count, unsigned order)
> >   {
> >   	unsigned long irq_flags;
> > @@ -703,7 +703,7 @@ static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
> >   /* Put all pages in pages list to correct pool to wait for reuse */
> >   static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
> > -			  enum ttm_caching_state cstate)
> > +			  enum ttm_caching cstate)
> >   {
> >   	struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate);
> >   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > @@ -821,7 +821,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
> >    * cached pages.
> >    */
> >   static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
> > -			 enum ttm_caching_state cstate)
> > +			 enum ttm_caching cstate)
> >   {
> >   	struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate);
> >   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > @@ -1040,7 +1040,7 @@ ttm_pool_unpopulate_helper(struct ttm_tt *ttm, unsigned mem_count_update)
> >   put_pages:
> >   	ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
> > -		      ttm->caching_state);
> > +		      ttm->caching);
> >   	ttm_tt_set_unpopulated(ttm);
> >   }
> > @@ -1057,7 +1057,7 @@ int ttm_pool_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
> >   		return -ENOMEM;
> >   	ret = ttm_get_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
> > -			    ttm->caching_state);
> > +			    ttm->caching);
> >   	if (unlikely(ret != 0)) {
> >   		ttm_pool_unpopulate_helper(ttm, 0);
> >   		return ret;
> > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > index 1045a5c26ee3..6625b43f6256 100644
> > --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > @@ -325,15 +325,15 @@ static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
> >   	}
> >   	return d_page;
> >   }
> > -static enum pool_type ttm_to_type(int flags, enum ttm_caching_state cstate)
> > +static enum pool_type ttm_to_type(int flags, enum ttm_caching cstate)
> >   {
> >   	enum pool_type type = IS_UNDEFINED;
> >   	if (flags & TTM_PAGE_FLAG_DMA32)
> >   		type |= IS_DMA32;
> > -	if (cstate == tt_cached)
> > +	if (cstate == ttm_cached)
> >   		type |= IS_CACHED;
> > -	else if (cstate == tt_uncached)
> > +	else if (cstate == ttm_uncached)
> >   		type |= IS_UC;
> >   	else
> >   		type |= IS_WC;
> > @@ -663,7 +663,7 @@ static struct dma_pool *ttm_dma_find_pool(struct device *dev,
> >    * are pages that have changed their caching state already put them to the
> >    * pool.
> >    */
> > -static void ttm_dma_handle_caching_state_failure(struct dma_pool *pool,
> > +static void ttm_dma_handle_caching_failure(struct dma_pool *pool,
> >   						 struct list_head *d_pages,
> >   						 struct page **failed_pages,
> >   						 unsigned cpages)
> > @@ -734,7 +734,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
> >   				r = ttm_set_pages_caching(pool, caching_array,
> >   							  cpages);
> >   				if (r)
> > -					ttm_dma_handle_caching_state_failure(
> > +					ttm_dma_handle_caching_failure(
> >   						pool, d_pages, caching_array,
> >   						cpages);
> >   			}
> > @@ -760,7 +760,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
> >   				r = ttm_set_pages_caching(pool, caching_array,
> >   							  cpages);
> >   				if (r) {
> > -					ttm_dma_handle_caching_state_failure(
> > +					ttm_dma_handle_caching_failure(
> >   					     pool, d_pages, caching_array,
> >   					     cpages);
> >   					goto out;
> > @@ -773,7 +773,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
> >   	if (cpages) {
> >   		r = ttm_set_pages_caching(pool, caching_array, cpages);
> >   		if (r)
> > -			ttm_dma_handle_caching_state_failure(pool, d_pages,
> > +			ttm_dma_handle_caching_failure(pool, d_pages,
> >   					caching_array, cpages);
> >   	}
> >   out:
> > @@ -904,7 +904,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev,
> >   	INIT_LIST_HEAD(&ttm_dma->pages_list);
> >   	i = 0;
> > -	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
> > +	type = ttm_to_type(ttm->page_flags, ttm->caching);
> >   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> >   	if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
> > @@ -1000,7 +1000,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
> >   	unsigned count, i, npages = 0;
> >   	unsigned long irq_flags;
> > -	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
> > +	type = ttm_to_type(ttm->page_flags, ttm->caching);
> >   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> >   	pool = ttm_dma_find_pool(dev, type | IS_HUGE);
> > @@ -1032,7 +1032,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
> >   		return;
> >   	is_cached = (ttm_dma_find_pool(pool->dev,
> > -		     ttm_to_type(ttm->page_flags, tt_cached)) == pool);
> > +		     ttm_to_type(ttm->page_flags, ttm_cached)) == pool);
> >   	/* make sure pages array match list and count number of pages */
> >   	count = 0;
> > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> > index 23e9604bc924..a465f51df027 100644
> > --- a/drivers/gpu/drm/ttm/ttm_tt.c
> > +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> > @@ -114,31 +114,30 @@ static int ttm_sg_tt_alloc_page_directory(struct ttm_dma_tt *ttm)
> >   	return 0;
> >   }
> > -static int ttm_tt_set_caching(struct ttm_tt *ttm,
> > -			      enum ttm_caching_state c_state)
> > +static int ttm_tt_set_caching(struct ttm_tt *ttm, enum ttm_caching caching)
> >   {
> > -	if (ttm->caching_state == c_state)
> > +	if (ttm->caching == caching)
> >   		return 0;
> >   	/* Can't change the caching state after TT is populated */
> >   	if (WARN_ON_ONCE(ttm_tt_is_populated(ttm)))
> >   		return -EINVAL;
> > -	ttm->caching_state = c_state;
> > +	ttm->caching = caching;
> >   	return 0;
> >   }
> >   int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement)
> >   {
> > -	enum ttm_caching_state state;
> > +	enum ttm_caching state;
> >   	if (placement & TTM_PL_FLAG_WC)
> > -		state = tt_wc;
> > +		state = ttm_write_combined;
> >   	else if (placement & TTM_PL_FLAG_UNCACHED)
> > -		state = tt_uncached;
> > +		state = ttm_uncached;
> >   	else
> > -		state = tt_cached;
> > +		state = ttm_cached;
> >   	return ttm_tt_set_caching(ttm, state);
> >   }
> > @@ -162,20 +161,22 @@ void ttm_tt_destroy(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
> >   static void ttm_tt_init_fields(struct ttm_tt *ttm,
> >   			       struct ttm_buffer_object *bo,
> > -			       uint32_t page_flags)
> > +			       uint32_t page_flags,
> > +			       enum ttm_caching caching)
> >   {
> >   	ttm->num_pages = bo->num_pages;
> > -	ttm->caching_state = tt_cached;
> > +	ttm->caching = ttm_cached;
> >   	ttm->page_flags = page_flags;
> >   	ttm_tt_set_unpopulated(ttm);
> >   	ttm->swap_storage = NULL;
> >   	ttm->sg = bo->sg;
> > +	ttm->caching = caching;
> >   }
> >   int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
> > -		uint32_t page_flags)
> > +		uint32_t page_flags, enum ttm_caching caching)
> >   {
> > -	ttm_tt_init_fields(ttm, bo, page_flags);
> > +	ttm_tt_init_fields(ttm, bo, page_flags, caching);
> >   	if (ttm_tt_alloc_page_directory(ttm)) {
> >   		pr_err("Failed allocating page table\n");
> > @@ -193,11 +194,11 @@ void ttm_tt_fini(struct ttm_tt *ttm)
> >   EXPORT_SYMBOL(ttm_tt_fini);
> >   int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
> > -		    uint32_t page_flags)
> > +		    uint32_t page_flags, enum ttm_caching caching)
> >   {
> >   	struct ttm_tt *ttm = &ttm_dma->ttm;
> > -	ttm_tt_init_fields(ttm, bo, page_flags);
> > +	ttm_tt_init_fields(ttm, bo, page_flags, caching);
> >   	INIT_LIST_HEAD(&ttm_dma->pages_list);
> >   	if (ttm_dma_tt_alloc_page_directory(ttm_dma)) {
> > @@ -209,12 +210,12 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
> >   EXPORT_SYMBOL(ttm_dma_tt_init);
> >   int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
> > -		   uint32_t page_flags)
> > +		   uint32_t page_flags, enum ttm_caching caching)
> >   {
> >   	struct ttm_tt *ttm = &ttm_dma->ttm;
> >   	int ret;
> > -	ttm_tt_init_fields(ttm, bo, page_flags);
> > +	ttm_tt_init_fields(ttm, bo, page_flags, caching);
> >   	INIT_LIST_HEAD(&ttm_dma->pages_list);
> >   	if (page_flags & TTM_PAGE_FLAG_SG)
> > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
> > index 7b5fd5288870..1fa7f9438ec4 100644
> > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
> > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
> > @@ -647,9 +647,11 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
> >   	vmw_be->mob = NULL;
> >   	if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
> > -		ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bo, page_flags);
> > +		ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bo, page_flags,
> > +				      ttm_cached);
> >   	else
> > -		ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bo, page_flags);
> > +		ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bo, page_flags,
> > +				  ttm_cached);
> >   	if (unlikely(ret != 0))
> >   		goto out_no_init;
> > diff --git a/include/drm/ttm/ttm_caching.h b/include/drm/ttm/ttm_caching.h
> > new file mode 100644
> > index 000000000000..161624dcf6be
> > --- /dev/null
> > +++ b/include/drm/ttm/ttm_caching.h
> > @@ -0,0 +1,34 @@
> > +/*
> > + * Copyright 2020 Advanced Micro Devices, Inc.
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > + * OTHER DEALINGS IN THE SOFTWARE.
> > + *
> > + * Authors: Christian König
> > + */
> > +
> > +#ifndef _TTM_CACHING_H_
> > +#define _TTM_CACHING_H_
> > +
> > +enum ttm_caching {
> > +	ttm_uncached,
> > +	ttm_write_combined,
> > +	ttm_cached
> > +};
> > +
> > +#endif
> > diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
> > index 5d1835d44084..c39c722d5184 100644
> > --- a/include/drm/ttm/ttm_tt.h
> > +++ b/include/drm/ttm/ttm_tt.h
> > @@ -28,6 +28,7 @@
> >   #define _TTM_TT_H_
> >   #include <linux/types.h>
> > +#include <drm/ttm/ttm_caching.h>
> >   struct ttm_tt;
> >   struct ttm_resource;
> > @@ -42,12 +43,6 @@ struct ttm_operation_ctx;
> >   #define TTM_PAGE_FLAG_PRIV_POPULATED  (1 << 31)
> > -enum ttm_caching_state {
> > -	tt_uncached,
> > -	tt_wc,
> > -	tt_cached
> > -};
> > -
> >   /**
> >    * struct ttm_tt
> >    *
> > @@ -69,7 +64,7 @@ struct ttm_tt {
> >   	unsigned long num_pages;
> >   	struct sg_table *sg; /* for SG objects via dma-buf */
> >   	struct file *swap_storage;
> > -	enum ttm_caching_state caching_state;
> > +	enum ttm_caching caching;
> >   };
> >   static inline bool ttm_tt_is_populated(struct ttm_tt *tt)
> > @@ -121,6 +116,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc);
> >    * @ttm: The struct ttm_tt.
> >    * @bo: The buffer object we create the ttm for.
> >    * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags.
> > + * @caching: the desired caching state of the pages
> >    *
> >    * Create a struct ttm_tt to back data with system memory pages.
> >    * No pages are actually allocated.
> > @@ -128,11 +124,11 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc);
> >    * NULL: Out of memory.
> >    */
> >   int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
> > -		uint32_t page_flags);
> > +		uint32_t page_flags, enum ttm_caching caching);
> >   int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
> > -		    uint32_t page_flags);
> > +		    uint32_t page_flags, enum ttm_caching caching);
> >   int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
> > -		   uint32_t page_flags);
> > +		   uint32_t page_flags, enum ttm_caching caching);
> >   /**
> >    * ttm_tt_fini
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/4] drm/ttm: set the tt caching state at creation time
  2020-10-12 14:14   ` Daniel Vetter
@ 2020-10-12 15:22     ` Christian König
  0 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2020-10-12 15:22 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel

Am 12.10.20 um 16:14 schrieb Daniel Vetter:
> On Mon, Oct 12, 2020 at 10:57:57AM +0200, Christian König wrote:
>> Ping? Anybody any more comments on this?
>>
>> Otherwise I'm going to push it to drm-misc-next by tomorrow or so.
> tbh the entire coherency/caching topic is imo a giantic mess in
> drivers/gpu (mostly because we're half-fighting dma-api all the time). But
> I don't have clear opinion where to go, hence *shrug*.

Well exactly that's why I'm doing the first step here by removing the 
illusion that TTM can magically changing the caching of a BO :)

Christian.

> -Daniel
>
>> Thanks,
>> Christian.
>>
>> Am 08.10.20 um 11:31 schrieb Christian König:
>>> All drivers can determine the tt caching state at creation time,
>>> no need to do this on the fly during every validation.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    |  2 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 11 +++++--
>>>    drivers/gpu/drm/drm_gem_vram_helper.c      |  2 +-
>>>    drivers/gpu/drm/nouveau/nouveau_sgdma.c    | 13 ++++++++-
>>>    drivers/gpu/drm/qxl/qxl_ttm.c              |  2 +-
>>>    drivers/gpu/drm/radeon/radeon_ttm.c        | 16 ++++++++--
>>>    drivers/gpu/drm/ttm/ttm_agp_backend.c      |  2 +-
>>>    drivers/gpu/drm/ttm/ttm_page_alloc.c       | 26 ++++++++---------
>>>    drivers/gpu/drm/ttm/ttm_page_alloc_dma.c   | 20 ++++++-------
>>>    drivers/gpu/drm/ttm/ttm_tt.c               | 33 +++++++++++----------
>>>    drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  6 ++--
>>>    include/drm/ttm/ttm_caching.h              | 34 ++++++++++++++++++++++
>>>    include/drm/ttm/ttm_tt.h                   | 16 ++++------
>>>    13 files changed, 123 insertions(+), 60 deletions(-)
>>>    create mode 100644 include/drm/ttm/ttm_caching.h
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> index 213ef090bb0e..3c5ad69eff19 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> @@ -124,7 +124,7 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
>>>    	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>>>    	struct ttm_dma_tt *ttm;
>>> -	if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
>>> +	if (bo->num_pages != 1 || bo->ttm->caching == ttm_cached)
>>>    		return AMDGPU_BO_INVALID_OFFSET;
>>>    	ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 399961035ae6..7f41a47e7353 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -1292,7 +1292,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev,
>>>    static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
>>>    					   uint32_t page_flags)
>>>    {
>>> +	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>>>    	struct amdgpu_ttm_tt *gtt;
>>> +	enum ttm_caching caching;
>>>    	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
>>>    	if (gtt == NULL) {
>>> @@ -1300,8 +1302,13 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
>>>    	}
>>>    	gtt->gobj = &bo->base;
>>> +	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
>>> +		caching = ttm_write_combined;
>>> +	else
>>> +		caching = ttm_cached;
>>> +
>>>    	/* allocate space for the uninitialized page entries */
>>> -	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
>>> +	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
>>>    		kfree(gtt);
>>>    		return NULL;
>>>    	}
>>> @@ -1525,7 +1532,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
>>>    	if (mem && mem->mem_type == TTM_PL_TT) {
>>>    		flags |= AMDGPU_PTE_SYSTEM;
>>> -		if (ttm->caching_state == tt_cached)
>>> +		if (ttm->caching == ttm_cached)
>>>    			flags |= AMDGPU_PTE_SNOOPED;
>>>    	}
>>> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
>>> index 3213429f8444..ad58d0af5141 100644
>>> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
>>> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
>>> @@ -918,7 +918,7 @@ static struct ttm_tt *bo_driver_ttm_tt_create(struct ttm_buffer_object *bo,
>>>    	if (!tt)
>>>    		return NULL;
>>> -	ret = ttm_tt_init(tt, bo, page_flags);
>>> +	ret = ttm_tt_init(tt, bo, page_flags, ttm_cached);
>>>    	if (ret < 0)
>>>    		goto err_ttm_tt_init;
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
>>> index 806d9ec310f5..cd6fdebae795 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
>>> @@ -5,6 +5,7 @@
>>>    #include "nouveau_drv.h"
>>>    #include "nouveau_mem.h"
>>>    #include "nouveau_ttm.h"
>>> +#include "nouveau_bo.h"
>>>    struct nouveau_sgdma_be {
>>>    	/* this has to be the first field so populate/unpopulated in
>>> @@ -67,13 +68,23 @@ nouveau_sgdma_unbind(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
>>>    struct ttm_tt *
>>>    nouveau_sgdma_create_ttm(struct ttm_buffer_object *bo, uint32_t page_flags)
>>>    {
>>> +	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
>>> +	struct nouveau_bo *nvbo = nouveau_bo(bo);
>>>    	struct nouveau_sgdma_be *nvbe;
>>> +	enum ttm_caching caching;
>>> +
>>> +	if (nvbo->force_coherent)
>>> +		caching = ttm_uncached;
>>> +	else if (drm->agp.bridge)
>>> +		caching = ttm_write_combined;
>>> +	else
>>> +		caching = ttm_cached;
>>>    	nvbe = kzalloc(sizeof(*nvbe), GFP_KERNEL);
>>>    	if (!nvbe)
>>>    		return NULL;
>>> -	if (ttm_dma_tt_init(&nvbe->ttm, bo, page_flags)) {
>>> +	if (ttm_dma_tt_init(&nvbe->ttm, bo, page_flags, caching)) {
>>>    		kfree(nvbe);
>>>    		return NULL;
>>>    	}
>>> diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
>>> index 669bceb58205..f50863493f64 100644
>>> --- a/drivers/gpu/drm/qxl/qxl_ttm.c
>>> +++ b/drivers/gpu/drm/qxl/qxl_ttm.c
>>> @@ -133,7 +133,7 @@ static struct ttm_tt *qxl_ttm_tt_create(struct ttm_buffer_object *bo,
>>>    	ttm = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
>>>    	if (ttm == NULL)
>>>    		return NULL;
>>> -	if (ttm_tt_init(ttm, bo, page_flags)) {
>>> +	if (ttm_tt_init(ttm, bo, page_flags, ttm_cached)) {
>>>    		kfree(ttm);
>>>    		return NULL;
>>>    	}
>>> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
>>> index 63e38b05a5bc..130a7cea35c3 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
>>> @@ -546,7 +546,7 @@ static int radeon_ttm_backend_bind(struct ttm_bo_device *bdev,
>>>    		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
>>>    		     ttm->num_pages, bo_mem, ttm);
>>>    	}
>>> -	if (ttm->caching_state == tt_cached)
>>> +	if (ttm->caching == ttm_cached)
>>>    		flags |= RADEON_GART_PAGE_SNOOP;
>>>    	r = radeon_gart_bind(rdev, gtt->offset, ttm->num_pages,
>>>    			     ttm->pages, gtt->ttm.dma_address, flags);
>>> @@ -590,6 +590,10 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo,
>>>    {
>>>    	struct radeon_device *rdev;
>>>    	struct radeon_ttm_tt *gtt;
>>> +	enum ttm_caching caching;
>>> +	struct radeon_bo *rbo;
>>> +
>>> +	rbo = container_of(bo, struct radeon_bo, tbo);
>>>    	rdev = radeon_get_rdev(bo->bdev);
>>>    #if IS_ENABLED(CONFIG_AGP)
>>> @@ -603,7 +607,15 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo,
>>>    	if (gtt == NULL) {
>>>    		return NULL;
>>>    	}
>>> -	if (ttm_dma_tt_init(&gtt->ttm, bo, page_flags)) {
>>> +
>>> +	if (rbo->flags & RADEON_GEM_GTT_UC)
>>> +		caching = ttm_uncached;
>>> +	else if (rbo->flags & RADEON_GEM_GTT_WC)
>>> +		caching = ttm_write_combined;
>>> +	else
>>> +		caching = ttm_cached;
>>> +
>>> +	if (ttm_dma_tt_init(&gtt->ttm, bo, page_flags, caching)) {
>>>    		kfree(gtt);
>>>    		return NULL;
>>>    	}
>>> diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
>>> index a98fd795b752..a723062d37e7 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
>>> @@ -136,7 +136,7 @@ struct ttm_tt *ttm_agp_tt_create(struct ttm_buffer_object *bo,
>>>    	agp_be->mem = NULL;
>>>    	agp_be->bridge = bridge;
>>> -	if (ttm_tt_init(&agp_be->ttm, bo, page_flags)) {
>>> +	if (ttm_tt_init(&agp_be->ttm, bo, page_flags, ttm_write_combined)) {
>>>    		kfree(agp_be);
>>>    		return NULL;
>>>    	}
>>> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
>>> index 111031cbb6df..c8f6790962b9 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
>>> @@ -220,14 +220,14 @@ static struct ttm_pool_manager *_manager;
>>>    /**
>>>     * Select the right pool or requested caching state and ttm flags. */
>>>    static struct ttm_page_pool *ttm_get_pool(int flags, bool huge,
>>> -					  enum ttm_caching_state cstate)
>>> +					  enum ttm_caching cstate)
>>>    {
>>>    	int pool_index;
>>> -	if (cstate == tt_cached)
>>> +	if (cstate == ttm_cached)
>>>    		return NULL;
>>> -	if (cstate == tt_wc)
>>> +	if (cstate == ttm_write_combined)
>>>    		pool_index = 0x0;
>>>    	else
>>>    		pool_index = 0x1;
>>> @@ -441,17 +441,17 @@ static void ttm_pool_mm_shrink_fini(struct ttm_pool_manager *manager)
>>>    }
>>>    static int ttm_set_pages_caching(struct page **pages,
>>> -		enum ttm_caching_state cstate, unsigned cpages)
>>> +		enum ttm_caching cstate, unsigned cpages)
>>>    {
>>>    	int r = 0;
>>>    	/* Set page caching */
>>>    	switch (cstate) {
>>> -	case tt_uncached:
>>> +	case ttm_uncached:
>>>    		r = ttm_set_pages_array_uc(pages, cpages);
>>>    		if (r)
>>>    			pr_err("Failed to set %d pages to uc!\n", cpages);
>>>    		break;
>>> -	case tt_wc:
>>> +	case ttm_write_combined:
>>>    		r = ttm_set_pages_array_wc(pages, cpages);
>>>    		if (r)
>>>    			pr_err("Failed to set %d pages to wc!\n", cpages);
>>> @@ -486,7 +486,7 @@ static void ttm_handle_caching_failure(struct page **failed_pages,
>>>     * pages returned in pages array.
>>>     */
>>>    static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
>>> -			       int ttm_flags, enum ttm_caching_state cstate,
>>> +			       int ttm_flags, enum ttm_caching cstate,
>>>    			       unsigned count, unsigned order)
>>>    {
>>>    	struct page **caching_array;
>>> @@ -566,7 +566,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
>>>     * pages is small.
>>>     */
>>>    static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags,
>>> -				      enum ttm_caching_state cstate,
>>> +				      enum ttm_caching cstate,
>>>    				      unsigned count, unsigned long *irq_flags)
>>>    {
>>>    	struct page *p;
>>> @@ -626,7 +626,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags,
>>>    static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
>>>    				   struct list_head *pages,
>>>    				   int ttm_flags,
>>> -				   enum ttm_caching_state cstate,
>>> +				   enum ttm_caching cstate,
>>>    				   unsigned count, unsigned order)
>>>    {
>>>    	unsigned long irq_flags;
>>> @@ -703,7 +703,7 @@ static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
>>>    /* Put all pages in pages list to correct pool to wait for reuse */
>>>    static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
>>> -			  enum ttm_caching_state cstate)
>>> +			  enum ttm_caching cstate)
>>>    {
>>>    	struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate);
>>>    #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>> @@ -821,7 +821,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
>>>     * cached pages.
>>>     */
>>>    static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
>>> -			 enum ttm_caching_state cstate)
>>> +			 enum ttm_caching cstate)
>>>    {
>>>    	struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate);
>>>    #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>> @@ -1040,7 +1040,7 @@ ttm_pool_unpopulate_helper(struct ttm_tt *ttm, unsigned mem_count_update)
>>>    put_pages:
>>>    	ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
>>> -		      ttm->caching_state);
>>> +		      ttm->caching);
>>>    	ttm_tt_set_unpopulated(ttm);
>>>    }
>>> @@ -1057,7 +1057,7 @@ int ttm_pool_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
>>>    		return -ENOMEM;
>>>    	ret = ttm_get_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
>>> -			    ttm->caching_state);
>>> +			    ttm->caching);
>>>    	if (unlikely(ret != 0)) {
>>>    		ttm_pool_unpopulate_helper(ttm, 0);
>>>    		return ret;
>>> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
>>> index 1045a5c26ee3..6625b43f6256 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
>>> @@ -325,15 +325,15 @@ static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
>>>    	}
>>>    	return d_page;
>>>    }
>>> -static enum pool_type ttm_to_type(int flags, enum ttm_caching_state cstate)
>>> +static enum pool_type ttm_to_type(int flags, enum ttm_caching cstate)
>>>    {
>>>    	enum pool_type type = IS_UNDEFINED;
>>>    	if (flags & TTM_PAGE_FLAG_DMA32)
>>>    		type |= IS_DMA32;
>>> -	if (cstate == tt_cached)
>>> +	if (cstate == ttm_cached)
>>>    		type |= IS_CACHED;
>>> -	else if (cstate == tt_uncached)
>>> +	else if (cstate == ttm_uncached)
>>>    		type |= IS_UC;
>>>    	else
>>>    		type |= IS_WC;
>>> @@ -663,7 +663,7 @@ static struct dma_pool *ttm_dma_find_pool(struct device *dev,
>>>     * are pages that have changed their caching state already put them to the
>>>     * pool.
>>>     */
>>> -static void ttm_dma_handle_caching_state_failure(struct dma_pool *pool,
>>> +static void ttm_dma_handle_caching_failure(struct dma_pool *pool,
>>>    						 struct list_head *d_pages,
>>>    						 struct page **failed_pages,
>>>    						 unsigned cpages)
>>> @@ -734,7 +734,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
>>>    				r = ttm_set_pages_caching(pool, caching_array,
>>>    							  cpages);
>>>    				if (r)
>>> -					ttm_dma_handle_caching_state_failure(
>>> +					ttm_dma_handle_caching_failure(
>>>    						pool, d_pages, caching_array,
>>>    						cpages);
>>>    			}
>>> @@ -760,7 +760,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
>>>    				r = ttm_set_pages_caching(pool, caching_array,
>>>    							  cpages);
>>>    				if (r) {
>>> -					ttm_dma_handle_caching_state_failure(
>>> +					ttm_dma_handle_caching_failure(
>>>    					     pool, d_pages, caching_array,
>>>    					     cpages);
>>>    					goto out;
>>> @@ -773,7 +773,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
>>>    	if (cpages) {
>>>    		r = ttm_set_pages_caching(pool, caching_array, cpages);
>>>    		if (r)
>>> -			ttm_dma_handle_caching_state_failure(pool, d_pages,
>>> +			ttm_dma_handle_caching_failure(pool, d_pages,
>>>    					caching_array, cpages);
>>>    	}
>>>    out:
>>> @@ -904,7 +904,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev,
>>>    	INIT_LIST_HEAD(&ttm_dma->pages_list);
>>>    	i = 0;
>>> -	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
>>> +	type = ttm_to_type(ttm->page_flags, ttm->caching);
>>>    #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>    	if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
>>> @@ -1000,7 +1000,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>>>    	unsigned count, i, npages = 0;
>>>    	unsigned long irq_flags;
>>> -	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
>>> +	type = ttm_to_type(ttm->page_flags, ttm->caching);
>>>    #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>    	pool = ttm_dma_find_pool(dev, type | IS_HUGE);
>>> @@ -1032,7 +1032,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>>>    		return;
>>>    	is_cached = (ttm_dma_find_pool(pool->dev,
>>> -		     ttm_to_type(ttm->page_flags, tt_cached)) == pool);
>>> +		     ttm_to_type(ttm->page_flags, ttm_cached)) == pool);
>>>    	/* make sure pages array match list and count number of pages */
>>>    	count = 0;
>>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>>> index 23e9604bc924..a465f51df027 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>>> @@ -114,31 +114,30 @@ static int ttm_sg_tt_alloc_page_directory(struct ttm_dma_tt *ttm)
>>>    	return 0;
>>>    }
>>> -static int ttm_tt_set_caching(struct ttm_tt *ttm,
>>> -			      enum ttm_caching_state c_state)
>>> +static int ttm_tt_set_caching(struct ttm_tt *ttm, enum ttm_caching caching)
>>>    {
>>> -	if (ttm->caching_state == c_state)
>>> +	if (ttm->caching == caching)
>>>    		return 0;
>>>    	/* Can't change the caching state after TT is populated */
>>>    	if (WARN_ON_ONCE(ttm_tt_is_populated(ttm)))
>>>    		return -EINVAL;
>>> -	ttm->caching_state = c_state;
>>> +	ttm->caching = caching;
>>>    	return 0;
>>>    }
>>>    int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement)
>>>    {
>>> -	enum ttm_caching_state state;
>>> +	enum ttm_caching state;
>>>    	if (placement & TTM_PL_FLAG_WC)
>>> -		state = tt_wc;
>>> +		state = ttm_write_combined;
>>>    	else if (placement & TTM_PL_FLAG_UNCACHED)
>>> -		state = tt_uncached;
>>> +		state = ttm_uncached;
>>>    	else
>>> -		state = tt_cached;
>>> +		state = ttm_cached;
>>>    	return ttm_tt_set_caching(ttm, state);
>>>    }
>>> @@ -162,20 +161,22 @@ void ttm_tt_destroy(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
>>>    static void ttm_tt_init_fields(struct ttm_tt *ttm,
>>>    			       struct ttm_buffer_object *bo,
>>> -			       uint32_t page_flags)
>>> +			       uint32_t page_flags,
>>> +			       enum ttm_caching caching)
>>>    {
>>>    	ttm->num_pages = bo->num_pages;
>>> -	ttm->caching_state = tt_cached;
>>> +	ttm->caching = ttm_cached;
>>>    	ttm->page_flags = page_flags;
>>>    	ttm_tt_set_unpopulated(ttm);
>>>    	ttm->swap_storage = NULL;
>>>    	ttm->sg = bo->sg;
>>> +	ttm->caching = caching;
>>>    }
>>>    int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
>>> -		uint32_t page_flags)
>>> +		uint32_t page_flags, enum ttm_caching caching)
>>>    {
>>> -	ttm_tt_init_fields(ttm, bo, page_flags);
>>> +	ttm_tt_init_fields(ttm, bo, page_flags, caching);
>>>    	if (ttm_tt_alloc_page_directory(ttm)) {
>>>    		pr_err("Failed allocating page table\n");
>>> @@ -193,11 +194,11 @@ void ttm_tt_fini(struct ttm_tt *ttm)
>>>    EXPORT_SYMBOL(ttm_tt_fini);
>>>    int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
>>> -		    uint32_t page_flags)
>>> +		    uint32_t page_flags, enum ttm_caching caching)
>>>    {
>>>    	struct ttm_tt *ttm = &ttm_dma->ttm;
>>> -	ttm_tt_init_fields(ttm, bo, page_flags);
>>> +	ttm_tt_init_fields(ttm, bo, page_flags, caching);
>>>    	INIT_LIST_HEAD(&ttm_dma->pages_list);
>>>    	if (ttm_dma_tt_alloc_page_directory(ttm_dma)) {
>>> @@ -209,12 +210,12 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
>>>    EXPORT_SYMBOL(ttm_dma_tt_init);
>>>    int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
>>> -		   uint32_t page_flags)
>>> +		   uint32_t page_flags, enum ttm_caching caching)
>>>    {
>>>    	struct ttm_tt *ttm = &ttm_dma->ttm;
>>>    	int ret;
>>> -	ttm_tt_init_fields(ttm, bo, page_flags);
>>> +	ttm_tt_init_fields(ttm, bo, page_flags, caching);
>>>    	INIT_LIST_HEAD(&ttm_dma->pages_list);
>>>    	if (page_flags & TTM_PAGE_FLAG_SG)
>>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>>> index 7b5fd5288870..1fa7f9438ec4 100644
>>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>>> @@ -647,9 +647,11 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
>>>    	vmw_be->mob = NULL;
>>>    	if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
>>> -		ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bo, page_flags);
>>> +		ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bo, page_flags,
>>> +				      ttm_cached);
>>>    	else
>>> -		ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bo, page_flags);
>>> +		ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bo, page_flags,
>>> +				  ttm_cached);
>>>    	if (unlikely(ret != 0))
>>>    		goto out_no_init;
>>> diff --git a/include/drm/ttm/ttm_caching.h b/include/drm/ttm/ttm_caching.h
>>> new file mode 100644
>>> index 000000000000..161624dcf6be
>>> --- /dev/null
>>> +++ b/include/drm/ttm/ttm_caching.h
>>> @@ -0,0 +1,34 @@
>>> +/*
>>> + * Copyright 2020 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a
>>> + * copy of this software and associated documentation files (the "Software"),
>>> + * to deal in the Software without restriction, including without limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom the
>>> + * Software is furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + * Authors: Christian König
>>> + */
>>> +
>>> +#ifndef _TTM_CACHING_H_
>>> +#define _TTM_CACHING_H_
>>> +
>>> +enum ttm_caching {
>>> +	ttm_uncached,
>>> +	ttm_write_combined,
>>> +	ttm_cached
>>> +};
>>> +
>>> +#endif
>>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>>> index 5d1835d44084..c39c722d5184 100644
>>> --- a/include/drm/ttm/ttm_tt.h
>>> +++ b/include/drm/ttm/ttm_tt.h
>>> @@ -28,6 +28,7 @@
>>>    #define _TTM_TT_H_
>>>    #include <linux/types.h>
>>> +#include <drm/ttm/ttm_caching.h>
>>>    struct ttm_tt;
>>>    struct ttm_resource;
>>> @@ -42,12 +43,6 @@ struct ttm_operation_ctx;
>>>    #define TTM_PAGE_FLAG_PRIV_POPULATED  (1 << 31)
>>> -enum ttm_caching_state {
>>> -	tt_uncached,
>>> -	tt_wc,
>>> -	tt_cached
>>> -};
>>> -
>>>    /**
>>>     * struct ttm_tt
>>>     *
>>> @@ -69,7 +64,7 @@ struct ttm_tt {
>>>    	unsigned long num_pages;
>>>    	struct sg_table *sg; /* for SG objects via dma-buf */
>>>    	struct file *swap_storage;
>>> -	enum ttm_caching_state caching_state;
>>> +	enum ttm_caching caching;
>>>    };
>>>    static inline bool ttm_tt_is_populated(struct ttm_tt *tt)
>>> @@ -121,6 +116,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc);
>>>     * @ttm: The struct ttm_tt.
>>>     * @bo: The buffer object we create the ttm for.
>>>     * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags.
>>> + * @caching: the desired caching state of the pages
>>>     *
>>>     * Create a struct ttm_tt to back data with system memory pages.
>>>     * No pages are actually allocated.
>>> @@ -128,11 +124,11 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc);
>>>     * NULL: Out of memory.
>>>     */
>>>    int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
>>> -		uint32_t page_flags);
>>> +		uint32_t page_flags, enum ttm_caching caching);
>>>    int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
>>> -		    uint32_t page_flags);
>>> +		    uint32_t page_flags, enum ttm_caching caching);
>>>    int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_buffer_object *bo,
>>> -		   uint32_t page_flags);
>>> +		   uint32_t page_flags, enum ttm_caching caching);
>>>    /**
>>>     * ttm_tt_fini
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-10-12 15:22 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-08  9:31 [PATCH 1/4] drm/ttm: set the tt caching state at creation time Christian König
2020-10-08  9:31 ` [PATCH 2/4] drm/ttm: add caching state to ttm_bus_placement Christian König
2020-10-08  9:31 ` [PATCH 3/4] drm/ttm: use caching instead of placement for ttm_io_prot Christian König
2020-10-08  9:31 ` [PATCH 4/4] drm/ttm: nuke caching placement flags Christian König
2020-10-12  8:57 ` [PATCH 1/4] drm/ttm: set the tt caching state at creation time Christian König
2020-10-12 14:14   ` Daniel Vetter
2020-10-12 15:22     ` Christian König

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).