All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/ttm: cleanup and optimize ttm_bo_mem_compat v2
@ 2017-03-31  9:47 Christian König
  2017-03-31  9:47 ` [PATCH 2/6] drm/ttm: add io_mem_pfn callback Christian König
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Christian König @ 2017-03-31  9:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

No need to implement the same logic twice. Also check if the busy placements
are identical to the already scanned placements before checking them.

v2: improve check even more as suggested by Michel.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 45 ++++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 989b98b..348b17e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1046,29 +1046,17 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 	return ret;
 }
 
-bool ttm_bo_mem_compat(struct ttm_placement *placement,
-		       struct ttm_mem_reg *mem,
-		       uint32_t *new_flags)
+static bool ttm_bo_places_compat(const struct ttm_place *places,
+				 unsigned num_placement,
+				 struct ttm_mem_reg *mem,
+				 uint32_t *new_flags)
 {
-	int i;
+	unsigned i;
 
-	for (i = 0; i < placement->num_placement; i++) {
-		const struct ttm_place *heap = &placement->placement[i];
-		if (mem->mm_node &&
-		    (mem->start < heap->fpfn ||
-		     (heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn)))
-			continue;
+	for (i = 0; i < num_placement; i++) {
+		const struct ttm_place *heap = &places[i];
 
-		*new_flags = heap->flags;
-		if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
-		    (*new_flags & mem->placement & TTM_PL_MASK_MEM))
-			return true;
-	}
-
-	for (i = 0; i < placement->num_busy_placement; i++) {
-		const struct ttm_place *heap = &placement->busy_placement[i];
-		if (mem->mm_node &&
-		    (mem->start < heap->fpfn ||
+		if (mem->mm_node && (mem->start < heap->fpfn ||
 		     (heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn)))
 			continue;
 
@@ -1077,6 +1065,23 @@ bool ttm_bo_mem_compat(struct ttm_placement *placement,
 		    (*new_flags & mem->placement & TTM_PL_MASK_MEM))
 			return true;
 	}
+	return false;
+}
+
+bool ttm_bo_mem_compat(struct ttm_placement *placement,
+		       struct ttm_mem_reg *mem,
+		       uint32_t *new_flags)
+{
+	if (ttm_bo_places_compat(placement->placement, placement->num_placement,
+				 mem, new_flags))
+		return true;
+
+	if ((placement->busy_placement != placement->placement ||
+	     placement->num_busy_placement > placement->num_placement) &&
+	    ttm_bo_places_compat(placement->busy_placement,
+				 placement->num_busy_placement,
+				 mem, new_flags))
+		return true;
 
 	return false;
 }
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/6] drm/ttm: add io_mem_pfn callback
  2017-03-31  9:47 [PATCH 1/6] drm/ttm: cleanup and optimize ttm_bo_mem_compat v2 Christian König
@ 2017-03-31  9:47 ` Christian König
  2017-03-31  9:47 ` [PATCH 4/6] drm/amdgpu: drop alpha support Christian König
       [not found] ` <1490953652-3703-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-03-31  9:47 UTC (permalink / raw)
  To: amd-gfx, dri-devel

From: Christian König <christian.koenig@amd.com>

This allows the driver to handle io_mem mappings on their own.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 +
 drivers/gpu/drm/ast/ast_ttm.c           |  1 +
 drivers/gpu/drm/bochs/bochs_mm.c        |  1 +
 drivers/gpu/drm/cirrus/cirrus_ttm.c     |  1 +
 drivers/gpu/drm/mgag200/mgag200_ttm.c   |  1 +
 drivers/gpu/drm/nouveau/nouveau_bo.c    |  1 +
 drivers/gpu/drm/qxl/qxl_ttm.c           |  1 +
 drivers/gpu/drm/radeon/radeon_ttm.c     |  1 +
 drivers/gpu/drm/ttm/ttm_bo_vm.c         | 10 +++++++++-
 drivers/gpu/drm/virtio/virtgpu_ttm.c    |  1 +
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c  |  1 +
 include/drm/ttm/ttm_bo_api.h            | 11 +++++++++++
 include/drm/ttm/ttm_bo_driver.h         |  9 +++++++++
 13 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 21876ee..7bf5ba7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1089,6 +1089,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
 	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
 	.io_mem_free = &amdgpu_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int amdgpu_ttm_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 50c910e..e879496 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -236,6 +236,7 @@ struct ttm_bo_driver ast_bo_driver = {
 	.verify_access = ast_bo_verify_access,
 	.io_mem_reserve = &ast_ttm_io_mem_reserve,
 	.io_mem_free = &ast_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int ast_mm_init(struct ast_private *ast)
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index e4c1125..804afbc 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -205,6 +205,7 @@ struct ttm_bo_driver bochs_bo_driver = {
 	.verify_access = bochs_bo_verify_access,
 	.io_mem_reserve = &bochs_ttm_io_mem_reserve,
 	.io_mem_free = &bochs_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int bochs_mm_init(struct bochs_device *bochs)
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index f53aa8f..93dbcd3 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -236,6 +236,7 @@ struct ttm_bo_driver cirrus_bo_driver = {
 	.verify_access = cirrus_bo_verify_access,
 	.io_mem_reserve = &cirrus_ttm_io_mem_reserve,
 	.io_mem_free = &cirrus_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int cirrus_mm_init(struct cirrus_device *cirrus)
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 657598b..565a217 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -236,6 +236,7 @@ struct ttm_bo_driver mgag200_bo_driver = {
 	.verify_access = mgag200_bo_verify_access,
 	.io_mem_reserve = &mgag200_ttm_io_mem_reserve,
 	.io_mem_free = &mgag200_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int mgag200_mm_init(struct mga_device *mdev)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 3949a74..978a5e7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1570,6 +1570,7 @@ struct ttm_bo_driver nouveau_bo_driver = {
 	.fault_reserve_notify = &nouveau_ttm_fault_reserve_notify,
 	.io_mem_reserve = &nouveau_ttm_io_mem_reserve,
 	.io_mem_free = &nouveau_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 struct nvkm_vma *
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 2955f91..28fa56e 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -394,6 +394,7 @@ static struct ttm_bo_driver qxl_bo_driver = {
 	.verify_access = &qxl_verify_access,
 	.io_mem_reserve = &qxl_ttm_io_mem_reserve,
 	.io_mem_free = &qxl_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 	.move_notify = &qxl_bo_move_notify,
 };
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 6571384..d07ff84 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -873,6 +873,7 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
 	.io_mem_reserve = &radeon_ttm_io_mem_reserve,
 	.io_mem_free = &radeon_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int radeon_ttm_init(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 750733a..be84e3dc 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -231,7 +231,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 */
 	for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
 		if (bo->mem.bus.is_iomem)
-			pfn = ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT) + page_offset;
+			pfn = bdev->driver->io_mem_pfn(bo, page_offset);
 		else {
 			page = ttm->pages[page_offset];
 			if (unlikely(!page && i == 0)) {
@@ -324,6 +324,14 @@ static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
 	return bo;
 }
 
+unsigned long ttm_bo_default_io_mem_pfn(struct ttm_buffer_object *bo,
+					unsigned long page_offset)
+{
+	return ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT)
+		+ page_offset;
+}
+EXPORT_SYMBOL(ttm_bo_default_io_mem_pfn);
+
 int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 		struct ttm_bo_device *bdev)
 {
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index a8875a7..8b76bf9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -432,6 +432,7 @@ static struct ttm_bo_driver virtio_gpu_bo_driver = {
 	.verify_access = &virtio_gpu_verify_access,
 	.io_mem_reserve = &virtio_gpu_ttm_io_mem_reserve,
 	.io_mem_free = &virtio_gpu_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 	.move_notify = &virtio_gpu_bo_move_notify,
 	.swap_notify = &virtio_gpu_bo_swap_notify,
 };
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 1de9669..ba2fa7b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -859,4 +859,5 @@ struct ttm_bo_driver vmw_bo_driver = {
 	.fault_reserve_notify = &vmw_ttm_fault_reserve_notify,
 	.io_mem_reserve = &vmw_ttm_io_mem_reserve,
 	.io_mem_free = &vmw_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 2d0f63e..3b302a5 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -724,6 +724,17 @@ extern int ttm_fbdev_mmap(struct vm_area_struct *vma,
 			  struct ttm_buffer_object *bo);
 
 /**
+ * ttm_bo_default_iomem_pfn - get a pfn for a page offset
+ *
+ * @bo: the BO we need to look up the pfn for
+ * @page_offset: offset inside the BO to look up.
+ *
+ * Calculate the PFN for iomem based mappings during page fault
+ */
+unsigned long ttm_bo_default_io_mem_pfn(struct ttm_buffer_object *bo,
+				        unsigned long page_offset);
+
+/**
  * ttm_bo_mmap - mmap out of the ttm device address space.
  *
  * @filp:      filp as input from the mmap method.
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 7777d14..cb5072a 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -462,6 +462,15 @@ struct ttm_bo_driver {
 			      struct ttm_mem_reg *mem);
 	void (*io_mem_free)(struct ttm_bo_device *bdev,
 			    struct ttm_mem_reg *mem);
+
+	/**
+	 * Return the pfn for a given page_offset inside the BO.
+	 *
+	 * @bo: the BO to look up the pfn for
+	 * @page_offset: the offset to look up
+	 */
+	unsigned long (*io_mem_pfn)(struct ttm_buffer_object *bo,
+				    unsigned long page_offset);
 };
 
 /**
-- 
2.5.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/6] drm/ttm: add TTM_PL_FLAG_CONTIGUOUS v2
       [not found] ` <1490953652-3703-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-03-31  9:47   ` Christian König
  2017-04-03 16:27     ` Nicolai Hähnle
  2017-03-31  9:47   ` [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS Christian König
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-03-31  9:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

This allows drivers to specify if they need a contiguous allocation or not.

v2: use space instead of tab

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c    | 4 +++-
 include/drm/ttm/ttm_placement.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 348b17e..da37cdf 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1062,7 +1062,9 @@ static bool ttm_bo_places_compat(const struct ttm_place *places,
 
 		*new_flags = heap->flags;
 		if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
-		    (*new_flags & mem->placement & TTM_PL_MASK_MEM))
+		    (*new_flags & mem->placement & TTM_PL_MASK_MEM) &&
+		    (!(*new_flags & TTM_PL_FLAG_CONTIGUOUS) ||
+		     (mem->placement & TTM_PL_FLAG_CONTIGUOUS)))
 			return true;
 	}
 	return false;
diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h
index 932be0c..e88a8e3 100644
--- a/include/drm/ttm/ttm_placement.h
+++ b/include/drm/ttm/ttm_placement.h
@@ -63,6 +63,7 @@
 #define TTM_PL_FLAG_CACHED      (1 << 16)
 #define TTM_PL_FLAG_UNCACHED    (1 << 17)
 #define TTM_PL_FLAG_WC          (1 << 18)
+#define TTM_PL_FLAG_CONTIGUOUS  (1 << 19)
 #define TTM_PL_FLAG_NO_EVICT    (1 << 21)
 #define TTM_PL_FLAG_TOPDOWN     (1 << 22)
 
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/6] drm/amdgpu: drop alpha support
  2017-03-31  9:47 [PATCH 1/6] drm/ttm: cleanup and optimize ttm_bo_mem_compat v2 Christian König
  2017-03-31  9:47 ` [PATCH 2/6] drm/ttm: add io_mem_pfn callback Christian König
@ 2017-03-31  9:47 ` Christian König
       [not found] ` <1490953652-3703-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-03-31  9:47 UTC (permalink / raw)
  To: amd-gfx, dri-devel

From: Christian König <christian.koenig@amd.com>

We will probably never see this combination.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7bf5ba7..524abca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -538,31 +538,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 			return -EINVAL;
 		mem->bus.base = adev->mc.aper_base;
 		mem->bus.is_iomem = true;
-#ifdef __alpha__
-		/*
-		 * Alpha: use bus.addr to hold the ioremap() return,
-		 * so we can modify bus.base below.
-		 */
-		if (mem->placement & TTM_PL_FLAG_WC)
-			mem->bus.addr =
-				ioremap_wc(mem->bus.base + mem->bus.offset,
-					   mem->bus.size);
-		else
-			mem->bus.addr =
-				ioremap_nocache(mem->bus.base + mem->bus.offset,
-						mem->bus.size);
-		if (!mem->bus.addr)
-			return -ENOMEM;
-
-		/*
-		 * Alpha: Use just the bus offset plus
-		 * the hose/domain memory base for bus.base.
-		 * It then can be used to build PTEs for VRAM
-		 * access, as done in ttm_bo_vm_fault().
-		 */
-		mem->bus.base = (mem->bus.base & 0x0ffffffffUL) +
-			adev->ddev->hose->dense_mem_base;
-#endif
 		break;
 	default:
 		return -EINVAL;
-- 
2.5.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS
       [not found] ` <1490953652-3703-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-03-31  9:47   ` [PATCH 3/6] drm/ttm: add TTM_PL_FLAG_CONTIGUOUS v2 Christian König
@ 2017-03-31  9:47   ` Christian König
       [not found]     ` <1490953652-3703-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-03-31  9:47   ` [PATCH 6/6] drm/amdgpu: handle CPU access for split VRAM buffers Christian König
  2017-04-03 12:37   ` [PATCH 1/6] drm/ttm: cleanup and optimize ttm_bo_mem_compat v2 Christian König
  3 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-03-31  9:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Implement AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS using TTM_PL_FLAG_CONTIGUOUS
instead of a placement limit. That allows us to better handle CPU
accessible placements.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Michel Dänzer <michel.daenzer@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 11 +++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 ++++++++++----
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d6b2de9..387d190 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
 
 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
 		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
-		unsigned lpfn = 0;
-
-		/* This forces a reallocation if the flag wasn't set before */
-		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
-			lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
 
 		places[c].fpfn = 0;
-		places[c].lpfn = lpfn;
+		places[c].lpfn = 0;
 		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
 			TTM_PL_FLAG_VRAM;
+
 		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
 			places[c].lpfn = visible_pfn;
 		else
 			places[c].flags |= TTM_PL_FLAG_TOPDOWN;
+
+		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
 		c++;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index d710226..af2d172 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 			       const struct ttm_place *place,
 			       struct ttm_mem_reg *mem)
 {
-	struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo);
 	struct amdgpu_vram_mgr *mgr = man->priv;
 	struct drm_mm *mm = &mgr->mm;
 	struct drm_mm_node *nodes;
@@ -107,8 +106,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 	if (!lpfn)
 		lpfn = man->size;
 
-	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
-	    place->lpfn || amdgpu_vram_page_split == -1) {
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS ||
+	    amdgpu_vram_page_split == -1) {
 		pages_per_node = ~0ul;
 		num_nodes = 1;
 	} else {
@@ -126,12 +125,14 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 		aflags = DRM_MM_CREATE_TOP;
 	}
 
+	mem->start = 0;
 	pages_left = mem->num_pages;
 
 	spin_lock(&mgr->lock);
 	for (i = 0; i < num_nodes; ++i) {
 		unsigned long pages = min(pages_left, pages_per_node);
 		uint32_t alignment = mem->page_alignment;
+		unsigned long start;
 
 		if (pages == pages_per_node)
 			alignment = pages_per_node;
@@ -145,11 +146,16 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 		if (unlikely(r))
 			goto error;
 
+		/*
+		 * Calculate a virtual BO start address to easily check if
+		 * everything is CPU accessible.
+		 */
+		start = nodes[i].start + nodes[i].size - mem->num_pages;
+		mem->start = max(mem->start, start);
 		pages_left -= pages;
 	}
 	spin_unlock(&mgr->lock);
 
-	mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET;
 	mem->mm_node = nodes;
 
 	return 0;
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 6/6] drm/amdgpu: handle CPU access for split VRAM buffers
       [not found] ` <1490953652-3703-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-03-31  9:47   ` [PATCH 3/6] drm/ttm: add TTM_PL_FLAG_CONTIGUOUS v2 Christian König
  2017-03-31  9:47   ` [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS Christian König
@ 2017-03-31  9:47   ` Christian König
  2017-04-03 16:25     ` Nicolai Hähnle
  2017-04-03 12:37   ` [PATCH 1/6] drm/ttm: cleanup and optimize ttm_bo_mem_compat v2 Christian König
  3 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-03-31  9:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

This avoids merging them together on page fault.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Michel Dänzer <michel.daenzer@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 387d190..10237a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -927,8 +927,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	size = bo->mem.num_pages << PAGE_SHIFT;
 	offset = bo->mem.start << PAGE_SHIFT;
 	/* TODO: figure out how to map scattered VRAM to the CPU */
-	if ((offset + size) <= adev->mc.visible_vram_size &&
-	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
+	if ((offset + size) <= adev->mc.visible_vram_size)
 		return 0;
 
 	/* Can't move a pinned BO to visible VRAM */
@@ -936,7 +935,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return -EINVAL;
 
 	/* hurrah the memory is not visible ! */
-	abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
 	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < abo->placement.num_placement; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 524abca..10b793a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -529,9 +529,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 	case TTM_PL_TT:
 		break;
 	case TTM_PL_VRAM:
-		if (mem->start == AMDGPU_BO_INVALID_OFFSET)
-			return -EINVAL;
-
 		mem->bus.offset = mem->start << PAGE_SHIFT;
 		/* check if it's visible */
 		if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
@@ -549,6 +546,17 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 {
 }
 
+static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+					   unsigned long page_offset)
+{
+	struct drm_mm_node *mm = bo->mem.mm_node;
+	uint64_t size = mm->size;
+
+	mm += page_offset / size;
+	page_offset %= size;
+	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
+}
+
 /*
  * TTM backend functions.
  */
@@ -1064,7 +1072,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
 	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
 	.io_mem_free = &amdgpu_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
+	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
 };
 
 int amdgpu_ttm_init(struct amdgpu_device *adev)
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/6] drm/ttm: cleanup and optimize ttm_bo_mem_compat v2
       [not found] ` <1490953652-3703-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-03-31  9:47   ` [PATCH 6/6] drm/amdgpu: handle CPU access for split VRAM buffers Christian König
@ 2017-04-03 12:37   ` Christian König
  3 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-04-03 12:37 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Daenzer, Michel,
	Haehnle, Nicolai, Alex Deucher

Ping! Any more comments on that set or can I push it?

Thanks,
Christian.

Am 31.03.2017 um 11:47 schrieb Christian König:
> From: Christian König <christian.koenig@amd.com>
>
> No need to implement the same logic twice. Also check if the busy placements
> are identical to the already scanned placements before checking them.
>
> v2: improve check even more as suggested by Michel.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 45 ++++++++++++++++++++++++--------------------
>   1 file changed, 25 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 989b98b..348b17e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1046,29 +1046,17 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
>   	return ret;
>   }
>   
> -bool ttm_bo_mem_compat(struct ttm_placement *placement,
> -		       struct ttm_mem_reg *mem,
> -		       uint32_t *new_flags)
> +static bool ttm_bo_places_compat(const struct ttm_place *places,
> +				 unsigned num_placement,
> +				 struct ttm_mem_reg *mem,
> +				 uint32_t *new_flags)
>   {
> -	int i;
> +	unsigned i;
>   
> -	for (i = 0; i < placement->num_placement; i++) {
> -		const struct ttm_place *heap = &placement->placement[i];
> -		if (mem->mm_node &&
> -		    (mem->start < heap->fpfn ||
> -		     (heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn)))
> -			continue;
> +	for (i = 0; i < num_placement; i++) {
> +		const struct ttm_place *heap = &places[i];
>   
> -		*new_flags = heap->flags;
> -		if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
> -		    (*new_flags & mem->placement & TTM_PL_MASK_MEM))
> -			return true;
> -	}
> -
> -	for (i = 0; i < placement->num_busy_placement; i++) {
> -		const struct ttm_place *heap = &placement->busy_placement[i];
> -		if (mem->mm_node &&
> -		    (mem->start < heap->fpfn ||
> +		if (mem->mm_node && (mem->start < heap->fpfn ||
>   		     (heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn)))
>   			continue;
>   
> @@ -1077,6 +1065,23 @@ bool ttm_bo_mem_compat(struct ttm_placement *placement,
>   		    (*new_flags & mem->placement & TTM_PL_MASK_MEM))
>   			return true;
>   	}
> +	return false;
> +}
> +
> +bool ttm_bo_mem_compat(struct ttm_placement *placement,
> +		       struct ttm_mem_reg *mem,
> +		       uint32_t *new_flags)
> +{
> +	if (ttm_bo_places_compat(placement->placement, placement->num_placement,
> +				 mem, new_flags))
> +		return true;
> +
> +	if ((placement->busy_placement != placement->placement ||
> +	     placement->num_busy_placement > placement->num_placement) &&
> +	    ttm_bo_places_compat(placement->busy_placement,
> +				 placement->num_busy_placement,
> +				 mem, new_flags))
> +		return true;
>   
>   	return false;
>   }


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS
       [not found]     ` <1490953652-3703-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-04-03 16:22       ` Nicolai Hähnle
       [not found]         ` <425dfa9e-c86b-6958-f7ff-31b91a0f9e21-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Nicolai Hähnle @ 2017-04-03 16:22 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 31.03.2017 11:47, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Implement AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS using TTM_PL_FLAG_CONTIGUOUS
> instead of a placement limit. That allows us to better handle CPU
> accessible placements.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Acked-by: Michel Dänzer <michel.daenzer@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 11 +++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 ++++++++++----
>  2 files changed, 15 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index d6b2de9..387d190 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
>
>  	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
>  		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
> -		unsigned lpfn = 0;
> -
> -		/* This forces a reallocation if the flag wasn't set before */
> -		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
> -			lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
>
>  		places[c].fpfn = 0;
> -		places[c].lpfn = lpfn;
> +		places[c].lpfn = 0;
>  		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
>  			TTM_PL_FLAG_VRAM;
> +
>  		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
>  			places[c].lpfn = visible_pfn;
>  		else
>  			places[c].flags |= TTM_PL_FLAG_TOPDOWN;
> +
> +		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
> +			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
>  		c++;
>  	}
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index d710226..af2d172 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
>  			       const struct ttm_place *place,
>  			       struct ttm_mem_reg *mem)
>  {
> -	struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo);
>  	struct amdgpu_vram_mgr *mgr = man->priv;
>  	struct drm_mm *mm = &mgr->mm;
>  	struct drm_mm_node *nodes;
> @@ -107,8 +106,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
>  	if (!lpfn)
>  		lpfn = man->size;
>
> -	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
> -	    place->lpfn || amdgpu_vram_page_split == -1) {
> +	if (place->flags & TTM_PL_FLAG_CONTIGUOUS ||
> +	    amdgpu_vram_page_split == -1) {
>  		pages_per_node = ~0ul;
>  		num_nodes = 1;
>  	} else {
> @@ -126,12 +125,14 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
>  		aflags = DRM_MM_CREATE_TOP;
>  	}
>
> +	mem->start = 0;
>  	pages_left = mem->num_pages;
>
>  	spin_lock(&mgr->lock);
>  	for (i = 0; i < num_nodes; ++i) {
>  		unsigned long pages = min(pages_left, pages_per_node);
>  		uint32_t alignment = mem->page_alignment;
> +		unsigned long start;
>
>  		if (pages == pages_per_node)
>  			alignment = pages_per_node;
> @@ -145,11 +146,16 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
>  		if (unlikely(r))
>  			goto error;
>
> +		/*
> +		 * Calculate a virtual BO start address to easily check if
> +		 * everything is CPU accessible.
> +		 */
> +		start = nodes[i].start + nodes[i].size - mem->num_pages;

This might wrap around (be a signed negative number), completely 
breaking the max() logic below.

> +		mem->start = max(mem->start, start);
>  		pages_left -= pages;
>  	}
>  	spin_unlock(&mgr->lock);
>
> -	mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET;

If we're going to abuse mem->start anyway, might I suggest just keeping 
track of max(nodes[i].start + nodes[i].size), and then setting 
mem->start to a magic (macro'd) constant based on whether everything is 
in visible VRAM or not?

Then the check in amdgpu_ttm_io_mem_reserve could be simplified accordingly.

Also, I think patches #6 and #5 should be exchanged, otherwise there's a 
temporary bug in handling split visible VRAM buffers.

Cheers,
Nicolai


>  	mem->mm_node = nodes;
>
>  	return 0;
>


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 6/6] drm/amdgpu: handle CPU access for split VRAM buffers
  2017-03-31  9:47   ` [PATCH 6/6] drm/amdgpu: handle CPU access for split VRAM buffers Christian König
@ 2017-04-03 16:25     ` Nicolai Hähnle
  2017-04-04 11:30       ` Christian König
  0 siblings, 1 reply; 15+ messages in thread
From: Nicolai Hähnle @ 2017-04-03 16:25 UTC (permalink / raw)
  To: Christian König, amd-gfx, dri-devel

On 31.03.2017 11:47, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> This avoids merging them together on page fault.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Acked-by: Michel Dänzer <michel.daenzer@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  4 +---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 16 ++++++++++++----
>  2 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 387d190..10237a8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -927,8 +927,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
>  	size = bo->mem.num_pages << PAGE_SHIFT;
>  	offset = bo->mem.start << PAGE_SHIFT;
>  	/* TODO: figure out how to map scattered VRAM to the CPU */
> -	if ((offset + size) <= adev->mc.visible_vram_size &&
> -	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
> +	if ((offset + size) <= adev->mc.visible_vram_size)
>  		return 0;
>
>  	/* Can't move a pinned BO to visible VRAM */
> @@ -936,7 +935,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
>  		return -EINVAL;
>
>  	/* hurrah the memory is not visible ! */
> -	abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
>  	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
>  	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;
>  	for (i = 0; i < abo->placement.num_placement; i++) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 524abca..10b793a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -529,9 +529,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
>  	case TTM_PL_TT:
>  		break;
>  	case TTM_PL_VRAM:
> -		if (mem->start == AMDGPU_BO_INVALID_OFFSET)
> -			return -EINVAL;
> -
>  		mem->bus.offset = mem->start << PAGE_SHIFT;
>  		/* check if it's visible */
>  		if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)

I believe the various mem->bus members are now unused, aren't they? 
Unless I missed something, it's best to clean this up and no longer set 
them (or set them to some obvious poison values).

Cheers,
Nicolai


> @@ -549,6 +546,17 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
>  {
>  }
>
> +static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
> +					   unsigned long page_offset)
> +{
> +	struct drm_mm_node *mm = bo->mem.mm_node;
> +	uint64_t size = mm->size;
> +
> +	mm += page_offset / size;
> +	page_offset %= size;
> +	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
> +}
> +
>  /*
>   * TTM backend functions.
>   */
> @@ -1064,7 +1072,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
>  	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
>  	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
>  	.io_mem_free = &amdgpu_ttm_io_mem_free,
> -	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
> +	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
>  };
>
>  int amdgpu_ttm_init(struct amdgpu_device *adev)
>


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/6] drm/ttm: add TTM_PL_FLAG_CONTIGUOUS v2
  2017-03-31  9:47   ` [PATCH 3/6] drm/ttm: add TTM_PL_FLAG_CONTIGUOUS v2 Christian König
@ 2017-04-03 16:27     ` Nicolai Hähnle
  0 siblings, 0 replies; 15+ messages in thread
From: Nicolai Hähnle @ 2017-04-03 16:27 UTC (permalink / raw)
  To: Christian König, amd-gfx, dri-devel

On 31.03.2017 11:47, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> This allows drivers to specify if they need a contiguous allocation or not.
>
> v2: use space instead of tab
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Patches 1-3:

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>

Patch 4:

Acked-by: Nicolai Hähnle <nicolai.haehnle@amd.com>


> ---
>  drivers/gpu/drm/ttm/ttm_bo.c    | 4 +++-
>  include/drm/ttm/ttm_placement.h | 1 +
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 348b17e..da37cdf 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1062,7 +1062,9 @@ static bool ttm_bo_places_compat(const struct ttm_place *places,
>
>  		*new_flags = heap->flags;
>  		if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
> -		    (*new_flags & mem->placement & TTM_PL_MASK_MEM))
> +		    (*new_flags & mem->placement & TTM_PL_MASK_MEM) &&
> +		    (!(*new_flags & TTM_PL_FLAG_CONTIGUOUS) ||
> +		     (mem->placement & TTM_PL_FLAG_CONTIGUOUS)))
>  			return true;
>  	}
>  	return false;
> diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h
> index 932be0c..e88a8e3 100644
> --- a/include/drm/ttm/ttm_placement.h
> +++ b/include/drm/ttm/ttm_placement.h
> @@ -63,6 +63,7 @@
>  #define TTM_PL_FLAG_CACHED      (1 << 16)
>  #define TTM_PL_FLAG_UNCACHED    (1 << 17)
>  #define TTM_PL_FLAG_WC          (1 << 18)
> +#define TTM_PL_FLAG_CONTIGUOUS  (1 << 19)
>  #define TTM_PL_FLAG_NO_EVICT    (1 << 21)
>  #define TTM_PL_FLAG_TOPDOWN     (1 << 22)
>
>


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 6/6] drm/amdgpu: handle CPU access for split VRAM buffers
  2017-04-03 16:25     ` Nicolai Hähnle
@ 2017-04-04 11:30       ` Christian König
  0 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-04-04 11:30 UTC (permalink / raw)
  To: Nicolai Hähnle, amd-gfx, dri-devel

Am 03.04.2017 um 18:25 schrieb Nicolai Hähnle:
> On 31.03.2017 11:47, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>> [SNIP]
>>          mem->bus.offset = mem->start << PAGE_SHIFT;
>>          /* check if it's visible */
>>          if ((mem->bus.offset + mem->bus.size) > 
>> adev->mc.visible_vram_size)
>
> I believe the various mem->bus members are now unused, aren't they? 
> Unless I missed something, it's best to clean this up and no longer 
> set them (or set them to some obvious poison values).

No, they are still needed for in kernel mappings. I only handle split 
mappings from userspace with the new callback.

Regards,
Christian.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS
       [not found]         ` <425dfa9e-c86b-6958-f7ff-31b91a0f9e21-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-04 11:33           ` Christian König
  2017-04-04 14:24             ` Nicolai Hähnle
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-04-04 11:33 UTC (permalink / raw)
  To: Nicolai Hähnle, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 03.04.2017 um 18:22 schrieb Nicolai Hähnle:
> On 31.03.2017 11:47, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> Implement AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS using TTM_PL_FLAG_CONTIGUOUS
>> instead of a placement limit. That allows us to better handle CPU
>> accessible placements.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Acked-by: Michel Dänzer <michel.daenzer@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 11 +++++------
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 ++++++++++----
>>  2 files changed, 15 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index d6b2de9..387d190 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct 
>> amdgpu_device *adev,
>>
>>      if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
>>          unsigned visible_pfn = adev->mc.visible_vram_size >> 
>> PAGE_SHIFT;
>> -        unsigned lpfn = 0;
>> -
>> -        /* This forces a reallocation if the flag wasn't set before */
>> -        if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
>> -            lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
>>
>>          places[c].fpfn = 0;
>> -        places[c].lpfn = lpfn;
>> +        places[c].lpfn = 0;
>>          places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
>>              TTM_PL_FLAG_VRAM;
>> +
>>          if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
>>              places[c].lpfn = visible_pfn;
>>          else
>>              places[c].flags |= TTM_PL_FLAG_TOPDOWN;
>> +
>> +        if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
>> +            places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
>>          c++;
>>      }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index d710226..af2d172 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_mem_type_manager *man,
>>                     const struct ttm_place *place,
>>                     struct ttm_mem_reg *mem)
>>  {
>> -    struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo);
>>      struct amdgpu_vram_mgr *mgr = man->priv;
>>      struct drm_mm *mm = &mgr->mm;
>>      struct drm_mm_node *nodes;
>> @@ -107,8 +106,8 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_mem_type_manager *man,
>>      if (!lpfn)
>>          lpfn = man->size;
>>
>> -    if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
>> -        place->lpfn || amdgpu_vram_page_split == -1) {
>> +    if (place->flags & TTM_PL_FLAG_CONTIGUOUS ||
>> +        amdgpu_vram_page_split == -1) {
>>          pages_per_node = ~0ul;
>>          num_nodes = 1;
>>      } else {
>> @@ -126,12 +125,14 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_mem_type_manager *man,
>>          aflags = DRM_MM_CREATE_TOP;
>>      }
>>
>> +    mem->start = 0;
>>      pages_left = mem->num_pages;
>>
>>      spin_lock(&mgr->lock);
>>      for (i = 0; i < num_nodes; ++i) {
>>          unsigned long pages = min(pages_left, pages_per_node);
>>          uint32_t alignment = mem->page_alignment;
>> +        unsigned long start;
>>
>>          if (pages == pages_per_node)
>>              alignment = pages_per_node;
>> @@ -145,11 +146,16 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_mem_type_manager *man,
>>          if (unlikely(r))
>>              goto error;
>>
>> +        /*
>> +         * Calculate a virtual BO start address to easily check if
>> +         * everything is CPU accessible.
>> +         */
>> +        start = nodes[i].start + nodes[i].size - mem->num_pages;
>
> This might wrap around (be a signed negative number), completely 
> breaking the max() logic below.

Good point, going to fix that.

>
>> +        mem->start = max(mem->start, start);
>>          pages_left -= pages;
>>      }
>>      spin_unlock(&mgr->lock);
>>
>> -    mem->start = num_nodes == 1 ? nodes[0].start : 
>> AMDGPU_BO_INVALID_OFFSET;
>
> If we're going to abuse mem->start anyway, might I suggest just 
> keeping track of max(nodes[i].start + nodes[i].size), and then setting 
> mem->start to a magic (macro'd) constant based on whether everything 
> is in visible VRAM or not?
>

No, that would break in kernel mappings.

> Then the check in amdgpu_ttm_io_mem_reserve could be simplified 
> accordingly.
>
> Also, I think patches #6 and #5 should be exchanged, otherwise there's 
> a temporary bug in handling split visible VRAM buffers.

Hui? Why? Patch #6 enables the whole thing by not making the contiguous 
flag mandatory for CPU mappings any more.

Switching those would cause problems with detecting when a BO is not in 
visible VRAM.

Regards,
Christian.

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS
  2017-04-04 11:33           ` Christian König
@ 2017-04-04 14:24             ` Nicolai Hähnle
  0 siblings, 0 replies; 15+ messages in thread
From: Nicolai Hähnle @ 2017-04-04 14:24 UTC (permalink / raw)
  To: Christian König, amd-gfx, dri-devel

On 04.04.2017 13:33, Christian König wrote:
> Am 03.04.2017 um 18:22 schrieb Nicolai Hähnle:
>> On 31.03.2017 11:47, Christian König wrote:
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> Implement AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS using TTM_PL_FLAG_CONTIGUOUS
>>> instead of a placement limit. That allows us to better handle CPU
>>> accessible placements.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Acked-by: Michel Dänzer <michel.daenzer@amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 11 +++++------
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 ++++++++++----
>>>  2 files changed, 15 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index d6b2de9..387d190 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct
>>> amdgpu_device *adev,
>>>
>>>      if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
>>>          unsigned visible_pfn = adev->mc.visible_vram_size >>
>>> PAGE_SHIFT;
>>> -        unsigned lpfn = 0;
>>> -
>>> -        /* This forces a reallocation if the flag wasn't set before */
>>> -        if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
>>> -            lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
>>>
>>>          places[c].fpfn = 0;
>>> -        places[c].lpfn = lpfn;
>>> +        places[c].lpfn = 0;
>>>          places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
>>>              TTM_PL_FLAG_VRAM;
>>> +
>>>          if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
>>>              places[c].lpfn = visible_pfn;
>>>          else
>>>              places[c].flags |= TTM_PL_FLAG_TOPDOWN;
>>> +
>>> +        if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
>>> +            places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
>>>          c++;
>>>      }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> index d710226..af2d172 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> @@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct
>>> ttm_mem_type_manager *man,
>>>                     const struct ttm_place *place,
>>>                     struct ttm_mem_reg *mem)
>>>  {
>>> -    struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo);
>>>      struct amdgpu_vram_mgr *mgr = man->priv;
>>>      struct drm_mm *mm = &mgr->mm;
>>>      struct drm_mm_node *nodes;
>>> @@ -107,8 +106,8 @@ static int amdgpu_vram_mgr_new(struct
>>> ttm_mem_type_manager *man,
>>>      if (!lpfn)
>>>          lpfn = man->size;
>>>
>>> -    if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
>>> -        place->lpfn || amdgpu_vram_page_split == -1) {
>>> +    if (place->flags & TTM_PL_FLAG_CONTIGUOUS ||
>>> +        amdgpu_vram_page_split == -1) {
>>>          pages_per_node = ~0ul;
>>>          num_nodes = 1;
>>>      } else {
>>> @@ -126,12 +125,14 @@ static int amdgpu_vram_mgr_new(struct
>>> ttm_mem_type_manager *man,
>>>          aflags = DRM_MM_CREATE_TOP;
>>>      }
>>>
>>> +    mem->start = 0;
>>>      pages_left = mem->num_pages;
>>>
>>>      spin_lock(&mgr->lock);
>>>      for (i = 0; i < num_nodes; ++i) {
>>>          unsigned long pages = min(pages_left, pages_per_node);
>>>          uint32_t alignment = mem->page_alignment;
>>> +        unsigned long start;
>>>
>>>          if (pages == pages_per_node)
>>>              alignment = pages_per_node;
>>> @@ -145,11 +146,16 @@ static int amdgpu_vram_mgr_new(struct
>>> ttm_mem_type_manager *man,
>>>          if (unlikely(r))
>>>              goto error;
>>>
>>> +        /*
>>> +         * Calculate a virtual BO start address to easily check if
>>> +         * everything is CPU accessible.
>>> +         */
>>> +        start = nodes[i].start + nodes[i].size - mem->num_pages;
>>
>> This might wrap around (be a signed negative number), completely
>> breaking the max() logic below.
>
> Good point, going to fix that.
>
>>
>>> +        mem->start = max(mem->start, start);
>>>          pages_left -= pages;
>>>      }
>>>      spin_unlock(&mgr->lock);
>>>
>>> -    mem->start = num_nodes == 1 ? nodes[0].start :
>>> AMDGPU_BO_INVALID_OFFSET;
>>
>> If we're going to abuse mem->start anyway, might I suggest just
>> keeping track of max(nodes[i].start + nodes[i].size), and then setting
>> mem->start to a magic (macro'd) constant based on whether everything
>> is in visible VRAM or not?
>>
>
> No, that would break in kernel mappings.
>
>> Then the check in amdgpu_ttm_io_mem_reserve could be simplified
>> accordingly.
>>
>> Also, I think patches #6 and #5 should be exchanged, otherwise there's
>> a temporary bug in handling split visible VRAM buffers.
>
> Hui? Why? Patch #6 enables the whole thing by not making the contiguous
> flag mandatory for CPU mappings any more.

Ah, I missed the fact that it's guarded by the check in 
amdgpu_bo_fault_reserve_notify. You're right, the order of patches is good.

Cheers,
Nicolai


>
> Switching those would cause problems with detecting when a BO is not in
> visible VRAM.
>
> Regards,
> Christian.
>


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS
  2017-03-29 17:43 [PATCH 1/6] drm/ttm: cleanup and optimize ttm_bo_mem_compat Christian König
@ 2017-03-29 17:43 ` Christian König
  0 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-03-29 17:43 UTC (permalink / raw)
  To: dri-devel

From: Christian König <christian.koenig@amd.com>

Implement AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS using TTM_PL_FLAG_CONTIGUOUS
instead of a placement limit. That allows us to better handle CPU
accessible placements.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 11 +++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 ++++++++++----
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d6b2de9..387d190 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
 
 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
 		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
-		unsigned lpfn = 0;
-
-		/* This forces a reallocation if the flag wasn't set before */
-		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
-			lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
 
 		places[c].fpfn = 0;
-		places[c].lpfn = lpfn;
+		places[c].lpfn = 0;
 		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
 			TTM_PL_FLAG_VRAM;
+
 		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
 			places[c].lpfn = visible_pfn;
 		else
 			places[c].flags |= TTM_PL_FLAG_TOPDOWN;
+
+		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
 		c++;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index d710226..af2d172 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 			       const struct ttm_place *place,
 			       struct ttm_mem_reg *mem)
 {
-	struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo);
 	struct amdgpu_vram_mgr *mgr = man->priv;
 	struct drm_mm *mm = &mgr->mm;
 	struct drm_mm_node *nodes;
@@ -107,8 +106,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 	if (!lpfn)
 		lpfn = man->size;
 
-	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
-	    place->lpfn || amdgpu_vram_page_split == -1) {
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS ||
+	    amdgpu_vram_page_split == -1) {
 		pages_per_node = ~0ul;
 		num_nodes = 1;
 	} else {
@@ -126,12 +125,14 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 		aflags = DRM_MM_CREATE_TOP;
 	}
 
+	mem->start = 0;
 	pages_left = mem->num_pages;
 
 	spin_lock(&mgr->lock);
 	for (i = 0; i < num_nodes; ++i) {
 		unsigned long pages = min(pages_left, pages_per_node);
 		uint32_t alignment = mem->page_alignment;
+		unsigned long start;
 
 		if (pages == pages_per_node)
 			alignment = pages_per_node;
@@ -145,11 +146,16 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 		if (unlikely(r))
 			goto error;
 
+		/*
+		 * Calculate a virtual BO start address to easily check if
+		 * everything is CPU accessible.
+		 */
+		start = nodes[i].start + nodes[i].size - mem->num_pages;
+		mem->start = max(mem->start, start);
 		pages_left -= pages;
 	}
 	spin_unlock(&mgr->lock);
 
-	mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET;
 	mem->mm_node = nodes;
 
 	return 0;
-- 
2.5.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS
       [not found] ` <1490792146-2218-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-03-29 12:55   ` Christian König
  0 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-03-29 12:55 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: maraeo-Re5JQEeQqe8AvxtiuMwx3w

From: Christian König <christian.koenig@amd.com>

Implement AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS using TTM_PL_FLAG_CONTIGUOUS
instead of a placement limit. That allows us to better handle CPU
accessible placements.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 11 +++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 ++++++++++----
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d6b2de9..387d190 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
 
 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
 		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
-		unsigned lpfn = 0;
-
-		/* This forces a reallocation if the flag wasn't set before */
-		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
-			lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
 
 		places[c].fpfn = 0;
-		places[c].lpfn = lpfn;
+		places[c].lpfn = 0;
 		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
 			TTM_PL_FLAG_VRAM;
+
 		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
 			places[c].lpfn = visible_pfn;
 		else
 			places[c].flags |= TTM_PL_FLAG_TOPDOWN;
+
+		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
 		c++;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index d710226..af2d172 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 			       const struct ttm_place *place,
 			       struct ttm_mem_reg *mem)
 {
-	struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo);
 	struct amdgpu_vram_mgr *mgr = man->priv;
 	struct drm_mm *mm = &mgr->mm;
 	struct drm_mm_node *nodes;
@@ -107,8 +106,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 	if (!lpfn)
 		lpfn = man->size;
 
-	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
-	    place->lpfn || amdgpu_vram_page_split == -1) {
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS ||
+	    amdgpu_vram_page_split == -1) {
 		pages_per_node = ~0ul;
 		num_nodes = 1;
 	} else {
@@ -126,12 +125,14 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 		aflags = DRM_MM_CREATE_TOP;
 	}
 
+	mem->start = 0;
 	pages_left = mem->num_pages;
 
 	spin_lock(&mgr->lock);
 	for (i = 0; i < num_nodes; ++i) {
 		unsigned long pages = min(pages_left, pages_per_node);
 		uint32_t alignment = mem->page_alignment;
+		unsigned long start;
 
 		if (pages == pages_per_node)
 			alignment = pages_per_node;
@@ -145,11 +146,16 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 		if (unlikely(r))
 			goto error;
 
+		/*
+		 * Calculate a virtual BO start address to easily check if
+		 * everything is CPU accessible.
+		 */
+		start = nodes[i].start + nodes[i].size - mem->num_pages;
+		mem->start = max(mem->start, start);
 		pages_left -= pages;
 	}
 	spin_unlock(&mgr->lock);
 
-	mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET;
 	mem->mm_node = nodes;
 
 	return 0;
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-04-04 14:24 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-31  9:47 [PATCH 1/6] drm/ttm: cleanup and optimize ttm_bo_mem_compat v2 Christian König
2017-03-31  9:47 ` [PATCH 2/6] drm/ttm: add io_mem_pfn callback Christian König
2017-03-31  9:47 ` [PATCH 4/6] drm/amdgpu: drop alpha support Christian König
     [not found] ` <1490953652-3703-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-03-31  9:47   ` [PATCH 3/6] drm/ttm: add TTM_PL_FLAG_CONTIGUOUS v2 Christian König
2017-04-03 16:27     ` Nicolai Hähnle
2017-03-31  9:47   ` [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS Christian König
     [not found]     ` <1490953652-3703-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-04-03 16:22       ` Nicolai Hähnle
     [not found]         ` <425dfa9e-c86b-6958-f7ff-31b91a0f9e21-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-04 11:33           ` Christian König
2017-04-04 14:24             ` Nicolai Hähnle
2017-03-31  9:47   ` [PATCH 6/6] drm/amdgpu: handle CPU access for split VRAM buffers Christian König
2017-04-03 16:25     ` Nicolai Hähnle
2017-04-04 11:30       ` Christian König
2017-04-03 12:37   ` [PATCH 1/6] drm/ttm: cleanup and optimize ttm_bo_mem_compat v2 Christian König
  -- strict thread matches above, loose matches on Subject: below --
2017-03-29 17:43 [PATCH 1/6] drm/ttm: cleanup and optimize ttm_bo_mem_compat Christian König
2017-03-29 17:43 ` [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS Christian König
2017-03-29 12:55 CPU mapping of split VRAM buffers Christian König
     [not found] ` <1490792146-2218-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-03-29 12:55   ` [PATCH 5/6] drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.