All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/amdgpu: remove unused VM defines
@ 2016-08-09 12:52 Christian König
       [not found] ` <1470747172-2853-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2016-08-09 12:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Not used for a long time.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c309eaf..fe0ad2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -830,8 +830,6 @@ struct amdgpu_ring {
 
 /* PTBs (Page Table Blocks) need to be aligned to 32K */
 #define AMDGPU_VM_PTB_ALIGN_SIZE   32768
-#define AMDGPU_VM_PTB_ALIGN_MASK (AMDGPU_VM_PTB_ALIGN_SIZE - 1)
-#define AMDGPU_VM_PTB_ALIGN(a) (((a) + AMDGPU_VM_PTB_ALIGN_MASK) & ~AMDGPU_VM_PTB_ALIGN_MASK)
 
 #define AMDGPU_PTE_VALID	(1 << 0)
 #define AMDGPU_PTE_SYSTEM	(1 << 1)
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/6] drm/amdgpu: cleanup VM fragment defines
       [not found] ` <1470747172-2853-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-08-09 12:52   ` Christian König
  2016-08-09 12:52   ` [PATCH 3/6] drm/amdgpu: rename amdgpu_vm_update_params Christian König
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2016-08-09 12:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

We can actually do way more than just the 64KB we currently used as default.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index fe0ad2a..f1b762d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -831,6 +831,9 @@ struct amdgpu_ring {
 /* PTBs (Page Table Blocks) need to be aligned to 32K */
 #define AMDGPU_VM_PTB_ALIGN_SIZE   32768
 
+/* LOG2 number of continuous pages for the fragment field */
+#define AMDGPU_LOG2_PAGES_PER_FRAG 4
+
 #define AMDGPU_PTE_VALID	(1 << 0)
 #define AMDGPU_PTE_SYSTEM	(1 << 1)
 #define AMDGPU_PTE_SNOOPED	(1 << 2)
@@ -841,10 +844,7 @@ struct amdgpu_ring {
 #define AMDGPU_PTE_READABLE	(1 << 5)
 #define AMDGPU_PTE_WRITEABLE	(1 << 6)
 
-/* PTE (Page Table Entry) fragment field for different page sizes */
-#define AMDGPU_PTE_FRAG_4KB	(0 << 7)
-#define AMDGPU_PTE_FRAG_64KB	(4 << 7)
-#define AMDGPU_LOG2_PAGES_PER_FRAG 4
+#define AMDGPU_PTE_FRAG(x)	((x & 0x1f) << 7)
 
 /* How to programm VM fault handling */
 #define AMDGPU_VM_FAULT_STOP_NEVER	0
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e1efe74f..f8cc45d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -738,7 +738,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 	 */
 
 	/* SI and newer are optimized for 64KB */
-	uint64_t frag_flags = AMDGPU_PTE_FRAG_64KB;
+	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
 	uint64_t frag_align = 0x80;
 
 	uint64_t frag_start = ALIGN(pe_start, frag_align);
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/6] drm/amdgpu: rename amdgpu_vm_update_params
       [not found] ` <1470747172-2853-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2016-08-09 12:52   ` [PATCH 2/6] drm/amdgpu: cleanup VM fragment defines Christian König
@ 2016-08-09 12:52   ` Christian König
  2016-08-09 12:52   ` [PATCH 4/6] drm/amdgpu: add adev to the pte_update_params Christian König
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2016-08-09 12:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Well those are actually page table entry parameters.
This also makes the variable names used a bit shorter.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 91 ++++++++++++++++------------------
 1 file changed, 44 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f8cc45d..ffba07c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -57,7 +57,7 @@
 /* Local structure. Encapsulate some VM table update parameters to reduce
  * the number of function parameters
  */
-struct amdgpu_vm_update_params {
+struct amdgpu_pte_update_params {
 	/* address where to copy page table entries from */
 	uint64_t src;
 	/* DMA addresses to use for mapping */
@@ -469,7 +469,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * amdgpu_vm_update_pages - helper to call the right asic function
  *
  * @adev: amdgpu_device pointer
- * @vm_update_params: see amdgpu_vm_update_params definition
+ * @params: see amdgpu_pte_update_params definition
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
@@ -480,29 +480,28 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * to setup the page table using the DMA.
  */
 static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
-				   struct amdgpu_vm_update_params
-					*vm_update_params,
+				   struct amdgpu_pte_update_params *params,
 				   uint64_t pe, uint64_t addr,
 				   unsigned count, uint32_t incr,
 				   uint32_t flags)
 {
 	trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
 
-	if (vm_update_params->src) {
-		amdgpu_vm_copy_pte(adev, vm_update_params->ib,
-			pe, (vm_update_params->src + (addr >> 12) * 8), count);
+	if (params->src) {
+		amdgpu_vm_copy_pte(adev, params->ib,
+			pe, (params->src + (addr >> 12) * 8), count);
 
-	} else if (vm_update_params->pages_addr) {
-		amdgpu_vm_write_pte(adev, vm_update_params->ib,
-			vm_update_params->pages_addr,
+	} else if (params->pages_addr) {
+		amdgpu_vm_write_pte(adev, params->ib,
+			params->pages_addr,
 			pe, addr, count, incr, flags);
 
 	} else if (count < 3) {
-		amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr,
+		amdgpu_vm_write_pte(adev, params->ib, NULL, pe, addr,
 				    count, incr, flags);
 
 	} else {
-		amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr,
+		amdgpu_vm_set_pte_pde(adev, params->ib, pe, addr,
 				      count, incr, flags);
 	}
 }
@@ -522,12 +521,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	struct amdgpu_ring *ring;
 	struct fence *fence = NULL;
 	struct amdgpu_job *job;
-	struct amdgpu_vm_update_params vm_update_params;
+	struct amdgpu_pte_update_params params;
 	unsigned entries;
 	uint64_t addr;
 	int r;
 
-	memset(&vm_update_params, 0, sizeof(vm_update_params));
+	memset(&params, 0, sizeof(params));
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
 	r = reservation_object_reserve_shared(bo->tbo.resv);
@@ -545,8 +544,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	if (r)
 		goto error;
 
-	vm_update_params.ib = &job->ibs[0];
-	amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries,
+	params.ib = &job->ibs[0];
+	amdgpu_vm_update_pages(adev, &params, addr, 0, entries,
 			       0, 0);
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 
@@ -619,12 +618,12 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	uint64_t last_pde = ~0, last_pt = ~0;
 	unsigned count = 0, pt_idx, ndw;
 	struct amdgpu_job *job;
-	struct amdgpu_vm_update_params vm_update_params;
+	struct amdgpu_pte_update_params params;
 	struct fence *fence = NULL;
 
 	int r;
 
-	memset(&vm_update_params, 0, sizeof(vm_update_params));
+	memset(&params, 0, sizeof(params));
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
 	/* padding, etc. */
@@ -637,7 +636,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
-	vm_update_params.ib = &job->ibs[0];
+	params.ib = &job->ibs[0];
 
 	/* walk over the address space and update the page directory */
 	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
@@ -657,7 +656,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 		    ((last_pt + incr * count) != pt)) {
 
 			if (count) {
-				amdgpu_vm_update_pages(adev, &vm_update_params,
+				amdgpu_vm_update_pages(adev, &params,
 						       last_pde, last_pt,
 						       count, incr,
 						       AMDGPU_PTE_VALID);
@@ -672,15 +671,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	}
 
 	if (count)
-		amdgpu_vm_update_pages(adev, &vm_update_params,
+		amdgpu_vm_update_pages(adev, &params,
 					last_pde, last_pt,
 					count, incr, AMDGPU_PTE_VALID);
 
-	if (vm_update_params.ib->length_dw != 0) {
-		amdgpu_ring_pad_ib(ring, vm_update_params.ib);
+	if (params.ib->length_dw != 0) {
+		amdgpu_ring_pad_ib(ring, params.ib);
 		amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
 				 AMDGPU_FENCE_OWNER_VM);
-		WARN_ON(vm_update_params.ib->length_dw > ndw);
+		WARN_ON(params.ib->length_dw > ndw);
 		r = amdgpu_job_submit(job, ring, &vm->entity,
 				      AMDGPU_FENCE_OWNER_VM, &fence);
 		if (r)
@@ -706,15 +705,14 @@ error_free:
  * amdgpu_vm_frag_ptes - add fragment information to PTEs
  *
  * @adev: amdgpu_device pointer
- * @vm_update_params: see amdgpu_vm_update_params definition
+ * @params: see amdgpu_pte_update_params definition
  * @pe_start: first PTE to handle
  * @pe_end: last PTE to handle
  * @addr: addr those PTEs should point to
  * @flags: hw mapping flags
  */
 static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
-				struct amdgpu_vm_update_params
-					*vm_update_params,
+				struct amdgpu_pte_update_params	*params,
 				uint64_t pe_start, uint64_t pe_end,
 				uint64_t addr, uint32_t flags)
 {
@@ -751,11 +749,11 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 		return;
 
 	/* system pages are non continuously */
-	if (vm_update_params->src || vm_update_params->pages_addr ||
+	if (params->src || params->pages_addr ||
 		!(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
 
 		count = (pe_end - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, vm_update_params, pe_start,
+		amdgpu_vm_update_pages(adev, params, pe_start,
 				       addr, count, AMDGPU_GPU_PAGE_SIZE,
 				       flags);
 		return;
@@ -764,21 +762,21 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 	/* handle the 4K area at the beginning */
 	if (pe_start != frag_start) {
 		count = (frag_start - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr,
+		amdgpu_vm_update_pages(adev, params, pe_start, addr,
 				       count, AMDGPU_GPU_PAGE_SIZE, flags);
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 	}
 
 	/* handle the area in the middle */
 	count = (frag_end - frag_start) / 8;
-	amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count,
+	amdgpu_vm_update_pages(adev, params, frag_start, addr, count,
 			       AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
 
 	/* handle the 4K area at the end */
 	if (frag_end != pe_end) {
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 		count = (pe_end - frag_end) / 8;
-		amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr,
+		amdgpu_vm_update_pages(adev, params, frag_end, addr,
 				       count, AMDGPU_GPU_PAGE_SIZE, flags);
 	}
 }
@@ -787,7 +785,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * amdgpu_vm_update_ptes - make sure that page tables are valid
  *
  * @adev: amdgpu_device pointer
- * @vm_update_params: see amdgpu_vm_update_params definition
+ * @params: see amdgpu_pte_update_params definition
  * @vm: requested vm
  * @start: start of GPU address range
  * @end: end of GPU address range
@@ -797,8 +795,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * Update the page tables in the range @start - @end.
  */
 static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
-				  struct amdgpu_vm_update_params
-					*vm_update_params,
+				  struct amdgpu_pte_update_params *params,
 				  struct amdgpu_vm *vm,
 				  uint64_t start, uint64_t end,
 				  uint64_t dst, uint32_t flags)
@@ -851,7 +848,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 			*/
 			cur_pe_end += 8 * nptes;
 		} else {
-			amdgpu_vm_frag_ptes(adev, vm_update_params,
+			amdgpu_vm_frag_ptes(adev, params,
 					    cur_pe_start, cur_pe_end,
 					    cur_dst, flags);
 
@@ -865,7 +862,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	amdgpu_vm_frag_ptes(adev, vm_update_params, cur_pe_start,
+	amdgpu_vm_frag_ptes(adev, params, cur_pe_start,
 			    cur_pe_end, cur_dst, flags);
 }
 
@@ -899,14 +896,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	void *owner = AMDGPU_FENCE_OWNER_VM;
 	unsigned nptes, ncmds, ndw;
 	struct amdgpu_job *job;
-	struct amdgpu_vm_update_params vm_update_params;
+	struct amdgpu_pte_update_params params;
 	struct fence *f = NULL;
 	int r;
 
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
-	memset(&vm_update_params, 0, sizeof(vm_update_params));
-	vm_update_params.src = src;
-	vm_update_params.pages_addr = pages_addr;
+	memset(&params, 0, sizeof(params));
+	params.src = src;
+	params.pages_addr = pages_addr;
 
 	/* sync to everything on unmapping */
 	if (!(flags & AMDGPU_PTE_VALID))
@@ -923,11 +920,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* padding, etc. */
 	ndw = 64;
 
-	if (vm_update_params.src) {
+	if (params.src) {
 		/* only copy commands needed */
 		ndw += ncmds * 7;
 
-	} else if (vm_update_params.pages_addr) {
+	} else if (params.pages_addr) {
 		/* header for write data commands */
 		ndw += ncmds * 4;
 
@@ -946,7 +943,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
-	vm_update_params.ib = &job->ibs[0];
+	params.ib = &job->ibs[0];
 
 	r = amdgpu_sync_fence(adev, &job->sync, exclusive);
 	if (r)
@@ -961,11 +958,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start,
+	amdgpu_vm_update_ptes(adev, &params, vm, start,
 			      last + 1, addr, flags);
 
-	amdgpu_ring_pad_ib(ring, vm_update_params.ib);
-	WARN_ON(vm_update_params.ib->length_dw > ndw);
+	amdgpu_ring_pad_ib(ring, params.ib);
+	WARN_ON(params.ib->length_dw > ndw);
 	r = amdgpu_job_submit(job, ring, &vm->entity,
 			      AMDGPU_FENCE_OWNER_VM, &f);
 	if (r)
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/6] drm/amdgpu: add adev to the pte_update_params
       [not found] ` <1470747172-2853-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2016-08-09 12:52   ` [PATCH 2/6] drm/amdgpu: cleanup VM fragment defines Christian König
  2016-08-09 12:52   ` [PATCH 3/6] drm/amdgpu: rename amdgpu_vm_update_params Christian König
@ 2016-08-09 12:52   ` Christian König
  2016-08-09 12:52   ` [PATCH 5/6] drm/amdgpu: flip frag_ptes and update_pts Christian König
  2016-08-09 12:52   ` [PATCH 6/6] drm/amdgpu: use more than 64KB fragment size if possible Christian König
  4 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2016-08-09 12:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

No need to carry that forward as a separate parameter.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 65 +++++++++++++++-------------------
 1 file changed, 29 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ffba07c..6d90b2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -58,6 +58,8 @@
  * the number of function parameters
  */
 struct amdgpu_pte_update_params {
+	/* amdgpu device we do this update for */
+	struct amdgpu_device *adev;
 	/* address where to copy page table entries from */
 	uint64_t src;
 	/* DMA addresses to use for mapping */
@@ -468,7 +470,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
 /**
  * amdgpu_vm_update_pages - helper to call the right asic function
  *
- * @adev: amdgpu_device pointer
  * @params: see amdgpu_pte_update_params definition
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
@@ -479,8 +480,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * Traces the parameters and calls the right asic functions
  * to setup the page table using the DMA.
  */
-static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
-				   struct amdgpu_pte_update_params *params,
+static void amdgpu_vm_update_pages(struct amdgpu_pte_update_params *params,
 				   uint64_t pe, uint64_t addr,
 				   unsigned count, uint32_t incr,
 				   uint32_t flags)
@@ -488,20 +488,20 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
 	trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
 
 	if (params->src) {
-		amdgpu_vm_copy_pte(adev, params->ib,
+		amdgpu_vm_copy_pte(params->adev, params->ib,
 			pe, (params->src + (addr >> 12) * 8), count);
 
 	} else if (params->pages_addr) {
-		amdgpu_vm_write_pte(adev, params->ib,
+		amdgpu_vm_write_pte(params->adev, params->ib,
 			params->pages_addr,
 			pe, addr, count, incr, flags);
 
 	} else if (count < 3) {
-		amdgpu_vm_write_pte(adev, params->ib, NULL, pe, addr,
+		amdgpu_vm_write_pte(params->adev, params->ib, NULL, pe, addr,
 				    count, incr, flags);
 
 	} else {
-		amdgpu_vm_set_pte_pde(adev, params->ib, pe, addr,
+		amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
 				      count, incr, flags);
 	}
 }
@@ -526,7 +526,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	uint64_t addr;
 	int r;
 
-	memset(&params, 0, sizeof(params));
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
 	r = reservation_object_reserve_shared(bo->tbo.resv);
@@ -544,9 +543,10 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	if (r)
 		goto error;
 
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
 	params.ib = &job->ibs[0];
-	amdgpu_vm_update_pages(adev, &params, addr, 0, entries,
-			       0, 0);
+	amdgpu_vm_update_pages(&params, addr, 0, entries, 0, 0);
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 
 	WARN_ON(job->ibs[0].length_dw > 64);
@@ -623,7 +623,6 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 
 	int r;
 
-	memset(&params, 0, sizeof(params));
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
 	/* padding, etc. */
@@ -636,6 +635,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
 	params.ib = &job->ibs[0];
 
 	/* walk over the address space and update the page directory */
@@ -656,9 +657,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 		    ((last_pt + incr * count) != pt)) {
 
 			if (count) {
-				amdgpu_vm_update_pages(adev, &params,
-						       last_pde, last_pt,
-						       count, incr,
+				amdgpu_vm_update_pages(&params, last_pde,
+						       last_pt, count, incr,
 						       AMDGPU_PTE_VALID);
 			}
 
@@ -671,8 +671,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	}
 
 	if (count)
-		amdgpu_vm_update_pages(adev, &params,
-					last_pde, last_pt,
+		amdgpu_vm_update_pages(&params, last_pde, last_pt,
 					count, incr, AMDGPU_PTE_VALID);
 
 	if (params.ib->length_dw != 0) {
@@ -704,15 +703,13 @@ error_free:
 /**
  * amdgpu_vm_frag_ptes - add fragment information to PTEs
  *
- * @adev: amdgpu_device pointer
  * @params: see amdgpu_pte_update_params definition
  * @pe_start: first PTE to handle
  * @pe_end: last PTE to handle
  * @addr: addr those PTEs should point to
  * @flags: hw mapping flags
  */
-static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
-				struct amdgpu_pte_update_params	*params,
+static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t pe_start, uint64_t pe_end,
 				uint64_t addr, uint32_t flags)
 {
@@ -753,38 +750,36 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 		!(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
 
 		count = (pe_end - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, params, pe_start,
-				       addr, count, AMDGPU_GPU_PAGE_SIZE,
-				       flags);
+		amdgpu_vm_update_pages(params, pe_start, addr, count,
+				       AMDGPU_GPU_PAGE_SIZE, flags);
 		return;
 	}
 
 	/* handle the 4K area at the beginning */
 	if (pe_start != frag_start) {
 		count = (frag_start - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, params, pe_start, addr,
-				       count, AMDGPU_GPU_PAGE_SIZE, flags);
+		amdgpu_vm_update_pages(params, pe_start, addr, count,
+				       AMDGPU_GPU_PAGE_SIZE, flags);
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 	}
 
 	/* handle the area in the middle */
 	count = (frag_end - frag_start) / 8;
-	amdgpu_vm_update_pages(adev, params, frag_start, addr, count,
+	amdgpu_vm_update_pages(params, frag_start, addr, count,
 			       AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
 
 	/* handle the 4K area at the end */
 	if (frag_end != pe_end) {
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 		count = (pe_end - frag_end) / 8;
-		amdgpu_vm_update_pages(adev, params, frag_end, addr,
-				       count, AMDGPU_GPU_PAGE_SIZE, flags);
+		amdgpu_vm_update_pages(params, frag_end, addr, count,
+				       AMDGPU_GPU_PAGE_SIZE, flags);
 	}
 }
 
 /**
  * amdgpu_vm_update_ptes - make sure that page tables are valid
  *
- * @adev: amdgpu_device pointer
  * @params: see amdgpu_pte_update_params definition
  * @vm: requested vm
  * @start: start of GPU address range
@@ -794,8 +789,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  *
  * Update the page tables in the range @start - @end.
  */
-static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
-				  struct amdgpu_pte_update_params *params,
+static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 				  struct amdgpu_vm *vm,
 				  uint64_t start, uint64_t end,
 				  uint64_t dst, uint32_t flags)
@@ -848,8 +842,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 			*/
 			cur_pe_end += 8 * nptes;
 		} else {
-			amdgpu_vm_frag_ptes(adev, params,
-					    cur_pe_start, cur_pe_end,
+			amdgpu_vm_frag_ptes(params, cur_pe_start, cur_pe_end,
 					    cur_dst, flags);
 
 			cur_pe_start = next_pe_start;
@@ -862,8 +855,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	amdgpu_vm_frag_ptes(adev, params, cur_pe_start,
-			    cur_pe_end, cur_dst, flags);
+	amdgpu_vm_frag_ptes(params, cur_pe_start, cur_pe_end, cur_dst, flags);
 }
 
 /**
@@ -901,7 +893,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	int r;
 
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
 	memset(&params, 0, sizeof(params));
+	params.adev = adev;
 	params.src = src;
 	params.pages_addr = pages_addr;
 
@@ -958,8 +952,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	amdgpu_vm_update_ptes(adev, &params, vm, start,
-			      last + 1, addr, flags);
+	amdgpu_vm_update_ptes(&params, vm, start, last + 1, addr, flags);
 
 	amdgpu_ring_pad_ib(ring, params.ib);
 	WARN_ON(params.ib->length_dw > ndw);
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 5/6] drm/amdgpu: flip frag_ptes and update_pts
       [not found] ` <1470747172-2853-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (2 preceding siblings ...)
  2016-08-09 12:52   ` [PATCH 4/6] drm/amdgpu: add adev to the pte_update_params Christian König
@ 2016-08-09 12:52   ` Christian König
  2016-08-09 12:52   ` [PATCH 6/6] drm/amdgpu: use more than 64KB fragment size if possible Christian König
  4 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2016-08-09 12:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

We can add the fragment params before we split the update for the page tables.
That should save a few CPU cycles for larger updates.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 166 ++++++++++++++++-----------------
 1 file changed, 79 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6d90b2c..e6c030b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -701,83 +701,6 @@ error_free:
 }
 
 /**
- * amdgpu_vm_frag_ptes - add fragment information to PTEs
- *
- * @params: see amdgpu_pte_update_params definition
- * @pe_start: first PTE to handle
- * @pe_end: last PTE to handle
- * @addr: addr those PTEs should point to
- * @flags: hw mapping flags
- */
-static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
-				uint64_t pe_start, uint64_t pe_end,
-				uint64_t addr, uint32_t flags)
-{
-	/**
-	 * The MC L1 TLB supports variable sized pages, based on a fragment
-	 * field in the PTE. When this field is set to a non-zero value, page
-	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
-	 * flags are considered valid for all PTEs within the fragment range
-	 * and corresponding mappings are assumed to be physically contiguous.
-	 *
-	 * The L1 TLB can store a single PTE for the whole fragment,
-	 * significantly increasing the space available for translation
-	 * caching. This leads to large improvements in throughput when the
-	 * TLB is under pressure.
-	 *
-	 * The L2 TLB distributes small and large fragments into two
-	 * asymmetric partitions. The large fragment cache is significantly
-	 * larger. Thus, we try to use large fragments wherever possible.
-	 * Userspace can support this by aligning virtual base address and
-	 * allocation size to the fragment size.
-	 */
-
-	/* SI and newer are optimized for 64KB */
-	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
-	uint64_t frag_align = 0x80;
-
-	uint64_t frag_start = ALIGN(pe_start, frag_align);
-	uint64_t frag_end = pe_end & ~(frag_align - 1);
-
-	unsigned count;
-
-	/* Abort early if there isn't anything to do */
-	if (pe_start == pe_end)
-		return;
-
-	/* system pages are non continuously */
-	if (params->src || params->pages_addr ||
-		!(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
-
-		count = (pe_end - pe_start) / 8;
-		amdgpu_vm_update_pages(params, pe_start, addr, count,
-				       AMDGPU_GPU_PAGE_SIZE, flags);
-		return;
-	}
-
-	/* handle the 4K area at the beginning */
-	if (pe_start != frag_start) {
-		count = (frag_start - pe_start) / 8;
-		amdgpu_vm_update_pages(params, pe_start, addr, count,
-				       AMDGPU_GPU_PAGE_SIZE, flags);
-		addr += AMDGPU_GPU_PAGE_SIZE * count;
-	}
-
-	/* handle the area in the middle */
-	count = (frag_end - frag_start) / 8;
-	amdgpu_vm_update_pages(params, frag_start, addr, count,
-			       AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
-
-	/* handle the 4K area at the end */
-	if (frag_end != pe_end) {
-		addr += AMDGPU_GPU_PAGE_SIZE * count;
-		count = (pe_end - frag_end) / 8;
-		amdgpu_vm_update_pages(params, frag_end, addr, count,
-				       AMDGPU_GPU_PAGE_SIZE, flags);
-	}
-}
-
-/**
  * amdgpu_vm_update_ptes - make sure that page tables are valid
  *
  * @params: see amdgpu_pte_update_params definition
@@ -796,7 +719,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 {
 	const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
 
-	uint64_t cur_pe_start, cur_pe_end, cur_dst;
+	uint64_t cur_pe_start, cur_nptes, cur_dst;
 	uint64_t addr; /* next GPU address to be updated */
 	uint64_t pt_idx;
 	struct amdgpu_bo *pt;
@@ -815,7 +738,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 
 	cur_pe_start = amdgpu_bo_gpu_offset(pt);
 	cur_pe_start += (addr & mask) * 8;
-	cur_pe_end = cur_pe_start + 8 * nptes;
+	cur_nptes = nptes;
 	cur_dst = dst;
 
 	/* for next ptb*/
@@ -835,18 +758,19 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		next_pe_start = amdgpu_bo_gpu_offset(pt);
 		next_pe_start += (addr & mask) * 8;
 
-		if (cur_pe_end == next_pe_start) {
+		if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) {
 			/* The next ptb is consecutive to current ptb.
-			 * Don't call amdgpu_vm_frag_ptes now.
+			 * Don't call amdgpu_vm_update_pages now.
 			 * Will update two ptbs together in future.
 			*/
-			cur_pe_end += 8 * nptes;
+			cur_nptes += nptes;
 		} else {
-			amdgpu_vm_frag_ptes(params, cur_pe_start, cur_pe_end,
-					    cur_dst, flags);
+			amdgpu_vm_update_pages(params, cur_pe_start, cur_dst,
+					       cur_nptes, AMDGPU_GPU_PAGE_SIZE,
+					       flags);
 
 			cur_pe_start = next_pe_start;
-			cur_pe_end = next_pe_start + 8 * nptes;
+			cur_nptes = nptes;
 			cur_dst = dst;
 		}
 
@@ -855,7 +779,75 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	amdgpu_vm_frag_ptes(params, cur_pe_start, cur_pe_end, cur_dst, flags);
+	amdgpu_vm_update_pages(params, cur_pe_start, cur_dst, cur_nptes,
+			       AMDGPU_GPU_PAGE_SIZE, flags);
+}
+
+/*
+ * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ *
+ * @params: see amdgpu_pte_update_params definition
+ * @vm: requested vm
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @dst: addr those PTEs should point to
+ * @flags: hw mapping flags
+ */
+static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
+				struct amdgpu_vm *vm,
+				uint64_t start, uint64_t end,
+				uint64_t dst, uint32_t flags)
+{
+	/**
+	 * The MC L1 TLB supports variable sized pages, based on a fragment
+	 * field in the PTE. When this field is set to a non-zero value, page
+	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+	 * flags are considered valid for all PTEs within the fragment range
+	 * and corresponding mappings are assumed to be physically contiguous.
+	 *
+	 * The L1 TLB can store a single PTE for the whole fragment,
+	 * significantly increasing the space available for translation
+	 * caching. This leads to large improvements in throughput when the
+	 * TLB is under pressure.
+	 *
+	 * The L2 TLB distributes small and large fragments into two
+	 * asymmetric partitions. The large fragment cache is significantly
+	 * larger. Thus, we try to use large fragments wherever possible.
+	 * Userspace can support this by aligning virtual base address and
+	 * allocation size to the fragment size.
+	 */
+
+	/* SI and newer are optimized for 64KB */
+	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
+	uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
+
+	uint64_t frag_start = ALIGN(start, frag_align);
+	uint64_t frag_end = end & ~(frag_align - 1);
+
+	/* system pages are non continuously */
+	if (params->src || params->pages_addr || !(flags & AMDGPU_PTE_VALID) ||
+	    (frag_start >= frag_end)) {
+
+		amdgpu_vm_update_ptes(params, vm, start, end, dst, flags);
+		return;
+	}
+
+	/* handle the 4K area at the beginning */
+	if (start != frag_start) {
+		amdgpu_vm_update_ptes(params, vm, start, frag_start,
+				      dst, flags);
+		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+	}
+
+	/* handle the area in the middle */
+	amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
+			      flags | frag_flags);
+
+	/* handle the 4K area at the end */
+	if (frag_end != end) {
+		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
+		amdgpu_vm_update_ptes(params, vm, frag_end, end, dst, flags);
+	}
 }
 
 /**
@@ -952,7 +944,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	amdgpu_vm_update_ptes(&params, vm, start, last + 1, addr, flags);
+	amdgpu_vm_frag_ptes(&params, vm, start, last + 1, addr, flags);
 
 	amdgpu_ring_pad_ib(ring, params.ib);
 	WARN_ON(params.ib->length_dw > ndw);
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 6/6] drm/amdgpu: use more than 64KB fragment size if possible
       [not found] ` <1470747172-2853-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (3 preceding siblings ...)
  2016-08-09 12:52   ` [PATCH 5/6] drm/amdgpu: flip frag_ptes and update_pts Christian König
@ 2016-08-09 12:52   ` Christian König
       [not found]     ` <1470747172-2853-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  4 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2016-08-09 12:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

We align to 64KB, but when userspace aligns even more we can easily use more.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e6c030b..88f4109 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -817,13 +817,13 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 	 * allocation size to the fragment size.
 	 */
 
-	/* SI and newer are optimized for 64KB */
-	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
-	uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
+	const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
 
 	uint64_t frag_start = ALIGN(start, frag_align);
 	uint64_t frag_end = end & ~(frag_align - 1);
 
+	uint32_t frag;
+
 	/* system pages are non continuously */
 	if (params->src || params->pages_addr || !(flags & AMDGPU_PTE_VALID) ||
 	    (frag_start >= frag_end)) {
@@ -832,6 +832,10 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 		return;
 	}
 
+	/* use more than 64KB fragment size if possible */
+	frag = lower_32_bits(frag_start | frag_end);
+	frag = likely(frag) ? __ffs(frag) : 31;
+
 	/* handle the 4K area at the beginning */
 	if (start != frag_start) {
 		amdgpu_vm_update_ptes(params, vm, start, frag_start,
@@ -841,7 +845,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 
 	/* handle the area in the middle */
 	amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
-			      flags | frag_flags);
+			      flags | AMDGPU_PTE_FRAG(frag));
 
 	/* handle the 4K area at the end */
 	if (frag_end != end) {
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 6/6] drm/amdgpu: use more than 64KB fragment size if possible
       [not found]     ` <1470747172-2853-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-08-09 15:16       ` Alex Deucher
  2016-08-09 15:49       ` Jay Cornwall
  1 sibling, 0 replies; 10+ messages in thread
From: Alex Deucher @ 2016-08-09 15:16 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list

On Tue, Aug 9, 2016 at 8:52 AM, Christian König <deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> We align to 64KB, but when userspace aligns even more we can easily use more.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

For the series:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index e6c030b..88f4109 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -817,13 +817,13 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params   *params,
>          * allocation size to the fragment size.
>          */
>
> -       /* SI and newer are optimized for 64KB */
> -       uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
> -       uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
> +       const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
>
>         uint64_t frag_start = ALIGN(start, frag_align);
>         uint64_t frag_end = end & ~(frag_align - 1);
>
> +       uint32_t frag;
> +
>         /* system pages are non continuously */
>         if (params->src || params->pages_addr || !(flags & AMDGPU_PTE_VALID) ||
>             (frag_start >= frag_end)) {
> @@ -832,6 +832,10 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params    *params,
>                 return;
>         }
>
> +       /* use more than 64KB fragment size if possible */
> +       frag = lower_32_bits(frag_start | frag_end);
> +       frag = likely(frag) ? __ffs(frag) : 31;
> +
>         /* handle the 4K area at the beginning */
>         if (start != frag_start) {
>                 amdgpu_vm_update_ptes(params, vm, start, frag_start,
> @@ -841,7 +845,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params     *params,
>
>         /* handle the area in the middle */
>         amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
> -                             flags | frag_flags);
> +                             flags | AMDGPU_PTE_FRAG(frag));
>
>         /* handle the 4K area at the end */
>         if (frag_end != end) {
> --
> 2.5.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 6/6] drm/amdgpu: use more than 64KB fragment size if possible
       [not found]     ` <1470747172-2853-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2016-08-09 15:16       ` Alex Deucher
@ 2016-08-09 15:49       ` Jay Cornwall
       [not found]         ` <c355be2d7552630f9707c0529edf4b7e-gJmSnxjMpeIFV7jr3Ov9Ew@public.gmane.org>
  1 sibling, 1 reply; 10+ messages in thread
From: Jay Cornwall @ 2016-08-09 15:49 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2016-08-09 07:52, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
> 
> We align to 64KB, but when userspace aligns even more we can easily use 
> more.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index e6c030b..88f4109 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -817,13 +817,13 @@ static void amdgpu_vm_frag_ptes(struct
> amdgpu_pte_update_params	*params,
>  	 * allocation size to the fragment size.
>  	 */
> 
> -	/* SI and newer are optimized for 64KB */
> -	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
> -	uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
> +	const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
> 
>  	uint64_t frag_start = ALIGN(start, frag_align);
>  	uint64_t frag_end = end & ~(frag_align - 1);
> 
> +	uint32_t frag;
> +
>  	/* system pages are non continuously */
>  	if (params->src || params->pages_addr || !(flags & AMDGPU_PTE_VALID) 
> ||
>  	    (frag_start >= frag_end)) {
> @@ -832,6 +832,10 @@ static void amdgpu_vm_frag_ptes(struct
> amdgpu_pte_update_params	*params,
>  		return;
>  	}
> 
> +	/* use more than 64KB fragment size if possible */
> +	frag = lower_32_bits(frag_start | frag_end);
> +	frag = likely(frag) ? __ffs(frag) : 31;
> +
>  	/* handle the 4K area at the beginning */
>  	if (start != frag_start) {
>  		amdgpu_vm_update_ptes(params, vm, start, frag_start,
> @@ -841,7 +845,7 @@ static void amdgpu_vm_frag_ptes(struct
> amdgpu_pte_update_params	*params,
> 
>  	/* handle the area in the middle */
>  	amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
> -			      flags | frag_flags);
> +			      flags | AMDGPU_PTE_FRAG(frag));
> 
>  	/* handle the 4K area at the end */
>  	if (frag_end != end) {

Would this change not direct larger fragments away from the BigK TLB 
partition?

My understanding was VM_L2_CNTL3.L2_CACHE_BIGK_FRAGMENT_SIZE is an exact 
match and not a minimum size. I can't find any immediate documentation 
on that topic to confirm.

-- 
Jay Cornwall
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 6/6] drm/amdgpu: use more than 64KB fragment size if possible
       [not found]         ` <c355be2d7552630f9707c0529edf4b7e-gJmSnxjMpeIFV7jr3Ov9Ew@public.gmane.org>
@ 2016-08-09 16:35           ` Christian König
       [not found]             ` <fd4eb0d0-b7a8-6aee-bc0c-91bd1626c9b9-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2016-08-09 16:35 UTC (permalink / raw)
  To: Jay Cornwall; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 09.08.2016 um 17:49 schrieb Jay Cornwall:
> On 2016-08-09 07:52, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> We align to 64KB, but when userspace aligns even more we can easily 
>> use more.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++----
>>  1 file changed, 8 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index e6c030b..88f4109 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -817,13 +817,13 @@ static void amdgpu_vm_frag_ptes(struct
>> amdgpu_pte_update_params    *params,
>>       * allocation size to the fragment size.
>>       */
>>
>> -    /* SI and newer are optimized for 64KB */
>> -    uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
>> -    uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
>> +    const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
>>
>>      uint64_t frag_start = ALIGN(start, frag_align);
>>      uint64_t frag_end = end & ~(frag_align - 1);
>>
>> +    uint32_t frag;
>> +
>>      /* system pages are non continuously */
>>      if (params->src || params->pages_addr || !(flags & 
>> AMDGPU_PTE_VALID) ||
>>          (frag_start >= frag_end)) {
>> @@ -832,6 +832,10 @@ static void amdgpu_vm_frag_ptes(struct
>> amdgpu_pte_update_params    *params,
>>          return;
>>      }
>>
>> +    /* use more than 64KB fragment size if possible */
>> +    frag = lower_32_bits(frag_start | frag_end);
>> +    frag = likely(frag) ? __ffs(frag) : 31;
>> +
>>      /* handle the 4K area at the beginning */
>>      if (start != frag_start) {
>>          amdgpu_vm_update_ptes(params, vm, start, frag_start,
>> @@ -841,7 +845,7 @@ static void amdgpu_vm_frag_ptes(struct
>> amdgpu_pte_update_params    *params,
>>
>>      /* handle the area in the middle */
>>      amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
>> -                  flags | frag_flags);
>> +                  flags | AMDGPU_PTE_FRAG(frag));
>>
>>      /* handle the 4K area at the end */
>>      if (frag_end != end) {
>
> Would this change not direct larger fragments away from the BigK TLB 
> partition?
>
> My understanding was VM_L2_CNTL3.L2_CACHE_BIGK_FRAGMENT_SIZE is an 
> exact match and not a minimum size. I can't find any immediate 
> documentation on that topic to confirm.

Yeah I was questioning that myself as well, especially since you wrote 
in the initial patch that SI and later are optimized for 64K.

So I tested it on Tonga and Polaris10 and it seems to work as expected, 
e.g. a 1MB fragment size really results in not reading the other page 
table entries as soon as it is cached.

But I'm not sure how exactly this partitioning of the L2 works and what 
effect it should have.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 6/6] drm/amdgpu: use more than 64KB fragment size if possible
       [not found]             ` <fd4eb0d0-b7a8-6aee-bc0c-91bd1626c9b9-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-08-09 16:45               ` Jay Cornwall
  0 siblings, 0 replies; 10+ messages in thread
From: Jay Cornwall @ 2016-08-09 16:45 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2016-08-09 11:35, Christian König wrote:
> Am 09.08.2016 um 17:49 schrieb Jay Cornwall:
>> On 2016-08-09 07:52, Christian König wrote:
>>> From: Christian König <christian.koenig@amd.com>
>>> 
>>> We align to 64KB, but when userspace aligns even more we can easily 
>>> use more.
>>> 
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++----
>>>  1 file changed, 8 insertions(+), 4 deletions(-)
>>> 
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index e6c030b..88f4109 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -817,13 +817,13 @@ static void amdgpu_vm_frag_ptes(struct
>>> amdgpu_pte_update_params    *params,
>>>       * allocation size to the fragment size.
>>>       */
>>> 
>>> -    /* SI and newer are optimized for 64KB */
>>> -    uint64_t frag_flags = 
>>> AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
>>> -    uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
>>> +    const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
>>> 
>>>      uint64_t frag_start = ALIGN(start, frag_align);
>>>      uint64_t frag_end = end & ~(frag_align - 1);
>>> 
>>> +    uint32_t frag;
>>> +
>>>      /* system pages are non continuously */
>>>      if (params->src || params->pages_addr || !(flags & 
>>> AMDGPU_PTE_VALID) ||
>>>          (frag_start >= frag_end)) {
>>> @@ -832,6 +832,10 @@ static void amdgpu_vm_frag_ptes(struct
>>> amdgpu_pte_update_params    *params,
>>>          return;
>>>      }
>>> 
>>> +    /* use more than 64KB fragment size if possible */
>>> +    frag = lower_32_bits(frag_start | frag_end);
>>> +    frag = likely(frag) ? __ffs(frag) : 31;
>>> +
>>>      /* handle the 4K area at the beginning */
>>>      if (start != frag_start) {
>>>          amdgpu_vm_update_ptes(params, vm, start, frag_start,
>>> @@ -841,7 +845,7 @@ static void amdgpu_vm_frag_ptes(struct
>>> amdgpu_pte_update_params    *params,
>>> 
>>>      /* handle the area in the middle */
>>>      amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
>>> -                  flags | frag_flags);
>>> +                  flags | AMDGPU_PTE_FRAG(frag));
>>> 
>>>      /* handle the 4K area at the end */
>>>      if (frag_end != end) {
>> 
>> Would this change not direct larger fragments away from the BigK TLB 
>> partition?
>> 
>> My understanding was VM_L2_CNTL3.L2_CACHE_BIGK_FRAGMENT_SIZE is an 
>> exact match and not a minimum size. I can't find any immediate 
>> documentation on that topic to confirm.
> 
> Yeah I was questioning that myself as well, especially since you wrote
> in the initial patch that SI and later are optimized for 64K.

The 64K figure came from VM documentation. It was otherwise unqualified 
but my guess is it was based on VidMM's page size (64K), the average 
gaming work set size, and the number of BigK entries. Apparently it's 
still good as we haven't changed it since.

> So I tested it on Tonga and Polaris10 and it seems to work as
> expected, e.g. a 1MB fragment size really results in not reading the
> other page table entries as soon as it is cached.
> 
> But I'm not sure how exactly this partitioning of the L2 works and
> what effect it should have.

OK. As long as there's no regression on e.g. Heaven, where I benchmarked 
the original change at + several percent, then it should be fine.

-- 
Jay Cornwall
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-08-09 16:45 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-09 12:52 [PATCH 1/6] drm/amdgpu: remove unused VM defines Christian König
     [not found] ` <1470747172-2853-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-08-09 12:52   ` [PATCH 2/6] drm/amdgpu: cleanup VM fragment defines Christian König
2016-08-09 12:52   ` [PATCH 3/6] drm/amdgpu: rename amdgpu_vm_update_params Christian König
2016-08-09 12:52   ` [PATCH 4/6] drm/amdgpu: add adev to the pte_update_params Christian König
2016-08-09 12:52   ` [PATCH 5/6] drm/amdgpu: flip frag_ptes and update_pts Christian König
2016-08-09 12:52   ` [PATCH 6/6] drm/amdgpu: use more than 64KB fragment size if possible Christian König
     [not found]     ` <1470747172-2853-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-08-09 15:16       ` Alex Deucher
2016-08-09 15:49       ` Jay Cornwall
     [not found]         ` <c355be2d7552630f9707c0529edf4b7e-gJmSnxjMpeIFV7jr3Ov9Ew@public.gmane.org>
2016-08-09 16:35           ` Christian König
     [not found]             ` <fd4eb0d0-b7a8-6aee-bc0c-91bd1626c9b9-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-08-09 16:45               ` Jay Cornwall

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.