All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: cleanup the VM code a bit more
@ 2017-08-30 13:48 Christian König
       [not found] ` <1504100904-1527-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2017-08-30 13:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

The src isn't used any more after GART hack removal.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4cdfb70..b08f031 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1475,7 +1475,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
  *
  * @adev: amdgpu_device pointer
  * @exclusive: fence we need to sync to
- * @src: address where to copy page table entries from
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @start: start of mapped range
@@ -1489,7 +1488,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 				       struct dma_fence *exclusive,
-				       uint64_t src,
 				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
 				       uint64_t start, uint64_t last,
@@ -1507,7 +1505,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
-	params.src = src;
 
 	/* sync to everything on unmapping */
 	if (!(flags & AMDGPU_PTE_VALID))
@@ -1547,13 +1544,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* one PDE write for each huge page */
 	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
 
-	if (src) {
-		/* only copy commands needed */
-		ndw += ncmds * 7;
-
-		params.func = amdgpu_vm_do_copy_ptes;
-
-	} else if (pages_addr) {
+	if (pages_addr) {
 		/* copy commands needed */
 		ndw += ncmds * 7;
 
@@ -1578,7 +1569,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	params.ib = &job->ibs[0];
 
-	if (!src && pages_addr) {
+	if (pages_addr) {
 		uint64_t *pte;
 		unsigned i;
 
@@ -1655,7 +1646,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				      struct drm_mm_node *nodes,
 				      struct dma_fence **fence)
 {
-	uint64_t pfn, src = 0, start = mapping->start;
+	uint64_t pfn, start = mapping->start;
 	int r;
 
 	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -1710,8 +1701,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		addr += pfn << PAGE_SHIFT;
 
 		last = min((uint64_t)mapping->last, start + max_entries - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
-						src, pages_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
@@ -1972,7 +1962,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		if (vm->pte_support_ats)
 			init_pte_value = AMDGPU_PTE_SYSTEM;
 
-		r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
 						mapping->start, mapping->last,
 						init_pte_value, 0, &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3
       [not found] ` <1504100904-1527-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-30 13:48   ` Christian König
       [not found]     ` <1504100904-1527-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-30 21:02   ` [PATCH 1/2] drm/amdgpu: cleanup the VM code a bit more Felix Kuehling
  2017-08-31  1:56   ` zhoucm1
  2 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2017-08-30 13:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Roger He <Hongbo.He@amd.com>

This can improve performance for some cases.

v2 (chk): handle all sizes, simplify the patch quite a bit
v3 (chk): adjust dw estimation as well

Signed-off-by: Roger He <Hongbo.He@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b08f031..1575657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t start, uint64_t end,
 				uint64_t dst, uint64_t flags)
 {
-	int r;
-
 	/**
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
 	 * field in the PTE. When this field is set to a non-zero value, page
@@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 	 * Userspace can support this by aligning virtual base address and
 	 * allocation size to the fragment size.
 	 */
-	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
-	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
-	uint64_t frag_align = 1 << pages_per_frag;
+	unsigned max_frag = params->adev->vm_manager.fragment_size;
+	uint64_t frag_flags, frag_end;
+	unsigned frag;
 
-	uint64_t frag_start = ALIGN(start, frag_align);
-	uint64_t frag_end = end & ~(frag_align - 1);
+	int r;
 
 	/* system pages are non continuously */
-	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end))
+	if (params->src || !(flags & AMDGPU_PTE_VALID))
 		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
 
-	/* handle the 4K area at the beginning */
-	if (start != frag_start) {
-		r = amdgpu_vm_update_ptes(params, start, frag_start,
-					  dst, flags);
+	/* Handle the fragments at the beginning */
+	while (start != end) {
+		/* This intentionally wraps around if no bit is set */
+		frag = min(ffs(start), fls64(end - start)) - 1;
+		if (frag >= max_frag)
+			break;
+
+		frag_flags = AMDGPU_PTE_FRAG(frag);
+		frag_end = start + (1 << frag);
+
+		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+					  flags | frag_flags);
 		if (r)
 			return r;
-		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
 	}
 
 	/* handle the area in the middle */
-	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
-				  flags | frag_flags);
-	if (r)
-		return r;
+	if (start != end) {
+		frag_flags = AMDGPU_PTE_FRAG(max_frag);
+		frag_end = end & ~((1 << max_frag) - 1);
 
-	/* handle the 4K area at the end */
-	if (frag_end != end) {
-		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+					  flags | frag_flags);
+		if (r)
+			return r;
+
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
 	}
-	return r;
+
+	/* Handle the fragments at the end */
+	while (start != end) {
+		frag = fls64(end - start) - 1;
+		frag_flags = AMDGPU_PTE_FRAG(frag);
+		frag_end = start + (1 << frag);
+
+		r = amdgpu_vm_update_ptes(params, start, frag_end,
+					  dst, flags | frag_flags);
+		if (r)
+			return r;
+
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
+	}
+
+	return 0;
 }
 
 /**
@@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		/* set page commands needed */
 		ndw += ncmds * 10;
 
-		/* two extra commands for begin/end of fragment */
-		ndw += 2 * 10;
+		/* extra commands for begin/end fragments */
+		ndw += 2 * 10 * adev->vm_manager.fragment_size;
 
 		params.func = amdgpu_vm_do_set_ptes;
 	}
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: cleanup the VM code a bit more
       [not found] ` <1504100904-1527-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-30 13:48   ` [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3 Christian König
@ 2017-08-30 21:02   ` Felix Kuehling
  2017-08-31  1:56   ` zhoucm1
  2 siblings, 0 replies; 9+ messages in thread
From: Felix Kuehling @ 2017-08-30 21:02 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

I was just about to send the same patch. Reviewed-by: Felix Kuehling
<Felix.Kuehling@amd.com>


On 2017-08-30 09:48 AM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> The src isn't used any more after GART hack removal.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++++---------------
>  1 file changed, 5 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 4cdfb70..b08f031 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1475,7 +1475,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>   *
>   * @adev: amdgpu_device pointer
>   * @exclusive: fence we need to sync to
> - * @src: address where to copy page table entries from
>   * @pages_addr: DMA addresses to use for mapping
>   * @vm: requested vm
>   * @start: start of mapped range
> @@ -1489,7 +1488,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>   */
>  static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  				       struct dma_fence *exclusive,
> -				       uint64_t src,
>  				       dma_addr_t *pages_addr,
>  				       struct amdgpu_vm *vm,
>  				       uint64_t start, uint64_t last,
> @@ -1507,7 +1505,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  	memset(&params, 0, sizeof(params));
>  	params.adev = adev;
>  	params.vm = vm;
> -	params.src = src;
>  
>  	/* sync to everything on unmapping */
>  	if (!(flags & AMDGPU_PTE_VALID))
> @@ -1547,13 +1544,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  	/* one PDE write for each huge page */
>  	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
>  
> -	if (src) {
> -		/* only copy commands needed */
> -		ndw += ncmds * 7;
> -
> -		params.func = amdgpu_vm_do_copy_ptes;
> -
> -	} else if (pages_addr) {
> +	if (pages_addr) {
>  		/* copy commands needed */
>  		ndw += ncmds * 7;
>  
> @@ -1578,7 +1569,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  
>  	params.ib = &job->ibs[0];
>  
> -	if (!src && pages_addr) {
> +	if (pages_addr) {
>  		uint64_t *pte;
>  		unsigned i;
>  
> @@ -1655,7 +1646,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>  				      struct drm_mm_node *nodes,
>  				      struct dma_fence **fence)
>  {
> -	uint64_t pfn, src = 0, start = mapping->start;
> +	uint64_t pfn, start = mapping->start;
>  	int r;
>  
>  	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
> @@ -1710,8 +1701,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>  		addr += pfn << PAGE_SHIFT;
>  
>  		last = min((uint64_t)mapping->last, start + max_entries - 1);
> -		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
> -						src, pages_addr, vm,
> +		r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm,
>  						start, last, flags, addr,
>  						fence);
>  		if (r)
> @@ -1972,7 +1962,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>  		if (vm->pte_support_ats)
>  			init_pte_value = AMDGPU_PTE_SYSTEM;
>  
> -		r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm,
> +		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>  						mapping->start, mapping->last,
>  						init_pte_value, 0, &f);
>  		amdgpu_vm_free_mapping(adev, vm, mapping, f);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3
       [not found]     ` <1504100904-1527-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-30 23:26       ` Felix Kuehling
       [not found]         ` <be9ee34f-0bac-750b-097d-12802ed94e2e-5C7GfCeVMHo@public.gmane.org>
  2017-08-31  2:00       ` zhoucm1
  1 sibling, 1 reply; 9+ messages in thread
From: Felix Kuehling @ 2017-08-30 23:26 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Christian König

One comment inline. With that fixed, this patch is Reviewed-by: Felix
Kuehling <Felix.Kuehling@amd.com>


On 2017-08-30 09:48 AM, Christian König wrote:
> From: Roger He <Hongbo.He@amd.com>
>
> This can improve performance for some cases.
>
> v2 (chk): handle all sizes, simplify the patch quite a bit
> v3 (chk): adjust dw estimation as well
>
> Signed-off-by: Roger He <Hongbo.He@amd.com>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
>  1 file changed, 49 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b08f031..1575657 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>  				uint64_t start, uint64_t end,
>  				uint64_t dst, uint64_t flags)
>  {
> -	int r;
> -
>  	/**
>  	 * The MC L1 TLB supports variable sized pages, based on a fragment
>  	 * field in the PTE. When this field is set to a non-zero value, page
> @@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>  	 * Userspace can support this by aligning virtual base address and
>  	 * allocation size to the fragment size.
>  	 */
> -	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
> -	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
> -	uint64_t frag_align = 1 << pages_per_frag;
> +	unsigned max_frag = params->adev->vm_manager.fragment_size;
> +	uint64_t frag_flags, frag_end;
> +	unsigned frag;
>  
> -	uint64_t frag_start = ALIGN(start, frag_align);
> -	uint64_t frag_end = end & ~(frag_align - 1);
> +	int r;
>  
>  	/* system pages are non continuously */
> -	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
> -	    (frag_start >= frag_end))
> +	if (params->src || !(flags & AMDGPU_PTE_VALID))
>  		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>  
> -	/* handle the 4K area at the beginning */
> -	if (start != frag_start) {
> -		r = amdgpu_vm_update_ptes(params, start, frag_start,
> -					  dst, flags);
> +	/* Handle the fragments at the beginning */
> +	while (start != end) {
> +		/* This intentionally wraps around if no bit is set */
> +		frag = min(ffs(start), fls64(end - start)) - 1;
> +		if (frag >= max_frag)
> +			break;
> +
> +		frag_flags = AMDGPU_PTE_FRAG(frag);
> +		frag_end = start + (1 << frag);
> +
> +		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
> +					  flags | frag_flags);
>  		if (r)
>  			return r;
> -		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
> +
> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> +		start = frag_end;
>  	}
>  
>  	/* handle the area in the middle */
> -	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
> -				  flags | frag_flags);
> -	if (r)
> -		return r;
> +	if (start != end) {
> +		frag_flags = AMDGPU_PTE_FRAG(max_frag);
> +		frag_end = end & ~((1 << max_frag) - 1);

You need a cast to uint64_t to make sure your mask is big enough and
doesn't wipe out the high address bits:

    frag_end = end & ~(uint64_t)((1 << max_frag) - 1)

>  
> -	/* handle the 4K area at the end */
> -	if (frag_end != end) {
> -		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
> -		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
> +		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
> +					  flags | frag_flags);
> +		if (r)
> +			return r;
> +
> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> +		start = frag_end;
>  	}
> -	return r;
> +
> +	/* Handle the fragments at the end */
> +	while (start != end) {
> +		frag = fls64(end - start) - 1;
> +		frag_flags = AMDGPU_PTE_FRAG(frag);
> +		frag_end = start + (1 << frag);
> +
> +		r = amdgpu_vm_update_ptes(params, start, frag_end,
> +					  dst, flags | frag_flags);
> +		if (r)
> +			return r;
> +
> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> +		start = frag_end;
> +	}
> +
> +	return 0;
>  }
>  
>  /**
> @@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  		/* set page commands needed */
>  		ndw += ncmds * 10;
>  
> -		/* two extra commands for begin/end of fragment */
> -		ndw += 2 * 10;
> +		/* extra commands for begin/end fragments */
> +		ndw += 2 * 10 * adev->vm_manager.fragment_size;
>  
>  		params.func = amdgpu_vm_do_set_ptes;
>  	}

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: cleanup the VM code a bit more
       [not found] ` <1504100904-1527-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-30 13:48   ` [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3 Christian König
  2017-08-30 21:02   ` [PATCH 1/2] drm/amdgpu: cleanup the VM code a bit more Felix Kuehling
@ 2017-08-31  1:56   ` zhoucm1
  2 siblings, 0 replies; 9+ messages in thread
From: zhoucm1 @ 2017-08-31  1:56 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年08月30日 21:48, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> The src isn't used any more after GART hack removal.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++++---------------
>   1 file changed, 5 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 4cdfb70..b08f031 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1475,7 +1475,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>    *
>    * @adev: amdgpu_device pointer
>    * @exclusive: fence we need to sync to
> - * @src: address where to copy page table entries from
>    * @pages_addr: DMA addresses to use for mapping
>    * @vm: requested vm
>    * @start: start of mapped range
> @@ -1489,7 +1488,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>    */
>   static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   				       struct dma_fence *exclusive,
> -				       uint64_t src,
>   				       dma_addr_t *pages_addr,
>   				       struct amdgpu_vm *vm,
>   				       uint64_t start, uint64_t last,
> @@ -1507,7 +1505,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	memset(&params, 0, sizeof(params));
>   	params.adev = adev;
>   	params.vm = vm;
> -	params.src = src;
>   
>   	/* sync to everything on unmapping */
>   	if (!(flags & AMDGPU_PTE_VALID))
> @@ -1547,13 +1544,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	/* one PDE write for each huge page */
>   	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
>   
> -	if (src) {
> -		/* only copy commands needed */
> -		ndw += ncmds * 7;
> -
> -		params.func = amdgpu_vm_do_copy_ptes;
> -
> -	} else if (pages_addr) {
> +	if (pages_addr) {
>   		/* copy commands needed */
>   		ndw += ncmds * 7;
>   
> @@ -1578,7 +1569,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   
>   	params.ib = &job->ibs[0];
>   
> -	if (!src && pages_addr) {
> +	if (pages_addr) {
>   		uint64_t *pte;
>   		unsigned i;
>   
> @@ -1655,7 +1646,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>   				      struct drm_mm_node *nodes,
>   				      struct dma_fence **fence)
>   {
> -	uint64_t pfn, src = 0, start = mapping->start;
> +	uint64_t pfn, start = mapping->start;
>   	int r;
>   
>   	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
> @@ -1710,8 +1701,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>   		addr += pfn << PAGE_SHIFT;
>   
>   		last = min((uint64_t)mapping->last, start + max_entries - 1);
> -		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
> -						src, pages_addr, vm,
> +		r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm,
>   						start, last, flags, addr,
>   						fence);
>   		if (r)
> @@ -1972,7 +1962,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>   		if (vm->pte_support_ats)
>   			init_pte_value = AMDGPU_PTE_SYSTEM;
>   
> -		r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm,
> +		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>   						mapping->start, mapping->last,
>   						init_pte_value, 0, &f);
>   		amdgpu_vm_free_mapping(adev, vm, mapping, f);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3
       [not found]     ` <1504100904-1527-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-30 23:26       ` Felix Kuehling
@ 2017-08-31  2:00       ` zhoucm1
       [not found]         ` <8f3f60fa-6633-4076-4480-220fd3d22a5c-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 9+ messages in thread
From: zhoucm1 @ 2017-08-31  2:00 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年08月30日 21:48, Christian König wrote:
> From: Roger He <Hongbo.He@amd.com>
>
> This can improve performance for some cases.
>
> v2 (chk): handle all sizes, simplify the patch quite a bit
> v3 (chk): adjust dw estimation as well
>
> Signed-off-by: Roger He <Hongbo.He@amd.com>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
>   1 file changed, 49 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b08f031..1575657 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>   				uint64_t start, uint64_t end,
>   				uint64_t dst, uint64_t flags)
>   {
> -	int r;
> -
>   	/**
>   	 * The MC L1 TLB supports variable sized pages, based on a fragment
>   	 * field in the PTE. When this field is set to a non-zero value, page
> @@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>   	 * Userspace can support this by aligning virtual base address and
>   	 * allocation size to the fragment size.
>   	 */
> -	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
> -	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
> -	uint64_t frag_align = 1 << pages_per_frag;
> +	unsigned max_frag = params->adev->vm_manager.fragment_size;
> +	uint64_t frag_flags, frag_end;
> +	unsigned frag;
>   
> -	uint64_t frag_start = ALIGN(start, frag_align);
> -	uint64_t frag_end = end & ~(frag_align - 1);
> +	int r;
>   
>   	/* system pages are non continuously */
> -	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
> -	    (frag_start >= frag_end))
> +	if (params->src || !(flags & AMDGPU_PTE_VALID))
>   		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>   
> -	/* handle the 4K area at the beginning */
> -	if (start != frag_start) {
> -		r = amdgpu_vm_update_ptes(params, start, frag_start,
> -					  dst, flags);
> +	/* Handle the fragments at the beginning */
> +	while (start != end) {
> +		/* This intentionally wraps around if no bit is set */
> +		frag = min(ffs(start), fls64(end - start)) - 1;
> +		if (frag >= max_frag)
> +			break;
Seem we can simplify more, frag = min(frag, max_frag) instead of break, 
this way, one while will solve all loop.

Regards,
David Zhou
> +
> +		frag_flags = AMDGPU_PTE_FRAG(frag);
> +		frag_end = start + (1 << frag);
> +
> +		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
> +					  flags | frag_flags);
>   		if (r)
>   			return r;
> -		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
> +
> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> +		start = frag_end;
>   	}
>   
>   	/* handle the area in the middle */
> -	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
> -				  flags | frag_flags);
> -	if (r)
> -		return r;
> +	if (start != end) {
> +		frag_flags = AMDGPU_PTE_FRAG(max_frag);
> +		frag_end = end & ~((1 << max_frag) - 1);
>   
> -	/* handle the 4K area at the end */
> -	if (frag_end != end) {
> -		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
> -		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
> +		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
> +					  flags | frag_flags);
> +		if (r)
> +			return r;
> +
> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> +		start = frag_end;
>   	}
> -	return r;
> +
> +	/* Handle the fragments at the end */
> +	while (start != end) {
> +		frag = fls64(end - start) - 1;
> +		frag_flags = AMDGPU_PTE_FRAG(frag);
> +		frag_end = start + (1 << frag);
> +
> +		r = amdgpu_vm_update_ptes(params, start, frag_end,
> +					  dst, flags | frag_flags);
> +		if (r)
> +			return r;
> +
> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> +		start = frag_end;
> +	}
> +
> +	return 0;
>   }
>   
>   /**
> @@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   		/* set page commands needed */
>   		ndw += ncmds * 10;
>   
> -		/* two extra commands for begin/end of fragment */
> -		ndw += 2 * 10;
> +		/* extra commands for begin/end fragments */
> +		ndw += 2 * 10 * adev->vm_manager.fragment_size;
>   
>   		params.func = amdgpu_vm_do_set_ptes;
>   	}

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3
       [not found]         ` <be9ee34f-0bac-750b-097d-12802ed94e2e-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-31  7:31           ` Christian König
       [not found]             ` <17b3dbe7-8ac9-274a-6539-f948acdee7a3-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2017-08-31  7:31 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 31.08.2017 um 01:26 schrieb Felix Kuehling:
> One comment inline. With that fixed, this patch is Reviewed-by: Felix
> Kuehling <Felix.Kuehling@amd.com>
>
>
> On 2017-08-30 09:48 AM, Christian König wrote:
>> From: Roger He <Hongbo.He@amd.com>
>>
>> This can improve performance for some cases.
>>
>> v2 (chk): handle all sizes, simplify the patch quite a bit
>> v3 (chk): adjust dw estimation as well
>>
>> Signed-off-by: Roger He <Hongbo.He@amd.com>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
>>   1 file changed, 49 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index b08f031..1575657 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>>   				uint64_t start, uint64_t end,
>>   				uint64_t dst, uint64_t flags)
>>   {
>> -	int r;
>> -
>>   	/**
>>   	 * The MC L1 TLB supports variable sized pages, based on a fragment
>>   	 * field in the PTE. When this field is set to a non-zero value, page
>> @@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>>   	 * Userspace can support this by aligning virtual base address and
>>   	 * allocation size to the fragment size.
>>   	 */
>> -	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
>> -	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
>> -	uint64_t frag_align = 1 << pages_per_frag;
>> +	unsigned max_frag = params->adev->vm_manager.fragment_size;
>> +	uint64_t frag_flags, frag_end;
>> +	unsigned frag;
>>   
>> -	uint64_t frag_start = ALIGN(start, frag_align);
>> -	uint64_t frag_end = end & ~(frag_align - 1);
>> +	int r;
>>   
>>   	/* system pages are non continuously */
>> -	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
>> -	    (frag_start >= frag_end))
>> +	if (params->src || !(flags & AMDGPU_PTE_VALID))
>>   		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>>   
>> -	/* handle the 4K area at the beginning */
>> -	if (start != frag_start) {
>> -		r = amdgpu_vm_update_ptes(params, start, frag_start,
>> -					  dst, flags);
>> +	/* Handle the fragments at the beginning */
>> +	while (start != end) {
>> +		/* This intentionally wraps around if no bit is set */
>> +		frag = min(ffs(start), fls64(end - start)) - 1;
>> +		if (frag >= max_frag)
>> +			break;
>> +
>> +		frag_flags = AMDGPU_PTE_FRAG(frag);
>> +		frag_end = start + (1 << frag);
>> +
>> +		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>> +					  flags | frag_flags);
>>   		if (r)
>>   			return r;
>> -		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
>> +
>> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +		start = frag_end;
>>   	}
>>   
>>   	/* handle the area in the middle */
>> -	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
>> -				  flags | frag_flags);
>> -	if (r)
>> -		return r;
>> +	if (start != end) {
>> +		frag_flags = AMDGPU_PTE_FRAG(max_frag);
>> +		frag_end = end & ~((1 << max_frag) - 1);
> You need a cast to uint64_t to make sure your mask is big enough and
> doesn't wipe out the high address bits:
>
>      frag_end = end & ~(uint64_t)((1 << max_frag) - 1)

Good point. Using "frag_end = end & ~((1ULL << max_frag) - 1);" should 
work as well, shouldn't it?

Regards,
Christian.

>
>>   
>> -	/* handle the 4K area at the end */
>> -	if (frag_end != end) {
>> -		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
>> -		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
>> +		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>> +					  flags | frag_flags);
>> +		if (r)
>> +			return r;
>> +
>> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +		start = frag_end;
>>   	}
>> -	return r;
>> +
>> +	/* Handle the fragments at the end */
>> +	while (start != end) {
>> +		frag = fls64(end - start) - 1;
>> +		frag_flags = AMDGPU_PTE_FRAG(frag);
>> +		frag_end = start + (1 << frag);
>> +
>> +		r = amdgpu_vm_update_ptes(params, start, frag_end,
>> +					  dst, flags | frag_flags);
>> +		if (r)
>> +			return r;
>> +
>> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +		start = frag_end;
>> +	}
>> +
>> +	return 0;
>>   }
>>   
>>   /**
>> @@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   		/* set page commands needed */
>>   		ndw += ncmds * 10;
>>   
>> -		/* two extra commands for begin/end of fragment */
>> -		ndw += 2 * 10;
>> +		/* extra commands for begin/end fragments */
>> +		ndw += 2 * 10 * adev->vm_manager.fragment_size;
>>   
>>   		params.func = amdgpu_vm_do_set_ptes;
>>   	}


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3
       [not found]         ` <8f3f60fa-6633-4076-4480-220fd3d22a5c-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-31  8:11           ` Christian König
  0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2017-08-31  8:11 UTC (permalink / raw)
  To: zhoucm1, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 31.08.2017 um 04:00 schrieb zhoucm1:
>
>
> On 2017年08月30日 21:48, Christian König wrote:
>> [SNIP]
>> +    while (start != end) {
>> +        /* This intentionally wraps around if no bit is set */
>> +        frag = min(ffs(start), fls64(end - start)) - 1;
>> +        if (frag >= max_frag)
>> +            break;
> Seem we can simplify more, frag = min(frag, max_frag) instead of 
> break, this way, one while will solve all loop.

It's not so easy, but still an interesting idea.

Need to change the handling a bit, but going to give that a try in the v4.

Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3
       [not found]             ` <17b3dbe7-8ac9-274a-6539-f948acdee7a3-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-31 13:35               ` Kuehling, Felix
  0 siblings, 0 replies; 9+ messages in thread
From: Kuehling, Felix @ 2017-08-31 13:35 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

>>> +            frag_end = end & ~((1 << max_frag) - 1);
>> You need a cast to uint64_t to make sure your mask is big enough and
>> doesn't wipe out the high address bits:
>>
>>      frag_end = end & ~(uint64_t)((1 << max_frag) - 1)
>
> Good point. Using "frag_end = end & ~((1ULL << max_frag) - 1);" should
> work as well, shouldn't it?

Yes, that would work too.

Regards,
  Felix
________________________________________
From: Koenig, Christian
Sent: Thursday, August 31, 2017 3:31:36 AM
To: Kuehling, Felix; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3

Am 31.08.2017 um 01:26 schrieb Felix Kuehling:
> One comment inline. With that fixed, this patch is Reviewed-by: Felix
> Kuehling <Felix.Kuehling@amd.com>
>
>
> On 2017-08-30 09:48 AM, Christian König wrote:
>> From: Roger He <Hongbo.He@amd.com>
>>
>> This can improve performance for some cases.
>>
>> v2 (chk): handle all sizes, simplify the patch quite a bit
>> v3 (chk): adjust dw estimation as well
>>
>> Signed-off-by: Roger He <Hongbo.He@amd.com>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
>>   1 file changed, 49 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index b08f031..1575657 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
>>                              uint64_t start, uint64_t end,
>>                              uint64_t dst, uint64_t flags)
>>   {
>> -    int r;
>> -
>>      /**
>>       * The MC L1 TLB supports variable sized pages, based on a fragment
>>       * field in the PTE. When this field is set to a non-zero value, page
>> @@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params       *params,
>>       * Userspace can support this by aligning virtual base address and
>>       * allocation size to the fragment size.
>>       */
>> -    unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
>> -    uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
>> -    uint64_t frag_align = 1 << pages_per_frag;
>> +    unsigned max_frag = params->adev->vm_manager.fragment_size;
>> +    uint64_t frag_flags, frag_end;
>> +    unsigned frag;
>>
>> -    uint64_t frag_start = ALIGN(start, frag_align);
>> -    uint64_t frag_end = end & ~(frag_align - 1);
>> +    int r;
>>
>>      /* system pages are non continuously */
>> -    if (params->src || !(flags & AMDGPU_PTE_VALID) ||
>> -        (frag_start >= frag_end))
>> +    if (params->src || !(flags & AMDGPU_PTE_VALID))
>>              return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>>
>> -    /* handle the 4K area at the beginning */
>> -    if (start != frag_start) {
>> -            r = amdgpu_vm_update_ptes(params, start, frag_start,
>> -                                      dst, flags);
>> +    /* Handle the fragments at the beginning */
>> +    while (start != end) {
>> +            /* This intentionally wraps around if no bit is set */
>> +            frag = min(ffs(start), fls64(end - start)) - 1;
>> +            if (frag >= max_frag)
>> +                    break;
>> +
>> +            frag_flags = AMDGPU_PTE_FRAG(frag);
>> +            frag_end = start + (1 << frag);
>> +
>> +            r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>> +                                      flags | frag_flags);
>>              if (r)
>>                      return r;
>> -            dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
>> +
>> +            dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +            start = frag_end;
>>      }
>>
>>      /* handle the area in the middle */
>> -    r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
>> -                              flags | frag_flags);
>> -    if (r)
>> -            return r;
>> +    if (start != end) {
>> +            frag_flags = AMDGPU_PTE_FRAG(max_frag);
>> +            frag_end = end & ~((1 << max_frag) - 1);
> You need a cast to uint64_t to make sure your mask is big enough and
> doesn't wipe out the high address bits:
>
>      frag_end = end & ~(uint64_t)((1 << max_frag) - 1)

Good point. Using "frag_end = end & ~((1ULL << max_frag) - 1);" should
work as well, shouldn't it?

Regards,
Christian.

>
>>
>> -    /* handle the 4K area at the end */
>> -    if (frag_end != end) {
>> -            dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
>> -            r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
>> +            r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>> +                                      flags | frag_flags);
>> +            if (r)
>> +                    return r;
>> +
>> +            dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +            start = frag_end;
>>      }
>> -    return r;
>> +
>> +    /* Handle the fragments at the end */
>> +    while (start != end) {
>> +            frag = fls64(end - start) - 1;
>> +            frag_flags = AMDGPU_PTE_FRAG(frag);
>> +            frag_end = start + (1 << frag);
>> +
>> +            r = amdgpu_vm_update_ptes(params, start, frag_end,
>> +                                      dst, flags | frag_flags);
>> +            if (r)
>> +                    return r;
>> +
>> +            dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +            start = frag_end;
>> +    }
>> +
>> +    return 0;
>>   }
>>
>>   /**
>> @@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>              /* set page commands needed */
>>              ndw += ncmds * 10;
>>
>> -            /* two extra commands for begin/end of fragment */
>> -            ndw += 2 * 10;
>> +            /* extra commands for begin/end fragments */
>> +            ndw += 2 * 10 * adev->vm_manager.fragment_size;
>>
>>              params.func = amdgpu_vm_do_set_ptes;
>>      }


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-08-31 13:35 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-30 13:48 [PATCH 1/2] drm/amdgpu: cleanup the VM code a bit more Christian König
     [not found] ` <1504100904-1527-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-30 13:48   ` [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3 Christian König
     [not found]     ` <1504100904-1527-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-30 23:26       ` Felix Kuehling
     [not found]         ` <be9ee34f-0bac-750b-097d-12802ed94e2e-5C7GfCeVMHo@public.gmane.org>
2017-08-31  7:31           ` Christian König
     [not found]             ` <17b3dbe7-8ac9-274a-6539-f948acdee7a3-5C7GfCeVMHo@public.gmane.org>
2017-08-31 13:35               ` Kuehling, Felix
2017-08-31  2:00       ` zhoucm1
     [not found]         ` <8f3f60fa-6633-4076-4480-220fd3d22a5c-5C7GfCeVMHo@public.gmane.org>
2017-08-31  8:11           ` Christian König
2017-08-30 21:02   ` [PATCH 1/2] drm/amdgpu: cleanup the VM code a bit more Felix Kuehling
2017-08-31  1:56   ` zhoucm1

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.