All of lore.kernel.org
 help / color / mirror / Atom feed
From: zhoucm1 <david1.zhou-5C7GfCeVMHo@public.gmane.org>
To: "Christian König"
	<deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3
Date: Thu, 31 Aug 2017 10:00:35 +0800	[thread overview]
Message-ID: <8f3f60fa-6633-4076-4480-220fd3d22a5c@amd.com> (raw)
In-Reply-To: <1504100904-1527-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>



On 2017年08月30日 21:48, Christian König wrote:
> From: Roger He <Hongbo.He@amd.com>
>
> This can improve performance for some cases.
>
> v2 (chk): handle all sizes, simplify the patch quite a bit
> v3 (chk): adjust dw estimation as well
>
> Signed-off-by: Roger He <Hongbo.He@amd.com>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
>   1 file changed, 49 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b08f031..1575657 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>   				uint64_t start, uint64_t end,
>   				uint64_t dst, uint64_t flags)
>   {
> -	int r;
> -
>   	/**
>   	 * The MC L1 TLB supports variable sized pages, based on a fragment
>   	 * field in the PTE. When this field is set to a non-zero value, page
> @@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>   	 * Userspace can support this by aligning virtual base address and
>   	 * allocation size to the fragment size.
>   	 */
> -	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
> -	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
> -	uint64_t frag_align = 1 << pages_per_frag;
> +	unsigned max_frag = params->adev->vm_manager.fragment_size;
> +	uint64_t frag_flags, frag_end;
> +	unsigned frag;
>   
> -	uint64_t frag_start = ALIGN(start, frag_align);
> -	uint64_t frag_end = end & ~(frag_align - 1);
> +	int r;
>   
>   	/* system pages are non continuously */
> -	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
> -	    (frag_start >= frag_end))
> +	if (params->src || !(flags & AMDGPU_PTE_VALID))
>   		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>   
> -	/* handle the 4K area at the beginning */
> -	if (start != frag_start) {
> -		r = amdgpu_vm_update_ptes(params, start, frag_start,
> -					  dst, flags);
> +	/* Handle the fragments at the beginning */
> +	while (start != end) {
> +		/* This intentionally wraps around if no bit is set */
> +		frag = min(ffs(start), fls64(end - start)) - 1;
> +		if (frag >= max_frag)
> +			break;
Seem we can simplify more, frag = min(frag, max_frag) instead of break, 
this way, one while will solve all loop.

Regards,
David Zhou
> +
> +		frag_flags = AMDGPU_PTE_FRAG(frag);
> +		frag_end = start + (1 << frag);
> +
> +		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
> +					  flags | frag_flags);
>   		if (r)
>   			return r;
> -		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
> +
> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> +		start = frag_end;
>   	}
>   
>   	/* handle the area in the middle */
> -	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
> -				  flags | frag_flags);
> -	if (r)
> -		return r;
> +	if (start != end) {
> +		frag_flags = AMDGPU_PTE_FRAG(max_frag);
> +		frag_end = end & ~((1 << max_frag) - 1);
>   
> -	/* handle the 4K area at the end */
> -	if (frag_end != end) {
> -		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
> -		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
> +		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
> +					  flags | frag_flags);
> +		if (r)
> +			return r;
> +
> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> +		start = frag_end;
>   	}
> -	return r;
> +
> +	/* Handle the fragments at the end */
> +	while (start != end) {
> +		frag = fls64(end - start) - 1;
> +		frag_flags = AMDGPU_PTE_FRAG(frag);
> +		frag_end = start + (1 << frag);
> +
> +		r = amdgpu_vm_update_ptes(params, start, frag_end,
> +					  dst, flags | frag_flags);
> +		if (r)
> +			return r;
> +
> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> +		start = frag_end;
> +	}
> +
> +	return 0;
>   }
>   
>   /**
> @@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   		/* set page commands needed */
>   		ndw += ncmds * 10;
>   
> -		/* two extra commands for begin/end of fragment */
> -		ndw += 2 * 10;
> +		/* extra commands for begin/end fragments */
> +		ndw += 2 * 10 * adev->vm_manager.fragment_size;
>   
>   		params.func = amdgpu_vm_do_set_ptes;
>   	}

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2017-08-31  2:00 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-30 13:48 [PATCH 1/2] drm/amdgpu: cleanup the VM code a bit more Christian König
     [not found] ` <1504100904-1527-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-30 13:48   ` [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3 Christian König
     [not found]     ` <1504100904-1527-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-30 23:26       ` Felix Kuehling
     [not found]         ` <be9ee34f-0bac-750b-097d-12802ed94e2e-5C7GfCeVMHo@public.gmane.org>
2017-08-31  7:31           ` Christian König
     [not found]             ` <17b3dbe7-8ac9-274a-6539-f948acdee7a3-5C7GfCeVMHo@public.gmane.org>
2017-08-31 13:35               ` Kuehling, Felix
2017-08-31  2:00       ` zhoucm1 [this message]
     [not found]         ` <8f3f60fa-6633-4076-4480-220fd3d22a5c-5C7GfCeVMHo@public.gmane.org>
2017-08-31  8:11           ` Christian König
2017-08-30 21:02   ` [PATCH 1/2] drm/amdgpu: cleanup the VM code a bit more Felix Kuehling
2017-08-31  1:56   ` zhoucm1

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8f3f60fa-6633-4076-4480-220fd3d22a5c@amd.com \
    --to=david1.zhou-5c7gfcevmho@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    --cc=deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.