All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: handle fault counters on invalid address
@ 2021-07-07 18:34 Philip Yang
  2021-07-07 18:50 ` Felix Kuehling
  0 siblings, 1 reply; 2+ messages in thread
From: Philip Yang @ 2021-07-07 18:34 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang

prange is NULL if vm fault retry on invalid address, for this case, can
not use prange to get pdd, use adev to get gpuidx and then get pdd
instead, then increase pdd vm fault counter.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 30 +++++++++++++++++-----------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 31f3f24cef6a..e7e99c5070b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2375,21 +2375,27 @@ static bool svm_range_skip_recover(struct svm_range *prange)
 
 static void
 svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
-		      struct svm_range *prange, int32_t gpuidx)
+		      int32_t gpuidx)
 {
 	struct kfd_process_device *pdd;
 
-	if (gpuidx == MAX_GPU_INSTANCE)
-		/* fault is on different page of same range
-		 * or fault is skipped to recover later
-		 */
-		pdd = svm_range_get_pdd_by_adev(prange, adev);
-	else
-		/* fault recovered
-		 * or fault cannot recover because GPU no access on the range
-		 */
-		pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+	/* fault is on different page of same range
+	 * or fault is skipped to recover later
+	 * or fault is on invalid virtual address
+	 */
+	if (gpuidx == MAX_GPU_INSTANCE) {
+		uint32_t gpuid;
+		int r;
 
+		r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx);
+		if (r < 0)
+			return;
+	}
+
+	/* fault is recovered
+	 * or fault cannot recover because GPU no access on the range
+	 */
+	pdd = kfd_process_device_from_gpuidx(p, gpuidx);
 	if (pdd)
 		WRITE_ONCE(pdd->faults, pdd->faults + 1);
 }
@@ -2525,7 +2531,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	mutex_unlock(&svms->lock);
 	mmap_read_unlock(mm);
 
-	svm_range_count_fault(adev, p, prange, gpuidx);
+	svm_range_count_fault(adev, p, gpuidx);
 
 	mmput(mm);
 out:
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] drm/amdkfd: handle fault counters on invalid address
  2021-07-07 18:34 [PATCH] drm/amdkfd: handle fault counters on invalid address Philip Yang
@ 2021-07-07 18:50 ` Felix Kuehling
  0 siblings, 0 replies; 2+ messages in thread
From: Felix Kuehling @ 2021-07-07 18:50 UTC (permalink / raw)
  To: Philip Yang, amd-gfx

On 2021-07-07 2:34 p.m., Philip Yang wrote:
> prange is NULL if vm fault retry on invalid address, for this case, can
> not use prange to get pdd, use adev to get gpuidx and then get pdd
> instead, then increase pdd vm fault counter.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 30 +++++++++++++++++-----------
>   1 file changed, 18 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 31f3f24cef6a..e7e99c5070b9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2375,21 +2375,27 @@ static bool svm_range_skip_recover(struct svm_range *prange)
>   
>   static void
>   svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
> -		      struct svm_range *prange, int32_t gpuidx)
> +		      int32_t gpuidx)
>   {
>   	struct kfd_process_device *pdd;
>   
> -	if (gpuidx == MAX_GPU_INSTANCE)
> -		/* fault is on different page of same range
> -		 * or fault is skipped to recover later
> -		 */
> -		pdd = svm_range_get_pdd_by_adev(prange, adev);
> -	else
> -		/* fault recovered
> -		 * or fault cannot recover because GPU no access on the range
> -		 */
> -		pdd = kfd_process_device_from_gpuidx(p, gpuidx);
> +	/* fault is on different page of same range
> +	 * or fault is skipped to recover later
> +	 * or fault is on invalid virtual address
> +	 */
> +	if (gpuidx == MAX_GPU_INSTANCE) {
> +		uint32_t gpuid;
> +		int r;
>   
> +		r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx);
> +		if (r < 0)
> +			return;
> +	}
> +
> +	/* fault is recovered
> +	 * or fault cannot recover because GPU no access on the range
> +	 */
> +	pdd = kfd_process_device_from_gpuidx(p, gpuidx);
>   	if (pdd)
>   		WRITE_ONCE(pdd->faults, pdd->faults + 1);
>   }
> @@ -2525,7 +2531,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>   	mutex_unlock(&svms->lock);
>   	mmap_read_unlock(mm);
>   
> -	svm_range_count_fault(adev, p, prange, gpuidx);
> +	svm_range_count_fault(adev, p, gpuidx);
>   
>   	mmput(mm);
>   out:
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-07-07 18:50 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-07 18:34 [PATCH] drm/amdkfd: handle fault counters on invalid address Philip Yang
2021-07-07 18:50 ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.