All of lore.kernel.org
 help / color / mirror / Atom feed
From: Huang Rui <ray.huang-5C7GfCeVMHo@public.gmane.org>
To: "Christian König"
	<ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH 5/5] drm/amdgpu: fix shadow BO restoring
Date: Mon, 17 Sep 2018 17:10:00 +0800	[thread overview]
Message-ID: <20180917090901.GD25456@hr-amur2> (raw)
In-Reply-To: <20180914134257.2196-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>

On Fri, Sep 14, 2018 at 03:42:57PM +0200, Christian König wrote:
> Don't grab the reservation lock any more and simplify the handling quite
> a bit.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 109 ++++++++---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  46 ++++--------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |   8 +--
>  3 files changed, 43 insertions(+), 120 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 899342c6dfad..1cbc372964f8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2954,54 +2954,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
>  	return 0;
>  }
>  
> -/**
> - * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
> - *
> - * @adev: amdgpu_device pointer
> - * @ring: amdgpu_ring for the engine handling the buffer operations
> - * @bo: amdgpu_bo buffer whose shadow is being restored
> - * @fence: dma_fence associated with the operation
> - *
> - * Restores the VRAM buffer contents from the shadow in GTT.  Used to
> - * restore things like GPUVM page tables after a GPU reset where
> - * the contents of VRAM might be lost.
> - * Returns 0 on success, negative error code on failure.
> - */
> -static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
> -						  struct amdgpu_ring *ring,
> -						  struct amdgpu_bo *bo,
> -						  struct dma_fence **fence)
> -{
> -	uint32_t domain;
> -	int r;
> -
> -	if (!bo->shadow)
> -		return 0;
> -
> -	r = amdgpu_bo_reserve(bo, true);
> -	if (r)
> -		return r;
> -	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> -	/* if bo has been evicted, then no need to recover */
> -	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> -		r = amdgpu_bo_validate(bo->shadow);
> -		if (r) {
> -			DRM_ERROR("bo validate failed!\n");
> -			goto err;
> -		}
> -
> -		r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
> -						 NULL, fence, true);
> -		if (r) {
> -			DRM_ERROR("recover page table failed!\n");
> -			goto err;
> -		}
> -	}
> -err:
> -	amdgpu_bo_unreserve(bo);
> -	return r;
> -}
> -
>  /**
>   * amdgpu_device_recover_vram - Recover some VRAM contents
>   *
> @@ -3010,16 +2962,15 @@ static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
>   * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
>   * restore things like GPUVM page tables after a GPU reset where
>   * the contents of VRAM might be lost.
> - * Returns 0 on success, 1 on failure.
> + *
> + * Returns:
> + * 0 on success, negative error code on failure.
>   */
>  static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
>  {
> -	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> -	struct amdgpu_bo *bo, *tmp;
>  	struct dma_fence *fence = NULL, *next = NULL;
> -	long r = 1;
> -	int i = 0;
> -	long tmo;
> +	struct amdgpu_bo *shadow;
> +	long r = 1, tmo;
>  
>  	if (amdgpu_sriov_runtime(adev))
>  		tmo = msecs_to_jiffies(8000);
> @@ -3028,44 +2979,40 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
>  
>  	DRM_INFO("recover vram bo from shadow start\n");
>  	mutex_lock(&adev->shadow_list_lock);
> -	list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
> -		next = NULL;
> -		amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
> +	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
> +
> +		/* No need to recover an evicted BO */
> +		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
> +		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
> +			continue;
> +
> +		r = amdgpu_bo_restore_shadow(shadow, &next);
> +		if (r)
> +			break;
> +
>  		if (fence) {
>  			r = dma_fence_wait_timeout(fence, false, tmo);
> -			if (r == 0)
> -				pr_err("wait fence %p[%d] timeout\n", fence, i);
> -			else if (r < 0)
> -				pr_err("wait fence %p[%d] interrupted\n", fence, i);
> -			if (r < 1) {
> -				dma_fence_put(fence);
> -				fence = next;
> +			dma_fence_put(fence);
> +			fence = next;
> +			if (r <= 0)
>  				break;
> -			}
> -			i++;
> +		} else {
> +			fence = next;
>  		}
> -
> -		dma_fence_put(fence);
> -		fence = next;
>  	}
>  	mutex_unlock(&adev->shadow_list_lock);
>  
> -	if (fence) {
> -		r = dma_fence_wait_timeout(fence, false, tmo);
> -		if (r == 0)
> -			pr_err("wait fence %p[%d] timeout\n", fence, i);
> -		else if (r < 0)
> -			pr_err("wait fence %p[%d] interrupted\n", fence, i);
> -
> -	}
> +	if (fence)
> +		tmo = dma_fence_wait_timeout(fence, false, tmo);
>  	dma_fence_put(fence);
>  
> -	if (r > 0)
> -		DRM_INFO("recover vram bo from shadow done\n");
> -	else
> +	if (r <= 0 || tmo <= 0) {
>  		DRM_ERROR("recover vram bo from shadow failed\n");
> +		return -EIO;
> +	}
>  
> -	return (r > 0) ? 0 : 1;
> +	DRM_INFO("recover vram bo from shadow done\n");
> +	return 0;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 650c45c896f0..ca8a0b7a5ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -547,7 +547,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
>  	if (!r) {
>  		bo->shadow->parent = amdgpu_bo_ref(bo);
>  		mutex_lock(&adev->shadow_list_lock);
> -		list_add_tail(&bo->shadow_list, &adev->shadow_list);
> +		list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
>  		mutex_unlock(&adev->shadow_list_lock);
>  	}
>  
> @@ -679,13 +679,10 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
>  }
>  
>  /**
> - * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object
> - * @adev: amdgpu device object
> - * @ring: amdgpu_ring for the engine handling the buffer operations
> - * @bo: &amdgpu_bo buffer to be restored
> - * @resv: reservation object with embedded fence
> + * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
> + *
> + * @shadow: &amdgpu_bo shadow to be restored
>   * @fence: dma_fence associated with the operation
> - * @direct: whether to submit the job directly
>   *
>   * Copies a buffer object's shadow content back to the object.
>   * This is used for recovering a buffer from its shadow in case of a gpu
> @@ -694,36 +691,19 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
>   * Returns:
>   * 0 for success or a negative error code on failure.
>   */
> -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
> -				  struct amdgpu_ring *ring,
> -				  struct amdgpu_bo *bo,
> -				  struct reservation_object *resv,
> -				  struct dma_fence **fence,
> -				  bool direct)
> +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
>  
>  {
> -	struct amdgpu_bo *shadow = bo->shadow;
> -	uint64_t bo_addr, shadow_addr;
> -	int r;
> -
> -	if (!shadow)
> -		return -EINVAL;
> -
> -	bo_addr = amdgpu_bo_gpu_offset(bo);
> -	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
> -
> -	r = reservation_object_reserve_shared(bo->tbo.resv);
> -	if (r)
> -		goto err;
> +	struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
> +	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +	uint64_t shadow_addr, parent_addr;
>  

May I know why won't need the reservation_object_reserve_shared() here? Is
it because we only copy buffer from shadow parent bo instead of orignal bo?

Thanks,
Ray

> -	r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
> -			       amdgpu_bo_size(bo), resv, fence,
> -			       direct, false);
> -	if (!r)
> -		amdgpu_bo_fence(bo, *fence, true);
> +	shadow_addr = amdgpu_bo_gpu_offset(shadow);
> +	parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
>  
> -err:
> -	return r;
> +	return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
> +				  amdgpu_bo_size(shadow), NULL, fence,
> +				  true, false);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 64337ff2ad63..7d3312d0da11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -273,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
>  			       struct reservation_object *resv,
>  			       struct dma_fence **fence, bool direct);
>  int amdgpu_bo_validate(struct amdgpu_bo *bo);
> -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
> -				  struct amdgpu_ring *ring,
> -				  struct amdgpu_bo *bo,
> -				  struct reservation_object *resv,
> -				  struct dma_fence **fence,
> -				  bool direct);
> +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
> +			     struct dma_fence **fence);
>  uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
>  					    uint32_t domain);
>  
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2018-09-17  9:10 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-14 13:42 [PATCH 1/5] drm/amdgpu: stop pipelining VM PDs/PTs moves Christian König
     [not found] ` <20180914134257.2196-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-09-14 13:42   ` [PATCH 2/5] drm/amdgpu: always enable shadow BOs v2 Christian König
2018-09-14 13:42   ` [PATCH 3/5] drm/amdgpu: shadow BOs don't need any alignment Christian König
2018-09-14 13:42   ` [PATCH 4/5] drm/amdgpu: always recover VRAM during GPU recovery Christian König
2018-09-14 13:42   ` [PATCH 5/5] drm/amdgpu: fix shadow BO restoring Christian König
     [not found]     ` <20180914134257.2196-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-09-17  9:10       ` Huang Rui [this message]
2018-09-17 11:42         ` Christian König
     [not found]           ` <e1c78397-4a41-0f74-2049-5c63a279630b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-09-18  8:40             ` Huang Rui
  -- strict thread matches above, loose matches on Subject: below --
2018-09-11  9:55 [PATCH 1/5] drm/amdgpu: stop pipelining VM PDs/PTs moves Christian König
     [not found] ` <20180911095602.10152-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-09-11  9:56   ` [PATCH 5/5] drm/amdgpu: fix shadow BO restoring Christian König
     [not found]     ` <20180911095602.10152-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-09-13  9:29       ` Zhang, Jerry(Junwei)
     [not found]         ` <073f24f7-df48-55f6-f4fb-2d250cfd2dd1-5C7GfCeVMHo@public.gmane.org>
2018-09-14 11:54           ` Christian König
     [not found]             ` <fa8fb53f-ff84-fb45-59c7-1d52ec57fba7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-09-18  6:15               ` Zhang, Jerry(Junwei)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180917090901.GD25456@hr-amur2 \
    --to=ray.huang-5c7gfcevmho@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    --cc=ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.