All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub
@ 2019-08-02 16:04 Zeng, Oak
       [not found] ` <1564761834-19210-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Zeng, Oak @ 2019-08-02 16:04 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zhou1, Tao, Zeng, Oak, Liu, Shaoyun

This is for kfd to reuse amdgpu TLB invalidation function. There
is already a gmc function flush_gpu_tlb to flush TLB on all vm
hub. On gfx10, kfd only needs to flush TLB on gfx hub but not
on mm hub. So export a function for KFD flush TLB only on gfx
hub.

Change-Id: I58ff00969f88438cfd3dc7e9deb7bff0c1bb4133
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 ++++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 071145a..0bd4a4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -88,6 +88,9 @@ struct amdgpu_vmhub {
  * GPU MC structures, functions & helpers
  */
 struct amdgpu_gmc_funcs {
+	/* flush vm tlb of specific hub */
+	void (*flush_vm_hub)(struct amdgpu_device *adev, uint32_t vmid,
+				   unsigned int vmhub, uint32_t flush_type);
 	/* flush the vm tlb via mmio */
 	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
 			      uint32_t vmid, uint32_t flush_type);
@@ -180,6 +183,7 @@ struct amdgpu_gmc {
 	struct ras_common_if    *ras_if;
 };
 
+#define amdgpu_gmc_flush_vm_hub(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_vm_hub((adev), (vmid), (vmhub), (type)))
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 4e3ac10..247515d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -416,6 +416,7 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
 }
 
 static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
+	.flush_vm_hub = gmc_v10_0_flush_vm_hub,
 	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
 	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] drm/amdkfd/gfx10: Calling amdgpu functions to invalidate TLB
       [not found] ` <1564761834-19210-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
@ 2019-08-02 16:04   ` Zeng, Oak
  2019-08-05  9:37   ` [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub Christian König
  1 sibling, 0 replies; 9+ messages in thread
From: Zeng, Oak @ 2019-08-02 16:04 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zhou1, Tao, Zeng, Oak, Liu, Shaoyun

Calling amdgpu function to invalidate TLB, instead of using a
kfd implementation. Delete the kfd local TLB invalidation
implementation.

Change-Id: Ia16fcf9091a93b8c0acdaf9981f8a5a1f9a5ca1d
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 41 ++--------------------
 1 file changed, 3 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 39ffb07..e7ae198 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -802,42 +802,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 }
 
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	uint32_t req = (1 << vmid) |
-		(0 << GCVM_INVALIDATE_ENG0_REQ__FLUSH_TYPE__SHIFT) |/* legacy */
-		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PTES_MASK |
-		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE0_MASK |
-		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE1_MASK |
-		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE2_MASK |
-		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L1_PTES_MASK;
-
-	mutex_lock(&adev->srbm_mutex);
-
-	/* Use light weight invalidation.
-	 *
-	 * TODO 1: agree on the right set of invalidation registers for
-	 * KFD use. Use the last one for now. Invalidate only GCHUB as
-	 * SDMA is now moved to GCHUB
-	 *
-	 * TODO 2: support range-based invalidation, requires kfg2kgd
-	 * interface change
-	 */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32),
-				0xffffffff);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32),
-				0x0000001f);
-
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ), req);
-
-	while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK)) &
-					(1 << vmid)))
-		cpu_relax();
-
-	mutex_unlock(&adev->srbm_mutex);
-}
-
 static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
 {
 	signed long r;
@@ -878,7 +842,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
 			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
 				== pasid) {
-				write_vmid_invalidate_request(kgd, vmid);
+				amdgpu_gmc_flush_vm_hub(adev, vmid,
+						AMDGPU_GFXHUB_0, 0);
 				break;
 			}
 		}
@@ -896,7 +861,7 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
 		return 0;
 	}
 
-	write_vmid_invalidate_request(kgd, vmid);
+	amdgpu_gmc_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
 	return 0;
 }
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub
       [not found] ` <1564761834-19210-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  2019-08-02 16:04   ` [PATCH 2/2] drm/amdkfd/gfx10: Calling amdgpu functions to invalidate TLB Zeng, Oak
@ 2019-08-05  9:37   ` Christian König
       [not found]     ` <855377a1-69ca-891e-1dad-5b93f57671da-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 9+ messages in thread
From: Christian König @ 2019-08-05  9:37 UTC (permalink / raw)
  To: Zeng, Oak, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zhou1, Tao, Liu, Shaoyun

Am 02.08.19 um 18:04 schrieb Zeng, Oak:
> This is for kfd to reuse amdgpu TLB invalidation function. There
> is already a gmc function flush_gpu_tlb to flush TLB on all vm
> hub. On gfx10, kfd only needs to flush TLB on gfx hub but not
> on mm hub. So export a function for KFD flush TLB only on gfx
> hub.

I would rather go ahead and add another parameter to flush_gpu_tlb to 
note which hub needs flushing.

We can probably easily extend the few callers to flush all hubs needed.

Christian.

>
> Change-Id: I58ff00969f88438cfd3dc7e9deb7bff0c1bb4133
> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 ++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 1 +
>   2 files changed, 5 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 071145a..0bd4a4f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -88,6 +88,9 @@ struct amdgpu_vmhub {
>    * GPU MC structures, functions & helpers
>    */
>   struct amdgpu_gmc_funcs {
> +	/* flush vm tlb of specific hub */
> +	void (*flush_vm_hub)(struct amdgpu_device *adev, uint32_t vmid,
> +				   unsigned int vmhub, uint32_t flush_type);
>   	/* flush the vm tlb via mmio */
>   	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
>   			      uint32_t vmid, uint32_t flush_type);
> @@ -180,6 +183,7 @@ struct amdgpu_gmc {
>   	struct ras_common_if    *ras_if;
>   };
>   
> +#define amdgpu_gmc_flush_vm_hub(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_vm_hub((adev), (vmid), (vmhub), (type)))
>   #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 4e3ac10..247515d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -416,6 +416,7 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
>   }
>   
>   static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
> +	.flush_vm_hub = gmc_v10_0_flush_vm_hub,
>   	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
>   	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub
       [not found]     ` <855377a1-69ca-891e-1dad-5b93f57671da-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-08-07  2:40       ` Zeng, Oak
       [not found]         ` <BL0PR12MB2580778BFD71E211AC18CFAF80D40-b4cIHhjg/p/XzH18dTCKOgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Zeng, Oak @ 2019-08-07  2:40 UTC (permalink / raw)
  To: Kuehling, Felix, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Zhang, Hawking,
	Deucher, Alexander
  Cc: Zhou1, Tao, Liu, Shaoyun

Ok, will do it.

BTW, does those codes below really needed, in function gmc_v10_0_flush_gpu_tlb. I think if we have the bug, then before below codes, when we flush TLB of gfxhub through mmio, it has already triggered the bug. Also as we already invalidated tlb on both mm and gfx hub (in the same function gmc_v10_0_flush_gpu_tlb), what is the point of below codes? Does the coded below invalidate TLB on both mm and gfx hub? Also @Zhang, Hawking@Deucher, Alexander

	/* The SDMA on Navi has a bug which can theoretically result in memory
	 * corruption if an invalidation happens at the same time as an VA
	 * translation. Avoid this by doing the invalidation from the SDMA
	 * itself.
	 */
	r = amdgpu_job_alloc_with_ib(adev, 16 * 4, &job);
	if (r)
		goto error_alloc;

	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
	job->vm_needs_flush = true;
	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
	r = amdgpu_job_submit(job, &adev->mman.entity,
			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);

Regards,
Oak

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com> 
Sent: Monday, August 5, 2019 5:37 AM
To: Zeng, Oak <Oak.Zeng@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Liu, Shaoyun <Shaoyun.Liu@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub

Am 02.08.19 um 18:04 schrieb Zeng, Oak:
> This is for kfd to reuse amdgpu TLB invalidation function. There is 
> already a gmc function flush_gpu_tlb to flush TLB on all vm hub. On 
> gfx10, kfd only needs to flush TLB on gfx hub but not on mm hub. So 
> export a function for KFD flush TLB only on gfx hub.

I would rather go ahead and add another parameter to flush_gpu_tlb to note which hub needs flushing.

We can probably easily extend the few callers to flush all hubs needed.

Christian.

>
> Change-Id: I58ff00969f88438cfd3dc7e9deb7bff0c1bb4133
> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 ++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 1 +
>   2 files changed, 5 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 071145a..0bd4a4f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -88,6 +88,9 @@ struct amdgpu_vmhub {
>    * GPU MC structures, functions & helpers
>    */
>   struct amdgpu_gmc_funcs {
> +	/* flush vm tlb of specific hub */
> +	void (*flush_vm_hub)(struct amdgpu_device *adev, uint32_t vmid,
> +				   unsigned int vmhub, uint32_t flush_type);
>   	/* flush the vm tlb via mmio */
>   	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
>   			      uint32_t vmid, uint32_t flush_type); @@ -180,6 +183,7 @@ 
> struct amdgpu_gmc {
>   	struct ras_common_if    *ras_if;
>   };
>   
> +#define amdgpu_gmc_flush_vm_hub(adev, vmid, vmhub, type) 
> +((adev)->gmc.gmc_funcs->flush_vm_hub((adev), (vmid), (vmhub), 
> +(type)))
>   #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) 
> (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 4e3ac10..247515d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -416,6 +416,7 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
>   }
>   
>   static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
> +	.flush_vm_hub = gmc_v10_0_flush_vm_hub,
>   	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
>   	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub
       [not found]         ` <BL0PR12MB2580778BFD71E211AC18CFAF80D40-b4cIHhjg/p/XzH18dTCKOgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-08-07  8:50           ` Christian König
       [not found]             ` <65784ad0-7693-9c98-82ee-66713c99f49b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2019-08-07  8:50 UTC (permalink / raw)
  To: Zeng, Oak, Kuehling, Felix, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Zhang, Hawking,
	Deucher, Alexander
  Cc: Zhou1, Tao, Liu, Shaoyun

> Does the coded below invalidate TLB on both mm and gfx hub?
No, just the gfx hub. The VMHUBs on Navi are unfortunately really buggy, 
so we had to do a lot of workarounds to get them into a state where they 
actually did what was expected from them.

One major issue for example is that you can't do MMIO based VM 
invalidation when the engine is busy. To work around this we do the 
invalidation with the (IIRC) SDMA engine as soon as that one is working.

The is the code you are noting below.

Regards,
Christian.

Am 07.08.19 um 04:40 schrieb Zeng, Oak:
> Ok, will do it.
>
> BTW, does those codes below really needed, in function gmc_v10_0_flush_gpu_tlb. I think if we have the bug, then before below codes, when we flush TLB of gfxhub through mmio, it has already triggered the bug. Also as we already invalidated tlb on both mm and gfx hub (in the same function gmc_v10_0_flush_gpu_tlb), what is the point of below codes? Does the coded below invalidate TLB on both mm and gfx hub? Also @Zhang, Hawking@Deucher, Alexander
>
> 	/* The SDMA on Navi has a bug which can theoretically result in memory
> 	 * corruption if an invalidation happens at the same time as an VA
> 	 * translation. Avoid this by doing the invalidation from the SDMA
> 	 * itself.
> 	 */
> 	r = amdgpu_job_alloc_with_ib(adev, 16 * 4, &job);
> 	if (r)
> 		goto error_alloc;
>
> 	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> 	job->vm_needs_flush = true;
> 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> 	r = amdgpu_job_submit(job, &adev->mman.entity,
> 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
>
> Regards,
> Oak
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Monday, August 5, 2019 5:37 AM
> To: Zeng, Oak <Oak.Zeng@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Liu, Shaoyun <Shaoyun.Liu@amd.com>
> Subject: Re: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub
>
> Am 02.08.19 um 18:04 schrieb Zeng, Oak:
>> This is for kfd to reuse amdgpu TLB invalidation function. There is
>> already a gmc function flush_gpu_tlb to flush TLB on all vm hub. On
>> gfx10, kfd only needs to flush TLB on gfx hub but not on mm hub. So
>> export a function for KFD flush TLB only on gfx hub.
> I would rather go ahead and add another parameter to flush_gpu_tlb to note which hub needs flushing.
>
> We can probably easily extend the few callers to flush all hubs needed.
>
> Christian.
>
>> Change-Id: I58ff00969f88438cfd3dc7e9deb7bff0c1bb4133
>> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 ++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 1 +
>>    2 files changed, 5 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> index 071145a..0bd4a4f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> @@ -88,6 +88,9 @@ struct amdgpu_vmhub {
>>     * GPU MC structures, functions & helpers
>>     */
>>    struct amdgpu_gmc_funcs {
>> +	/* flush vm tlb of specific hub */
>> +	void (*flush_vm_hub)(struct amdgpu_device *adev, uint32_t vmid,
>> +				   unsigned int vmhub, uint32_t flush_type);
>>    	/* flush the vm tlb via mmio */
>>    	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
>>    			      uint32_t vmid, uint32_t flush_type); @@ -180,6 +183,7 @@
>> struct amdgpu_gmc {
>>    	struct ras_common_if    *ras_if;
>>    };
>>    
>> +#define amdgpu_gmc_flush_vm_hub(adev, vmid, vmhub, type)
>> +((adev)->gmc.gmc_funcs->flush_vm_hub((adev), (vmid), (vmhub),
>> +(type)))
>>    #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
>>    #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>>    #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid)
>> (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 4e3ac10..247515d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -416,6 +416,7 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
>>    }
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>> +	.flush_vm_hub = gmc_v10_0_flush_vm_hub,
>>    	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
>>    	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub
       [not found]             ` <65784ad0-7693-9c98-82ee-66713c99f49b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-08-07 15:17               ` Zeng, Oak
  0 siblings, 0 replies; 9+ messages in thread
From: Zeng, Oak @ 2019-08-07 15:17 UTC (permalink / raw)
  To: Koenig, Christian, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Zhang, Hawking,
	Deucher, Alexander
  Cc: Zhou1, Tao, Liu, Shaoyun

Thanks. I realized I didn't read the code careful enough...The workaround is only for navi10 and navi12 - I didn't read this correctly and was thinking gfxhub tlb invalidation was done twice.

I understand the codes now. I think the HW SDMA bug has been fixed in navi14 so we don't need that WA for 14.

Regards,
Oak

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com> 
Sent: Wednesday, August 7, 2019 4:51 AM
To: Zeng, Oak <Oak.Zeng@amd.com>; Kuehling, Felix <Felix.Kuehling@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org; Zhang, Hawking <Hawking.Zhang@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>
Cc: Zhou1, Tao <Tao.Zhou1@amd.com>; Liu, Shaoyun <Shaoyun.Liu@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub

> Does the coded below invalidate TLB on both mm and gfx hub?
No, just the gfx hub. The VMHUBs on Navi are unfortunately really buggy, so we had to do a lot of workarounds to get them into a state where they actually did what was expected from them.

One major issue for example is that you can't do MMIO based VM invalidation when the engine is busy. To work around this we do the invalidation with the (IIRC) SDMA engine as soon as that one is working.

The is the code you are noting below.

Regards,
Christian.

Am 07.08.19 um 04:40 schrieb Zeng, Oak:
> Ok, will do it.
>
> BTW, does those codes below really needed, in function 
> gmc_v10_0_flush_gpu_tlb. I think if we have the bug, then before below 
> codes, when we flush TLB of gfxhub through mmio, it has already 
> triggered the bug. Also as we already invalidated tlb on both mm and 
> gfx hub (in the same function gmc_v10_0_flush_gpu_tlb), what is the 
> point of below codes? Does the coded below invalidate TLB on both mm 
> and gfx hub? Also @Zhang, Hawking@Deucher, Alexander
>
> 	/* The SDMA on Navi has a bug which can theoretically result in memory
> 	 * corruption if an invalidation happens at the same time as an VA
> 	 * translation. Avoid this by doing the invalidation from the SDMA
> 	 * itself.
> 	 */
> 	r = amdgpu_job_alloc_with_ib(adev, 16 * 4, &job);
> 	if (r)
> 		goto error_alloc;
>
> 	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> 	job->vm_needs_flush = true;
> 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> 	r = amdgpu_job_submit(job, &adev->mman.entity,
> 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
>
> Regards,
> Oak
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Monday, August 5, 2019 5:37 AM
> To: Zeng, Oak <Oak.Zeng@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Zhou1, Tao 
> <Tao.Zhou1@amd.com>; Liu, Shaoyun <Shaoyun.Liu@amd.com>
> Subject: Re: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of 
> specific vm hub
>
> Am 02.08.19 um 18:04 schrieb Zeng, Oak:
>> This is for kfd to reuse amdgpu TLB invalidation function. There is 
>> already a gmc function flush_gpu_tlb to flush TLB on all vm hub. On 
>> gfx10, kfd only needs to flush TLB on gfx hub but not on mm hub. So 
>> export a function for KFD flush TLB only on gfx hub.
> I would rather go ahead and add another parameter to flush_gpu_tlb to note which hub needs flushing.
>
> We can probably easily extend the few callers to flush all hubs needed.
>
> Christian.
>
>> Change-Id: I58ff00969f88438cfd3dc7e9deb7bff0c1bb4133
>> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 ++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 1 +
>>    2 files changed, 5 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> index 071145a..0bd4a4f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> @@ -88,6 +88,9 @@ struct amdgpu_vmhub {
>>     * GPU MC structures, functions & helpers
>>     */
>>    struct amdgpu_gmc_funcs {
>> +	/* flush vm tlb of specific hub */
>> +	void (*flush_vm_hub)(struct amdgpu_device *adev, uint32_t vmid,
>> +				   unsigned int vmhub, uint32_t flush_type);
>>    	/* flush the vm tlb via mmio */
>>    	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
>>    			      uint32_t vmid, uint32_t flush_type); @@ -180,6 +183,7 @@ 
>> struct amdgpu_gmc {
>>    	struct ras_common_if    *ras_if;
>>    };
>>    
>> +#define amdgpu_gmc_flush_vm_hub(adev, vmid, vmhub, type) 
>> +((adev)->gmc.gmc_funcs->flush_vm_hub((adev), (vmid), (vmhub),
>> +(type)))
>>    #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
>>    #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>>    #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) 
>> (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) 
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 4e3ac10..247515d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -416,6 +416,7 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
>>    }
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>> +	.flush_vm_hub = gmc_v10_0_flush_vm_hub,
>>    	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
>>    	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub
       [not found] ` <1565324504-4445-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  2019-08-13 19:17   ` Zeng, Oak
@ 2019-08-14  8:10   ` Koenig, Christian
  1 sibling, 0 replies; 9+ messages in thread
From: Koenig, Christian @ 2019-08-14  8:10 UTC (permalink / raw)
  To: Zeng, Oak, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zhou1, Tao

Am 09.08.19 um 06:21 schrieb Zeng, Oak:
> This is for kfd to reuse amdgpu TLB invalidation function.
> On gfx10, kfd only needs to flush TLB on gfx hub but not
> on mm hub. So export a function for KFD flush TLB only on
> specific hub.
>
> Change-Id: I58ff00969f88438cfd3dc7e9deb7bff0c1bb4133
> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c          |  9 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h           |  6 +-
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c            |  5 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c            | 16 ++++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c             |  6 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c             |  6 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c             |  6 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c             | 69 ++++++++++++-----------
>   9 files changed, 78 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 2695925..741a3c5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -670,7 +670,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
>   int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int vmid;
> +	int vmid, i;
>   	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>   	uint32_t flush_type = 0;
>   
> @@ -689,8 +689,9 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
>   		if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
>   			if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
>   				== pasid) {
> -				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> -							 flush_type);
> +				for (i = 0; i < adev->num_vmhubs; i++)
> +					amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> +								i, flush_type);
>   				break;
>   			}
>   		}
> @@ -702,6 +703,7 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
>   int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> +	int i;
>   
>   	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
>   		pr_err("non kfd vmid %d\n", vmid);
> @@ -723,7 +725,9 @@ int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
>   	 * TODO 2: support range-based invalidation, requires kfg2kgd
>   	 * interface change
>   	 */
> -	amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
> +	for (i = 0; i < adev->num_vmhubs; i++)
> +		amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 6d11e17..a67ffff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -248,7 +248,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   	}
>   	mb();
>   	amdgpu_asic_flush_hdp(adev, NULL);
> -	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
> +	for (i = 0; i < adev->num_vmhubs; i++)
> +		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
> +
>   	return 0;
>   }
>   
> @@ -309,7 +311,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
>   #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
>   	unsigned i,t,p;
>   #endif
> -	int r;
> +	int r, i;
>   
>   	if (!adev->gart.ready) {
>   		WARN(1, "trying to bind memory to uninitialized GART !\n");
> @@ -333,7 +335,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
>   
>   	mb();
>   	amdgpu_asic_flush_hdp(adev, NULL);
> -	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
> +	for (i = 0; i < adev->num_vmhubs; i++)
> +		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 071145a..8f2699e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -89,8 +89,8 @@ struct amdgpu_vmhub {
>    */
>   struct amdgpu_gmc_funcs {
>   	/* flush the vm tlb via mmio */
> -	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
> -			      uint32_t vmid, uint32_t flush_type);
> +	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
> +				uint32_t vmhub, uint32_t flush_type);
>   	/* flush the vm tlb via ring */
>   	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>   				       uint64_t pd_addr);
> @@ -180,7 +180,7 @@ struct amdgpu_gmc {
>   	struct ras_common_if    *ras_if;
>   };
>   
> -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
> +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>   #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 9db4c4b..1adde85 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -1735,9 +1735,12 @@ static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
>   
>   static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
>   {
> +	int i;
> +
>   	gfx_v10_0_init_csb(adev);
>   
> -	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
> +	for (i = 0; i < adev->num_vmhubs; i++)
> +		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>   
>   	/* TODO: init power gating */
>   	return;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 4e3ac10..b8afb12 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -229,8 +229,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>    *
>    * Flush the TLB for the requested page table.
>    */
> -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
> -				    uint32_t vmid, uint32_t flush_type)
> +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +					uint32_t vmhub, uint32_t flush_type)
>   {
>   	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>   	struct dma_fence *fence;
> @@ -243,7 +243,14 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
>   
>   	mutex_lock(&adev->mman.gtt_window_lock);
>   
> -	gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
> +	if (vmhub == AMDGPU_MMHUB_0) {
> +		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
> +		mutex_unlock(&adev->mman.gtt_window_lock);
> +		return;
> +	}
> +
> +	BUG_ON(vmhub != AMDGPU_GFXHUB_0);
> +
>   	if (!adev->mman.buffer_funcs_enabled ||
>   	    !adev->ib_pool_ready ||
>   	    adev->asic_type > CHIP_NAVI14 ||
> @@ -773,7 +780,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
>   
>   	gfxhub_v2_0_set_fault_enable_default(adev, value);
>   	mmhub_v2_0_set_fault_enable_default(adev, value);
> -	gmc_v10_0_flush_gpu_tlb(adev, 0, 0);
> +	gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
> +	gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
>   
>   	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
>   		 (unsigned)(adev->gmc.gart_size >> 20),
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index b06d876..564fb1c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -359,8 +359,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
> -				uint32_t vmid, uint32_t flush_type)
> +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +					uint32_t vmhub, uint32_t flush_type)
>   {
>   	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>   }
> @@ -568,7 +568,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
>   	else
>   		gmc_v6_0_set_fault_enable_default(adev, true);
>   
> -	gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
> +	gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0);
>   	dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
>   		 (unsigned)(adev->gmc.gart_size >> 20),
>   		 (unsigned long long)table_addr);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 75aa333..9e6a233 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -430,8 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>    *
>    * Flush the TLB for the requested page table (CIK).
>    */
> -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
> -				uint32_t vmid, uint32_t flush_type)
> +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +					uint32_t vmhub, uint32_t flush_type)
>   {
>   	/* bits 0-15 are the VM contexts0-15 */
>   	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> @@ -674,7 +674,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
>   		WREG32(mmCHUB_CONTROL, tmp);
>   	}
>   
> -	gmc_v7_0_flush_gpu_tlb(adev, 0, 0);
> +	gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0);
>   	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
>   		 (unsigned)(adev->gmc.gart_size >> 20),
>   		 (unsigned long long)table_addr);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 8bf2ba3..f7d6a07 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -632,8 +632,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>    *
>    * Flush the TLB for the requested page table (VI).
>    */
> -static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
> -				uint32_t vmid, uint32_t flush_type)
> +static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +					uint32_t vmhub, uint32_t flush_type)
>   {
>   	/* bits 0-15 are the VM contexts0-15 */
>   	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> @@ -918,7 +918,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
>   	else
>   		gmc_v8_0_set_fault_enable_default(adev, true);
>   
> -	gmc_v8_0_flush_gpu_tlb(adev, 0, 0);
> +	gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0);
>   	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
>   		 (unsigned)(adev->gmc.gart_size >> 20),
>   		 (unsigned long long)table_addr);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index a2aa35e..f862366 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -448,44 +448,45 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
>    *
>    * Flush the TLB for the requested page table using certain type.
>    */
> -static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
> -				uint32_t vmid, uint32_t flush_type)
> +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +					uint32_t vmhub, uint32_t flush_type)
>   {
>   	const unsigned eng = 17;
> -	unsigned i, j;
> +	u32 j, tmp;
> +	struct amdgpu_vmhub *hub;
>   
> -	for (i = 0; i < adev->num_vmhubs; ++i) {
> -		struct amdgpu_vmhub *hub = &adev->vmhub[i];
> -		u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
> +	BUG_ON(vmhub >= adev->num_vmhubs);
>   
> -		/* This is necessary for a HW workaround under SRIOV as well
> -		 * as GFXOFF under bare metal
> -		 */
> -		if (adev->gfx.kiq.ring.sched.ready &&
> -		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> -		    !adev->in_gpu_reset) {
> -			uint32_t req = hub->vm_inv_eng0_req + eng;
> -			uint32_t ack = hub->vm_inv_eng0_ack + eng;
> -
> -			amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
> -							   1 << vmid);
> -			continue;
> -		}
> +	hub = &adev->vmhub[vmhub];
> +	tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>   
> -		spin_lock(&adev->gmc.invalidate_lock);
> -		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> -		for (j = 0; j < adev->usec_timeout; j++) {
> -			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> -			if (tmp & (1 << vmid))
> -				break;
> -			udelay(1);
> -		}
> -		spin_unlock(&adev->gmc.invalidate_lock);
> -		if (j < adev->usec_timeout)
> -			continue;
> +	/* This is necessary for a HW workaround under SRIOV as well
> +	 * as GFXOFF under bare metal
> +	 */
> +	if (adev->gfx.kiq.ring.sched.ready &&
> +			(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> +			!adev->in_gpu_reset) {
> +		uint32_t req = hub->vm_inv_eng0_req + eng;
> +		uint32_t ack = hub->vm_inv_eng0_ack + eng;
> +
> +		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
> +				1 << vmid);
> +		return;
> +	}
>   
> -		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
> +	spin_lock(&adev->gmc.invalidate_lock);
> +	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +	for (j = 0; j < adev->usec_timeout; j++) {
> +		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> +		if (tmp & (1 << vmid))
> +			break;
> +		udelay(1);
>   	}
> +	spin_unlock(&adev->gmc.invalidate_lock);
> +	if (j < adev->usec_timeout)
> +		return;
> +
> +	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   }
>   
>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> @@ -1239,7 +1240,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
>    */
>   static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
>   {
> -	int r;
> +	int r, i;
>   	bool value;
>   	u32 tmp;
>   
> @@ -1295,7 +1296,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
>   		mmhub_v9_4_set_fault_enable_default(adev, value);
>   	else
>   		mmhub_v1_0_set_fault_enable_default(adev, value);
> -	gmc_v9_0_flush_gpu_tlb(adev, 0, 0);
> +
> +	for (i = 0; i < adev->num_vmhubs; ++i)
> +		gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
>   
>   	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
>   		 (unsigned)(adev->gmc.gart_size >> 20),

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub
       [not found] ` <1565324504-4445-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
@ 2019-08-13 19:17   ` Zeng, Oak
  2019-08-14  8:10   ` Koenig, Christian
  1 sibling, 0 replies; 9+ messages in thread
From: Zeng, Oak @ 2019-08-13 19:17 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Koenig, Christian
  Cc: Kuehling, Felix, Zhou1, Tao

Ping this series. @Koenig, Christian do you have more comments on this?

Regards,
Oak

-----Original Message-----
From: Zeng, Oak <Oak.Zeng@amd.com> 
Sent: Friday, August 9, 2019 12:22 AM
To: amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Zeng, Oak <Oak.Zeng@amd.com>
Subject: [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub

This is for kfd to reuse amdgpu TLB invalidation function.
On gfx10, kfd only needs to flush TLB on gfx hub but not on mm hub. So export a function for KFD flush TLB only on specific hub.

Change-Id: I58ff00969f88438cfd3dc7e9deb7bff0c1bb4133
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c          |  9 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h           |  6 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c            |  5 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c            | 16 ++++--
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c             |  6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c             |  6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c             |  6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c             | 69 ++++++++++++-----------
 9 files changed, 78 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 2695925..741a3c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -670,7 +670,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,  int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  {
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
+	int vmid, i;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 	uint32_t flush_type = 0;
 
@@ -689,8 +689,9 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 		if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
 			if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
 				== pasid) {
-				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-							 flush_type);
+				for (i = 0; i < adev->num_vmhubs; i++)
+					amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+								i, flush_type);
 				break;
 			}
 		}
@@ -702,6 +703,7 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)  {
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	int i;
 
 	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 		pr_err("non kfd vmid %d\n", vmid);
@@ -723,7 +725,9 @@ int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
 	 * TODO 2: support range-based invalidation, requires kfg2kgd
 	 * interface change
 	 */
-	amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
+	for (i = 0; i < adev->num_vmhubs; i++)
+		amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 6d11e17..a67ffff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -248,7 +248,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 	}
 	mb();
 	amdgpu_asic_flush_hdp(adev, NULL);
-	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+	for (i = 0; i < adev->num_vmhubs; i++)
+		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+
 	return 0;
 }
 
@@ -309,7 +311,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,  #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 	unsigned i,t,p;
 #endif
-	int r;
+	int r, i;
 
 	if (!adev->gart.ready) {
 		WARN(1, "trying to bind memory to uninitialized GART !\n"); @@ -333,7 +335,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 
 	mb();
 	amdgpu_asic_flush_hdp(adev, NULL);
-	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+	for (i = 0; i < adev->num_vmhubs; i++)
+		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 071145a..8f2699e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -89,8 +89,8 @@ struct amdgpu_vmhub {
  */
 struct amdgpu_gmc_funcs {
 	/* flush the vm tlb via mmio */
-	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
-			      uint32_t vmid, uint32_t flush_type);
+	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
+				uint32_t vmhub, uint32_t flush_type);
 	/* flush the vm tlb via ring */
 	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
 				       uint64_t pd_addr);
@@ -180,7 +180,7 @@ struct amdgpu_gmc {
 	struct ras_common_if    *ras_if;
 };
 
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
+((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))  #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))  #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 9db4c4b..1adde85 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1735,9 +1735,12 @@ static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
 
 static void gfx_v10_0_init_pg(struct amdgpu_device *adev)  {
+	int i;
+
 	gfx_v10_0_init_csb(adev);
 
-	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+	for (i = 0; i < adev->num_vmhubs; i++)
+		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 
 	/* TODO: init power gating */
 	return;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 4e3ac10..b8afb12 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -229,8 +229,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
  *
  * Flush the TLB for the requested page table.
  */
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
-				    uint32_t vmid, uint32_t flush_type)
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 	struct dma_fence *fence;
@@ -243,7 +243,14 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
 
 	mutex_lock(&adev->mman.gtt_window_lock);
 
-	gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
+	if (vmhub == AMDGPU_MMHUB_0) {
+		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
+		mutex_unlock(&adev->mman.gtt_window_lock);
+		return;
+	}
+
+	BUG_ON(vmhub != AMDGPU_GFXHUB_0);
+
 	if (!adev->mman.buffer_funcs_enabled ||
 	    !adev->ib_pool_ready ||
 	    adev->asic_type > CHIP_NAVI14 ||
@@ -773,7 +780,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
 
 	gfxhub_v2_0_set_fault_enable_default(adev, value);
 	mmhub_v2_0_set_fault_enable_default(adev, value);
-	gmc_v10_0_flush_gpu_tlb(adev, 0, 0);
+	gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
+	gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
 
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 		 (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index b06d876..564fb1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -359,8 +359,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
-static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
-				uint32_t vmid, uint32_t flush_type)
+static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);  } @@ -568,7 +568,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
 	else
 		gmc_v6_0_set_fault_enable_default(adev, true);
 
-	gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
+	gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0);
 	dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
 		 (unsigned)(adev->gmc.gart_size >> 20),
 		 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 75aa333..9e6a233 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -430,8 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
  *
  * Flush the TLB for the requested page table (CIK).
  */
-static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
-				uint32_t vmid, uint32_t flush_type)
+static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	/* bits 0-15 are the VM contexts0-15 */
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -674,7 +674,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
 		WREG32(mmCHUB_CONTROL, tmp);
 	}
 
-	gmc_v7_0_flush_gpu_tlb(adev, 0, 0);
+	gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 		 (unsigned)(adev->gmc.gart_size >> 20),
 		 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 8bf2ba3..f7d6a07 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -632,8 +632,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
  *
  * Flush the TLB for the requested page table (VI).
  */
-static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
-				uint32_t vmid, uint32_t flush_type)
+static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	/* bits 0-15 are the VM contexts0-15 */
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -918,7 +918,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
 	else
 		gmc_v8_0_set_fault_enable_default(adev, true);
 
-	gmc_v8_0_flush_gpu_tlb(adev, 0, 0);
+	gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 		 (unsigned)(adev->gmc.gart_size >> 20),
 		 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index a2aa35e..f862366 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -448,44 +448,45 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
  *
  * Flush the TLB for the requested page table using certain type.
  */
-static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
-				uint32_t vmid, uint32_t flush_type)
+static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	const unsigned eng = 17;
-	unsigned i, j;
+	u32 j, tmp;
+	struct amdgpu_vmhub *hub;
 
-	for (i = 0; i < adev->num_vmhubs; ++i) {
-		struct amdgpu_vmhub *hub = &adev->vmhub[i];
-		u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+	BUG_ON(vmhub >= adev->num_vmhubs);
 
-		/* This is necessary for a HW workaround under SRIOV as well
-		 * as GFXOFF under bare metal
-		 */
-		if (adev->gfx.kiq.ring.sched.ready &&
-		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-		    !adev->in_gpu_reset) {
-			uint32_t req = hub->vm_inv_eng0_req + eng;
-			uint32_t ack = hub->vm_inv_eng0_ack + eng;
-
-			amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
-							   1 << vmid);
-			continue;
-		}
+	hub = &adev->vmhub[vmhub];
+	tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
 
-		spin_lock(&adev->gmc.invalidate_lock);
-		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
-		for (j = 0; j < adev->usec_timeout; j++) {
-			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
-			if (tmp & (1 << vmid))
-				break;
-			udelay(1);
-		}
-		spin_unlock(&adev->gmc.invalidate_lock);
-		if (j < adev->usec_timeout)
-			continue;
+	/* This is necessary for a HW workaround under SRIOV as well
+	 * as GFXOFF under bare metal
+	 */
+	if (adev->gfx.kiq.ring.sched.ready &&
+			(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+			!adev->in_gpu_reset) {
+		uint32_t req = hub->vm_inv_eng0_req + eng;
+		uint32_t ack = hub->vm_inv_eng0_ack + eng;
+
+		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
+				1 << vmid);
+		return;
+	}
 
-		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
+	spin_lock(&adev->gmc.invalidate_lock);
+	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+	for (j = 0; j < adev->usec_timeout; j++) {
+		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+		if (tmp & (1 << vmid))
+			break;
+		udelay(1);
 	}
+	spin_unlock(&adev->gmc.invalidate_lock);
+	if (j < adev->usec_timeout)
+		return;
+
+	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 }
 
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, @@ -1239,7 +1240,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
  */
 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)  {
-	int r;
+	int r, i;
 	bool value;
 	u32 tmp;
 
@@ -1295,7 +1296,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 		mmhub_v9_4_set_fault_enable_default(adev, value);
 	else
 		mmhub_v1_0_set_fault_enable_default(adev, value);
-	gmc_v9_0_flush_gpu_tlb(adev, 0, 0);
+
+	for (i = 0; i < adev->num_vmhubs; ++i)
+		gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
 
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 		 (unsigned)(adev->gmc.gart_size >> 20),
--
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub
@ 2019-08-09  4:21 Zeng, Oak
       [not found] ` <1565324504-4445-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Zeng, Oak @ 2019-08-09  4:21 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zhou1, Tao, Koenig, Christian, Zeng, Oak

This is for kfd to reuse amdgpu TLB invalidation function.
On gfx10, kfd only needs to flush TLB on gfx hub but not
on mm hub. So export a function for KFD flush TLB only on
specific hub.

Change-Id: I58ff00969f88438cfd3dc7e9deb7bff0c1bb4133
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c          |  9 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h           |  6 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c            |  5 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c            | 16 ++++--
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c             |  6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c             |  6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c             |  6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c             | 69 ++++++++++++-----------
 9 files changed, 78 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 2695925..741a3c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -670,7 +670,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
 int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
+	int vmid, i;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 	uint32_t flush_type = 0;
 
@@ -689,8 +689,9 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 		if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
 			if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
 				== pasid) {
-				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-							 flush_type);
+				for (i = 0; i < adev->num_vmhubs; i++)
+					amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+								i, flush_type);
 				break;
 			}
 		}
@@ -702,6 +703,7 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	int i;
 
 	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 		pr_err("non kfd vmid %d\n", vmid);
@@ -723,7 +725,9 @@ int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
 	 * TODO 2: support range-based invalidation, requires kfg2kgd
 	 * interface change
 	 */
-	amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
+	for (i = 0; i < adev->num_vmhubs; i++)
+		amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 6d11e17..a67ffff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -248,7 +248,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 	}
 	mb();
 	amdgpu_asic_flush_hdp(adev, NULL);
-	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+	for (i = 0; i < adev->num_vmhubs; i++)
+		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+
 	return 0;
 }
 
@@ -309,7 +311,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 	unsigned i,t,p;
 #endif
-	int r;
+	int r, i;
 
 	if (!adev->gart.ready) {
 		WARN(1, "trying to bind memory to uninitialized GART !\n");
@@ -333,7 +335,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 
 	mb();
 	amdgpu_asic_flush_hdp(adev, NULL);
-	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+	for (i = 0; i < adev->num_vmhubs; i++)
+		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 071145a..8f2699e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -89,8 +89,8 @@ struct amdgpu_vmhub {
  */
 struct amdgpu_gmc_funcs {
 	/* flush the vm tlb via mmio */
-	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
-			      uint32_t vmid, uint32_t flush_type);
+	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
+				uint32_t vmhub, uint32_t flush_type);
 	/* flush the vm tlb via ring */
 	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
 				       uint64_t pd_addr);
@@ -180,7 +180,7 @@ struct amdgpu_gmc {
 	struct ras_common_if    *ras_if;
 };
 
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
 #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 9db4c4b..1adde85 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1735,9 +1735,12 @@ static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
 
 static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
 {
+	int i;
+
 	gfx_v10_0_init_csb(adev);
 
-	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+	for (i = 0; i < adev->num_vmhubs; i++)
+		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 
 	/* TODO: init power gating */
 	return;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 4e3ac10..b8afb12 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -229,8 +229,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
  *
  * Flush the TLB for the requested page table.
  */
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
-				    uint32_t vmid, uint32_t flush_type)
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 	struct dma_fence *fence;
@@ -243,7 +243,14 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
 
 	mutex_lock(&adev->mman.gtt_window_lock);
 
-	gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
+	if (vmhub == AMDGPU_MMHUB_0) {
+		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
+		mutex_unlock(&adev->mman.gtt_window_lock);
+		return;
+	}
+
+	BUG_ON(vmhub != AMDGPU_GFXHUB_0);
+
 	if (!adev->mman.buffer_funcs_enabled ||
 	    !adev->ib_pool_ready ||
 	    adev->asic_type > CHIP_NAVI14 ||
@@ -773,7 +780,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
 
 	gfxhub_v2_0_set_fault_enable_default(adev, value);
 	mmhub_v2_0_set_fault_enable_default(adev, value);
-	gmc_v10_0_flush_gpu_tlb(adev, 0, 0);
+	gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
+	gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
 
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 		 (unsigned)(adev->gmc.gart_size >> 20),
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index b06d876..564fb1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -359,8 +359,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
-static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
-				uint32_t vmid, uint32_t flush_type)
+static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
@@ -568,7 +568,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
 	else
 		gmc_v6_0_set_fault_enable_default(adev, true);
 
-	gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
+	gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0);
 	dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
 		 (unsigned)(adev->gmc.gart_size >> 20),
 		 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 75aa333..9e6a233 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -430,8 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
  *
  * Flush the TLB for the requested page table (CIK).
  */
-static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
-				uint32_t vmid, uint32_t flush_type)
+static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	/* bits 0-15 are the VM contexts0-15 */
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@@ -674,7 +674,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
 		WREG32(mmCHUB_CONTROL, tmp);
 	}
 
-	gmc_v7_0_flush_gpu_tlb(adev, 0, 0);
+	gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 		 (unsigned)(adev->gmc.gart_size >> 20),
 		 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 8bf2ba3..f7d6a07 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -632,8 +632,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
  *
  * Flush the TLB for the requested page table (VI).
  */
-static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
-				uint32_t vmid, uint32_t flush_type)
+static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	/* bits 0-15 are the VM contexts0-15 */
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@@ -918,7 +918,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
 	else
 		gmc_v8_0_set_fault_enable_default(adev, true);
 
-	gmc_v8_0_flush_gpu_tlb(adev, 0, 0);
+	gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 		 (unsigned)(adev->gmc.gart_size >> 20),
 		 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index a2aa35e..f862366 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -448,44 +448,45 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
  *
  * Flush the TLB for the requested page table using certain type.
  */
-static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
-				uint32_t vmid, uint32_t flush_type)
+static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	const unsigned eng = 17;
-	unsigned i, j;
+	u32 j, tmp;
+	struct amdgpu_vmhub *hub;
 
-	for (i = 0; i < adev->num_vmhubs; ++i) {
-		struct amdgpu_vmhub *hub = &adev->vmhub[i];
-		u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+	BUG_ON(vmhub >= adev->num_vmhubs);
 
-		/* This is necessary for a HW workaround under SRIOV as well
-		 * as GFXOFF under bare metal
-		 */
-		if (adev->gfx.kiq.ring.sched.ready &&
-		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-		    !adev->in_gpu_reset) {
-			uint32_t req = hub->vm_inv_eng0_req + eng;
-			uint32_t ack = hub->vm_inv_eng0_ack + eng;
-
-			amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
-							   1 << vmid);
-			continue;
-		}
+	hub = &adev->vmhub[vmhub];
+	tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
 
-		spin_lock(&adev->gmc.invalidate_lock);
-		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
-		for (j = 0; j < adev->usec_timeout; j++) {
-			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
-			if (tmp & (1 << vmid))
-				break;
-			udelay(1);
-		}
-		spin_unlock(&adev->gmc.invalidate_lock);
-		if (j < adev->usec_timeout)
-			continue;
+	/* This is necessary for a HW workaround under SRIOV as well
+	 * as GFXOFF under bare metal
+	 */
+	if (adev->gfx.kiq.ring.sched.ready &&
+			(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+			!adev->in_gpu_reset) {
+		uint32_t req = hub->vm_inv_eng0_req + eng;
+		uint32_t ack = hub->vm_inv_eng0_ack + eng;
+
+		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
+				1 << vmid);
+		return;
+	}
 
-		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
+	spin_lock(&adev->gmc.invalidate_lock);
+	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+	for (j = 0; j < adev->usec_timeout; j++) {
+		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+		if (tmp & (1 << vmid))
+			break;
+		udelay(1);
 	}
+	spin_unlock(&adev->gmc.invalidate_lock);
+	if (j < adev->usec_timeout)
+		return;
+
+	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 }
 
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
@@ -1239,7 +1240,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
  */
 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 {
-	int r;
+	int r, i;
 	bool value;
 	u32 tmp;
 
@@ -1295,7 +1296,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 		mmhub_v9_4_set_fault_enable_default(adev, value);
 	else
 		mmhub_v1_0_set_fault_enable_default(adev, value);
-	gmc_v9_0_flush_gpu_tlb(adev, 0, 0);
+
+	for (i = 0; i < adev->num_vmhubs; ++i)
+		gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
 
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 		 (unsigned)(adev->gmc.gart_size >> 20),
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-08-14  8:10 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-02 16:04 [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub Zeng, Oak
     [not found] ` <1564761834-19210-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
2019-08-02 16:04   ` [PATCH 2/2] drm/amdkfd/gfx10: Calling amdgpu functions to invalidate TLB Zeng, Oak
2019-08-05  9:37   ` [PATCH 1/2] drm/amdgpu: Export function to flush TLB of specific vm hub Christian König
     [not found]     ` <855377a1-69ca-891e-1dad-5b93f57671da-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-08-07  2:40       ` Zeng, Oak
     [not found]         ` <BL0PR12MB2580778BFD71E211AC18CFAF80D40-b4cIHhjg/p/XzH18dTCKOgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-08-07  8:50           ` Christian König
     [not found]             ` <65784ad0-7693-9c98-82ee-66713c99f49b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-08-07 15:17               ` Zeng, Oak
2019-08-09  4:21 Zeng, Oak
     [not found] ` <1565324504-4445-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
2019-08-13 19:17   ` Zeng, Oak
2019-08-14  8:10   ` Koenig, Christian

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.