[PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore

amd-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore
@ 2020-01-18  1:37 Felix Kuehling
  2020-01-18  1:37 ` [PATCH 2/3] drm/amdgpu: Use the correct flush_type in flush_gpu_tlb_pasid Felix Kuehling
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Felix Kuehling @ 2020-01-18  1:37 UTC (permalink / raw)
  To: amd-gfx

Use a more meaningful variable name for the invalidation request
that is distinct from the tmp variable that gets overwritten when
acquiring the invalidation semaphore.

Fixes: 00f607f38d82 ("drm/amdgpu: invalidate mmhub semaphore workaround in gmc9/gmc10")
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 ++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 86f4ffe408e7..d914555e1212 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -262,7 +262,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 {
 	bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
 	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
-	u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
+	u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
+	u32 tmp;
 	/* Use register 17 for GART */
 	const unsigned eng = 17;
 	unsigned int i;
@@ -289,7 +290,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
 	}
 
-	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
 
 	/*
 	 * Issue a dummy read to wait for the ACK register to be cleared
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 54bdc1786ab1..6d95de1413c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -476,13 +476,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 {
 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
 	const unsigned eng = 17;
-	u32 j, tmp;
+	u32 j, inv_req, tmp;
 	struct amdgpu_vmhub *hub;
 
 	BUG_ON(vmhub >= adev->num_vmhubs);
 
 	hub = &adev->vmhub[vmhub];
-	tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
 
 	/* This is necessary for a HW workaround under SRIOV as well
 	 * as GFXOFF under bare metal
@@ -493,7 +493,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		uint32_t req = hub->vm_inv_eng0_req + eng;
 		uint32_t ack = hub->vm_inv_eng0_ack + eng;
 
-		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
+		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
 				1 << vmid);
 		return;
 	}
@@ -521,7 +521,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
 	}
 
-	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
 
 	/*
 	 * Issue a dummy read to wait for the ACK register to be cleared
-- 
2.24.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/3] drm/amdgpu: Use the correct flush_type in flush_gpu_tlb_pasid
  2020-01-18  1:37 [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore Felix Kuehling
@ 2020-01-18  1:37 ` Felix Kuehling
  2020-01-20 16:37   ` Zeng, Oak
  2020-01-18  1:37 ` [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround Felix Kuehling
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Felix Kuehling @ 2020-01-18  1:37 UTC (permalink / raw)
  To: amd-gfx

The flush_type was incorrectly hard-coded to 0 when calling falling back
to MMIO-based invalidation in flush_gpu_tlb_pasid.

Fixes: caa5cf78387c ("drm/amdgpu: export function to flush TLB via pasid")
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d914555e1212..a1f7bb42e6b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -443,10 +443,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 			if (all_hub) {
 				for (i = 0; i < adev->num_vmhubs; i++)
 					gmc_v10_0_flush_gpu_tlb(adev, vmid,
-							i, 0);
+							i, flush_type);
 			} else {
 				gmc_v10_0_flush_gpu_tlb(adev, vmid,
-						AMDGPU_GFXHUB_0, 0);
+						AMDGPU_GFXHUB_0, flush_type);
 			}
 			break;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6d95de1413c4..90216abf14a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -602,10 +602,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 			if (all_hub) {
 				for (i = 0; i < adev->num_vmhubs; i++)
 					gmc_v9_0_flush_gpu_tlb(adev, vmid,
-							i, 0);
+							i, flush_type);
 			} else {
 				gmc_v9_0_flush_gpu_tlb(adev, vmid,
-						AMDGPU_GFXHUB_0, 0);
+						AMDGPU_GFXHUB_0, flush_type);
 			}
 			break;
 		}
-- 
2.24.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround
  2020-01-18  1:37 [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore Felix Kuehling
  2020-01-18  1:37 ` [PATCH 2/3] drm/amdgpu: Use the correct flush_type in flush_gpu_tlb_pasid Felix Kuehling
@ 2020-01-18  1:37 ` Felix Kuehling
  2020-01-20 16:41   ` Zeng, Oak
  2020-01-20 17:47   ` shaoyunl
  2020-01-18 13:22 ` [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore Christian König
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 13+ messages in thread
From: Felix Kuehling @ 2020-01-18  1:37 UTC (permalink / raw)
  To: amd-gfx; +Cc: shaoyun.liu

Using a heavy-weight TLB flush once is not sufficient. Concurrent
memory accesses in the same TLB cache line can re-populate TLB entries
from stale texture cache (TC) entries while the heavy-weight TLB
flush is in progress. To fix this race condition, perform another TLB
flush after the heavy-weight one, when TC is known to be clean.

Move the workaround into the low-level TLB flushing functions. This way
they apply to amdgpu as well, and KIQ-based TLB flush only needs to
synchronize once.

CC: shaoyun.liu@amd.com
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 68 +++++++++++++++++-----
 2 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8609287620ea..5325f6b455f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -647,13 +647,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
 int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-	uint32_t flush_type = 0;
+	const uint32_t flush_type = 0;
 	bool all_hub = false;
 
-	if (adev->gmc.xgmi.num_physical_nodes &&
-		adev->asic_type == CHIP_VEGA20)
-		flush_type = 2;
-
 	if (adev->family == AMDGPU_FAMILY_AI)
 		all_hub = true;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 90216abf14a4..e2a5e852bdb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -476,13 +476,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 {
 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
 	const unsigned eng = 17;
-	u32 j, inv_req, tmp;
+	u32 j, inv_req, inv_req2, tmp;
 	struct amdgpu_vmhub *hub;
 
 	BUG_ON(vmhub >= adev->num_vmhubs);
 
 	hub = &adev->vmhub[vmhub];
-	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+	if (adev->gmc.xgmi.num_physical_nodes &&
+	    adev->asic_type == CHIP_VEGA20) {
+		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
+		 * heavy-weight TLB flush (type 2), which flushes
+		 * both. Due to a race condition with concurrent
+		 * memory accesses using the same TLB cache line, we
+		 * still need a second TLB flush after this.
+		 */
+		inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
+		inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+	} else {
+		inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+		inv_req2 = 0;
+	}
 
 	/* This is necessary for a HW workaround under SRIOV as well
 	 * as GFXOFF under bare metal
@@ -521,21 +534,27 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
 	}
 
-	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
+	do {
+		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
 
-	/*
-	 * Issue a dummy read to wait for the ACK register to be cleared
-	 * to avoid a false ACK due to the new fast GRBM interface.
-	 */
-	if (vmhub == AMDGPU_GFXHUB_0)
-		RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
+		/*
+		 * Issue a dummy read to wait for the ACK register to
+		 * be cleared to avoid a false ACK due to the new fast
+		 * GRBM interface.
+		 */
+		if (vmhub == AMDGPU_GFXHUB_0)
+			RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
 
-	for (j = 0; j < adev->usec_timeout; j++) {
-		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
-		if (tmp & (1 << vmid))
-			break;
-		udelay(1);
-	}
+		for (j = 0; j < adev->usec_timeout; j++) {
+			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+			if (tmp & (1 << vmid))
+				break;
+			udelay(1);
+		}
+
+		inv_req = inv_req2;
+		inv_req2 = 0;
+	} while (inv_req);
 
 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
 	if (use_semaphore)
@@ -577,9 +596,26 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 		return -EIO;
 
 	if (ring->sched.ready) {
+		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
+		 * heavy-weight TLB flush (type 2), which flushes
+		 * both. Due to a race condition with concurrent
+		 * memory accesses using the same TLB cache line, we
+		 * still need a second TLB flush after this.
+		 */
+		bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
+				       adev->asic_type == CHIP_VEGA20);
+		/* 2 dwords flush + 8 dwords fence */
+		unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
+
+		if (vega20_xgmi_wa)
+			ndw += kiq->pmf->invalidate_tlbs_size;
+
 		spin_lock(&adev->gfx.kiq.ring_lock);
 		/* 2 dwords flush + 8 dwords fence */
-		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
+		amdgpu_ring_alloc(ring, ndw);
+		if (vega20_xgmi_wa)
+			kiq->pmf->kiq_invalidate_tlbs(ring,
+						      pasid, 2, all_hub);
 		kiq->pmf->kiq_invalidate_tlbs(ring,
 					pasid, flush_type, all_hub);
 		amdgpu_fence_emit_polling(ring, &seq);
-- 
2.24.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore
  2020-01-18  1:37 [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore Felix Kuehling
  2020-01-18  1:37 ` [PATCH 2/3] drm/amdgpu: Use the correct flush_type in flush_gpu_tlb_pasid Felix Kuehling
  2020-01-18  1:37 ` [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround Felix Kuehling
@ 2020-01-18 13:22 ` Christian König
  2020-01-20 16:20 ` Yong Zhao
  2020-01-21 22:41 ` Felix Kuehling
  4 siblings, 0 replies; 13+ messages in thread
From: Christian König @ 2020-01-18 13:22 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx

Am 18.01.20 um 02:37 schrieb Felix Kuehling:
> Use a more meaningful variable name for the invalidation request
> that is distinct from the tmp variable that gets overwritten when
> acquiring the invalidation semaphore.
>
> Fixes: 00f607f38d82 ("drm/amdgpu: invalidate mmhub semaphore workaround in gmc9/gmc10")
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com> for this one and #2.

Acked-by: Christian König <christian.koenig@amd.com> for #3.

> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 ++++----
>   2 files changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 86f4ffe408e7..d914555e1212 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -262,7 +262,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   {
>   	bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
>   	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> -	u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
> +	u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
> +	u32 tmp;
>   	/* Use register 17 for GART */
>   	const unsigned eng = 17;
>   	unsigned int i;
> @@ -289,7 +290,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>   	}
>   
> -	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>   
>   	/*
>   	 * Issue a dummy read to wait for the ACK register to be cleared
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 54bdc1786ab1..6d95de1413c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -476,13 +476,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   {
>   	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
>   	const unsigned eng = 17;
> -	u32 j, tmp;
> +	u32 j, inv_req, tmp;
>   	struct amdgpu_vmhub *hub;
>   
>   	BUG_ON(vmhub >= adev->num_vmhubs);
>   
>   	hub = &adev->vmhub[vmhub];
> -	tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
> +	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>   
>   	/* This is necessary for a HW workaround under SRIOV as well
>   	 * as GFXOFF under bare metal
> @@ -493,7 +493,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   		uint32_t req = hub->vm_inv_eng0_req + eng;
>   		uint32_t ack = hub->vm_inv_eng0_ack + eng;
>   
> -		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
> +		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>   				1 << vmid);
>   		return;
>   	}
> @@ -521,7 +521,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>   	}
>   
> -	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>   
>   	/*
>   	 * Issue a dummy read to wait for the ACK register to be cleared

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore
  2020-01-18  1:37 [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore Felix Kuehling
                   ` (2 preceding siblings ...)
  2020-01-18 13:22 ` [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore Christian König
@ 2020-01-20 16:20 ` Yong Zhao
  2020-01-21 22:41 ` Felix Kuehling
  4 siblings, 0 replies; 13+ messages in thread
From: Yong Zhao @ 2020-01-20 16:20 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx

Reviewed-by: Yong Zhao <Yong.Zhao@amd.com>

On 2020-01-17 8:37 p.m., Felix Kuehling wrote:
> Use a more meaningful variable name for the invalidation request
> that is distinct from the tmp variable that gets overwritten when
> acquiring the invalidation semaphore.
>
> Fixes: 00f607f38d82 ("drm/amdgpu: invalidate mmhub semaphore workaround in gmc9/gmc10")
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 ++++----
>   2 files changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 86f4ffe408e7..d914555e1212 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -262,7 +262,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   {
>   	bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
>   	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> -	u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
> +	u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
> +	u32 tmp;
>   	/* Use register 17 for GART */
>   	const unsigned eng = 17;
>   	unsigned int i;
> @@ -289,7 +290,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>   	}
>   
> -	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>   
>   	/*
>   	 * Issue a dummy read to wait for the ACK register to be cleared
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 54bdc1786ab1..6d95de1413c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -476,13 +476,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   {
>   	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
>   	const unsigned eng = 17;
> -	u32 j, tmp;
> +	u32 j, inv_req, tmp;
>   	struct amdgpu_vmhub *hub;
>   
>   	BUG_ON(vmhub >= adev->num_vmhubs);
>   
>   	hub = &adev->vmhub[vmhub];
> -	tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
> +	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>   
>   	/* This is necessary for a HW workaround under SRIOV as well
>   	 * as GFXOFF under bare metal
> @@ -493,7 +493,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   		uint32_t req = hub->vm_inv_eng0_req + eng;
>   		uint32_t ack = hub->vm_inv_eng0_ack + eng;
>   
> -		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
> +		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>   				1 << vmid);
>   		return;
>   	}
> @@ -521,7 +521,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>   	}
>   
> -	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>   
>   	/*
>   	 * Issue a dummy read to wait for the ACK register to be cleared
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH 2/3] drm/amdgpu: Use the correct flush_type in flush_gpu_tlb_pasid
  2020-01-18  1:37 ` [PATCH 2/3] drm/amdgpu: Use the correct flush_type in flush_gpu_tlb_pasid Felix Kuehling
@ 2020-01-20 16:37   ` Zeng, Oak
  0 siblings, 0 replies; 13+ messages in thread
From: Zeng, Oak @ 2020-01-20 16:37 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx

[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Oak Zeng <Oak.Zeng@amd.com>

Regards,
Oak

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Felix Kuehling
Sent: Friday, January 17, 2020 8:38 PM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 2/3] drm/amdgpu: Use the correct flush_type in flush_gpu_tlb_pasid

The flush_type was incorrectly hard-coded to 0 when calling falling back to MMIO-based invalidation in flush_gpu_tlb_pasid.

Fixes: caa5cf78387c ("drm/amdgpu: export function to flush TLB via pasid")
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 ++--  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d914555e1212..a1f7bb42e6b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -443,10 +443,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 			if (all_hub) {
 				for (i = 0; i < adev->num_vmhubs; i++)
 					gmc_v10_0_flush_gpu_tlb(adev, vmid,
-							i, 0);
+							i, flush_type);
 			} else {
 				gmc_v10_0_flush_gpu_tlb(adev, vmid,
-						AMDGPU_GFXHUB_0, 0);
+						AMDGPU_GFXHUB_0, flush_type);
 			}
 			break;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6d95de1413c4..90216abf14a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -602,10 +602,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 			if (all_hub) {
 				for (i = 0; i < adev->num_vmhubs; i++)
 					gmc_v9_0_flush_gpu_tlb(adev, vmid,
-							i, 0);
+							i, flush_type);
 			} else {
 				gmc_v9_0_flush_gpu_tlb(adev, vmid,
-						AMDGPU_GFXHUB_0, 0);
+						AMDGPU_GFXHUB_0, flush_type);
 			}
 			break;
 		}
--
2.24.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Coak.zeng%40amd.com%7C21f63466ef6e4931800f08d79bb7185d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637149083047847133&amp;sdata=zacZX9gCwsZSHopHhk%2Ba72D2Piq6M8%2FMrZlEvJ0Iw70%3D&amp;reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* RE: [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround
  2020-01-18  1:37 ` [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround Felix Kuehling
@ 2020-01-20 16:41   ` Zeng, Oak
  2020-01-20 17:47   ` shaoyunl
  1 sibling, 0 replies; 13+ messages in thread
From: Zeng, Oak @ 2020-01-20 16:41 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx; +Cc: Liu, Shaoyun

[AMD Official Use Only - Internal Distribution Only]

Hi Felix/Shaoyun,

Is this HW issue fixed on MI100?

Regards,
Oak

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Felix Kuehling
Sent: Friday, January 17, 2020 8:38 PM
To: amd-gfx@lists.freedesktop.org
Cc: Liu, Shaoyun <Shaoyun.Liu@amd.com>
Subject: [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround

Using a heavy-weight TLB flush once is not sufficient. Concurrent memory accesses in the same TLB cache line can re-populate TLB entries from stale texture cache (TC) entries while the heavy-weight TLB flush is in progress. To fix this race condition, perform another TLB flush after the heavy-weight one, when TC is known to be clean.

Move the workaround into the low-level TLB flushing functions. This way they apply to amdgpu as well, and KIQ-based TLB flush only needs to synchronize once.

CC: shaoyun.liu@amd.com
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 68 +++++++++++++++++-----
 2 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8609287620ea..5325f6b455f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -647,13 +647,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)  int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)  {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-	uint32_t flush_type = 0;
+	const uint32_t flush_type = 0;
 	bool all_hub = false;
 
-	if (adev->gmc.xgmi.num_physical_nodes &&
-		adev->asic_type == CHIP_VEGA20)
-		flush_type = 2;
-
 	if (adev->family == AMDGPU_FAMILY_AI)
 		all_hub = true;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 90216abf14a4..e2a5e852bdb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -476,13 +476,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,  {
 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
 	const unsigned eng = 17;
-	u32 j, inv_req, tmp;
+	u32 j, inv_req, inv_req2, tmp;
 	struct amdgpu_vmhub *hub;
 
 	BUG_ON(vmhub >= adev->num_vmhubs);
 
 	hub = &adev->vmhub[vmhub];
-	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+	if (adev->gmc.xgmi.num_physical_nodes &&
+	    adev->asic_type == CHIP_VEGA20) {
+		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
+		 * heavy-weight TLB flush (type 2), which flushes
+		 * both. Due to a race condition with concurrent
+		 * memory accesses using the same TLB cache line, we
+		 * still need a second TLB flush after this.
+		 */
+		inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
+		inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+	} else {
+		inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+		inv_req2 = 0;
+	}
 
 	/* This is necessary for a HW workaround under SRIOV as well
 	 * as GFXOFF under bare metal
@@ -521,21 +534,27 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
 	}
 
-	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
+	do {
+		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
 
-	/*
-	 * Issue a dummy read to wait for the ACK register to be cleared
-	 * to avoid a false ACK due to the new fast GRBM interface.
-	 */
-	if (vmhub == AMDGPU_GFXHUB_0)
-		RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
+		/*
+		 * Issue a dummy read to wait for the ACK register to
+		 * be cleared to avoid a false ACK due to the new fast
+		 * GRBM interface.
+		 */
+		if (vmhub == AMDGPU_GFXHUB_0)
+			RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
 
-	for (j = 0; j < adev->usec_timeout; j++) {
-		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
-		if (tmp & (1 << vmid))
-			break;
-		udelay(1);
-	}
+		for (j = 0; j < adev->usec_timeout; j++) {
+			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+			if (tmp & (1 << vmid))
+				break;
+			udelay(1);
+		}
+
+		inv_req = inv_req2;
+		inv_req2 = 0;
+	} while (inv_req);
 
 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
 	if (use_semaphore)
@@ -577,9 +596,26 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 		return -EIO;
 
 	if (ring->sched.ready) {
+		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
+		 * heavy-weight TLB flush (type 2), which flushes
+		 * both. Due to a race condition with concurrent
+		 * memory accesses using the same TLB cache line, we
+		 * still need a second TLB flush after this.
+		 */
+		bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
+				       adev->asic_type == CHIP_VEGA20);
+		/* 2 dwords flush + 8 dwords fence */
+		unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
+
+		if (vega20_xgmi_wa)
+			ndw += kiq->pmf->invalidate_tlbs_size;
+
 		spin_lock(&adev->gfx.kiq.ring_lock);
 		/* 2 dwords flush + 8 dwords fence */
-		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
+		amdgpu_ring_alloc(ring, ndw);
+		if (vega20_xgmi_wa)
+			kiq->pmf->kiq_invalidate_tlbs(ring,
+						      pasid, 2, all_hub);
 		kiq->pmf->kiq_invalidate_tlbs(ring,
 					pasid, flush_type, all_hub);
 		amdgpu_fence_emit_polling(ring, &seq);
--
2.24.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Coak.zeng%40amd.com%7Cb6e8fc1d4a464f9a3a5e08d79bb71b15%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637149083076774221&amp;sdata=WGOHumpie7M6weZNK3stNKGKFW2HancXQa6%2BEhZfqMo%3D&amp;reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround
  2020-01-18  1:37 ` [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround Felix Kuehling
  2020-01-20 16:41   ` Zeng, Oak
@ 2020-01-20 17:47   ` shaoyunl
  2020-01-20 17:58     ` Felix Kuehling
  1 sibling, 1 reply; 13+ messages in thread
From: shaoyunl @ 2020-01-20 17:47 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx

comments in line .

On 2020-01-17 8:37 p.m., Felix Kuehling wrote:
> Using a heavy-weight TLB flush once is not sufficient. Concurrent
> memory accesses in the same TLB cache line can re-populate TLB entries
> from stale texture cache (TC) entries while the heavy-weight TLB
> flush is in progress. To fix this race condition, perform another TLB
> flush after the heavy-weight one, when TC is known to be clean.
>
> Move the workaround into the low-level TLB flushing functions. This way
> they apply to amdgpu as well, and KIQ-based TLB flush only needs to
> synchronize once.
>
> CC: shaoyun.liu@amd.com
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  6 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 68 +++++++++++++++++-----
>   2 files changed, 53 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 8609287620ea..5325f6b455f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -647,13 +647,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
>   int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> -	uint32_t flush_type = 0;
> +	const uint32_t flush_type = 0;
>   	bool all_hub = false;
>   
> -	if (adev->gmc.xgmi.num_physical_nodes &&
> -		adev->asic_type == CHIP_VEGA20)
> -		flush_type = 2;
> -
>   	if (adev->family == AMDGPU_FAMILY_AI)
>   		all_hub = true;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 90216abf14a4..e2a5e852bdb0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -476,13 +476,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   {
>   	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
>   	const unsigned eng = 17;
> -	u32 j, inv_req, tmp;
> +	u32 j, inv_req, inv_req2, tmp;
>   	struct amdgpu_vmhub *hub;
>   
>   	BUG_ON(vmhub >= adev->num_vmhubs);
>   
>   	hub = &adev->vmhub[vmhub];
> -	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
> +	if (adev->gmc.xgmi.num_physical_nodes &&
> +	    adev->asic_type == CHIP_VEGA20) {
> +		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
> +		 * heavy-weight TLB flush (type 2), which flushes
> +		 * both. Due to a race condition with concurrent
> +		 * memory accesses using the same TLB cache line, we
> +		 * still need a second TLB flush after this.
> +		 */
> +		inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
> +		inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);

[shaoyunl]  For the send invalidation in this situation ,can we use 0  
for the flush type directly ? I think no matter what's the input 
flush_type for this function , heavy-weight  + legacy invalidation 
should be enough for all of them .

> +	} else {
> +		inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
> +		inv_req2 = 0;
> +	}
>   
>   	/* This is necessary for a HW workaround under SRIOV as well
>   	 * as GFXOFF under bare metal
> @@ -521,21 +534,27 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>   	}
>   
> -	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
> +	do {
> +		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>   
> -	/*
> -	 * Issue a dummy read to wait for the ACK register to be cleared
> -	 * to avoid a false ACK due to the new fast GRBM interface.
> -	 */
> -	if (vmhub == AMDGPU_GFXHUB_0)
> -		RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
> +		/*
> +		 * Issue a dummy read to wait for the ACK register to
> +		 * be cleared to avoid a false ACK due to the new fast
> +		 * GRBM interface.
> +		 */
> +		if (vmhub == AMDGPU_GFXHUB_0)
> +			RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
>   
> -	for (j = 0; j < adev->usec_timeout; j++) {
> -		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> -		if (tmp & (1 << vmid))
> -			break;
> -		udelay(1);
> -	}
> +		for (j = 0; j < adev->usec_timeout; j++) {
> +			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> +			if (tmp & (1 << vmid))
> +				break;
> +			udelay(1);
> +		}
> +
> +		inv_req = inv_req2;
> +		inv_req2 = 0;
> +	} while (inv_req);
>   
>   	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
>   	if (use_semaphore)
> @@ -577,9 +596,26 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>   		return -EIO;
>   
>   	if (ring->sched.ready) {
> +		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
> +		 * heavy-weight TLB flush (type 2), which flushes
> +		 * both. Due to a race condition with concurrent
> +		 * memory accesses using the same TLB cache line, we
> +		 * still need a second TLB flush after this.
> +		 */
> +		bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
> +				       adev->asic_type == CHIP_VEGA20);
> +		/* 2 dwords flush + 8 dwords fence */
> +		unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
> +
> +		if (vega20_xgmi_wa)
> +			ndw += kiq->pmf->invalidate_tlbs_size;
> +
>   		spin_lock(&adev->gfx.kiq.ring_lock);
>   		/* 2 dwords flush + 8 dwords fence */
> -		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
> +		amdgpu_ring_alloc(ring, ndw);
> +		if (vega20_xgmi_wa)
> +			kiq->pmf->kiq_invalidate_tlbs(ring,
> +						      pasid, 2, all_hub);
>   		kiq->pmf->kiq_invalidate_tlbs(ring,
>   					pasid, flush_type, all_hub);
>   		amdgpu_fence_emit_polling(ring, &seq);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround
  2020-01-20 17:47   ` shaoyunl
@ 2020-01-20 17:58     ` Felix Kuehling
  2020-01-20 18:28       ` shaoyunl
  0 siblings, 1 reply; 13+ messages in thread
From: Felix Kuehling @ 2020-01-20 17:58 UTC (permalink / raw)
  To: shaoyunl, amd-gfx

On 2020-01-20 12:47 p.m., shaoyunl wrote:
> comments in line .
>
> On 2020-01-17 8:37 p.m., Felix Kuehling wrote:
>> Using a heavy-weight TLB flush once is not sufficient. Concurrent
>> memory accesses in the same TLB cache line can re-populate TLB entries
>> from stale texture cache (TC) entries while the heavy-weight TLB
>> flush is in progress. To fix this race condition, perform another TLB
>> flush after the heavy-weight one, when TC is known to be clean.
>>
>> Move the workaround into the low-level TLB flushing functions. This way
>> they apply to amdgpu as well, and KIQ-based TLB flush only needs to
>> synchronize once.
>>
>> CC: shaoyun.liu@amd.com
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  6 +-
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 68 +++++++++++++++++-----
>>   2 files changed, 53 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> index 8609287620ea..5325f6b455f6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> @@ -647,13 +647,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct 
>> kgd_dev *kgd, uint16_t vmid)
>>   int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t 
>> pasid)
>>   {
>>       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>> -    uint32_t flush_type = 0;
>> +    const uint32_t flush_type = 0;
>>       bool all_hub = false;
>>   -    if (adev->gmc.xgmi.num_physical_nodes &&
>> -        adev->asic_type == CHIP_VEGA20)
>> -        flush_type = 2;
>> -
>>       if (adev->family == AMDGPU_FAMILY_AI)
>>           all_hub = true;
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 90216abf14a4..e2a5e852bdb0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -476,13 +476,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>   {
>>       bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, 
>> vmhub);
>>       const unsigned eng = 17;
>> -    u32 j, inv_req, tmp;
>> +    u32 j, inv_req, inv_req2, tmp;
>>       struct amdgpu_vmhub *hub;
>>         BUG_ON(vmhub >= adev->num_vmhubs);
>>         hub = &adev->vmhub[vmhub];
>> -    inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>> +    if (adev->gmc.xgmi.num_physical_nodes &&
>> +        adev->asic_type == CHIP_VEGA20) {
>> +        /* Vega20+XGMI caches PTEs in TC and TLB. Add a
>> +         * heavy-weight TLB flush (type 2), which flushes
>> +         * both. Due to a race condition with concurrent
>> +         * memory accesses using the same TLB cache line, we
>> +         * still need a second TLB flush after this.
>> +         */
>> +        inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
>> +        inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>
> [shaoyunl]  For the send invalidation in this situation ,can we use 0  
> for the flush type directly ? I think no matter what's the input 
> flush_type for this function , heavy-weight  + legacy invalidation 
> should be enough for all of them .

I'm not sure that's true. In the case of the race condition, there was 
some concurrent memory access during the first heavy-weight 
invalidation. If that is now flushed in the second invalidation, and a 
heavy-weight invalidation was requested, we should also flush any TC 
cache lines associated with that access. So hard-coding flush_type 0 
here is probably not safe for all cases.

Regards,
   Felix


>
>> +    } else {
>> +        inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>> +        inv_req2 = 0;
>> +    }
>>         /* This is necessary for a HW workaround under SRIOV as well
>>        * as GFXOFF under bare metal
>> @@ -521,21 +534,27 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>               DRM_ERROR("Timeout waiting for sem acquire in VM 
>> flush!\n");
>>       }
>>   -    WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>> +    do {
>> +        WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>>   -    /*
>> -     * Issue a dummy read to wait for the ACK register to be cleared
>> -     * to avoid a false ACK due to the new fast GRBM interface.
>> -     */
>> -    if (vmhub == AMDGPU_GFXHUB_0)
>> -        RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
>> +        /*
>> +         * Issue a dummy read to wait for the ACK register to
>> +         * be cleared to avoid a false ACK due to the new fast
>> +         * GRBM interface.
>> +         */
>> +        if (vmhub == AMDGPU_GFXHUB_0)
>> +            RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
>>   -    for (j = 0; j < adev->usec_timeout; j++) {
>> -        tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
>> -        if (tmp & (1 << vmid))
>> -            break;
>> -        udelay(1);
>> -    }
>> +        for (j = 0; j < adev->usec_timeout; j++) {
>> +            tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
>> +            if (tmp & (1 << vmid))
>> +                break;
>> +            udelay(1);
>> +        }
>> +
>> +        inv_req = inv_req2;
>> +        inv_req2 = 0;
>> +    } while (inv_req);
>>         /* TODO: It needs to continue working on debugging with 
>> semaphore for GFXHUB as well. */
>>       if (use_semaphore)
>> @@ -577,9 +596,26 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
>> amdgpu_device *adev,
>>           return -EIO;
>>         if (ring->sched.ready) {
>> +        /* Vega20+XGMI caches PTEs in TC and TLB. Add a
>> +         * heavy-weight TLB flush (type 2), which flushes
>> +         * both. Due to a race condition with concurrent
>> +         * memory accesses using the same TLB cache line, we
>> +         * still need a second TLB flush after this.
>> +         */
>> +        bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
>> +                       adev->asic_type == CHIP_VEGA20);
>> +        /* 2 dwords flush + 8 dwords fence */
>> +        unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
>> +
>> +        if (vega20_xgmi_wa)
>> +            ndw += kiq->pmf->invalidate_tlbs_size;
>> +
>>           spin_lock(&adev->gfx.kiq.ring_lock);
>>           /* 2 dwords flush + 8 dwords fence */
>> -        amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
>> +        amdgpu_ring_alloc(ring, ndw);
>> +        if (vega20_xgmi_wa)
>> +            kiq->pmf->kiq_invalidate_tlbs(ring,
>> +                              pasid, 2, all_hub);
>>           kiq->pmf->kiq_invalidate_tlbs(ring,
>>                       pasid, flush_type, all_hub);
>>           amdgpu_fence_emit_polling(ring, &seq);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround
  2020-01-20 17:58     ` Felix Kuehling
@ 2020-01-20 18:28       ` shaoyunl
  2020-01-20 18:40         ` Felix Kuehling
  0 siblings, 1 reply; 13+ messages in thread
From: shaoyunl @ 2020-01-20 18:28 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx


On 2020-01-20 12:58 p.m., Felix Kuehling wrote:
> On 2020-01-20 12:47 p.m., shaoyunl wrote:
>> comments in line .
>>
>> On 2020-01-17 8:37 p.m., Felix Kuehling wrote:
>>> Using a heavy-weight TLB flush once is not sufficient. Concurrent
>>> memory accesses in the same TLB cache line can re-populate TLB entries
>>> from stale texture cache (TC) entries while the heavy-weight TLB
>>> flush is in progress. To fix this race condition, perform another TLB
>>> flush after the heavy-weight one, when TC is known to be clean.
>>>
>>> Move the workaround into the low-level TLB flushing functions. This way
>>> they apply to amdgpu as well, and KIQ-based TLB flush only needs to
>>> synchronize once.
>>>
>>> CC: shaoyun.liu@amd.com
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  6 +-
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 68 
>>> +++++++++++++++++-----
>>>   2 files changed, 53 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>> index 8609287620ea..5325f6b455f6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>> @@ -647,13 +647,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct 
>>> kgd_dev *kgd, uint16_t vmid)
>>>   int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, 
>>> uint16_t pasid)
>>>   {
>>>       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>> -    uint32_t flush_type = 0;
>>> +    const uint32_t flush_type = 0;
>>>       bool all_hub = false;
>>>   -    if (adev->gmc.xgmi.num_physical_nodes &&
>>> -        adev->asic_type == CHIP_VEGA20)
>>> -        flush_type = 2;
>>> -
>>>       if (adev->family == AMDGPU_FAMILY_AI)
>>>           all_hub = true;
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index 90216abf14a4..e2a5e852bdb0 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -476,13 +476,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>>> amdgpu_device *adev, uint32_t vmid,
>>>   {
>>>       bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, 
>>> vmhub);
>>>       const unsigned eng = 17;
>>> -    u32 j, inv_req, tmp;
>>> +    u32 j, inv_req, inv_req2, tmp;
>>>       struct amdgpu_vmhub *hub;
>>>         BUG_ON(vmhub >= adev->num_vmhubs);
>>>         hub = &adev->vmhub[vmhub];
>>> -    inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>>> +    if (adev->gmc.xgmi.num_physical_nodes &&
>>> +        adev->asic_type == CHIP_VEGA20) {
>>> +        /* Vega20+XGMI caches PTEs in TC and TLB. Add a
>>> +         * heavy-weight TLB flush (type 2), which flushes
>>> +         * both. Due to a race condition with concurrent
>>> +         * memory accesses using the same TLB cache line, we
>>> +         * still need a second TLB flush after this.
>>> +         */
>>> +        inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
>>> +        inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>>
>> [shaoyunl]  For the send invalidation in this situation ,can we use 
>> 0  for the flush type directly ? I think no matter what's the input 
>> flush_type for this function , heavy-weight  + legacy invalidation 
>> should be enough for all of them .
>
> I'm not sure that's true. In the case of the race condition, there was 
> some concurrent memory access during the first heavy-weight 
> invalidation. If that is now flushed in the second invalidation, and a 
> heavy-weight invalidation was requested, we should also flush any TC 
> cache lines associated with that access. So hard-coding flush_type 0 
> here is probably not safe for all cases.
>
> Regards,
>   Felix
>
[shaoyunl]   Originally we use the  heavy-weight invalidation for XGMI 
here is due to the HW issue which always use NC even for remote GPU 
memory access (this lead walker to load the TLB directly from TC with 
stale value) . The heavy-weight  will set invalidate bit for both TLB 
and  TC so this will make the walker to load from main memory . Your 
change is based on the assumption that after first heavy-weight 
invalidation , the TC already load with  correct contents which seems  
should be true , so in this situation I think the light-weight or even 
legacy invalidation will be  enough since they will load from TC to TLB 
directly .

Regards

shaoyun.liu


>
>>
>>> +    } else {
>>> +        inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>>> +        inv_req2 = 0;
>>> +    }
>>>         /* This is necessary for a HW workaround under SRIOV as well
>>>        * as GFXOFF under bare metal
>>> @@ -521,21 +534,27 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>>> amdgpu_device *adev, uint32_t vmid,
>>>               DRM_ERROR("Timeout waiting for sem acquire in VM 
>>> flush!\n");
>>>       }
>>>   -    WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>>> +    do {
>>> +        WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>>>   -    /*
>>> -     * Issue a dummy read to wait for the ACK register to be cleared
>>> -     * to avoid a false ACK due to the new fast GRBM interface.
>>> -     */
>>> -    if (vmhub == AMDGPU_GFXHUB_0)
>>> -        RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
>>> +        /*
>>> +         * Issue a dummy read to wait for the ACK register to
>>> +         * be cleared to avoid a false ACK due to the new fast
>>> +         * GRBM interface.
>>> +         */
>>> +        if (vmhub == AMDGPU_GFXHUB_0)
>>> +            RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
>>>   -    for (j = 0; j < adev->usec_timeout; j++) {
>>> -        tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
>>> -        if (tmp & (1 << vmid))
>>> -            break;
>>> -        udelay(1);
>>> -    }
>>> +        for (j = 0; j < adev->usec_timeout; j++) {
>>> +            tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
>>> +            if (tmp & (1 << vmid))
>>> +                break;
>>> +            udelay(1);
>>> +        }
>>> +
>>> +        inv_req = inv_req2;
>>> +        inv_req2 = 0;
>>> +    } while (inv_req);
>>>         /* TODO: It needs to continue working on debugging with 
>>> semaphore for GFXHUB as well. */
>>>       if (use_semaphore)
>>> @@ -577,9 +596,26 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
>>> amdgpu_device *adev,
>>>           return -EIO;
>>>         if (ring->sched.ready) {
>>> +        /* Vega20+XGMI caches PTEs in TC and TLB. Add a
>>> +         * heavy-weight TLB flush (type 2), which flushes
>>> +         * both. Due to a race condition with concurrent
>>> +         * memory accesses using the same TLB cache line, we
>>> +         * still need a second TLB flush after this.
>>> +         */
>>> +        bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
>>> +                       adev->asic_type == CHIP_VEGA20);
>>> +        /* 2 dwords flush + 8 dwords fence */
>>> +        unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
>>> +
>>> +        if (vega20_xgmi_wa)
>>> +            ndw += kiq->pmf->invalidate_tlbs_size;
>>> +
>>>           spin_lock(&adev->gfx.kiq.ring_lock);
>>>           /* 2 dwords flush + 8 dwords fence */
>>> -        amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
>>> +        amdgpu_ring_alloc(ring, ndw);
>>> +        if (vega20_xgmi_wa)
>>> +            kiq->pmf->kiq_invalidate_tlbs(ring,
>>> +                              pasid, 2, all_hub);
>>>           kiq->pmf->kiq_invalidate_tlbs(ring,
>>>                       pasid, flush_type, all_hub);
>>>           amdgpu_fence_emit_polling(ring, &seq);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround
  2020-01-20 18:28       ` shaoyunl
@ 2020-01-20 18:40         ` Felix Kuehling
  2020-01-20 18:50           ` shaoyunl
  0 siblings, 1 reply; 13+ messages in thread
From: Felix Kuehling @ 2020-01-20 18:40 UTC (permalink / raw)
  To: shaoyunl, amd-gfx

On 2020-01-20 1:28 p.m., shaoyunl wrote:
>
> On 2020-01-20 12:58 p.m., Felix Kuehling wrote:
>> On 2020-01-20 12:47 p.m., shaoyunl wrote:
>>> comments in line .
>>>
>>> On 2020-01-17 8:37 p.m., Felix Kuehling wrote:
>>>> Using a heavy-weight TLB flush once is not sufficient. Concurrent
>>>> memory accesses in the same TLB cache line can re-populate TLB entries
>>>> from stale texture cache (TC) entries while the heavy-weight TLB
>>>> flush is in progress. To fix this race condition, perform another TLB
>>>> flush after the heavy-weight one, when TC is known to be clean.
>>>>
>>>> Move the workaround into the low-level TLB flushing functions. This 
>>>> way
>>>> they apply to amdgpu as well, and KIQ-based TLB flush only needs to
>>>> synchronize once.
>>>>
>>>> CC: shaoyun.liu@amd.com
>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  6 +-
>>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 68 
>>>> +++++++++++++++++-----
>>>>   2 files changed, 53 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> index 8609287620ea..5325f6b455f6 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> @@ -647,13 +647,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct 
>>>> kgd_dev *kgd, uint16_t vmid)
>>>>   int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, 
>>>> uint16_t pasid)
>>>>   {
>>>>       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>> -    uint32_t flush_type = 0;
>>>> +    const uint32_t flush_type = 0;
>>>>       bool all_hub = false;
>>>>   -    if (adev->gmc.xgmi.num_physical_nodes &&
>>>> -        adev->asic_type == CHIP_VEGA20)
>>>> -        flush_type = 2;
>>>> -
>>>>       if (adev->family == AMDGPU_FAMILY_AI)
>>>>           all_hub = true;
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> index 90216abf14a4..e2a5e852bdb0 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> @@ -476,13 +476,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>>>> amdgpu_device *adev, uint32_t vmid,
>>>>   {
>>>>       bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, 
>>>> vmhub);
>>>>       const unsigned eng = 17;
>>>> -    u32 j, inv_req, tmp;
>>>> +    u32 j, inv_req, inv_req2, tmp;
>>>>       struct amdgpu_vmhub *hub;
>>>>         BUG_ON(vmhub >= adev->num_vmhubs);
>>>>         hub = &adev->vmhub[vmhub];
>>>> -    inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>>>> +    if (adev->gmc.xgmi.num_physical_nodes &&
>>>> +        adev->asic_type == CHIP_VEGA20) {
>>>> +        /* Vega20+XGMI caches PTEs in TC and TLB. Add a
>>>> +         * heavy-weight TLB flush (type 2), which flushes
>>>> +         * both. Due to a race condition with concurrent
>>>> +         * memory accesses using the same TLB cache line, we
>>>> +         * still need a second TLB flush after this.
>>>> +         */
>>>> +        inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
>>>> +        inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>>>
>>> [shaoyunl]  For the send invalidation in this situation ,can we use 
>>> 0  for the flush type directly ? I think no matter what's the input 
>>> flush_type for this function , heavy-weight + legacy invalidation 
>>> should be enough for all of them .
>>
>> I'm not sure that's true. In the case of the race condition, there 
>> was some concurrent memory access during the first heavy-weight 
>> invalidation. If that is now flushed in the second invalidation, and 
>> a heavy-weight invalidation was requested, we should also flush any 
>> TC cache lines associated with that access. So hard-coding flush_type 
>> 0 here is probably not safe for all cases.
>>
>> Regards,
>>   Felix
>>
> [shaoyunl]   Originally we use the  heavy-weight invalidation for XGMI 
> here is due to the HW issue which always use NC even for remote GPU 
> memory access (this lead walker to load the TLB directly from TC with 
> stale value) . The heavy-weight  will set invalidate bit for both TLB 
> and  TC so this will make the walker to load from main memory . Your 
> change is based on the assumption that after first heavy-weight 
> invalidation , the TC already load with  correct contents which seems  
> should be true , so in this situation I think the light-weight or even 
> legacy invalidation will be  enough since they will load from TC to 
> TLB directly .

With this change, if you request a legacy invalidation (currently we 
always do), you'll get a heavy-weight followed by a legacy invalidation.

I'm working on other changes that will require a heavy-weight TLB flush 
even without this workaround. In this case I believe the second flush 
will need to be heavy-weight as well.

Regards,
   Felix


>
> Regards
>
> shaoyun.liu
>
>
>>
>>>
>>>> +    } else {
>>>> +        inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>>>> +        inv_req2 = 0;
>>>> +    }
>>>>         /* This is necessary for a HW workaround under SRIOV as well
>>>>        * as GFXOFF under bare metal
>>>> @@ -521,21 +534,27 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>>>> amdgpu_device *adev, uint32_t vmid,
>>>>               DRM_ERROR("Timeout waiting for sem acquire in VM 
>>>> flush!\n");
>>>>       }
>>>>   -    WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>>>> +    do {
>>>> +        WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>>>>   -    /*
>>>> -     * Issue a dummy read to wait for the ACK register to be cleared
>>>> -     * to avoid a false ACK due to the new fast GRBM interface.
>>>> -     */
>>>> -    if (vmhub == AMDGPU_GFXHUB_0)
>>>> -        RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
>>>> +        /*
>>>> +         * Issue a dummy read to wait for the ACK register to
>>>> +         * be cleared to avoid a false ACK due to the new fast
>>>> +         * GRBM interface.
>>>> +         */
>>>> +        if (vmhub == AMDGPU_GFXHUB_0)
>>>> +            RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
>>>>   -    for (j = 0; j < adev->usec_timeout; j++) {
>>>> -        tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
>>>> -        if (tmp & (1 << vmid))
>>>> -            break;
>>>> -        udelay(1);
>>>> -    }
>>>> +        for (j = 0; j < adev->usec_timeout; j++) {
>>>> +            tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
>>>> +            if (tmp & (1 << vmid))
>>>> +                break;
>>>> +            udelay(1);
>>>> +        }
>>>> +
>>>> +        inv_req = inv_req2;
>>>> +        inv_req2 = 0;
>>>> +    } while (inv_req);
>>>>         /* TODO: It needs to continue working on debugging with 
>>>> semaphore for GFXHUB as well. */
>>>>       if (use_semaphore)
>>>> @@ -577,9 +596,26 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
>>>> amdgpu_device *adev,
>>>>           return -EIO;
>>>>         if (ring->sched.ready) {
>>>> +        /* Vega20+XGMI caches PTEs in TC and TLB. Add a
>>>> +         * heavy-weight TLB flush (type 2), which flushes
>>>> +         * both. Due to a race condition with concurrent
>>>> +         * memory accesses using the same TLB cache line, we
>>>> +         * still need a second TLB flush after this.
>>>> +         */
>>>> +        bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
>>>> +                       adev->asic_type == CHIP_VEGA20);
>>>> +        /* 2 dwords flush + 8 dwords fence */
>>>> +        unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
>>>> +
>>>> +        if (vega20_xgmi_wa)
>>>> +            ndw += kiq->pmf->invalidate_tlbs_size;
>>>> +
>>>>           spin_lock(&adev->gfx.kiq.ring_lock);
>>>>           /* 2 dwords flush + 8 dwords fence */
>>>> -        amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
>>>> +        amdgpu_ring_alloc(ring, ndw);
>>>> +        if (vega20_xgmi_wa)
>>>> +            kiq->pmf->kiq_invalidate_tlbs(ring,
>>>> +                              pasid, 2, all_hub);
>>>>           kiq->pmf->kiq_invalidate_tlbs(ring,
>>>>                       pasid, flush_type, all_hub);
>>>>           amdgpu_fence_emit_polling(ring, &seq);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround
  2020-01-20 18:40         ` Felix Kuehling
@ 2020-01-20 18:50           ` shaoyunl
  0 siblings, 0 replies; 13+ messages in thread
From: shaoyunl @ 2020-01-20 18:50 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx

I see.  So this change

Reviewed-by: shaoyun liu <shaoyun.liu@amd.com>


On 2020-01-20 1:40 p.m., Felix Kuehling wrote:
> On 2020-01-20 1:28 p.m., shaoyunl wrote:
>>
>> On 2020-01-20 12:58 p.m., Felix Kuehling wrote:
>>> On 2020-01-20 12:47 p.m., shaoyunl wrote:
>>>> comments in line .
>>>>
>>>> On 2020-01-17 8:37 p.m., Felix Kuehling wrote:
>>>>> Using a heavy-weight TLB flush once is not sufficient. Concurrent
>>>>> memory accesses in the same TLB cache line can re-populate TLB 
>>>>> entries
>>>>> from stale texture cache (TC) entries while the heavy-weight TLB
>>>>> flush is in progress. To fix this race condition, perform another TLB
>>>>> flush after the heavy-weight one, when TC is known to be clean.
>>>>>
>>>>> Move the workaround into the low-level TLB flushing functions. 
>>>>> This way
>>>>> they apply to amdgpu as well, and KIQ-based TLB flush only needs to
>>>>> synchronize once.
>>>>>
>>>>> CC: shaoyun.liu@amd.com
>>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  6 +-
>>>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 68 
>>>>> +++++++++++++++++-----
>>>>>   2 files changed, 53 insertions(+), 21 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>> index 8609287620ea..5325f6b455f6 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>> @@ -647,13 +647,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct 
>>>>> kgd_dev *kgd, uint16_t vmid)
>>>>>   int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, 
>>>>> uint16_t pasid)
>>>>>   {
>>>>>       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>>> -    uint32_t flush_type = 0;
>>>>> +    const uint32_t flush_type = 0;
>>>>>       bool all_hub = false;
>>>>>   -    if (adev->gmc.xgmi.num_physical_nodes &&
>>>>> -        adev->asic_type == CHIP_VEGA20)
>>>>> -        flush_type = 2;
>>>>> -
>>>>>       if (adev->family == AMDGPU_FAMILY_AI)
>>>>>           all_hub = true;
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>> index 90216abf14a4..e2a5e852bdb0 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>> @@ -476,13 +476,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>>>>> amdgpu_device *adev, uint32_t vmid,
>>>>>   {
>>>>>       bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, 
>>>>> vmhub);
>>>>>       const unsigned eng = 17;
>>>>> -    u32 j, inv_req, tmp;
>>>>> +    u32 j, inv_req, inv_req2, tmp;
>>>>>       struct amdgpu_vmhub *hub;
>>>>>         BUG_ON(vmhub >= adev->num_vmhubs);
>>>>>         hub = &adev->vmhub[vmhub];
>>>>> -    inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>>>>> +    if (adev->gmc.xgmi.num_physical_nodes &&
>>>>> +        adev->asic_type == CHIP_VEGA20) {
>>>>> +        /* Vega20+XGMI caches PTEs in TC and TLB. Add a
>>>>> +         * heavy-weight TLB flush (type 2), which flushes
>>>>> +         * both. Due to a race condition with concurrent
>>>>> +         * memory accesses using the same TLB cache line, we
>>>>> +         * still need a second TLB flush after this.
>>>>> +         */
>>>>> +        inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
>>>>> +        inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>>>>
>>>> [shaoyunl]  For the send invalidation in this situation ,can we use 
>>>> 0  for the flush type directly ? I think no matter what's the input 
>>>> flush_type for this function , heavy-weight + legacy invalidation 
>>>> should be enough for all of them .
>>>
>>> I'm not sure that's true. In the case of the race condition, there 
>>> was some concurrent memory access during the first heavy-weight 
>>> invalidation. If that is now flushed in the second invalidation, and 
>>> a heavy-weight invalidation was requested, we should also flush any 
>>> TC cache lines associated with that access. So hard-coding 
>>> flush_type 0 here is probably not safe for all cases.
>>>
>>> Regards,
>>>   Felix
>>>
>> [shaoyunl]   Originally we use the  heavy-weight invalidation for 
>> XGMI here is due to the HW issue which always use NC even for remote 
>> GPU memory access (this lead walker to load the TLB directly from TC 
>> with stale value) . The heavy-weight  will set invalidate bit for 
>> both TLB and  TC so this will make the walker to load from main 
>> memory . Your change is based on the assumption that after first 
>> heavy-weight invalidation , the TC already load with  correct 
>> contents which seems  should be true , so in this situation I think 
>> the light-weight or even legacy invalidation will be  enough since 
>> they will load from TC to TLB directly .
>
> With this change, if you request a legacy invalidation (currently we 
> always do), you'll get a heavy-weight followed by a legacy invalidation.
>
> I'm working on other changes that will require a heavy-weight TLB 
> flush even without this workaround. In this case I believe the second 
> flush will need to be heavy-weight as well.
>
> Regards,
>   Felix
>
>
>>
>> Regards
>>
>> shaoyun.liu
>>
>>
>>>
>>>>
>>>>> +    } else {
>>>>> +        inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>>>>> +        inv_req2 = 0;
>>>>> +    }
>>>>>         /* This is necessary for a HW workaround under SRIOV as well
>>>>>        * as GFXOFF under bare metal
>>>>> @@ -521,21 +534,27 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>>>>> amdgpu_device *adev, uint32_t vmid,
>>>>>               DRM_ERROR("Timeout waiting for sem acquire in VM 
>>>>> flush!\n");
>>>>>       }
>>>>>   -    WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>>>>> +    do {
>>>>> +        WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>>>>>   -    /*
>>>>> -     * Issue a dummy read to wait for the ACK register to be cleared
>>>>> -     * to avoid a false ACK due to the new fast GRBM interface.
>>>>> -     */
>>>>> -    if (vmhub == AMDGPU_GFXHUB_0)
>>>>> -        RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
>>>>> +        /*
>>>>> +         * Issue a dummy read to wait for the ACK register to
>>>>> +         * be cleared to avoid a false ACK due to the new fast
>>>>> +         * GRBM interface.
>>>>> +         */
>>>>> +        if (vmhub == AMDGPU_GFXHUB_0)
>>>>> +            RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
>>>>>   -    for (j = 0; j < adev->usec_timeout; j++) {
>>>>> -        tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
>>>>> -        if (tmp & (1 << vmid))
>>>>> -            break;
>>>>> -        udelay(1);
>>>>> -    }
>>>>> +        for (j = 0; j < adev->usec_timeout; j++) {
>>>>> +            tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
>>>>> +            if (tmp & (1 << vmid))
>>>>> +                break;
>>>>> +            udelay(1);
>>>>> +        }
>>>>> +
>>>>> +        inv_req = inv_req2;
>>>>> +        inv_req2 = 0;
>>>>> +    } while (inv_req);
>>>>>         /* TODO: It needs to continue working on debugging with 
>>>>> semaphore for GFXHUB as well. */
>>>>>       if (use_semaphore)
>>>>> @@ -577,9 +596,26 @@ static int 
>>>>> gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>>>>>           return -EIO;
>>>>>         if (ring->sched.ready) {
>>>>> +        /* Vega20+XGMI caches PTEs in TC and TLB. Add a
>>>>> +         * heavy-weight TLB flush (type 2), which flushes
>>>>> +         * both. Due to a race condition with concurrent
>>>>> +         * memory accesses using the same TLB cache line, we
>>>>> +         * still need a second TLB flush after this.
>>>>> +         */
>>>>> +        bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
>>>>> +                       adev->asic_type == CHIP_VEGA20);
>>>>> +        /* 2 dwords flush + 8 dwords fence */
>>>>> +        unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
>>>>> +
>>>>> +        if (vega20_xgmi_wa)
>>>>> +            ndw += kiq->pmf->invalidate_tlbs_size;
>>>>> +
>>>>>           spin_lock(&adev->gfx.kiq.ring_lock);
>>>>>           /* 2 dwords flush + 8 dwords fence */
>>>>> -        amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
>>>>> +        amdgpu_ring_alloc(ring, ndw);
>>>>> +        if (vega20_xgmi_wa)
>>>>> +            kiq->pmf->kiq_invalidate_tlbs(ring,
>>>>> +                              pasid, 2, all_hub);
>>>>>           kiq->pmf->kiq_invalidate_tlbs(ring,
>>>>>                       pasid, flush_type, all_hub);
>>>>>           amdgpu_fence_emit_polling(ring, &seq);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore
  2020-01-18  1:37 [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore Felix Kuehling
                   ` (3 preceding siblings ...)
  2020-01-20 16:20 ` Yong Zhao
@ 2020-01-21 22:41 ` Felix Kuehling
  4 siblings, 0 replies; 13+ messages in thread
From: Felix Kuehling @ 2020-01-21 22:41 UTC (permalink / raw)
  To: amd-gfx, Yong Zhao, Zeng, Oak, Shaoyun Liu, Christian König

Thank for the reviews. I pushed patch 1 and 2 because they're obvious 
fixes. They have passed local testing on a Vega10. I'm still waiting for 
an opportunity to test the re-worked workaround in patch 3 on a 
problematic system with Vega20 and XGMI.

Regards,
   Felix

On 2020-01-17 8:37 p.m., Felix Kuehling wrote:
> Use a more meaningful variable name for the invalidation request
> that is distinct from the tmp variable that gets overwritten when
> acquiring the invalidation semaphore.
>
> Fixes: 00f607f38d82 ("drm/amdgpu: invalidate mmhub semaphore workaround in gmc9/gmc10")
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 ++++----
>   2 files changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 86f4ffe408e7..d914555e1212 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -262,7 +262,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   {
>   	bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
>   	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> -	u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
> +	u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
> +	u32 tmp;
>   	/* Use register 17 for GART */
>   	const unsigned eng = 17;
>   	unsigned int i;
> @@ -289,7 +290,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>   	}
>   
> -	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>   
>   	/*
>   	 * Issue a dummy read to wait for the ACK register to be cleared
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 54bdc1786ab1..6d95de1413c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -476,13 +476,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   {
>   	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
>   	const unsigned eng = 17;
> -	u32 j, tmp;
> +	u32 j, inv_req, tmp;
>   	struct amdgpu_vmhub *hub;
>   
>   	BUG_ON(vmhub >= adev->num_vmhubs);
>   
>   	hub = &adev->vmhub[vmhub];
> -	tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
> +	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>   
>   	/* This is necessary for a HW workaround under SRIOV as well
>   	 * as GFXOFF under bare metal
> @@ -493,7 +493,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   		uint32_t req = hub->vm_inv_eng0_req + eng;
>   		uint32_t ack = hub->vm_inv_eng0_ack + eng;
>   
> -		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
> +		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>   				1 << vmid);
>   		return;
>   	}
> @@ -521,7 +521,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>   	}
>   
> -	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>   
>   	/*
>   	 * Issue a dummy read to wait for the ACK register to be cleared
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2020-01-21 22:42 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-18  1:37 [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore Felix Kuehling
2020-01-18  1:37 ` [PATCH 2/3] drm/amdgpu: Use the correct flush_type in flush_gpu_tlb_pasid Felix Kuehling
2020-01-20 16:37   ` Zeng, Oak
2020-01-18  1:37 ` [PATCH 3/3] drm/amdgpu: Improve Vega20 XGMI TLB flush workaround Felix Kuehling
2020-01-20 16:41   ` Zeng, Oak
2020-01-20 17:47   ` shaoyunl
2020-01-20 17:58     ` Felix Kuehling
2020-01-20 18:28       ` shaoyunl
2020-01-20 18:40         ` Felix Kuehling
2020-01-20 18:50           ` shaoyunl
2020-01-18 13:22 ` [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore Christian König
2020-01-20 16:20 ` Yong Zhao
2020-01-21 22:41 ` Felix Kuehling

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).