All of lore.kernel.org
 help / color / mirror / Atom feed
* Rework flushing changes to the TLB
@ 2023-09-05  6:04 Christian König
  2023-09-05  6:04 ` [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb Christian König
                   ` (10 more replies)
  0 siblings, 11 replies; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

Hi guys,

as discussed internally the MES and KFD needs some form of TLB fence
which signals when flushing VM updates out to the hardware is completed
and resources can be freed.

As prerequisite to this we need to rework all the different workarounds
and approaches around TLB flushing to be at a higher level.

While at it fix a bunch of bugs in that code which could trigger in
certain situations.

Please review,
Christian.



^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 20:45   ` Alex Deucher
  2023-09-08 18:58   ` Felix Kuehling
  2023-09-05  6:04 ` [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb Christian König
                   ` (9 subsequent siblings)
  10 siblings, 2 replies; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

The KIQ code path was ignoring the second flush. Also avoid long lines and
re-calculating the register offsets over and over again.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +++++++++++++++++----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 0673cda547bb..4f6990ba71cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -814,13 +814,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 					uint32_t vmhub, uint32_t flush_type)
 {
 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
+	u32 j, inv_req, inv_req2, tmp, sem, req, ack;
 	const unsigned int eng = 17;
-	u32 j, inv_req, inv_req2, tmp;
 	struct amdgpu_vmhub *hub;
 
 	BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
 
 	hub = &adev->vmhub[vmhub];
+	sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+	req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+	ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
 	if (adev->gmc.xgmi.num_physical_nodes &&
 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
 		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
@@ -852,6 +856,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 
 		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
 						   1 << vmid);
+		if (inv_req2)
+			amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
+							   inv_req2, 1 << vmid);
+
 		up_read(&adev->reset_domain->sem);
 		return;
 	}
@@ -870,9 +878,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		for (j = 0; j < adev->usec_timeout; j++) {
 			/* a read return value of 1 means semaphore acquire */
 			if (vmhub >= AMDGPU_MMHUB0(0))
-				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
 			else
-				tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+				tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
 			if (tmp & 0x1)
 				break;
 			udelay(1);
@@ -884,9 +892,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 
 	do {
 		if (vmhub >= AMDGPU_MMHUB0(0))
-			WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+			WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
 		else
-			WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+			WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
 
 		/*
 		 * Issue a dummy read to wait for the ACK register to
@@ -895,14 +903,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		 */
 		if ((vmhub == AMDGPU_GFXHUB(0)) &&
 		    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
-			RREG32_NO_KIQ(hub->vm_inv_eng0_req +
-				      hub->eng_distance * eng);
+			RREG32_NO_KIQ(req);
 
 		for (j = 0; j < adev->usec_timeout; j++) {
 			if (vmhub >= AMDGPU_MMHUB0(0))
-				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
 			else
-				tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+				tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
 			if (tmp & (1 << vmid))
 				break;
 			udelay(1);
@@ -919,9 +926,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		 * write with 0 means semaphore release
 		 */
 		if (vmhub >= AMDGPU_MMHUB0(0))
-			WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
+			WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
 		else
-			WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
+			WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
 	}
 
 	spin_unlock(&adev->gmc.invalidate_lock);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
  2023-09-05  6:04 ` [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 20:52   ` Alex Deucher
  2023-09-08 19:30   ` Felix Kuehling
  2023-09-05  6:04 ` [PATCH 03/11] drm/amdgpu: cleanup gmc_v11_0_flush_gpu_tlb Christian König
                   ` (8 subsequent siblings)
  10 siblings, 2 replies; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

Move the SDMA workaround necessary for Navi 1x into a higher layer.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  48 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |   5 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c |   3 +
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 159 ++++++-----------------
 4 files changed, 97 insertions(+), 118 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index d78bd9732543..857051093900 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -575,6 +575,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
 	return 0;
 }
 
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+			      uint32_t vmhub, uint32_t flush_type)
+{
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+	struct dma_fence *fence;
+	struct amdgpu_job *job;
+	int r;
+
+	if (!hub->sdma_invalidation_workaround || vmid ||
+	    !adev->mman.buffer_funcs_enabled ||
+	    !adev->ib_pool_ready || amdgpu_in_reset(adev) ||
+	    !ring->sched.ready) {
+		adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
+						   flush_type);
+		return;
+	}
+
+	/* The SDMA on Navi 1x has a bug which can theoretically result in memory
+	 * corruption if an invalidation happens at the same time as an VA
+	 * translation. Avoid this by doing the invalidation from the SDMA
+	 * itself at least for GART.
+	 */
+	mutex_lock(&adev->mman.gtt_window_lock);
+	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+				     &job);
+	if (r)
+		goto error_alloc;
+
+	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+	job->vm_needs_flush = true;
+	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+	fence = amdgpu_job_submit(job);
+	mutex_unlock(&adev->mman.gtt_window_lock);
+
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+
+	return;
+
+error_alloc:
+	mutex_unlock(&adev->mman.gtt_window_lock);
+	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+}
+
 /**
  * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
  * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index fdc25cd559b6..9e7df2f69123 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -117,6 +117,8 @@ struct amdgpu_vmhub {
 
 	uint32_t	vm_contexts_disable;
 
+	bool		sdma_invalidation_workaround;
+
 	const struct amdgpu_vmhub_funcs *vmhub_funcs;
 };
 
@@ -335,7 +337,6 @@ struct amdgpu_gmc {
 	u64 noretry_flags;
 };
 
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
 #define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
 	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
 	((adev), (pasid), (type), (allhub), (inst)))
@@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+			      uint32_t vmhub, uint32_t flush_type);
 
 extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
 extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index a041c6c970e1..8521c45e8f38 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev)
 		GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
 		GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
 
+	/* TODO: This is only needed on some Navi 1x revisions */
+	hub->sdma_invalidation_workaround = true;
+
 	hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index fa87a85e1017..1f70c57bcd69 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -230,20 +230,49 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
  * by the amdgpu vm/hsa code.
  */
 
-static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
-				   unsigned int vmhub, uint32_t flush_type)
+/**
+ * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: vmhub type
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
 	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
 	u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
-	u32 tmp;
 	/* Use register 17 for GART */
 	const unsigned int eng = 17;
-	unsigned int i;
 	unsigned char hub_ip = 0;
+	u32 sem, req, ack;
+	unsigned int i;
+	u32 tmp;
 
-	hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
-		   GC_HWIP : MMHUB_HWIP;
+	sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+	req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+	ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+	/* flush hdp cache */
+	adev->hdp.funcs->flush_hdp(adev, NULL);
+
+	/* For SRIOV run time, driver shouldn't access the register through MMIO
+	 * Directly use kiq to do the vm invalidation instead
+	 */
+	if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
+	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+	    down_read_trylock(&adev->reset_domain->sem)) {
+		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
+				1 << vmid);
+		up_read(&adev->reset_domain->sem);
+		return;
+	}
+
+	hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
 
 	spin_lock(&adev->gmc.invalidate_lock);
 	/*
@@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 	if (use_semaphore) {
 		for (i = 0; i < adev->usec_timeout; i++) {
 			/* a read return value of 1 means semaphore acuqire */
-			tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
-					 hub->eng_distance * eng, hub_ip);
-
+			tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
 			if (tmp & 0x1)
 				break;
 			udelay(1);
@@ -269,9 +296,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
 	}
 
-	WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
-			  hub->eng_distance * eng,
-			  inv_req, hub_ip);
+	WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
 
 	/*
 	 * Issue a dummy read to wait for the ACK register to be cleared
@@ -279,14 +304,11 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 	 */
 	if ((vmhub == AMDGPU_GFXHUB(0)) &&
 	    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
-		RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
-				  hub->eng_distance * eng, hub_ip);
+		RREG32_RLC_NO_KIQ(req, hub_ip);
 
 	/* Wait for ACK with a delay.*/
 	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
-				  hub->eng_distance * eng, hub_ip);
-
+		tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
 		tmp &= 1 << vmid;
 		if (tmp)
 			break;
@@ -296,109 +318,12 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 
 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
 	if (use_semaphore)
-		/*
-		 * add semaphore release after invalidation,
-		 * write with 0 means semaphore release
-		 */
-		WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
-				  hub->eng_distance * eng, 0, hub_ip);
+		WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
 
 	spin_unlock(&adev->gmc.invalidate_lock);
 
-	if (i < adev->usec_timeout)
-		return;
-
-	DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
-}
-
-/**
- * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
- *
- * @adev: amdgpu_device pointer
- * @vmid: vm instance to flush
- * @vmhub: vmhub type
- * @flush_type: the flush type
- *
- * Flush the TLB for the requested page table.
- */
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
-					uint32_t vmhub, uint32_t flush_type)
-{
-	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-	struct dma_fence *fence;
-	struct amdgpu_job *job;
-
-	int r;
-
-	/* flush hdp cache */
-	adev->hdp.funcs->flush_hdp(adev, NULL);
-
-	/* For SRIOV run time, driver shouldn't access the register through MMIO
-	 * Directly use kiq to do the vm invalidation instead
-	 */
-	if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
-	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-	    down_read_trylock(&adev->reset_domain->sem)) {
-		struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
-		const unsigned int eng = 17;
-		u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
-		u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
-		u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
-
-		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-				1 << vmid);
-
-		up_read(&adev->reset_domain->sem);
-		return;
-	}
-
-	mutex_lock(&adev->mman.gtt_window_lock);
-
-	if (vmhub == AMDGPU_MMHUB0(0)) {
-		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
-		mutex_unlock(&adev->mman.gtt_window_lock);
-		return;
-	}
-
-	BUG_ON(vmhub != AMDGPU_GFXHUB(0));
-
-	if (!adev->mman.buffer_funcs_enabled ||
-	    !adev->ib_pool_ready ||
-	    amdgpu_in_reset(adev) ||
-	    ring->sched.ready == false) {
-		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
-		mutex_unlock(&adev->mman.gtt_window_lock);
-		return;
-	}
-
-	/* The SDMA on Navi has a bug which can theoretically result in memory
-	 * corruption if an invalidation happens at the same time as an VA
-	 * translation. Avoid this by doing the invalidation from the SDMA
-	 * itself.
-	 */
-	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
-				     AMDGPU_FENCE_OWNER_UNDEFINED,
-				     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
-				     &job);
-	if (r)
-		goto error_alloc;
-
-	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
-	job->vm_needs_flush = true;
-	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
-	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-	fence = amdgpu_job_submit(job);
-
-	mutex_unlock(&adev->mman.gtt_window_lock);
-
-	dma_fence_wait(fence, false);
-	dma_fence_put(fence);
-
-	return;
-
-error_alloc:
-	mutex_unlock(&adev->mman.gtt_window_lock);
-	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+	if (i >= adev->usec_timeout)
+		DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
 }
 
 /**
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 03/11] drm/amdgpu: cleanup gmc_v11_0_flush_gpu_tlb
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
  2023-09-05  6:04 ` [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb Christian König
  2023-09-05  6:04 ` [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 20:56   ` Alex Deucher
  2023-09-05  6:04 ` [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid Christian König
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

Remove leftovers from copying this from the gmc v10 code.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 108 ++++++++++---------------
 1 file changed, 41 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index dcbba981462e..3c3ad3f17c6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -186,27 +186,50 @@ static bool gmc_v11_0_get_vmid_pasid_mapping_info(
 	return !!(*p_pasid);
 }
 
-/*
- * GART
- * VMID 0 is the physical GPU addresses as used by the kernel.
- * VMIDs 1-15 are used for userspace clients and are handled
- * by the amdgpu vm/hsa code.
+/**
+ * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
  */
-
-static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
-				   unsigned int vmhub, uint32_t flush_type)
+static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t vmhub, uint32_t flush_type)
 {
 	bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(adev, vmhub);
 	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
 	u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
-	u32 tmp;
 	/* Use register 17 for GART */
 	const unsigned int eng = 17;
+	unsigned char hub_ip;
+	u32 sem, req, ack;
 	unsigned int i;
-	unsigned char hub_ip = 0;
+	u32 tmp;
+
+	if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
+		return;
 
-	hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
-		   GC_HWIP : MMHUB_HWIP;
+	sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+	req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+	ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+	/* flush hdp cache */
+	adev->hdp.funcs->flush_hdp(adev, NULL);
+
+	/* For SRIOV run time, driver shouldn't access the register through MMIO
+	 * Directly use kiq to do the vm invalidation instead
+	 */
+	if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
+	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
+				1 << vmid);
+		return;
+	}
+
+	hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
 
 	spin_lock(&adev->gmc.invalidate_lock);
 	/*
@@ -220,8 +243,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 	if (use_semaphore) {
 		for (i = 0; i < adev->usec_timeout; i++) {
 			/* a read return value of 1 means semaphore acuqire */
-			tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
-					    hub->eng_distance * eng, hub_ip);
+			tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
 			if (tmp & 0x1)
 				break;
 			udelay(1);
@@ -231,12 +253,11 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
 	}
 
-	WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);
+	WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
 
 	/* Wait for ACK with a delay.*/
 	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
-				    hub->eng_distance * eng, hub_ip);
+		tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
 		tmp &= 1 << vmid;
 		if (tmp)
 			break;
@@ -246,12 +267,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 
 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
 	if (use_semaphore)
-		/*
-		 * add semaphore release after invalidation,
-		 * write with 0 means semaphore release
-		 */
-		WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
-			      hub->eng_distance * eng, 0, hub_ip);
+		WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
 
 	/* Issue additional private vm invalidation to MMHUB */
 	if ((vmhub != AMDGPU_GFXHUB(0)) &&
@@ -268,50 +284,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 
 	spin_unlock(&adev->gmc.invalidate_lock);
 
-	if (i < adev->usec_timeout)
-		return;
-
-	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
-}
-
-/**
- * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback
- *
- * @adev: amdgpu_device pointer
- * @vmid: vm instance to flush
- * @vmhub: which hub to flush
- * @flush_type: the flush type
- *
- * Flush the TLB for the requested page table.
- */
-static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
-					uint32_t vmhub, uint32_t flush_type)
-{
-	if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
-		return;
-
-	/* flush hdp cache */
-	adev->hdp.funcs->flush_hdp(adev, NULL);
-
-	/* For SRIOV run time, driver shouldn't access the register through MMIO
-	 * Directly use kiq to do the vm invalidation instead
-	 */
-	if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
-	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
-		struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
-		const unsigned int eng = 17;
-		u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
-		u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
-		u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
-
-		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-				1 << vmid);
-		return;
-	}
-
-	mutex_lock(&adev->mman.gtt_window_lock);
-	gmc_v11_0_flush_vm_hub(adev, vmid, vmhub, 0);
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	if (i >= adev->usec_timeout)
+		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 }
 
 /**
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
                   ` (2 preceding siblings ...)
  2023-09-05  6:04 ` [PATCH 03/11] drm/amdgpu: cleanup gmc_v11_0_flush_gpu_tlb Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 22:39   ` Alex Deucher
                     ` (2 more replies)
  2023-09-05  6:04 ` [PATCH 05/11] drm/amdgpu: fix and cleanup gmc_v8_0_flush_gpu_tlb_pasid Christian König
                   ` (6 subsequent siblings)
  10 siblings, 3 replies; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

Testing for reset is pointless since the reset can start right after the
test. Grab the reset semaphore instead.

The same PASID can be used by more than once VMID, build a mask of VMIDs
to reset instead of just restting the first one.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 6a6929ac2748..9e19a752f94b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -33,6 +33,7 @@
 #include "amdgpu_ucode.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_gem.h"
+#include "amdgpu_reset.h"
 
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_sh_mask.h"
@@ -426,23 +427,23 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 					uint16_t pasid, uint32_t flush_type,
 					bool all_hub, uint32_t inst)
 {
+	u32 mask = 0x0;
 	int vmid;
-	unsigned int tmp;
 
-	if (amdgpu_in_reset(adev))
-		return -EIO;
+	if(!down_read_trylock(&adev->reset_domain->sem))
+		return 0;
 
 	for (vmid = 1; vmid < 16; vmid++) {
+		u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 
-		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
-			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-			RREG32(mmVM_INVALIDATE_RESPONSE);
-			break;
-		}
+		    (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
+			mask |= 1 << vmid;
 	}
 
+	WREG32(mmVM_INVALIDATE_REQUEST, mask);
+	RREG32(mmVM_INVALIDATE_RESPONSE);
+	up_read(&adev->reset_domain->sem);
 	return 0;
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 05/11] drm/amdgpu: fix and cleanup gmc_v8_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
                   ` (3 preceding siblings ...)
  2023-09-05  6:04 ` [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 22:40   ` Alex Deucher
                     ` (2 more replies)
  2023-09-05  6:04 ` [PATCH 06/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb_pasid Christian König
                   ` (5 subsequent siblings)
  10 siblings, 3 replies; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

Testing for reset is pointless since the reset can start right after the
test. Grab the reset semaphore instead.

The same PASID can be used by more than once VMID, build a mask of VMIDs
to reset instead of just restting the first one.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 5af235202513..2d51531a1f2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -31,6 +31,7 @@
 #include "amdgpu_ucode.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_gem.h"
+#include "amdgpu_reset.h"
 
 #include "gmc/gmc_8_1_d.h"
 #include "gmc/gmc_8_1_sh_mask.h"
@@ -616,25 +617,24 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 					uint16_t pasid, uint32_t flush_type,
 					bool all_hub, uint32_t inst)
 {
+	u32 mask = 0x0;
 	int vmid;
-	unsigned int tmp;
 
-	if (amdgpu_in_reset(adev))
-		return -EIO;
+	if(!down_read_trylock(&adev->reset_domain->sem))
+		return 0;
 
 	for (vmid = 1; vmid < 16; vmid++) {
+		u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 
-		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
-			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-			RREG32(mmVM_INVALIDATE_RESPONSE);
-			break;
-		}
+		    (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
+			mask |= 1 << vmid;
 	}
 
+	WREG32(mmVM_INVALIDATE_REQUEST, mask);
+	RREG32(mmVM_INVALIDATE_RESPONSE);
+	up_read(&adev->reset_domain->sem);
 	return 0;
-
 }
 
 /*
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 06/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
                   ` (4 preceding siblings ...)
  2023-09-05  6:04 ` [PATCH 05/11] drm/amdgpu: fix and cleanup gmc_v8_0_flush_gpu_tlb_pasid Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 22:45   ` Deucher, Alexander
  2023-09-08 21:13   ` Felix Kuehling
  2023-09-05  6:04 ` [PATCH 07/11] drm/amdgpu: cleanup gmc_v10_0_flush_gpu_tlb_pasid Christian König
                   ` (4 subsequent siblings)
  10 siblings, 2 replies; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

Testing for reset is pointless since the reset can start right after the
test.

The same PASID can be used by more than one VMID, reset each of them.

Move the KIQ and all the workaround handling into common GMC code.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c |  60 +++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  10 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 109 ++++++++----------------
 3 files changed, 102 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 857051093900..b5f1a1218725 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -32,6 +32,7 @@
 #include "amdgpu.h"
 #include "amdgpu_gmc.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
 #include "amdgpu_xgmi.h"
 
 #include <drm/drm_drv.h>
@@ -623,6 +624,65 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
 }
 
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+				   uint32_t flush_type, bool all_hub,
+				   uint32_t inst)
+{
+	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
+		adev->usec_timeout;
+	struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+	unsigned int ndw;
+	signed long r;
+	uint32_t seq;
+
+	if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
+	    !down_read_trylock(&adev->reset_domain->sem)) {
+		return adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+								flush_type,
+								all_hub, inst);
+	}
+
+	/* 2 dwords flush + 8 dwords fence */
+	ndw = kiq->pmf->invalidate_tlbs_size + 8;
+
+	if (adev->gmc.flush_tlb_needs_extra_type_2)
+		ndw += kiq->pmf->invalidate_tlbs_size;
+
+	if (adev->gmc.flush_tlb_needs_extra_type_0)
+		ndw += kiq->pmf->invalidate_tlbs_size;
+
+	spin_lock(&adev->gfx.kiq[inst].ring_lock);
+	amdgpu_ring_alloc(ring, ndw);
+	if (adev->gmc.flush_tlb_needs_extra_type_2)
+		kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
+
+	if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
+		kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
+
+	kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
+	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+	if (r) {
+		amdgpu_ring_undo(ring);
+		spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+		goto error_unlock_reset;
+	}
+
+	amdgpu_ring_commit(ring);
+	spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+	r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
+	if (r < 1) {
+		dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
+		r = -ETIME;
+		goto error_unlock_reset;
+	}
+	r = 0;
+
+error_unlock_reset:
+	up_read(&adev->reset_domain->sem);
+	return r;
+}
+
 /**
  * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
  * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 9e7df2f69123..7732d4ef845e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -335,11 +335,12 @@ struct amdgpu_gmc {
 	u64 MC_VM_MX_L1_TLB_CNTL;
 
 	u64 noretry_flags;
+
+	bool flush_tlb_needs_extra_type_0;
+	bool flush_tlb_needs_extra_type_2;
+	bool flush_pasid_uses_kiq;
 };
 
-#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
-	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
-	((adev), (pasid), (type), (allhub), (inst)))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
 #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
@@ -404,6 +405,9 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
 void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 			      uint32_t vmhub, uint32_t flush_type);
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+				   uint32_t flush_type, bool all_hub,
+				   uint32_t inst);
 
 extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
 extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 4f6990ba71cb..39016b6900d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -954,87 +954,30 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 					uint16_t pasid, uint32_t flush_type,
 					bool all_hub, uint32_t inst)
 {
-	int vmid, i;
-	signed long r;
-	uint32_t seq;
-	uint16_t queried_pasid;
-	bool ret;
-	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
-	struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
-
-	if (amdgpu_in_reset(adev))
-		return -EIO;
-
-	if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) {
-		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
-		 * heavy-weight TLB flush (type 2), which flushes
-		 * both. Due to a race condition with concurrent
-		 * memory accesses using the same TLB cache line, we
-		 * still need a second TLB flush after this.
-		 */
-		bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
-				       adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0));
-		/* 2 dwords flush + 8 dwords fence */
-		unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
-
-		if (vega20_xgmi_wa)
-			ndw += kiq->pmf->invalidate_tlbs_size;
-
-		spin_lock(&adev->gfx.kiq[inst].ring_lock);
-		/* 2 dwords flush + 8 dwords fence */
-		amdgpu_ring_alloc(ring, ndw);
-		if (vega20_xgmi_wa)
-			kiq->pmf->kiq_invalidate_tlbs(ring,
-						      pasid, 2, all_hub);
-
-		if (flush_type == 2 &&
-		    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
-		    adev->rev_id == 0)
-			kiq->pmf->kiq_invalidate_tlbs(ring,
-						pasid, 0, all_hub);
-
-		kiq->pmf->kiq_invalidate_tlbs(ring,
-					pasid, flush_type, all_hub);
-		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
-		if (r) {
-			amdgpu_ring_undo(ring);
-			spin_unlock(&adev->gfx.kiq[inst].ring_lock);
-			up_read(&adev->reset_domain->sem);
-			return -ETIME;
-		}
-
-		amdgpu_ring_commit(ring);
-		spin_unlock(&adev->gfx.kiq[inst].ring_lock);
-		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
-		if (r < 1) {
-			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
-			up_read(&adev->reset_domain->sem);
-			return -ETIME;
-		}
-		up_read(&adev->reset_domain->sem);
-		return 0;
-	}
+	uint16_t queried;
+	int i, vmid;
 
 	for (vmid = 1; vmid < 16; vmid++) {
+		bool valid;
 
-		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
-				&queried_pasid);
-		if (ret && queried_pasid == pasid) {
-			if (all_hub) {
-				for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
-					gmc_v9_0_flush_gpu_tlb(adev, vmid,
-							i, flush_type);
-			} else {
-				gmc_v9_0_flush_gpu_tlb(adev, vmid,
-						AMDGPU_GFXHUB(0), flush_type);
-			}
-			break;
+		valid = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+								 &queried);
+		if (!valid || queried != pasid)
+			continue;
+
+		if (all_hub) {
+			for_each_set_bit(i, adev->vmhubs_mask,
+					 AMDGPU_MAX_VMHUBS)
+				gmc_v9_0_flush_gpu_tlb(adev, vmid, i,
+						       flush_type);
+		} else {
+			gmc_v9_0_flush_gpu_tlb(adev, vmid,
+					       AMDGPU_GFXHUB(0),
+					       flush_type);
 		}
 	}
 
 	return 0;
-
 }
 
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
@@ -2335,6 +2278,24 @@ static int gmc_v9_0_hw_init(void *handle)
 	bool value;
 	int i, r;
 
+	adev->gmc.flush_pasid_uses_kiq = true;
+
+	/* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB flush
+	 * (type 2), which flushes both. Due to a race condition with
+	 * concurrent memory accesses using the same TLB cache line, we still
+	 * need a second TLB flush after this.
+	 */
+	adev->gmc.flush_tlb_needs_extra_type_2 =
+		adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) &&
+		adev->gmc.xgmi.num_physical_nodes;
+	/*
+	 * TODO: This workaround is badly documented and had a buggy
+	 * implementation. We should probably verify what we do here.
+	 */
+	adev->gmc.flush_tlb_needs_extra_type_0 =
+		adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
+		adev->rev_id == 0;
+
 	/* The sequence of these two function calls matters.*/
 	gmc_v9_0_init_golden_registers(adev);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 07/11] drm/amdgpu: cleanup gmc_v10_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
                   ` (5 preceding siblings ...)
  2023-09-05  6:04 ` [PATCH 06/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb_pasid Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 22:46   ` Alex Deucher
  2023-09-08 21:13   ` Felix Kuehling
  2023-09-05  6:04 ` [PATCH 08/11] drm/amdgpu: fix and cleanup gmc_v11_0_flush_gpu_tlb_pasid Christian König
                   ` (3 subsequent siblings)
  10 siblings, 2 replies; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

The same PASID can be used by more than one VMID, reset each of them.

Use the common KIQ handling.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 66 ++++++++------------------
 1 file changed, 19 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 1f70c57bcd69..407ddb926941 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -341,57 +341,27 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 					uint16_t pasid, uint32_t flush_type,
 					bool all_hub, uint32_t inst)
 {
+	uint16_t queried;
 	int vmid, i;
-	signed long r;
-	uint32_t seq;
-	uint16_t queried_pasid;
-	bool ret;
-	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
-	struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
-
-	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
-		spin_lock(&adev->gfx.kiq[0].ring_lock);
-		/* 2 dwords flush + 8 dwords fence */
-		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
-		kiq->pmf->kiq_invalidate_tlbs(ring,
-					pasid, flush_type, all_hub);
-		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
-		if (r) {
-			amdgpu_ring_undo(ring);
-			spin_unlock(&adev->gfx.kiq[0].ring_lock);
-			return -ETIME;
-		}
-
-		amdgpu_ring_commit(ring);
-		spin_unlock(&adev->gfx.kiq[0].ring_lock);
-		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
-		if (r < 1) {
-			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
-			return -ETIME;
-		}
-
-		return 0;
-	}
 
 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
-
-		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
-				&queried_pasid);
-		if (ret	&& queried_pasid == pasid) {
-			if (all_hub) {
-				for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
-					gmc_v10_0_flush_gpu_tlb(adev, vmid,
-							i, flush_type);
-			} else {
-				gmc_v10_0_flush_gpu_tlb(adev, vmid,
-						AMDGPU_GFXHUB(0), flush_type);
-			}
-			if (!adev->enable_mes)
-				break;
+		bool valid;
+
+		valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+								  &queried);
+		if (!valid || queried != pasid)
+			continue;
+
+		if (all_hub) {
+			for_each_set_bit(i, adev->vmhubs_mask,
+					 AMDGPU_MAX_VMHUBS)
+				gmc_v10_0_flush_gpu_tlb(adev, vmid, i,
+							flush_type);
+		} else {
+			gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+						flush_type);
 		}
 	}
-
 	return 0;
 }
 
@@ -1009,8 +979,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
 
 static int gmc_v10_0_hw_init(void *handle)
 {
-	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int r;
+
+	adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
 
 	/* The sequence of these two function calls matters.*/
 	gmc_v10_0_init_golden_registers(adev);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 08/11] drm/amdgpu: fix and cleanup gmc_v11_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
                   ` (6 preceding siblings ...)
  2023-09-05  6:04 ` [PATCH 07/11] drm/amdgpu: cleanup gmc_v10_0_flush_gpu_tlb_pasid Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 22:47   ` Alex Deucher
  2023-09-05  6:04 ` [PATCH 09/11] drm/amdgpu: drop error return from flush_gpu_tlb_pasid Christian König
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

The same PASID can be used by more than one VMID, reset each of them.

Use the common KIQ handling.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 63 ++++++++------------------
 1 file changed, 19 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 3c3ad3f17c6a..aa39c1087e44 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -303,54 +303,27 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 					uint16_t pasid, uint32_t flush_type,
 					bool all_hub, uint32_t inst)
 {
+	uint16_t queried;
 	int vmid, i;
-	signed long r;
-	uint32_t seq;
-	uint16_t queried_pasid;
-	bool ret;
-	struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
-
-	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
-		spin_lock(&adev->gfx.kiq[0].ring_lock);
-		/* 2 dwords flush + 8 dwords fence */
-		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
-		kiq->pmf->kiq_invalidate_tlbs(ring,
-					pasid, flush_type, all_hub);
-		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
-		if (r) {
-			amdgpu_ring_undo(ring);
-			spin_unlock(&adev->gfx.kiq[0].ring_lock);
-			return -ETIME;
-		}
-
-		amdgpu_ring_commit(ring);
-		spin_unlock(&adev->gfx.kiq[0].ring_lock);
-		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
-		if (r < 1) {
-			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
-			return -ETIME;
-		}
-
-		return 0;
-	}
 
 	for (vmid = 1; vmid < 16; vmid++) {
-
-		ret = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid,
-				&queried_pasid);
-		if (ret	&& queried_pasid == pasid) {
-			if (all_hub) {
-				for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
-					gmc_v11_0_flush_gpu_tlb(adev, vmid,
-							i, flush_type);
-			} else {
-				gmc_v11_0_flush_gpu_tlb(adev, vmid,
-						AMDGPU_GFXHUB(0), flush_type);
-			}
+		bool valid;
+
+		valid = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid,
+							      &queried);
+		if (!valid || queried == pasid)
+			continue;
+
+		if (all_hub) {
+			for_each_set_bit(i, adev->vmhubs_mask,
+					 AMDGPU_MAX_VMHUBS)
+				gmc_v11_0_flush_gpu_tlb(adev, vmid, i,
+							flush_type);
+		} else {
+			gmc_v11_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+						flush_type);
 		}
 	}
-
 	return 0;
 }
 
@@ -918,8 +891,10 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
 
 static int gmc_v11_0_hw_init(void *handle)
 {
-	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int r;
+
+	adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
 
 	/* The sequence of these two function calls matters.*/
 	gmc_v11_0_init_golden_registers(adev);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 09/11] drm/amdgpu: drop error return from flush_gpu_tlb_pasid
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
                   ` (7 preceding siblings ...)
  2023-09-05  6:04 ` [PATCH 08/11] drm/amdgpu: fix and cleanup gmc_v11_0_flush_gpu_tlb_pasid Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 22:48   ` Alex Deucher
  2023-09-05  6:04 ` [PATCH 10/11] drm/amdgpu: rework lock handling fro flush_tlb Christian König
  2023-09-05  6:04 ` [PATCH 11/11] drm/amdgpu: further move TLB hw workarounds a layer up Christian König
  10 siblings, 1 reply; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

That function never fails, drop the error return.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 7 ++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 +++---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 7 +++----
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  | 7 +++----
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 9 ++++-----
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 9 ++++-----
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 8 +++-----
 7 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index b5f1a1218725..15814cb801e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -638,9 +638,10 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
 
 	if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
 	    !down_read_trylock(&adev->reset_domain->sem)) {
-		return adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
-								flush_type,
-								all_hub, inst);
+		adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+							 flush_type, all_hub,
+							 inst);
+		return 0;
 	}
 
 	/* 2 dwords flush + 8 dwords fence */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 7732d4ef845e..dd0ede75e5d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -130,9 +130,9 @@ struct amdgpu_gmc_funcs {
 	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
 				uint32_t vmhub, uint32_t flush_type);
 	/* flush the vm tlb via pasid */
-	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
-					uint32_t flush_type, bool all_hub,
-					uint32_t inst);
+	void (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+				    uint32_t flush_type, bool all_hub,
+				    uint32_t inst);
 	/* flush the vm tlb via ring */
 	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
 				       uint64_t pd_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 407ddb926941..40d432d46469 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -337,9 +337,9 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
  *
  * Flush the TLB for the requested pasid.
  */
-static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-					uint16_t pasid, uint32_t flush_type,
-					bool all_hub, uint32_t inst)
+static void gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					  uint16_t pasid, uint32_t flush_type,
+					  bool all_hub, uint32_t inst)
 {
 	uint16_t queried;
 	int vmid, i;
@@ -362,7 +362,6 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 						flush_type);
 		}
 	}
-	return 0;
 }
 
 static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index aa39c1087e44..50bc5f151038 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -299,9 +299,9 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
  *
  * Flush the TLB for the requested pasid.
  */
-static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-					uint16_t pasid, uint32_t flush_type,
-					bool all_hub, uint32_t inst)
+static void gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					  uint16_t pasid, uint32_t flush_type,
+					  bool all_hub, uint32_t inst)
 {
 	uint16_t queried;
 	int vmid, i;
@@ -324,7 +324,6 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 						flush_type);
 		}
 	}
-	return 0;
 }
 
 static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 9e19a752f94b..fa3586efacd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -423,15 +423,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
  *
  * Flush the TLB for the requested pasid.
  */
-static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-					uint16_t pasid, uint32_t flush_type,
-					bool all_hub, uint32_t inst)
+static void gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					 uint16_t pasid, uint32_t flush_type,
+					 bool all_hub, uint32_t inst)
 {
 	u32 mask = 0x0;
 	int vmid;
 
 	if(!down_read_trylock(&adev->reset_domain->sem))
-		return 0;
+		return;
 
 	for (vmid = 1; vmid < 16; vmid++) {
 		u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
@@ -444,7 +444,6 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	WREG32(mmVM_INVALIDATE_REQUEST, mask);
 	RREG32(mmVM_INVALIDATE_RESPONSE);
 	up_read(&adev->reset_domain->sem);
-	return 0;
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 2d51531a1f2d..ffcd79d28b9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -613,15 +613,15 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
  *
  * Flush the TLB for the requested pasid.
  */
-static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-					uint16_t pasid, uint32_t flush_type,
-					bool all_hub, uint32_t inst)
+static void gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					 uint16_t pasid, uint32_t flush_type,
+					 bool all_hub, uint32_t inst)
 {
 	u32 mask = 0x0;
 	int vmid;
 
 	if(!down_read_trylock(&adev->reset_domain->sem))
-		return 0;
+		return;
 
 	for (vmid = 1; vmid < 16; vmid++) {
 		u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
@@ -634,7 +634,6 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	WREG32(mmVM_INVALIDATE_REQUEST, mask);
 	RREG32(mmVM_INVALIDATE_RESPONSE);
 	up_read(&adev->reset_domain->sem);
-	return 0;
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 39016b6900d3..94ba16536fc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -950,9 +950,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
  *
  * Flush the TLB for the requested pasid.
  */
-static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-					uint16_t pasid, uint32_t flush_type,
-					bool all_hub, uint32_t inst)
+static void gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					 uint16_t pasid, uint32_t flush_type,
+					 bool all_hub, uint32_t inst)
 {
 	uint16_t queried;
 	int i, vmid;
@@ -976,8 +976,6 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 					       flush_type);
 		}
 	}
-
-	return 0;
 }
 
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 10/11] drm/amdgpu: rework lock handling fro flush_tlb
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
                   ` (8 preceding siblings ...)
  2023-09-05  6:04 ` [PATCH 09/11] drm/amdgpu: drop error return from flush_gpu_tlb_pasid Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 22:49   ` Alex Deucher
  2023-09-05  6:04 ` [PATCH 11/11] drm/amdgpu: further move TLB hw workarounds a layer up Christian König
  10 siblings, 1 reply; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

Instead of each implementation doing this more or less correctly
move taking the reset lock at a higher level.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 9 +++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 6 +-----
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 5 -----
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 5 -----
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 6 +-----
 5 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 15814cb801e7..c24252304d48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -589,8 +589,17 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	    !adev->mman.buffer_funcs_enabled ||
 	    !adev->ib_pool_ready || amdgpu_in_reset(adev) ||
 	    !ring->sched.ready) {
+
+		/*
+		 * A GPU reset should flush all TLBs anyway, so no need to do
+		 * this while one is ongoing.
+		 */
+		if(!down_read_trylock(&adev->reset_domain->sem))
+			return;
+
 		adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
 						   flush_type);
+		up_read(&adev->reset_domain->sem);
 		return;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 40d432d46469..302279497d67 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -51,8 +51,6 @@
 #include "athub_v2_0.h"
 #include "athub_v2_1.h"
 
-#include "amdgpu_reset.h"
-
 static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
 					 struct amdgpu_irq_src *src,
 					 unsigned int type,
@@ -264,11 +262,9 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 * Directly use kiq to do the vm invalidation instead
 	 */
 	if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
-	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-	    down_read_trylock(&adev->reset_domain->sem)) {
+	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
 		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
 				1 << vmid);
-		up_read(&adev->reset_domain->sem);
 		return;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index fa3586efacd2..998f6ee60b78 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -33,7 +33,6 @@
 #include "amdgpu_ucode.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_gem.h"
-#include "amdgpu_reset.h"
 
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_sh_mask.h"
@@ -430,9 +429,6 @@ static void gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	u32 mask = 0x0;
 	int vmid;
 
-	if(!down_read_trylock(&adev->reset_domain->sem))
-		return;
-
 	for (vmid = 1; vmid < 16; vmid++) {
 		u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 
@@ -443,7 +439,6 @@ static void gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
 	WREG32(mmVM_INVALIDATE_REQUEST, mask);
 	RREG32(mmVM_INVALIDATE_RESPONSE);
-	up_read(&adev->reset_domain->sem);
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index ffcd79d28b9a..8dcd9b13673c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -31,7 +31,6 @@
 #include "amdgpu_ucode.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_gem.h"
-#include "amdgpu_reset.h"
 
 #include "gmc/gmc_8_1_d.h"
 #include "gmc/gmc_8_1_sh_mask.h"
@@ -620,9 +619,6 @@ static void gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	u32 mask = 0x0;
 	int vmid;
 
-	if(!down_read_trylock(&adev->reset_domain->sem))
-		return;
-
 	for (vmid = 1; vmid < 16; vmid++) {
 		u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 
@@ -633,7 +629,6 @@ static void gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
 	WREG32(mmVM_INVALIDATE_REQUEST, mask);
 	RREG32(mmVM_INVALIDATE_RESPONSE);
-	up_read(&adev->reset_domain->sem);
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 94ba16536fc2..c5df8f052f3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -64,8 +64,6 @@
 #include "amdgpu_ras.h"
 #include "amdgpu_xgmi.h"
 
-#include "amdgpu_reset.h"
-
 /* add these here since we already include dce12 headers and these are for DCN */
 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX                                                 2
@@ -849,8 +847,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 * as GFXOFF under bare metal
 	 */
 	if (adev->gfx.kiq[0].ring.sched.ready &&
-	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-	    down_read_trylock(&adev->reset_domain->sem)) {
+	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
 		uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
 		uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
 
@@ -860,7 +857,6 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 			amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
 							   inv_req2, 1 << vmid);
 
-		up_read(&adev->reset_domain->sem);
 		return;
 	}
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 11/11] drm/amdgpu: further move TLB hw workarounds a layer up
  2023-09-05  6:04 Rework flushing changes to the TLB Christian König
                   ` (9 preceding siblings ...)
  2023-09-05  6:04 ` [PATCH 10/11] drm/amdgpu: rework lock handling fro flush_tlb Christian König
@ 2023-09-05  6:04 ` Christian König
  2023-09-05 22:51   ` Alex Deucher
  10 siblings, 1 reply; 38+ messages in thread
From: Christian König @ 2023-09-05  6:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: shashank.sharma

For the PASID flushing we already handled that at a higher layer, apply
those workarounds to the standard flush as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 19 +++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 74 ++++++++-----------------
 2 files changed, 42 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c24252304d48..8a5381ca7713 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -597,6 +597,14 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		if(!down_read_trylock(&adev->reset_domain->sem))
 			return;
 
+		if (adev->gmc.flush_tlb_needs_extra_type_2)
+			adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+							   vmhub, 2);
+
+		if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+			adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+							   vmhub, 0);
+
 		adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
 						   flush_type);
 		up_read(&adev->reset_domain->sem);
@@ -647,6 +655,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
 
 	if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
 	    !down_read_trylock(&adev->reset_domain->sem)) {
+
+		if (adev->gmc.flush_tlb_needs_extra_type_2)
+			adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+								 2, all_hub,
+								 inst);
+
+		if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+			adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+								 0, all_hub,
+								 inst);
+
 		adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
 							 flush_type, all_hub,
 							 inst);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index c5df8f052f3f..a1a6f4b63208 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -812,37 +812,18 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 					uint32_t vmhub, uint32_t flush_type)
 {
 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
-	u32 j, inv_req, inv_req2, tmp, sem, req, ack;
+	u32 j, inv_req, tmp, sem, req, ack;
 	const unsigned int eng = 17;
 	struct amdgpu_vmhub *hub;
 
 	BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
 
 	hub = &adev->vmhub[vmhub];
+	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
 	sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
 	req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
 	ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
 
-	if (adev->gmc.xgmi.num_physical_nodes &&
-	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
-		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
-		 * heavy-weight TLB flush (type 2), which flushes
-		 * both. Due to a race condition with concurrent
-		 * memory accesses using the same TLB cache line, we
-		 * still need a second TLB flush after this.
-		 */
-		inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
-		inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
-	} else if (flush_type == 2 &&
-		   adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
-		   adev->rev_id == 0) {
-		inv_req = gmc_v9_0_get_invalidate_req(vmid, 0);
-		inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
-	} else {
-		inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
-		inv_req2 = 0;
-	}
-
 	/* This is necessary for a HW workaround under SRIOV as well
 	 * as GFXOFF under bare metal
 	 */
@@ -853,10 +834,6 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 
 		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
 						   1 << vmid);
-		if (inv_req2)
-			amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
-							   inv_req2, 1 << vmid);
-
 		return;
 	}
 
@@ -886,34 +863,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
 	}
 
-	do {
-		if (vmhub >= AMDGPU_MMHUB0(0))
-			WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
-		else
-			WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
-
-		/*
-		 * Issue a dummy read to wait for the ACK register to
-		 * be cleared to avoid a false ACK due to the new fast
-		 * GRBM interface.
-		 */
-		if ((vmhub == AMDGPU_GFXHUB(0)) &&
-		    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
-			RREG32_NO_KIQ(req);
+	if (vmhub >= AMDGPU_MMHUB0(0))
+		WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
+	else
+		WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
 
-		for (j = 0; j < adev->usec_timeout; j++) {
-			if (vmhub >= AMDGPU_MMHUB0(0))
-				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
-			else
-				tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
-			if (tmp & (1 << vmid))
-				break;
-			udelay(1);
-		}
+	/*
+	 * Issue a dummy read to wait for the ACK register to
+	 * be cleared to avoid a false ACK due to the new fast
+	 * GRBM interface.
+	 */
+	if ((vmhub == AMDGPU_GFXHUB(0)) &&
+	    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
+		RREG32_NO_KIQ(req);
 
-		inv_req = inv_req2;
-		inv_req2 = 0;
-	} while (inv_req);
+	for (j = 0; j < adev->usec_timeout; j++) {
+		if (vmhub >= AMDGPU_MMHUB0(0))
+			tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
+		else
+			tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
+		if (tmp & (1 << vmid))
+			break;
+		udelay(1);
+	}
 
 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
 	if (use_semaphore) {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb
  2023-09-05  6:04 ` [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb Christian König
@ 2023-09-05 20:45   ` Alex Deucher
  2023-09-06  8:50     ` Christian König
  2023-09-08 18:58   ` Felix Kuehling
  1 sibling, 1 reply; 38+ messages in thread
From: Alex Deucher @ 2023-09-05 20:45 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, shashank.sharma

On Tue, Sep 5, 2023 at 3:00 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> The KIQ code path was ignoring the second flush. Also avoid long lines and
> re-calculating the register offsets over and over again.

I'd split this into two patches, one for the code cleanup and one to
fix the missing flush.

Alex

>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +++++++++++++++++----------
>  1 file changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 0673cda547bb..4f6990ba71cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -814,13 +814,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                                         uint32_t vmhub, uint32_t flush_type)
>  {
>         bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
> +       u32 j, inv_req, inv_req2, tmp, sem, req, ack;
>         const unsigned int eng = 17;
> -       u32 j, inv_req, inv_req2, tmp;
>         struct amdgpu_vmhub *hub;
>
>         BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
>
>         hub = &adev->vmhub[vmhub];
> +       sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
> +       req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> +       ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> +
>         if (adev->gmc.xgmi.num_physical_nodes &&
>             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
>                 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
> @@ -852,6 +856,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>
>                 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>                                                    1 << vmid);
> +               if (inv_req2)
> +                       amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
> +                                                          inv_req2, 1 << vmid);
> +
>                 up_read(&adev->reset_domain->sem);
>                 return;
>         }
> @@ -870,9 +878,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                 for (j = 0; j < adev->usec_timeout; j++) {
>                         /* a read return value of 1 means semaphore acquire */
>                         if (vmhub >= AMDGPU_MMHUB0(0))
> -                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
> +                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
>                         else
> -                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
> +                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
>                         if (tmp & 0x1)
>                                 break;
>                         udelay(1);
> @@ -884,9 +892,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>
>         do {
>                 if (vmhub >= AMDGPU_MMHUB0(0))
> -                       WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
> +                       WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
>                 else
> -                       WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
> +                       WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
>
>                 /*
>                  * Issue a dummy read to wait for the ACK register to
> @@ -895,14 +903,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                  */
>                 if ((vmhub == AMDGPU_GFXHUB(0)) &&
>                     (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
> -                       RREG32_NO_KIQ(hub->vm_inv_eng0_req +
> -                                     hub->eng_distance * eng);
> +                       RREG32_NO_KIQ(req);
>
>                 for (j = 0; j < adev->usec_timeout; j++) {
>                         if (vmhub >= AMDGPU_MMHUB0(0))
> -                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
> +                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
>                         else
> -                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
> +                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
>                         if (tmp & (1 << vmid))
>                                 break;
>                         udelay(1);
> @@ -919,9 +926,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                  * write with 0 means semaphore release
>                  */
>                 if (vmhub >= AMDGPU_MMHUB0(0))
> -                       WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
> +                       WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
>                 else
> -                       WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
> +                       WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
>         }
>
>         spin_unlock(&adev->gmc.invalidate_lock);
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb
  2023-09-05  6:04 ` [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb Christian König
@ 2023-09-05 20:52   ` Alex Deucher
  2023-09-08 19:30   ` Felix Kuehling
  1 sibling, 0 replies; 38+ messages in thread
From: Alex Deucher @ 2023-09-05 20:52 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, shashank.sharma

On Tue, Sep 5, 2023 at 2:20 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Move the SDMA workaround necessary for Navi 1x into a higher layer.

You could split out the register offsets code cleanup into a separate
patch.  Either way, the patch is:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  48 +++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |   5 +-
>  drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c |   3 +
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 159 ++++++-----------------
>  4 files changed, 97 insertions(+), 118 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index d78bd9732543..857051093900 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -575,6 +575,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
>         return 0;
>  }
>
> +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +                             uint32_t vmhub, uint32_t flush_type)
> +{
> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +       struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> +       struct dma_fence *fence;
> +       struct amdgpu_job *job;
> +       int r;
> +
> +       if (!hub->sdma_invalidation_workaround || vmid ||
> +           !adev->mman.buffer_funcs_enabled ||
> +           !adev->ib_pool_ready || amdgpu_in_reset(adev) ||
> +           !ring->sched.ready) {
> +               adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
> +                                                  flush_type);
> +               return;
> +       }
> +
> +       /* The SDMA on Navi 1x has a bug which can theoretically result in memory
> +        * corruption if an invalidation happens at the same time as an VA
> +        * translation. Avoid this by doing the invalidation from the SDMA
> +        * itself at least for GART.
> +        */
> +       mutex_lock(&adev->mman.gtt_window_lock);
> +       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
> +                                    AMDGPU_FENCE_OWNER_UNDEFINED,
> +                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
> +                                    &job);
> +       if (r)
> +               goto error_alloc;
> +
> +       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> +       job->vm_needs_flush = true;
> +       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
> +       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> +       fence = amdgpu_job_submit(job);
> +       mutex_unlock(&adev->mman.gtt_window_lock);
> +
> +       dma_fence_wait(fence, false);
> +       dma_fence_put(fence);
> +
> +       return;
> +
> +error_alloc:
> +       mutex_unlock(&adev->mman.gtt_window_lock);
> +       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> +}
> +
>  /**
>   * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
>   * @adev: amdgpu_device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index fdc25cd559b6..9e7df2f69123 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -117,6 +117,8 @@ struct amdgpu_vmhub {
>
>         uint32_t        vm_contexts_disable;
>
> +       bool            sdma_invalidation_workaround;
> +
>         const struct amdgpu_vmhub_funcs *vmhub_funcs;
>  };
>
> @@ -335,7 +337,6 @@ struct amdgpu_gmc {
>         u64 noretry_flags;
>  };
>
> -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
>  #define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
>         ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
>         ((adev), (pasid), (type), (allhub), (inst)))
> @@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
>  int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
>  void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
>  int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
> +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +                             uint32_t vmhub, uint32_t flush_type);
>
>  extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
>  extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
> index a041c6c970e1..8521c45e8f38 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
> @@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev)
>                 GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
>                 GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
>
> +       /* TODO: This is only needed on some Navi 1x revisions */
> +       hub->sdma_invalidation_workaround = true;
> +
>         hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index fa87a85e1017..1f70c57bcd69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -230,20 +230,49 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
>   * by the amdgpu vm/hsa code.
>   */
>
> -static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
> -                                  unsigned int vmhub, uint32_t flush_type)
> +/**
> + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
> + *
> + * @adev: amdgpu_device pointer
> + * @vmid: vm instance to flush
> + * @vmhub: vmhub type
> + * @flush_type: the flush type
> + *
> + * Flush the TLB for the requested page table.
> + */
> +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +                                       uint32_t vmhub, uint32_t flush_type)
>  {
>         bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
>         struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>         u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
> -       u32 tmp;
>         /* Use register 17 for GART */
>         const unsigned int eng = 17;
> -       unsigned int i;
>         unsigned char hub_ip = 0;
> +       u32 sem, req, ack;
> +       unsigned int i;
> +       u32 tmp;
>
> -       hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
> -                  GC_HWIP : MMHUB_HWIP;
> +       sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
> +       req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> +       ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> +
> +       /* flush hdp cache */
> +       adev->hdp.funcs->flush_hdp(adev, NULL);
> +
> +       /* For SRIOV run time, driver shouldn't access the register through MMIO
> +        * Directly use kiq to do the vm invalidation instead
> +        */
> +       if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
> +           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> +           down_read_trylock(&adev->reset_domain->sem)) {
> +               amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
> +                               1 << vmid);
> +               up_read(&adev->reset_domain->sem);
> +               return;
> +       }
> +
> +       hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
>
>         spin_lock(&adev->gmc.invalidate_lock);
>         /*
> @@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>         if (use_semaphore) {
>                 for (i = 0; i < adev->usec_timeout; i++) {
>                         /* a read return value of 1 means semaphore acuqire */
> -                       tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
> -                                        hub->eng_distance * eng, hub_ip);
> -
> +                       tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
>                         if (tmp & 0x1)
>                                 break;
>                         udelay(1);
> @@ -269,9 +296,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>                         DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>         }
>
> -       WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
> -                         hub->eng_distance * eng,
> -                         inv_req, hub_ip);
> +       WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
>
>         /*
>          * Issue a dummy read to wait for the ACK register to be cleared
> @@ -279,14 +304,11 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>          */
>         if ((vmhub == AMDGPU_GFXHUB(0)) &&
>             (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
> -               RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
> -                                 hub->eng_distance * eng, hub_ip);
> +               RREG32_RLC_NO_KIQ(req, hub_ip);
>
>         /* Wait for ACK with a delay.*/
>         for (i = 0; i < adev->usec_timeout; i++) {
> -               tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
> -                                 hub->eng_distance * eng, hub_ip);
> -
> +               tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
>                 tmp &= 1 << vmid;
>                 if (tmp)
>                         break;
> @@ -296,109 +318,12 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>
>         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
>         if (use_semaphore)
> -               /*
> -                * add semaphore release after invalidation,
> -                * write with 0 means semaphore release
> -                */
> -               WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
> -                                 hub->eng_distance * eng, 0, hub_ip);
> +               WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
>
>         spin_unlock(&adev->gmc.invalidate_lock);
>
> -       if (i < adev->usec_timeout)
> -               return;
> -
> -       DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
> -}
> -
> -/**
> - * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
> - *
> - * @adev: amdgpu_device pointer
> - * @vmid: vm instance to flush
> - * @vmhub: vmhub type
> - * @flush_type: the flush type
> - *
> - * Flush the TLB for the requested page table.
> - */
> -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> -                                       uint32_t vmhub, uint32_t flush_type)
> -{
> -       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> -       struct dma_fence *fence;
> -       struct amdgpu_job *job;
> -
> -       int r;
> -
> -       /* flush hdp cache */
> -       adev->hdp.funcs->flush_hdp(adev, NULL);
> -
> -       /* For SRIOV run time, driver shouldn't access the register through MMIO
> -        * Directly use kiq to do the vm invalidation instead
> -        */
> -       if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
> -           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> -           down_read_trylock(&adev->reset_domain->sem)) {
> -               struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> -               const unsigned int eng = 17;
> -               u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
> -               u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> -               u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> -
> -               amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
> -                               1 << vmid);
> -
> -               up_read(&adev->reset_domain->sem);
> -               return;
> -       }
> -
> -       mutex_lock(&adev->mman.gtt_window_lock);
> -
> -       if (vmhub == AMDGPU_MMHUB0(0)) {
> -               gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
> -               mutex_unlock(&adev->mman.gtt_window_lock);
> -               return;
> -       }
> -
> -       BUG_ON(vmhub != AMDGPU_GFXHUB(0));
> -
> -       if (!adev->mman.buffer_funcs_enabled ||
> -           !adev->ib_pool_ready ||
> -           amdgpu_in_reset(adev) ||
> -           ring->sched.ready == false) {
> -               gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
> -               mutex_unlock(&adev->mman.gtt_window_lock);
> -               return;
> -       }
> -
> -       /* The SDMA on Navi has a bug which can theoretically result in memory
> -        * corruption if an invalidation happens at the same time as an VA
> -        * translation. Avoid this by doing the invalidation from the SDMA
> -        * itself.
> -        */
> -       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
> -                                    AMDGPU_FENCE_OWNER_UNDEFINED,
> -                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
> -                                    &job);
> -       if (r)
> -               goto error_alloc;
> -
> -       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> -       job->vm_needs_flush = true;
> -       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
> -       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> -       fence = amdgpu_job_submit(job);
> -
> -       mutex_unlock(&adev->mman.gtt_window_lock);
> -
> -       dma_fence_wait(fence, false);
> -       dma_fence_put(fence);
> -
> -       return;
> -
> -error_alloc:
> -       mutex_unlock(&adev->mman.gtt_window_lock);
> -       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> +       if (i >= adev->usec_timeout)
> +               DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
>  }
>
>  /**
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 03/11] drm/amdgpu: cleanup gmc_v11_0_flush_gpu_tlb
  2023-09-05  6:04 ` [PATCH 03/11] drm/amdgpu: cleanup gmc_v11_0_flush_gpu_tlb Christian König
@ 2023-09-05 20:56   ` Alex Deucher
  0 siblings, 0 replies; 38+ messages in thread
From: Alex Deucher @ 2023-09-05 20:56 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, shashank.sharma

On Tue, Sep 5, 2023 at 2:30 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Remove leftovers from copying this from the gmc v10 code.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 108 ++++++++++---------------
>  1 file changed, 41 insertions(+), 67 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index dcbba981462e..3c3ad3f17c6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -186,27 +186,50 @@ static bool gmc_v11_0_get_vmid_pasid_mapping_info(
>         return !!(*p_pasid);
>  }
>
> -/*
> - * GART
> - * VMID 0 is the physical GPU addresses as used by the kernel.
> - * VMIDs 1-15 are used for userspace clients and are handled
> - * by the amdgpu vm/hsa code.
> +/**
> + * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback
> + *
> + * @adev: amdgpu_device pointer
> + * @vmid: vm instance to flush
> + * @vmhub: which hub to flush
> + * @flush_type: the flush type
> + *
> + * Flush the TLB for the requested page table.
>   */
> -
> -static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
> -                                  unsigned int vmhub, uint32_t flush_type)
> +static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +                                       uint32_t vmhub, uint32_t flush_type)
>  {
>         bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(adev, vmhub);
>         struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>         u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
> -       u32 tmp;
>         /* Use register 17 for GART */
>         const unsigned int eng = 17;
> +       unsigned char hub_ip;
> +       u32 sem, req, ack;
>         unsigned int i;
> -       unsigned char hub_ip = 0;
> +       u32 tmp;
> +
> +       if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
> +               return;
>
> -       hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
> -                  GC_HWIP : MMHUB_HWIP;
> +       sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
> +       req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> +       ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> +
> +       /* flush hdp cache */
> +       adev->hdp.funcs->flush_hdp(adev, NULL);
> +
> +       /* For SRIOV run time, driver shouldn't access the register through MMIO
> +        * Directly use kiq to do the vm invalidation instead
> +        */
> +       if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
> +           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
> +               amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
> +                               1 << vmid);
> +               return;
> +       }
> +
> +       hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
>
>         spin_lock(&adev->gmc.invalidate_lock);
>         /*
> @@ -220,8 +243,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>         if (use_semaphore) {
>                 for (i = 0; i < adev->usec_timeout; i++) {
>                         /* a read return value of 1 means semaphore acuqire */
> -                       tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
> -                                           hub->eng_distance * eng, hub_ip);
> +                       tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
>                         if (tmp & 0x1)
>                                 break;
>                         udelay(1);
> @@ -231,12 +253,11 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>                         DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>         }
>
> -       WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);
> +       WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
>
>         /* Wait for ACK with a delay.*/
>         for (i = 0; i < adev->usec_timeout; i++) {
> -               tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
> -                                   hub->eng_distance * eng, hub_ip);
> +               tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
>                 tmp &= 1 << vmid;
>                 if (tmp)
>                         break;
> @@ -246,12 +267,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>
>         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
>         if (use_semaphore)
> -               /*
> -                * add semaphore release after invalidation,
> -                * write with 0 means semaphore release
> -                */
> -               WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
> -                             hub->eng_distance * eng, 0, hub_ip);
> +               WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
>
>         /* Issue additional private vm invalidation to MMHUB */
>         if ((vmhub != AMDGPU_GFXHUB(0)) &&
> @@ -268,50 +284,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>
>         spin_unlock(&adev->gmc.invalidate_lock);
>
> -       if (i < adev->usec_timeout)
> -               return;
> -
> -       DRM_ERROR("Timeout waiting for VM flush ACK!\n");
> -}
> -
> -/**
> - * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback
> - *
> - * @adev: amdgpu_device pointer
> - * @vmid: vm instance to flush
> - * @vmhub: which hub to flush
> - * @flush_type: the flush type
> - *
> - * Flush the TLB for the requested page table.
> - */
> -static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> -                                       uint32_t vmhub, uint32_t flush_type)
> -{
> -       if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
> -               return;
> -
> -       /* flush hdp cache */
> -       adev->hdp.funcs->flush_hdp(adev, NULL);
> -
> -       /* For SRIOV run time, driver shouldn't access the register through MMIO
> -        * Directly use kiq to do the vm invalidation instead
> -        */
> -       if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
> -           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
> -               struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> -               const unsigned int eng = 17;
> -               u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
> -               u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> -               u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> -
> -               amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
> -                               1 << vmid);
> -               return;
> -       }
> -
> -       mutex_lock(&adev->mman.gtt_window_lock);
> -       gmc_v11_0_flush_vm_hub(adev, vmid, vmhub, 0);
> -       mutex_unlock(&adev->mman.gtt_window_lock);
> +       if (i >= adev->usec_timeout)
> +               DRM_ERROR("Timeout waiting for VM flush ACK!\n");

While you are at it, maybe switch this to use dev_err so we can better
tell what GPU in the multi-GPU case.  Same comment for the other
patches.  Either way:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

Alex

>  }
>
>  /**
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid Christian König
@ 2023-09-05 22:39   ` Alex Deucher
  2023-09-06 14:25   ` Shashank Sharma
  2023-09-08 20:43   ` Felix Kuehling
  2 siblings, 0 replies; 38+ messages in thread
From: Alex Deucher @ 2023-09-05 22:39 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, shashank.sharma

On Tue, Sep 5, 2023 at 7:30 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Testing for reset is pointless since the reset can start right after the
> test. Grab the reset semaphore instead.
>
> The same PASID can be used by more than once VMID, build a mask of VMIDs
> to reset instead of just restting the first one.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 19 ++++++++++---------
>  1 file changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 6a6929ac2748..9e19a752f94b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -33,6 +33,7 @@
>  #include "amdgpu_ucode.h"
>  #include "amdgpu_amdkfd.h"
>  #include "amdgpu_gem.h"
> +#include "amdgpu_reset.h"
>
>  #include "bif/bif_4_1_d.h"
>  #include "bif/bif_4_1_sh_mask.h"
> @@ -426,23 +427,23 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>                                         uint16_t pasid, uint32_t flush_type,
>                                         bool all_hub, uint32_t inst)
>  {
> +       u32 mask = 0x0;
>         int vmid;
> -       unsigned int tmp;
>
> -       if (amdgpu_in_reset(adev))
> -               return -EIO;
> +       if(!down_read_trylock(&adev->reset_domain->sem))
> +               return 0;
>
>         for (vmid = 1; vmid < 16; vmid++) {
> +               u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>
> -               tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>                 if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> -                       (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> -                       WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -                       RREG32(mmVM_INVALIDATE_RESPONSE);
> -                       break;
> -               }
> +                   (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
> +                       mask |= 1 << vmid;
>         }
>
> +       WREG32(mmVM_INVALIDATE_REQUEST, mask);
> +       RREG32(mmVM_INVALIDATE_RESPONSE);
> +       up_read(&adev->reset_domain->sem);
>         return 0;
>  }
>
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 05/11] drm/amdgpu: fix and cleanup gmc_v8_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 05/11] drm/amdgpu: fix and cleanup gmc_v8_0_flush_gpu_tlb_pasid Christian König
@ 2023-09-05 22:40   ` Alex Deucher
  2023-09-06 14:26   ` Shashank Sharma
  2023-09-08 20:44   ` Felix Kuehling
  2 siblings, 0 replies; 38+ messages in thread
From: Alex Deucher @ 2023-09-05 22:40 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, shashank.sharma

On Tue, Sep 5, 2023 at 3:00 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Testing for reset is pointless since the reset can start right after the
> test. Grab the reset semaphore instead.
>
> The same PASID can be used by more than once VMID, build a mask of VMIDs
> to reset instead of just restting the first one.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 20 ++++++++++----------
>  1 file changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 5af235202513..2d51531a1f2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -31,6 +31,7 @@
>  #include "amdgpu_ucode.h"
>  #include "amdgpu_amdkfd.h"
>  #include "amdgpu_gem.h"
> +#include "amdgpu_reset.h"
>
>  #include "gmc/gmc_8_1_d.h"
>  #include "gmc/gmc_8_1_sh_mask.h"
> @@ -616,25 +617,24 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>                                         uint16_t pasid, uint32_t flush_type,
>                                         bool all_hub, uint32_t inst)
>  {
> +       u32 mask = 0x0;
>         int vmid;
> -       unsigned int tmp;
>
> -       if (amdgpu_in_reset(adev))
> -               return -EIO;
> +       if(!down_read_trylock(&adev->reset_domain->sem))
> +               return 0;
>
>         for (vmid = 1; vmid < 16; vmid++) {
> +               u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>
> -               tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>                 if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> -                       (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> -                       WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -                       RREG32(mmVM_INVALIDATE_RESPONSE);
> -                       break;
> -               }
> +                   (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
> +                       mask |= 1 << vmid;
>         }
>
> +       WREG32(mmVM_INVALIDATE_REQUEST, mask);
> +       RREG32(mmVM_INVALIDATE_RESPONSE);
> +       up_read(&adev->reset_domain->sem);
>         return 0;
> -
>  }
>
>  /*
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* RE: [PATCH 06/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 06/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb_pasid Christian König
@ 2023-09-05 22:45   ` Deucher, Alexander
  2023-09-08 21:13   ` Felix Kuehling
  1 sibling, 0 replies; 38+ messages in thread
From: Deucher, Alexander @ 2023-09-05 22:45 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: Sharma, Shashank

[Public]

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> Christian König
> Sent: Tuesday, September 5, 2023 2:04 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Sharma, Shashank <Shashank.Sharma@amd.com>
> Subject: [PATCH 06/11] drm/amdgpu: fix and cleanup
> gmc_v9_0_flush_gpu_tlb_pasid
>
> Testing for reset is pointless since the reset can start right after the test.
>
> The same PASID can be used by more than one VMID, reset each of them.
>
> Move the KIQ and all the workaround handling into common GMC code.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c |  60 +++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  10 ++-
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 109 ++++++++----------------
>  3 files changed, 102 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 857051093900..b5f1a1218725 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -32,6 +32,7 @@
>  #include "amdgpu.h"
>  #include "amdgpu_gmc.h"
>  #include "amdgpu_ras.h"
> +#include "amdgpu_reset.h"
>  #include "amdgpu_xgmi.h"
>
>  #include <drm/drm_drv.h>
> @@ -623,6 +624,65 @@ void amdgpu_gmc_flush_gpu_tlb(struct
> amdgpu_device *adev, uint32_t vmid,
>       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);  }
>
> +int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> uint16_t pasid,
> +                                uint32_t flush_type, bool all_hub,
> +                                uint32_t inst)
> +{
> +     u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT
> :
> +             adev->usec_timeout;
> +     struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
> +     struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
> +     unsigned int ndw;
> +     signed long r;
> +     uint32_t seq;
> +
> +     if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
> +         !down_read_trylock(&adev->reset_domain->sem)) {
> +             return adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev,
> pasid,
> +                                                             flush_type,
> +                                                             all_hub, inst);
> +     }
> +
> +     /* 2 dwords flush + 8 dwords fence */
> +     ndw = kiq->pmf->invalidate_tlbs_size + 8;
> +
> +     if (adev->gmc.flush_tlb_needs_extra_type_2)
> +             ndw += kiq->pmf->invalidate_tlbs_size;
> +
> +     if (adev->gmc.flush_tlb_needs_extra_type_0)
> +             ndw += kiq->pmf->invalidate_tlbs_size;
> +
> +     spin_lock(&adev->gfx.kiq[inst].ring_lock);
> +     amdgpu_ring_alloc(ring, ndw);
> +     if (adev->gmc.flush_tlb_needs_extra_type_2)
> +             kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
> +
> +     if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
> +             kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
> +
> +     kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
> +     r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> +     if (r) {
> +             amdgpu_ring_undo(ring);
> +             spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> +             goto error_unlock_reset;
> +     }
> +
> +     amdgpu_ring_commit(ring);
> +     spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> +     r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
> +     if (r < 1) {
> +             dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
> +             r = -ETIME;
> +             goto error_unlock_reset;
> +     }
> +     r = 0;
> +
> +error_unlock_reset:
> +     up_read(&adev->reset_domain->sem);
> +     return r;
> +}
> +
>  /**
>   * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
>   * @adev: amdgpu_device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 9e7df2f69123..7732d4ef845e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -335,11 +335,12 @@ struct amdgpu_gmc {
>       u64 MC_VM_MX_L1_TLB_CNTL;
>
>       u64 noretry_flags;
> +
> +     bool flush_tlb_needs_extra_type_0;
> +     bool flush_tlb_needs_extra_type_2;
> +     bool flush_pasid_uses_kiq;
>  };
>
> -#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
> -     ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> -     ((adev), (pasid), (type), (allhub), (inst)))
>  #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev-
> >gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))  #define
> amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev-
> >gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))  #define
> amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs-
> >map_mtype((adev),(flags))
> @@ -404,6 +405,9 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device
> *adev);  int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device
> *adev);  void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev,
> uint32_t vmid,
>                             uint32_t vmhub, uint32_t flush_type);
> +int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> uint16_t pasid,
> +                                uint32_t flush_type, bool all_hub,
> +                                uint32_t inst);
>
>  extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);  extern
> void amdgpu_gmc_noretry_set(struct amdgpu_device *adev); diff --git
> a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 4f6990ba71cb..39016b6900d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -954,87 +954,30 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct
> amdgpu_device *adev,
>                                       uint16_t pasid, uint32_t flush_type,
>                                       bool all_hub, uint32_t inst)
>  {
> -     int vmid, i;
> -     signed long r;
> -     uint32_t seq;
> -     uint16_t queried_pasid;
> -     bool ret;
> -     u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT
> : adev->usec_timeout;
> -     struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
> -     struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
> -
> -     if (amdgpu_in_reset(adev))
> -             return -EIO;
> -
> -     if (ring->sched.ready && down_read_trylock(&adev->reset_domain-
> >sem)) {
> -             /* Vega20+XGMI caches PTEs in TC and TLB. Add a
> -              * heavy-weight TLB flush (type 2), which flushes
> -              * both. Due to a race condition with concurrent
> -              * memory accesses using the same TLB cache line, we
> -              * still need a second TLB flush after this.
> -              */
> -             bool vega20_xgmi_wa = (adev-
> >gmc.xgmi.num_physical_nodes &&
> -                                    adev->ip_versions[GC_HWIP][0] ==
> IP_VERSION(9, 4, 0));
> -             /* 2 dwords flush + 8 dwords fence */
> -             unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
> -
> -             if (vega20_xgmi_wa)
> -                     ndw += kiq->pmf->invalidate_tlbs_size;
> -
> -             spin_lock(&adev->gfx.kiq[inst].ring_lock);
> -             /* 2 dwords flush + 8 dwords fence */
> -             amdgpu_ring_alloc(ring, ndw);
> -             if (vega20_xgmi_wa)
> -                     kiq->pmf->kiq_invalidate_tlbs(ring,
> -                                                   pasid, 2, all_hub);
> -
> -             if (flush_type == 2 &&
> -                 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
> -                 adev->rev_id == 0)
> -                     kiq->pmf->kiq_invalidate_tlbs(ring,
> -                                             pasid, 0, all_hub);
> -
> -             kiq->pmf->kiq_invalidate_tlbs(ring,
> -                                     pasid, flush_type, all_hub);
> -             r = amdgpu_fence_emit_polling(ring, &seq,
> MAX_KIQ_REG_WAIT);
> -             if (r) {
> -                     amdgpu_ring_undo(ring);
> -                     spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> -                     up_read(&adev->reset_domain->sem);
> -                     return -ETIME;
> -             }
> -
> -             amdgpu_ring_commit(ring);
> -             spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> -             r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
> -             if (r < 1) {
> -                     dev_err(adev->dev, "wait for kiq fence error: %ld.\n",
> r);
> -                     up_read(&adev->reset_domain->sem);
> -                     return -ETIME;
> -             }
> -             up_read(&adev->reset_domain->sem);
> -             return 0;
> -     }
> +     uint16_t queried;
> +     int i, vmid;
>
>       for (vmid = 1; vmid < 16; vmid++) {
> +             bool valid;
>
> -             ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev,
> vmid,
> -                             &queried_pasid);
> -             if (ret && queried_pasid == pasid) {
> -                     if (all_hub) {
> -                             for_each_set_bit(i, adev->vmhubs_mask,
> AMDGPU_MAX_VMHUBS)
> -                                     gmc_v9_0_flush_gpu_tlb(adev, vmid,
> -                                                     i, flush_type);
> -                     } else {
> -                             gmc_v9_0_flush_gpu_tlb(adev, vmid,
> -                                             AMDGPU_GFXHUB(0),
> flush_type);
> -                     }
> -                     break;
> +             valid = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev,
> vmid,
> +                                                              &queried);
> +             if (!valid || queried != pasid)
> +                     continue;
> +
> +             if (all_hub) {
> +                     for_each_set_bit(i, adev->vmhubs_mask,
> +                                      AMDGPU_MAX_VMHUBS)
> +                             gmc_v9_0_flush_gpu_tlb(adev, vmid, i,
> +                                                    flush_type);
> +             } else {
> +                     gmc_v9_0_flush_gpu_tlb(adev, vmid,
> +                                            AMDGPU_GFXHUB(0),
> +                                            flush_type);
>               }
>       }
>
>       return 0;
> -
>  }
>
>  static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, @@
> -2335,6 +2278,24 @@ static int gmc_v9_0_hw_init(void *handle)
>       bool value;
>       int i, r;
>
> +     adev->gmc.flush_pasid_uses_kiq = true;
> +
> +     /* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB
> flush
> +      * (type 2), which flushes both. Due to a race condition with
> +      * concurrent memory accesses using the same TLB cache line, we still
> +      * need a second TLB flush after this.
> +      */
> +     adev->gmc.flush_tlb_needs_extra_type_2 =
> +             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) &&
> +             adev->gmc.xgmi.num_physical_nodes;
> +     /*
> +      * TODO: This workaround is badly documented and had a buggy
> +      * implementation. We should probably verify what we do here.
> +      */
> +     adev->gmc.flush_tlb_needs_extra_type_0 =
> +             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
> +             adev->rev_id == 0;
> +
>       /* The sequence of these two function calls matters.*/
>       gmc_v9_0_init_golden_registers(adev);
>
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 07/11] drm/amdgpu: cleanup gmc_v10_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 07/11] drm/amdgpu: cleanup gmc_v10_0_flush_gpu_tlb_pasid Christian König
@ 2023-09-05 22:46   ` Alex Deucher
  2023-09-08 21:13   ` Felix Kuehling
  1 sibling, 0 replies; 38+ messages in thread
From: Alex Deucher @ 2023-09-05 22:46 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, shashank.sharma

On Tue, Sep 5, 2023 at 2:15 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> The same PASID can be used by more than one VMID, reset each of them.
>
> Use the common KIQ handling.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 66 ++++++++------------------
>  1 file changed, 19 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 1f70c57bcd69..407ddb926941 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -341,57 +341,27 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>                                         uint16_t pasid, uint32_t flush_type,
>                                         bool all_hub, uint32_t inst)
>  {
> +       uint16_t queried;
>         int vmid, i;
> -       signed long r;
> -       uint32_t seq;
> -       uint16_t queried_pasid;
> -       bool ret;
> -       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
> -       struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
> -       struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
> -
> -       if (amdgpu_emu_mode == 0 && ring->sched.ready) {
> -               spin_lock(&adev->gfx.kiq[0].ring_lock);
> -               /* 2 dwords flush + 8 dwords fence */
> -               amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
> -               kiq->pmf->kiq_invalidate_tlbs(ring,
> -                                       pasid, flush_type, all_hub);
> -               r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> -               if (r) {
> -                       amdgpu_ring_undo(ring);
> -                       spin_unlock(&adev->gfx.kiq[0].ring_lock);
> -                       return -ETIME;
> -               }
> -
> -               amdgpu_ring_commit(ring);
> -               spin_unlock(&adev->gfx.kiq[0].ring_lock);
> -               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
> -               if (r < 1) {
> -                       dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
> -                       return -ETIME;
> -               }
> -
> -               return 0;
> -       }
>
>         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
> -
> -               ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> -                               &queried_pasid);
> -               if (ret && queried_pasid == pasid) {
> -                       if (all_hub) {
> -                               for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
> -                                       gmc_v10_0_flush_gpu_tlb(adev, vmid,
> -                                                       i, flush_type);
> -                       } else {
> -                               gmc_v10_0_flush_gpu_tlb(adev, vmid,
> -                                               AMDGPU_GFXHUB(0), flush_type);
> -                       }
> -                       if (!adev->enable_mes)
> -                               break;
> +               bool valid;
> +
> +               valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +                                                                 &queried);
> +               if (!valid || queried != pasid)
> +                       continue;
> +
> +               if (all_hub) {
> +                       for_each_set_bit(i, adev->vmhubs_mask,
> +                                        AMDGPU_MAX_VMHUBS)
> +                               gmc_v10_0_flush_gpu_tlb(adev, vmid, i,
> +                                                       flush_type);
> +               } else {
> +                       gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
> +                                               flush_type);
>                 }
>         }
> -
>         return 0;
>  }
>
> @@ -1009,8 +979,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
>
>  static int gmc_v10_0_hw_init(void *handle)
>  {
> -       int r;
>         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> +       int r;
> +
> +       adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
>
>         /* The sequence of these two function calls matters.*/
>         gmc_v10_0_init_golden_registers(adev);
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 08/11] drm/amdgpu: fix and cleanup gmc_v11_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 08/11] drm/amdgpu: fix and cleanup gmc_v11_0_flush_gpu_tlb_pasid Christian König
@ 2023-09-05 22:47   ` Alex Deucher
  0 siblings, 0 replies; 38+ messages in thread
From: Alex Deucher @ 2023-09-05 22:47 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, shashank.sharma

On Tue, Sep 5, 2023 at 3:00 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> The same PASID can be used by more than one VMID, reset each of them.
>
> Use the common KIQ handling.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 63 ++++++++------------------
>  1 file changed, 19 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index 3c3ad3f17c6a..aa39c1087e44 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -303,54 +303,27 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>                                         uint16_t pasid, uint32_t flush_type,
>                                         bool all_hub, uint32_t inst)
>  {
> +       uint16_t queried;
>         int vmid, i;
> -       signed long r;
> -       uint32_t seq;
> -       uint16_t queried_pasid;
> -       bool ret;
> -       struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
> -       struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
> -
> -       if (amdgpu_emu_mode == 0 && ring->sched.ready) {
> -               spin_lock(&adev->gfx.kiq[0].ring_lock);
> -               /* 2 dwords flush + 8 dwords fence */
> -               amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
> -               kiq->pmf->kiq_invalidate_tlbs(ring,
> -                                       pasid, flush_type, all_hub);
> -               r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> -               if (r) {
> -                       amdgpu_ring_undo(ring);
> -                       spin_unlock(&adev->gfx.kiq[0].ring_lock);
> -                       return -ETIME;
> -               }
> -
> -               amdgpu_ring_commit(ring);
> -               spin_unlock(&adev->gfx.kiq[0].ring_lock);
> -               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> -               if (r < 1) {
> -                       dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
> -                       return -ETIME;
> -               }
> -
> -               return 0;
> -       }
>
>         for (vmid = 1; vmid < 16; vmid++) {
> -
> -               ret = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid,
> -                               &queried_pasid);
> -               if (ret && queried_pasid == pasid) {
> -                       if (all_hub) {
> -                               for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
> -                                       gmc_v11_0_flush_gpu_tlb(adev, vmid,
> -                                                       i, flush_type);
> -                       } else {
> -                               gmc_v11_0_flush_gpu_tlb(adev, vmid,
> -                                               AMDGPU_GFXHUB(0), flush_type);
> -                       }
> +               bool valid;
> +
> +               valid = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid,
> +                                                             &queried);
> +               if (!valid || queried == pasid)
> +                       continue;
> +
> +               if (all_hub) {
> +                       for_each_set_bit(i, adev->vmhubs_mask,
> +                                        AMDGPU_MAX_VMHUBS)
> +                               gmc_v11_0_flush_gpu_tlb(adev, vmid, i,
> +                                                       flush_type);
> +               } else {
> +                       gmc_v11_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
> +                                               flush_type);
>                 }
>         }
> -
>         return 0;
>  }
>
> @@ -918,8 +891,10 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
>
>  static int gmc_v11_0_hw_init(void *handle)
>  {
> -       int r;
>         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> +       int r;
> +
> +       adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
>
>         /* The sequence of these two function calls matters.*/
>         gmc_v11_0_init_golden_registers(adev);
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 09/11] drm/amdgpu: drop error return from flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 09/11] drm/amdgpu: drop error return from flush_gpu_tlb_pasid Christian König
@ 2023-09-05 22:48   ` Alex Deucher
  0 siblings, 0 replies; 38+ messages in thread
From: Alex Deucher @ 2023-09-05 22:48 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, shashank.sharma

On Tue, Sep 5, 2023 at 2:30 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> That function never fails, drop the error return.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 7 ++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 +++---
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 7 +++----
>  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  | 7 +++----
>  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 9 ++++-----
>  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 9 ++++-----
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 8 +++-----
>  7 files changed, 24 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index b5f1a1218725..15814cb801e7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -638,9 +638,10 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
>
>         if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
>             !down_read_trylock(&adev->reset_domain->sem)) {
> -               return adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
> -                                                               flush_type,
> -                                                               all_hub, inst);
> +               adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
> +                                                        flush_type, all_hub,
> +                                                        inst);
> +               return 0;
>         }
>
>         /* 2 dwords flush + 8 dwords fence */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 7732d4ef845e..dd0ede75e5d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -130,9 +130,9 @@ struct amdgpu_gmc_funcs {
>         void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>                                 uint32_t vmhub, uint32_t flush_type);
>         /* flush the vm tlb via pasid */
> -       int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> -                                       uint32_t flush_type, bool all_hub,
> -                                       uint32_t inst);
> +       void (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> +                                   uint32_t flush_type, bool all_hub,
> +                                   uint32_t inst);
>         /* flush the vm tlb via ring */
>         uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>                                        uint64_t pd_addr);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 407ddb926941..40d432d46469 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -337,9 +337,9 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   *
>   * Flush the TLB for the requested pasid.
>   */
> -static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> -                                       uint16_t pasid, uint32_t flush_type,
> -                                       bool all_hub, uint32_t inst)
> +static void gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +                                         uint16_t pasid, uint32_t flush_type,
> +                                         bool all_hub, uint32_t inst)
>  {
>         uint16_t queried;
>         int vmid, i;
> @@ -362,7 +362,6 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>                                                 flush_type);
>                 }
>         }
> -       return 0;
>  }
>
>  static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index aa39c1087e44..50bc5f151038 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -299,9 +299,9 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   *
>   * Flush the TLB for the requested pasid.
>   */
> -static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> -                                       uint16_t pasid, uint32_t flush_type,
> -                                       bool all_hub, uint32_t inst)
> +static void gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +                                         uint16_t pasid, uint32_t flush_type,
> +                                         bool all_hub, uint32_t inst)
>  {
>         uint16_t queried;
>         int vmid, i;
> @@ -324,7 +324,6 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>                                                 flush_type);
>                 }
>         }
> -       return 0;
>  }
>
>  static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 9e19a752f94b..fa3586efacd2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -423,15 +423,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>   *
>   * Flush the TLB for the requested pasid.
>   */
> -static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> -                                       uint16_t pasid, uint32_t flush_type,
> -                                       bool all_hub, uint32_t inst)
> +static void gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +                                        uint16_t pasid, uint32_t flush_type,
> +                                        bool all_hub, uint32_t inst)
>  {
>         u32 mask = 0x0;
>         int vmid;
>
>         if(!down_read_trylock(&adev->reset_domain->sem))
> -               return 0;
> +               return;
>
>         for (vmid = 1; vmid < 16; vmid++) {
>                 u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> @@ -444,7 +444,6 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>         WREG32(mmVM_INVALIDATE_REQUEST, mask);
>         RREG32(mmVM_INVALIDATE_RESPONSE);
>         up_read(&adev->reset_domain->sem);
> -       return 0;
>  }
>
>  /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 2d51531a1f2d..ffcd79d28b9a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -613,15 +613,15 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>   *
>   * Flush the TLB for the requested pasid.
>   */
> -static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> -                                       uint16_t pasid, uint32_t flush_type,
> -                                       bool all_hub, uint32_t inst)
> +static void gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +                                        uint16_t pasid, uint32_t flush_type,
> +                                        bool all_hub, uint32_t inst)
>  {
>         u32 mask = 0x0;
>         int vmid;
>
>         if(!down_read_trylock(&adev->reset_domain->sem))
> -               return 0;
> +               return;
>
>         for (vmid = 1; vmid < 16; vmid++) {
>                 u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> @@ -634,7 +634,6 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>         WREG32(mmVM_INVALIDATE_REQUEST, mask);
>         RREG32(mmVM_INVALIDATE_RESPONSE);
>         up_read(&adev->reset_domain->sem);
> -       return 0;
>  }
>
>  /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 39016b6900d3..94ba16536fc2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -950,9 +950,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   *
>   * Flush the TLB for the requested pasid.
>   */
> -static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> -                                       uint16_t pasid, uint32_t flush_type,
> -                                       bool all_hub, uint32_t inst)
> +static void gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +                                        uint16_t pasid, uint32_t flush_type,
> +                                        bool all_hub, uint32_t inst)
>  {
>         uint16_t queried;
>         int i, vmid;
> @@ -976,8 +976,6 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>                                                flush_type);
>                 }
>         }
> -
> -       return 0;
>  }
>
>  static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 10/11] drm/amdgpu: rework lock handling fro flush_tlb
  2023-09-05  6:04 ` [PATCH 10/11] drm/amdgpu: rework lock handling fro flush_tlb Christian König
@ 2023-09-05 22:49   ` Alex Deucher
  0 siblings, 0 replies; 38+ messages in thread
From: Alex Deucher @ 2023-09-05 22:49 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, shashank.sharma

On Tue, Sep 5, 2023 at 2:30 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Instead of each implementation doing this more or less correctly
> move taking the reset lock at a higher level.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 9 +++++++++
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 6 +-----
>  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 5 -----
>  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 5 -----
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 6 +-----
>  5 files changed, 11 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 15814cb801e7..c24252304d48 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -589,8 +589,17 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>             !adev->mman.buffer_funcs_enabled ||
>             !adev->ib_pool_ready || amdgpu_in_reset(adev) ||
>             !ring->sched.ready) {
> +
> +               /*
> +                * A GPU reset should flush all TLBs anyway, so no need to do
> +                * this while one is ongoing.
> +                */
> +               if(!down_read_trylock(&adev->reset_domain->sem))

space between the if and (.
With that fixed:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>


> +                       return;
> +
>                 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
>                                                    flush_type);
> +               up_read(&adev->reset_domain->sem);
>                 return;
>         }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 40d432d46469..302279497d67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -51,8 +51,6 @@
>  #include "athub_v2_0.h"
>  #include "athub_v2_1.h"
>
> -#include "amdgpu_reset.h"
> -
>  static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
>                                          struct amdgpu_irq_src *src,
>                                          unsigned int type,
> @@ -264,11 +262,9 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>          * Directly use kiq to do the vm invalidation instead
>          */
>         if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
> -           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> -           down_read_trylock(&adev->reset_domain->sem)) {
> +           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
>                 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>                                 1 << vmid);
> -               up_read(&adev->reset_domain->sem);
>                 return;
>         }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index fa3586efacd2..998f6ee60b78 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -33,7 +33,6 @@
>  #include "amdgpu_ucode.h"
>  #include "amdgpu_amdkfd.h"
>  #include "amdgpu_gem.h"
> -#include "amdgpu_reset.h"
>
>  #include "bif/bif_4_1_d.h"
>  #include "bif/bif_4_1_sh_mask.h"
> @@ -430,9 +429,6 @@ static void gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>         u32 mask = 0x0;
>         int vmid;
>
> -       if(!down_read_trylock(&adev->reset_domain->sem))
> -               return;
> -
>         for (vmid = 1; vmid < 16; vmid++) {
>                 u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>
> @@ -443,7 +439,6 @@ static void gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>
>         WREG32(mmVM_INVALIDATE_REQUEST, mask);
>         RREG32(mmVM_INVALIDATE_RESPONSE);
> -       up_read(&adev->reset_domain->sem);
>  }
>
>  /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index ffcd79d28b9a..8dcd9b13673c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -31,7 +31,6 @@
>  #include "amdgpu_ucode.h"
>  #include "amdgpu_amdkfd.h"
>  #include "amdgpu_gem.h"
> -#include "amdgpu_reset.h"
>
>  #include "gmc/gmc_8_1_d.h"
>  #include "gmc/gmc_8_1_sh_mask.h"
> @@ -620,9 +619,6 @@ static void gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>         u32 mask = 0x0;
>         int vmid;
>
> -       if(!down_read_trylock(&adev->reset_domain->sem))
> -               return;
> -
>         for (vmid = 1; vmid < 16; vmid++) {
>                 u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>
> @@ -633,7 +629,6 @@ static void gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>
>         WREG32(mmVM_INVALIDATE_REQUEST, mask);
>         RREG32(mmVM_INVALIDATE_RESPONSE);
> -       up_read(&adev->reset_domain->sem);
>  }
>
>  /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 94ba16536fc2..c5df8f052f3f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -64,8 +64,6 @@
>  #include "amdgpu_ras.h"
>  #include "amdgpu_xgmi.h"
>
> -#include "amdgpu_reset.h"
> -
>  /* add these here since we already include dce12 headers and these are for DCN */
>  #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
>  #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX                                                 2
> @@ -849,8 +847,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>          * as GFXOFF under bare metal
>          */
>         if (adev->gfx.kiq[0].ring.sched.ready &&
> -           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> -           down_read_trylock(&adev->reset_domain->sem)) {
> +           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
>                 uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>                 uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>
> @@ -860,7 +857,6 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                         amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
>                                                            inv_req2, 1 << vmid);
>
> -               up_read(&adev->reset_domain->sem);
>                 return;
>         }
>
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 11/11] drm/amdgpu: further move TLB hw workarounds a layer up
  2023-09-05  6:04 ` [PATCH 11/11] drm/amdgpu: further move TLB hw workarounds a layer up Christian König
@ 2023-09-05 22:51   ` Alex Deucher
  0 siblings, 0 replies; 38+ messages in thread
From: Alex Deucher @ 2023-09-05 22:51 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, shashank.sharma

On Tue, Sep 5, 2023 at 3:00 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> For the PASID flushing we already handled that at a higher layer, apply
> those workarounds to the standard flush as well.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 19 +++++++
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 74 ++++++++-----------------
>  2 files changed, 42 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index c24252304d48..8a5381ca7713 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -597,6 +597,14 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                 if(!down_read_trylock(&adev->reset_domain->sem))
>                         return;
>
> +               if (adev->gmc.flush_tlb_needs_extra_type_2)
> +                       adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
> +                                                          vmhub, 2);
> +
> +               if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
> +                       adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
> +                                                          vmhub, 0);
> +
>                 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
>                                                    flush_type);
>                 up_read(&adev->reset_domain->sem);
> @@ -647,6 +655,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
>
>         if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
>             !down_read_trylock(&adev->reset_domain->sem)) {
> +
> +               if (adev->gmc.flush_tlb_needs_extra_type_2)
> +                       adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
> +                                                                2, all_hub,
> +                                                                inst);
> +
> +               if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
> +                       adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
> +                                                                0, all_hub,
> +                                                                inst);
> +
>                 adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
>                                                          flush_type, all_hub,
>                                                          inst);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index c5df8f052f3f..a1a6f4b63208 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -812,37 +812,18 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                                         uint32_t vmhub, uint32_t flush_type)
>  {
>         bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
> -       u32 j, inv_req, inv_req2, tmp, sem, req, ack;
> +       u32 j, inv_req, tmp, sem, req, ack;
>         const unsigned int eng = 17;
>         struct amdgpu_vmhub *hub;
>
>         BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
>
>         hub = &adev->vmhub[vmhub];
> +       inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>         sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
>         req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>         ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>
> -       if (adev->gmc.xgmi.num_physical_nodes &&
> -           adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
> -               /* Vega20+XGMI caches PTEs in TC and TLB. Add a
> -                * heavy-weight TLB flush (type 2), which flushes
> -                * both. Due to a race condition with concurrent
> -                * memory accesses using the same TLB cache line, we
> -                * still need a second TLB flush after this.
> -                */
> -               inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
> -               inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
> -       } else if (flush_type == 2 &&
> -                  adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
> -                  adev->rev_id == 0) {
> -               inv_req = gmc_v9_0_get_invalidate_req(vmid, 0);
> -               inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
> -       } else {
> -               inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
> -               inv_req2 = 0;
> -       }
> -
>         /* This is necessary for a HW workaround under SRIOV as well
>          * as GFXOFF under bare metal
>          */
> @@ -853,10 +834,6 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>
>                 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>                                                    1 << vmid);
> -               if (inv_req2)
> -                       amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
> -                                                          inv_req2, 1 << vmid);
> -
>                 return;
>         }
>
> @@ -886,34 +863,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                         DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>         }
>
> -       do {
> -               if (vmhub >= AMDGPU_MMHUB0(0))
> -                       WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
> -               else
> -                       WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
> -
> -               /*
> -                * Issue a dummy read to wait for the ACK register to
> -                * be cleared to avoid a false ACK due to the new fast
> -                * GRBM interface.
> -                */
> -               if ((vmhub == AMDGPU_GFXHUB(0)) &&
> -                   (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
> -                       RREG32_NO_KIQ(req);
> +       if (vmhub >= AMDGPU_MMHUB0(0))
> +               WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
> +       else
> +               WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
>
> -               for (j = 0; j < adev->usec_timeout; j++) {
> -                       if (vmhub >= AMDGPU_MMHUB0(0))
> -                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
> -                       else
> -                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
> -                       if (tmp & (1 << vmid))
> -                               break;
> -                       udelay(1);
> -               }
> +       /*
> +        * Issue a dummy read to wait for the ACK register to
> +        * be cleared to avoid a false ACK due to the new fast
> +        * GRBM interface.
> +        */
> +       if ((vmhub == AMDGPU_GFXHUB(0)) &&
> +           (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
> +               RREG32_NO_KIQ(req);
>
> -               inv_req = inv_req2;
> -               inv_req2 = 0;
> -       } while (inv_req);
> +       for (j = 0; j < adev->usec_timeout; j++) {
> +               if (vmhub >= AMDGPU_MMHUB0(0))
> +                       tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
> +               else
> +                       tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
> +               if (tmp & (1 << vmid))
> +                       break;
> +               udelay(1);
> +       }
>
>         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
>         if (use_semaphore) {
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb
  2023-09-05 20:45   ` Alex Deucher
@ 2023-09-06  8:50     ` Christian König
  0 siblings, 0 replies; 38+ messages in thread
From: Christian König @ 2023-09-06  8:50 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx, shashank.sharma

Am 05.09.23 um 22:45 schrieb Alex Deucher:
> On Tue, Sep 5, 2023 at 3:00 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> The KIQ code path was ignoring the second flush. Also avoid long lines and
>> re-calculating the register offsets over and over again.
> I'd split this into two patches, one for the code cleanup and one to
> fix the missing flush.

I've later opted for moving the whole workarounds a layer up because we 
seem to have missed this in a couple of more places.

So I should probably just completely drop fixing this here.

Christian.

>
> Alex
>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +++++++++++++++++----------
>>   1 file changed, 18 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 0673cda547bb..4f6990ba71cb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -814,13 +814,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>                                          uint32_t vmhub, uint32_t flush_type)
>>   {
>>          bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
>> +       u32 j, inv_req, inv_req2, tmp, sem, req, ack;
>>          const unsigned int eng = 17;
>> -       u32 j, inv_req, inv_req2, tmp;
>>          struct amdgpu_vmhub *hub;
>>
>>          BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
>>
>>          hub = &adev->vmhub[vmhub];
>> +       sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
>> +       req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>> +       ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>> +
>>          if (adev->gmc.xgmi.num_physical_nodes &&
>>              adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
>>                  /* Vega20+XGMI caches PTEs in TC and TLB. Add a
>> @@ -852,6 +856,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>
>>                  amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>>                                                     1 << vmid);
>> +               if (inv_req2)
>> +                       amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
>> +                                                          inv_req2, 1 << vmid);
>> +
>>                  up_read(&adev->reset_domain->sem);
>>                  return;
>>          }
>> @@ -870,9 +878,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>                  for (j = 0; j < adev->usec_timeout; j++) {
>>                          /* a read return value of 1 means semaphore acquire */
>>                          if (vmhub >= AMDGPU_MMHUB0(0))
>> -                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
>> +                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
>>                          else
>> -                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
>> +                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
>>                          if (tmp & 0x1)
>>                                  break;
>>                          udelay(1);
>> @@ -884,9 +892,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>
>>          do {
>>                  if (vmhub >= AMDGPU_MMHUB0(0))
>> -                       WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
>> +                       WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
>>                  else
>> -                       WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
>> +                       WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
>>
>>                  /*
>>                   * Issue a dummy read to wait for the ACK register to
>> @@ -895,14 +903,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>                   */
>>                  if ((vmhub == AMDGPU_GFXHUB(0)) &&
>>                      (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
>> -                       RREG32_NO_KIQ(hub->vm_inv_eng0_req +
>> -                                     hub->eng_distance * eng);
>> +                       RREG32_NO_KIQ(req);
>>
>>                  for (j = 0; j < adev->usec_timeout; j++) {
>>                          if (vmhub >= AMDGPU_MMHUB0(0))
>> -                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
>> +                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
>>                          else
>> -                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
>> +                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
>>                          if (tmp & (1 << vmid))
>>                                  break;
>>                          udelay(1);
>> @@ -919,9 +926,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>                   * write with 0 means semaphore release
>>                   */
>>                  if (vmhub >= AMDGPU_MMHUB0(0))
>> -                       WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
>> +                       WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
>>                  else
>> -                       WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
>> +                       WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
>>          }
>>
>>          spin_unlock(&adev->gmc.invalidate_lock);
>> --
>> 2.34.1
>>


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid Christian König
  2023-09-05 22:39   ` Alex Deucher
@ 2023-09-06 14:25   ` Shashank Sharma
  2023-09-06 14:35     ` Shashank Sharma
  2023-09-08 20:43   ` Felix Kuehling
  2 siblings, 1 reply; 38+ messages in thread
From: Shashank Sharma @ 2023-09-06 14:25 UTC (permalink / raw)
  To: Christian König, amd-gfx


On 05/09/2023 08:04, Christian König wrote:
> Testing for reset is pointless since the reset can start right after the
> test. Grab the reset semaphore instead.
>
> The same PASID can be used by more than once VMID, build a mask of VMIDs
> to reset instead of just restting the first one.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 19 ++++++++++---------
>   1 file changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 6a6929ac2748..9e19a752f94b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -33,6 +33,7 @@
>   #include "amdgpu_ucode.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_gem.h"
> +#include "amdgpu_reset.h"
>   
>   #include "bif/bif_4_1_d.h"
>   #include "bif/bif_4_1_sh_mask.h"
> @@ -426,23 +427,23 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>   					uint16_t pasid, uint32_t flush_type,
>   					bool all_hub, uint32_t inst)
>   {
> +	u32 mask = 0x0;
>   	int vmid;
> -	unsigned int tmp;
>   
> -	if (amdgpu_in_reset(adev))
> -		return -EIO;
> +	if(!down_read_trylock(&adev->reset_domain->sem))
> +		return 0;
>   
>   	for (vmid = 1; vmid < 16; vmid++) {
> +		u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>   
> -		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>   		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> -			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> -			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -			RREG32(mmVM_INVALIDATE_RESPONSE);
> -			break;
> -		}
> +		    (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
> +			mask |= 1 << vmid;

I am a bit concerned here about the change in code, in the previous code 
we were writing the 'first match out of 16' of tmp and of mask and 
programming the registers with (1 << vmid), whereas in new code set we 
are writing the 'last match out of 16' of vmid. Is that intentional or 
expected ?

- Shashank

>   	}
>   
> +	WREG32(mmVM_INVALIDATE_REQUEST, mask);
> +	RREG32(mmVM_INVALIDATE_RESPONSE);
> +	up_read(&adev->reset_domain->sem);
>   	return 0;
>   }
>   

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 05/11] drm/amdgpu: fix and cleanup gmc_v8_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 05/11] drm/amdgpu: fix and cleanup gmc_v8_0_flush_gpu_tlb_pasid Christian König
  2023-09-05 22:40   ` Alex Deucher
@ 2023-09-06 14:26   ` Shashank Sharma
  2023-09-08 20:44   ` Felix Kuehling
  2 siblings, 0 replies; 38+ messages in thread
From: Shashank Sharma @ 2023-09-06 14:26 UTC (permalink / raw)
  To: Christian König, amd-gfx


On 05/09/2023 08:04, Christian König wrote:
> Testing for reset is pointless since the reset can start right after the
> test. Grab the reset semaphore instead.
>
> The same PASID can be used by more than once VMID, build a mask of VMIDs
> to reset instead of just restting the first one.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 20 ++++++++++----------
>   1 file changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 5af235202513..2d51531a1f2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -31,6 +31,7 @@
>   #include "amdgpu_ucode.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_gem.h"
> +#include "amdgpu_reset.h"
>   
>   #include "gmc/gmc_8_1_d.h"
>   #include "gmc/gmc_8_1_sh_mask.h"
> @@ -616,25 +617,24 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>   					uint16_t pasid, uint32_t flush_type,
>   					bool all_hub, uint32_t inst)
>   {
> +	u32 mask = 0x0;
>   	int vmid;
> -	unsigned int tmp;
>   
> -	if (amdgpu_in_reset(adev))
> -		return -EIO;
> +	if(!down_read_trylock(&adev->reset_domain->sem))
> +		return 0;
>   
>   	for (vmid = 1; vmid < 16; vmid++) {
> +		u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>   
> -		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>   		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> -			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> -			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -			RREG32(mmVM_INVALIDATE_RESPONSE);
> -			break;
> -		}
> +		    (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
> +			mask |= 1 << vmid;

Same comment as previous patch, first vmid match vs last vmid match, is 
that intended logic change ?

- Shashank

>   	}
>   
> +	WREG32(mmVM_INVALIDATE_REQUEST, mask);
> +	RREG32(mmVM_INVALIDATE_RESPONSE);
> +	up_read(&adev->reset_domain->sem);
>   	return 0;
> -
>   }
>   
>   /*

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid
  2023-09-06 14:25   ` Shashank Sharma
@ 2023-09-06 14:35     ` Shashank Sharma
  2023-09-07  6:57       ` Christian König
  0 siblings, 1 reply; 38+ messages in thread
From: Shashank Sharma @ 2023-09-06 14:35 UTC (permalink / raw)
  To: Christian König, amd-gfx


On 06/09/2023 16:25, Shashank Sharma wrote:
>
> On 05/09/2023 08:04, Christian König wrote:
>> Testing for reset is pointless since the reset can start right after the
>> test. Grab the reset semaphore instead.
>>
>> The same PASID can be used by more than once VMID, build a mask of VMIDs
>> to reset instead of just restting the first one.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 19 ++++++++++---------
>>   1 file changed, 10 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index 6a6929ac2748..9e19a752f94b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -33,6 +33,7 @@
>>   #include "amdgpu_ucode.h"
>>   #include "amdgpu_amdkfd.h"
>>   #include "amdgpu_gem.h"
>> +#include "amdgpu_reset.h"
>>     #include "bif/bif_4_1_d.h"
>>   #include "bif/bif_4_1_sh_mask.h"
>> @@ -426,23 +427,23 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct 
>> amdgpu_device *adev,
>>                       uint16_t pasid, uint32_t flush_type,
>>                       bool all_hub, uint32_t inst)
>>   {
>> +    u32 mask = 0x0;
>>       int vmid;
>> -    unsigned int tmp;
>>   -    if (amdgpu_in_reset(adev))
>> -        return -EIO;
>> +    if(!down_read_trylock(&adev->reset_domain->sem))
>> +        return 0;
>>         for (vmid = 1; vmid < 16; vmid++) {
>> +        u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>>   -        tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>>           if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
>> -            (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
>> -            WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>> -            RREG32(mmVM_INVALIDATE_RESPONSE);
>> -            break;
>> -        }
>> +            (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
>> +            mask |= 1 << vmid;
>
> I am a bit concerned here about the change in code, in the previous 
> code we were writing the 'first match out of 16' of tmp and of mask 
> and programming the registers with (1 << vmid), whereas in new code 
> set we are writing the 'last match out of 16' of vmid. Is that 
> intentional or expected ?
>
With last, I mean all matching bits until last :)
> - Shashank
>
>>       }
>>   +    WREG32(mmVM_INVALIDATE_REQUEST, mask);
>> +    RREG32(mmVM_INVALIDATE_RESPONSE);
>> +    up_read(&adev->reset_domain->sem);
>>       return 0;
>>   }

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid
  2023-09-06 14:35     ` Shashank Sharma
@ 2023-09-07  6:57       ` Christian König
  2023-09-07  7:31         ` Shashank Sharma
  0 siblings, 1 reply; 38+ messages in thread
From: Christian König @ 2023-09-07  6:57 UTC (permalink / raw)
  To: Shashank Sharma, amd-gfx

Am 06.09.23 um 16:35 schrieb Shashank Sharma:
>
> On 06/09/2023 16:25, Shashank Sharma wrote:
>>
>> On 05/09/2023 08:04, Christian König wrote:
>>> Testing for reset is pointless since the reset can start right after 
>>> the
>>> test. Grab the reset semaphore instead.
>>>
>>> The same PASID can be used by more than once VMID, build a mask of 
>>> VMIDs
>>> to reset instead of just restting the first one.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 19 ++++++++++---------
>>>   1 file changed, 10 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> index 6a6929ac2748..9e19a752f94b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> @@ -33,6 +33,7 @@
>>>   #include "amdgpu_ucode.h"
>>>   #include "amdgpu_amdkfd.h"
>>>   #include "amdgpu_gem.h"
>>> +#include "amdgpu_reset.h"
>>>     #include "bif/bif_4_1_d.h"
>>>   #include "bif/bif_4_1_sh_mask.h"
>>> @@ -426,23 +427,23 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct 
>>> amdgpu_device *adev,
>>>                       uint16_t pasid, uint32_t flush_type,
>>>                       bool all_hub, uint32_t inst)
>>>   {
>>> +    u32 mask = 0x0;
>>>       int vmid;
>>> -    unsigned int tmp;
>>>   -    if (amdgpu_in_reset(adev))
>>> -        return -EIO;
>>> + if(!down_read_trylock(&adev->reset_domain->sem))
>>> +        return 0;
>>>         for (vmid = 1; vmid < 16; vmid++) {
>>> +        u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>>>   -        tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>>>           if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
>>> -            (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
>>> -            WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>>> -            RREG32(mmVM_INVALIDATE_RESPONSE);
>>> -            break;
>>> -        }
>>> +            (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
>>> +            mask |= 1 << vmid;
>>
>> I am a bit concerned here about the change in code, in the previous 
>> code we were writing the 'first match out of 16' of tmp and of mask 
>> and programming the registers with (1 << vmid), whereas in new code 
>> set we are writing the 'last match out of 16' of vmid. Is that 
>> intentional or expected ?
>>
> With last, I mean all matching bits until last :)

Take a closer look :)

The bits are ORed together for each VMID which has the matching pasid.

Christian.

>> - Shashank
>>
>>>       }
>>>   +    WREG32(mmVM_INVALIDATE_REQUEST, mask);
>>> +    RREG32(mmVM_INVALIDATE_RESPONSE);
>>> +    up_read(&adev->reset_domain->sem);
>>>       return 0;
>>>   }


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid
  2023-09-07  6:57       ` Christian König
@ 2023-09-07  7:31         ` Shashank Sharma
  0 siblings, 0 replies; 38+ messages in thread
From: Shashank Sharma @ 2023-09-07  7:31 UTC (permalink / raw)
  To: Christian König, amd-gfx


On 07/09/2023 08:57, Christian König wrote:
> Am 06.09.23 um 16:35 schrieb Shashank Sharma:
>>
>> On 06/09/2023 16:25, Shashank Sharma wrote:
>>>
>>> On 05/09/2023 08:04, Christian König wrote:
>>>> Testing for reset is pointless since the reset can start right 
>>>> after the
>>>> test. Grab the reset semaphore instead.
>>>>
>>>> The same PASID can be used by more than once VMID, build a mask of 
>>>> VMIDs
>>>> to reset instead of just restting the first one.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 19 ++++++++++---------
>>>>   1 file changed, 10 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>> index 6a6929ac2748..9e19a752f94b 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>> @@ -33,6 +33,7 @@
>>>>   #include "amdgpu_ucode.h"
>>>>   #include "amdgpu_amdkfd.h"
>>>>   #include "amdgpu_gem.h"
>>>> +#include "amdgpu_reset.h"
>>>>     #include "bif/bif_4_1_d.h"
>>>>   #include "bif/bif_4_1_sh_mask.h"
>>>> @@ -426,23 +427,23 @@ static int 
>>>> gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>>>>                       uint16_t pasid, uint32_t flush_type,
>>>>                       bool all_hub, uint32_t inst)
>>>>   {
>>>> +    u32 mask = 0x0;
>>>>       int vmid;
>>>> -    unsigned int tmp;
>>>>   -    if (amdgpu_in_reset(adev))
>>>> -        return -EIO;
>>>> + if(!down_read_trylock(&adev->reset_domain->sem))
>>>> +        return 0;
>>>>         for (vmid = 1; vmid < 16; vmid++) {
>>>> +        u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>>>>   -        tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>>>>           if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
>>>> -            (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
>>>> -            WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>>>> -            RREG32(mmVM_INVALIDATE_RESPONSE);
>>>> -            break;
>>>> -        }
>>>> +            (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
>>>> +            mask |= 1 << vmid;
>>>
>>> I am a bit concerned here about the change in code, in the previous 
>>> code we were writing the 'first match out of 16' of tmp and of mask 
>>> and programming the registers with (1 << vmid), whereas in new code 
>>> set we are writing the 'last match out of 16' of vmid. Is that 
>>> intentional or expected ?
>>>
>> With last, I mean all matching bits until last :)
>
> Take a closer look :)
>
> The bits are ORed together for each VMID which has the matching pasid.


Agree, I saw that the previous code was programming only the first 
matching VMID (and then break the loop), but and then I reiterated the 
commit message and realized that it was the bug and this patch is fixing 
it :).

Please feel free to use: Reviewed-by: Shashank Sharma 
<shashank.sharma@amd.com>

- Shashank

>
> Christian.
>
>>> - Shashank
>>>
>>>>       }
>>>>   +    WREG32(mmVM_INVALIDATE_REQUEST, mask);
>>>> +    RREG32(mmVM_INVALIDATE_RESPONSE);
>>>> +    up_read(&adev->reset_domain->sem);
>>>>       return 0;
>>>>   }
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb
  2023-09-05  6:04 ` [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb Christian König
  2023-09-05 20:45   ` Alex Deucher
@ 2023-09-08 18:58   ` Felix Kuehling
  2023-09-19  8:01     ` Christian König
  1 sibling, 1 reply; 38+ messages in thread
From: Felix Kuehling @ 2023-09-08 18:58 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: shashank.sharma


On 2023-09-05 02:04, Christian König wrote:
> The KIQ code path was ignoring the second flush. Also avoid long lines and
> re-calculating the register offsets over and over again.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +++++++++++++++++----------
>   1 file changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 0673cda547bb..4f6990ba71cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -814,13 +814,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   					uint32_t vmhub, uint32_t flush_type)
>   {
>   	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
> +	u32 j, inv_req, inv_req2, tmp, sem, req, ack;
>   	const unsigned int eng = 17;
> -	u32 j, inv_req, inv_req2, tmp;
>   	struct amdgpu_vmhub *hub;
>   
>   	BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
>   
>   	hub = &adev->vmhub[vmhub];
> +	sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
> +	req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> +	ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;

If you use SOC15_REG_OFFSET here, you can drop all the if (vmhub >= 
AMDGPU_MMHUB0(0)) conditions below.

Other than that, the patch looks good to me.

Regards,
   Felix


> +
>   	if (adev->gmc.xgmi.num_physical_nodes &&
>   	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
>   		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
> @@ -852,6 +856,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   
>   		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>   						   1 << vmid);
> +		if (inv_req2)
> +			amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
> +							   inv_req2, 1 << vmid);
> +
>   		up_read(&adev->reset_domain->sem);
>   		return;
>   	}
> @@ -870,9 +878,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   		for (j = 0; j < adev->usec_timeout; j++) {
>   			/* a read return value of 1 means semaphore acquire */
>   			if (vmhub >= AMDGPU_MMHUB0(0))
> -				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
> +				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
>   			else
> -				tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
> +				tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
>   			if (tmp & 0x1)
>   				break;
>   			udelay(1);
> @@ -884,9 +892,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   
>   	do {
>   		if (vmhub >= AMDGPU_MMHUB0(0))
> -			WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
> +			WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
>   		else
> -			WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
> +			WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
>   
>   		/*
>   		 * Issue a dummy read to wait for the ACK register to
> @@ -895,14 +903,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   		 */
>   		if ((vmhub == AMDGPU_GFXHUB(0)) &&
>   		    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
> -			RREG32_NO_KIQ(hub->vm_inv_eng0_req +
> -				      hub->eng_distance * eng);
> +			RREG32_NO_KIQ(req);
>   
>   		for (j = 0; j < adev->usec_timeout; j++) {
>   			if (vmhub >= AMDGPU_MMHUB0(0))
> -				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
> +				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
>   			else
> -				tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
> +				tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
>   			if (tmp & (1 << vmid))
>   				break;
>   			udelay(1);
> @@ -919,9 +926,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   		 * write with 0 means semaphore release
>   		 */
>   		if (vmhub >= AMDGPU_MMHUB0(0))
> -			WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
> +			WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
>   		else
> -			WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
> +			WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
>   	}
>   
>   	spin_unlock(&adev->gmc.invalidate_lock);

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb
  2023-09-05  6:04 ` [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb Christian König
  2023-09-05 20:52   ` Alex Deucher
@ 2023-09-08 19:30   ` Felix Kuehling
  2023-09-12  7:49     ` Christian König
  1 sibling, 1 reply; 38+ messages in thread
From: Felix Kuehling @ 2023-09-08 19:30 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: shashank.sharma

On 2023-09-05 02:04, Christian König wrote:
> Move the SDMA workaround necessary for Navi 1x into a higher layer.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  48 +++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |   5 +-
>   drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c |   3 +
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 159 ++++++-----------------
>   4 files changed, 97 insertions(+), 118 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index d78bd9732543..857051093900 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -575,6 +575,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +			      uint32_t vmhub, uint32_t flush_type)
> +{
> +	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> +	struct dma_fence *fence;
> +	struct amdgpu_job *job;
> +	int r;
> +
> +	if (!hub->sdma_invalidation_workaround || vmid ||

The "|| vmid" part of the condition is new. AFAICT, the workaround was 
applied to all VMIDs before this patch. Is this change intentional?

Regards,
   Felix


> +	    !adev->mman.buffer_funcs_enabled ||
> +	    !adev->ib_pool_ready || amdgpu_in_reset(adev) ||
> +	    !ring->sched.ready) {
> +		adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
> +						   flush_type);
> +		return;
> +	}
> +
> +	/* The SDMA on Navi 1x has a bug which can theoretically result in memory
> +	 * corruption if an invalidation happens at the same time as an VA
> +	 * translation. Avoid this by doing the invalidation from the SDMA
> +	 * itself at least for GART.
> +	 */
> +	mutex_lock(&adev->mman.gtt_window_lock);
> +	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
> +				     AMDGPU_FENCE_OWNER_UNDEFINED,
> +				     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
> +				     &job);
> +	if (r)
> +		goto error_alloc;
> +
> +	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> +	job->vm_needs_flush = true;
> +	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
> +	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> +	fence = amdgpu_job_submit(job);
> +	mutex_unlock(&adev->mman.gtt_window_lock);
> +
> +	dma_fence_wait(fence, false);
> +	dma_fence_put(fence);
> +
> +	return;
> +
> +error_alloc:
> +	mutex_unlock(&adev->mman.gtt_window_lock);
> +	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> +}
> +
>   /**
>    * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
>    * @adev: amdgpu_device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index fdc25cd559b6..9e7df2f69123 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -117,6 +117,8 @@ struct amdgpu_vmhub {
>   
>   	uint32_t	vm_contexts_disable;
>   
> +	bool		sdma_invalidation_workaround;
> +
>   	const struct amdgpu_vmhub_funcs *vmhub_funcs;
>   };
>   
> @@ -335,7 +337,6 @@ struct amdgpu_gmc {
>   	u64 noretry_flags;
>   };
>   
> -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
>   #define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
>   	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
>   	((adev), (pasid), (type), (allhub), (inst)))
> @@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
>   int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
>   void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
>   int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
> +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +			      uint32_t vmhub, uint32_t flush_type);
>   
>   extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
>   extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
> index a041c6c970e1..8521c45e8f38 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
> @@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev)
>   		GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
>   		GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
>   
> +	/* TODO: This is only needed on some Navi 1x revisions */
> +	hub->sdma_invalidation_workaround = true;
> +
>   	hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index fa87a85e1017..1f70c57bcd69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -230,20 +230,49 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
>    * by the amdgpu vm/hsa code.
>    */
>   
> -static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
> -				   unsigned int vmhub, uint32_t flush_type)
> +/**
> + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
> + *
> + * @adev: amdgpu_device pointer
> + * @vmid: vm instance to flush
> + * @vmhub: vmhub type
> + * @flush_type: the flush type
> + *
> + * Flush the TLB for the requested page table.
> + */
> +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +					uint32_t vmhub, uint32_t flush_type)
>   {
>   	bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
>   	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>   	u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
> -	u32 tmp;
>   	/* Use register 17 for GART */
>   	const unsigned int eng = 17;
> -	unsigned int i;
>   	unsigned char hub_ip = 0;
> +	u32 sem, req, ack;
> +	unsigned int i;
> +	u32 tmp;
>   
> -	hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
> -		   GC_HWIP : MMHUB_HWIP;
> +	sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
> +	req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> +	ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> +
> +	/* flush hdp cache */
> +	adev->hdp.funcs->flush_hdp(adev, NULL);
> +
> +	/* For SRIOV run time, driver shouldn't access the register through MMIO
> +	 * Directly use kiq to do the vm invalidation instead
> +	 */
> +	if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
> +	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> +	    down_read_trylock(&adev->reset_domain->sem)) {
> +		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
> +				1 << vmid);
> +		up_read(&adev->reset_domain->sem);
> +		return;
> +	}
> +
> +	hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
>   
>   	spin_lock(&adev->gmc.invalidate_lock);
>   	/*
> @@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   	if (use_semaphore) {
>   		for (i = 0; i < adev->usec_timeout; i++) {
>   			/* a read return value of 1 means semaphore acuqire */
> -			tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
> -					 hub->eng_distance * eng, hub_ip);
> -
> +			tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
>   			if (tmp & 0x1)
>   				break;
>   			udelay(1);
> @@ -269,9 +296,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>   	}
>   
> -	WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
> -			  hub->eng_distance * eng,
> -			  inv_req, hub_ip);
> +	WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
>   
>   	/*
>   	 * Issue a dummy read to wait for the ACK register to be cleared
> @@ -279,14 +304,11 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   	 */
>   	if ((vmhub == AMDGPU_GFXHUB(0)) &&
>   	    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
> -		RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
> -				  hub->eng_distance * eng, hub_ip);
> +		RREG32_RLC_NO_KIQ(req, hub_ip);
>   
>   	/* Wait for ACK with a delay.*/
>   	for (i = 0; i < adev->usec_timeout; i++) {
> -		tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
> -				  hub->eng_distance * eng, hub_ip);
> -
> +		tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
>   		tmp &= 1 << vmid;
>   		if (tmp)
>   			break;
> @@ -296,109 +318,12 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   
>   	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
>   	if (use_semaphore)
> -		/*
> -		 * add semaphore release after invalidation,
> -		 * write with 0 means semaphore release
> -		 */
> -		WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
> -				  hub->eng_distance * eng, 0, hub_ip);
> +		WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
>   
>   	spin_unlock(&adev->gmc.invalidate_lock);
>   
> -	if (i < adev->usec_timeout)
> -		return;
> -
> -	DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
> -}
> -
> -/**
> - * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
> - *
> - * @adev: amdgpu_device pointer
> - * @vmid: vm instance to flush
> - * @vmhub: vmhub type
> - * @flush_type: the flush type
> - *
> - * Flush the TLB for the requested page table.
> - */
> -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> -					uint32_t vmhub, uint32_t flush_type)
> -{
> -	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> -	struct dma_fence *fence;
> -	struct amdgpu_job *job;
> -
> -	int r;
> -
> -	/* flush hdp cache */
> -	adev->hdp.funcs->flush_hdp(adev, NULL);
> -
> -	/* For SRIOV run time, driver shouldn't access the register through MMIO
> -	 * Directly use kiq to do the vm invalidation instead
> -	 */
> -	if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
> -	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> -	    down_read_trylock(&adev->reset_domain->sem)) {
> -		struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> -		const unsigned int eng = 17;
> -		u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
> -		u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> -		u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> -
> -		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
> -				1 << vmid);
> -
> -		up_read(&adev->reset_domain->sem);
> -		return;
> -	}
> -
> -	mutex_lock(&adev->mman.gtt_window_lock);
> -
> -	if (vmhub == AMDGPU_MMHUB0(0)) {
> -		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
> -		mutex_unlock(&adev->mman.gtt_window_lock);
> -		return;
> -	}
> -
> -	BUG_ON(vmhub != AMDGPU_GFXHUB(0));
> -
> -	if (!adev->mman.buffer_funcs_enabled ||
> -	    !adev->ib_pool_ready ||
> -	    amdgpu_in_reset(adev) ||
> -	    ring->sched.ready == false) {
> -		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
> -		mutex_unlock(&adev->mman.gtt_window_lock);
> -		return;
> -	}
> -
> -	/* The SDMA on Navi has a bug which can theoretically result in memory
> -	 * corruption if an invalidation happens at the same time as an VA
> -	 * translation. Avoid this by doing the invalidation from the SDMA
> -	 * itself.
> -	 */
> -	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
> -				     AMDGPU_FENCE_OWNER_UNDEFINED,
> -				     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
> -				     &job);
> -	if (r)
> -		goto error_alloc;
> -
> -	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> -	job->vm_needs_flush = true;
> -	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
> -	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> -	fence = amdgpu_job_submit(job);
> -
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> -
> -	dma_fence_wait(fence, false);
> -	dma_fence_put(fence);
> -
> -	return;
> -
> -error_alloc:
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> -	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> +	if (i >= adev->usec_timeout)
> +		DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
>   }
>   
>   /**

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid Christian König
  2023-09-05 22:39   ` Alex Deucher
  2023-09-06 14:25   ` Shashank Sharma
@ 2023-09-08 20:43   ` Felix Kuehling
  2 siblings, 0 replies; 38+ messages in thread
From: Felix Kuehling @ 2023-09-08 20:43 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: shashank.sharma

I think you mean "VMIDs to invalidate", not "VMIDs to reset". With that 
fixed, the patch is

Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>


On 2023-09-05 02:04, Christian König wrote:
> Testing for reset is pointless since the reset can start right after the
> test. Grab the reset semaphore instead.
>
> The same PASID can be used by more than once VMID, build a mask of VMIDs
> to reset instead of just restting the first one.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 19 ++++++++++---------
>   1 file changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 6a6929ac2748..9e19a752f94b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -33,6 +33,7 @@
>   #include "amdgpu_ucode.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_gem.h"
> +#include "amdgpu_reset.h"
>   
>   #include "bif/bif_4_1_d.h"
>   #include "bif/bif_4_1_sh_mask.h"
> @@ -426,23 +427,23 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>   					uint16_t pasid, uint32_t flush_type,
>   					bool all_hub, uint32_t inst)
>   {
> +	u32 mask = 0x0;
>   	int vmid;
> -	unsigned int tmp;
>   
> -	if (amdgpu_in_reset(adev))
> -		return -EIO;
> +	if(!down_read_trylock(&adev->reset_domain->sem))
> +		return 0;
>   
>   	for (vmid = 1; vmid < 16; vmid++) {
> +		u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>   
> -		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>   		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> -			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> -			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -			RREG32(mmVM_INVALIDATE_RESPONSE);
> -			break;
> -		}
> +		    (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
> +			mask |= 1 << vmid;
>   	}
>   
> +	WREG32(mmVM_INVALIDATE_REQUEST, mask);
> +	RREG32(mmVM_INVALIDATE_RESPONSE);
> +	up_read(&adev->reset_domain->sem);
>   	return 0;
>   }
>   

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 05/11] drm/amdgpu: fix and cleanup gmc_v8_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 05/11] drm/amdgpu: fix and cleanup gmc_v8_0_flush_gpu_tlb_pasid Christian König
  2023-09-05 22:40   ` Alex Deucher
  2023-09-06 14:26   ` Shashank Sharma
@ 2023-09-08 20:44   ` Felix Kuehling
  2 siblings, 0 replies; 38+ messages in thread
From: Felix Kuehling @ 2023-09-08 20:44 UTC (permalink / raw)
  To: amd-gfx


On 2023-09-05 02:04, Christian König wrote:
> Testing for reset is pointless since the reset can start right after the
> test. Grab the reset semaphore instead.
>
> The same PASID can be used by more than once VMID, build a mask of VMIDs
> to reset instead of just restting the first one.

I think you mean "VMIDs to invalidate", not "VMIDs to reset". With that 
fixed, the patch is

Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>


>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 20 ++++++++++----------
>   1 file changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 5af235202513..2d51531a1f2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -31,6 +31,7 @@
>   #include "amdgpu_ucode.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_gem.h"
> +#include "amdgpu_reset.h"
>   
>   #include "gmc/gmc_8_1_d.h"
>   #include "gmc/gmc_8_1_sh_mask.h"
> @@ -616,25 +617,24 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>   					uint16_t pasid, uint32_t flush_type,
>   					bool all_hub, uint32_t inst)
>   {
> +	u32 mask = 0x0;
>   	int vmid;
> -	unsigned int tmp;
>   
> -	if (amdgpu_in_reset(adev))
> -		return -EIO;
> +	if(!down_read_trylock(&adev->reset_domain->sem))
> +		return 0;
>   
>   	for (vmid = 1; vmid < 16; vmid++) {
> +		u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>   
> -		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>   		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> -			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> -			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -			RREG32(mmVM_INVALIDATE_RESPONSE);
> -			break;
> -		}
> +		    (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
> +			mask |= 1 << vmid;
>   	}
>   
> +	WREG32(mmVM_INVALIDATE_REQUEST, mask);
> +	RREG32(mmVM_INVALIDATE_RESPONSE);
> +	up_read(&adev->reset_domain->sem);
>   	return 0;
> -
>   }
>   
>   /*

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 06/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 06/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb_pasid Christian König
  2023-09-05 22:45   ` Deucher, Alexander
@ 2023-09-08 21:13   ` Felix Kuehling
  1 sibling, 0 replies; 38+ messages in thread
From: Felix Kuehling @ 2023-09-08 21:13 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: shashank.sharma


On 2023-09-05 02:04, Christian König wrote:
> Testing for reset is pointless since the reset can start right after the
> test.
>
> The same PASID can be used by more than one VMID, reset each of them.
>
> Move the KIQ and all the workaround handling into common GMC code.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
reset -> invalidate.

With that fixed the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c |  60 +++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  10 ++-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 109 ++++++++----------------
>   3 files changed, 102 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 857051093900..b5f1a1218725 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -32,6 +32,7 @@
>   #include "amdgpu.h"
>   #include "amdgpu_gmc.h"
>   #include "amdgpu_ras.h"
> +#include "amdgpu_reset.h"
>   #include "amdgpu_xgmi.h"
>   
>   #include <drm/drm_drv.h>
> @@ -623,6 +624,65 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>   }
>   
> +int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
> +				   uint32_t flush_type, bool all_hub,
> +				   uint32_t inst)
> +{
> +	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
> +		adev->usec_timeout;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
> +	unsigned int ndw;
> +	signed long r;
> +	uint32_t seq;
> +
> +	if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
> +	    !down_read_trylock(&adev->reset_domain->sem)) {
> +		return adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
> +								flush_type,
> +								all_hub, inst);
> +	}
> +
> +	/* 2 dwords flush + 8 dwords fence */
> +	ndw = kiq->pmf->invalidate_tlbs_size + 8;
> +
> +	if (adev->gmc.flush_tlb_needs_extra_type_2)
> +		ndw += kiq->pmf->invalidate_tlbs_size;
> +
> +	if (adev->gmc.flush_tlb_needs_extra_type_0)
> +		ndw += kiq->pmf->invalidate_tlbs_size;
> +
> +	spin_lock(&adev->gfx.kiq[inst].ring_lock);
> +	amdgpu_ring_alloc(ring, ndw);
> +	if (adev->gmc.flush_tlb_needs_extra_type_2)
> +		kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
> +
> +	if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
> +		kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
> +
> +	kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
> +	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> +	if (r) {
> +		amdgpu_ring_undo(ring);
> +		spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> +		goto error_unlock_reset;
> +	}
> +
> +	amdgpu_ring_commit(ring);
> +	spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> +	r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
> +	if (r < 1) {
> +		dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
> +		r = -ETIME;
> +		goto error_unlock_reset;
> +	}
> +	r = 0;
> +
> +error_unlock_reset:
> +	up_read(&adev->reset_domain->sem);
> +	return r;
> +}
> +
>   /**
>    * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
>    * @adev: amdgpu_device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 9e7df2f69123..7732d4ef845e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -335,11 +335,12 @@ struct amdgpu_gmc {
>   	u64 MC_VM_MX_L1_TLB_CNTL;
>   
>   	u64 noretry_flags;
> +
> +	bool flush_tlb_needs_extra_type_0;
> +	bool flush_tlb_needs_extra_type_2;
> +	bool flush_pasid_uses_kiq;
>   };
>   
> -#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
> -	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> -	((adev), (pasid), (type), (allhub), (inst)))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>   #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> @@ -404,6 +405,9 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
>   int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
>   void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   			      uint32_t vmhub, uint32_t flush_type);
> +int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
> +				   uint32_t flush_type, bool all_hub,
> +				   uint32_t inst);
>   
>   extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
>   extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 4f6990ba71cb..39016b6900d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -954,87 +954,30 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>   					uint16_t pasid, uint32_t flush_type,
>   					bool all_hub, uint32_t inst)
>   {
> -	int vmid, i;
> -	signed long r;
> -	uint32_t seq;
> -	uint16_t queried_pasid;
> -	bool ret;
> -	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
> -	struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
> -	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
> -
> -	if (amdgpu_in_reset(adev))
> -		return -EIO;
> -
> -	if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) {
> -		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
> -		 * heavy-weight TLB flush (type 2), which flushes
> -		 * both. Due to a race condition with concurrent
> -		 * memory accesses using the same TLB cache line, we
> -		 * still need a second TLB flush after this.
> -		 */
> -		bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
> -				       adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0));
> -		/* 2 dwords flush + 8 dwords fence */
> -		unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
> -
> -		if (vega20_xgmi_wa)
> -			ndw += kiq->pmf->invalidate_tlbs_size;
> -
> -		spin_lock(&adev->gfx.kiq[inst].ring_lock);
> -		/* 2 dwords flush + 8 dwords fence */
> -		amdgpu_ring_alloc(ring, ndw);
> -		if (vega20_xgmi_wa)
> -			kiq->pmf->kiq_invalidate_tlbs(ring,
> -						      pasid, 2, all_hub);
> -
> -		if (flush_type == 2 &&
> -		    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
> -		    adev->rev_id == 0)
> -			kiq->pmf->kiq_invalidate_tlbs(ring,
> -						pasid, 0, all_hub);
> -
> -		kiq->pmf->kiq_invalidate_tlbs(ring,
> -					pasid, flush_type, all_hub);
> -		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> -		if (r) {
> -			amdgpu_ring_undo(ring);
> -			spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> -			up_read(&adev->reset_domain->sem);
> -			return -ETIME;
> -		}
> -
> -		amdgpu_ring_commit(ring);
> -		spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> -		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
> -		if (r < 1) {
> -			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
> -			up_read(&adev->reset_domain->sem);
> -			return -ETIME;
> -		}
> -		up_read(&adev->reset_domain->sem);
> -		return 0;
> -	}
> +	uint16_t queried;
> +	int i, vmid;
>   
>   	for (vmid = 1; vmid < 16; vmid++) {
> +		bool valid;
>   
> -		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> -				&queried_pasid);
> -		if (ret && queried_pasid == pasid) {
> -			if (all_hub) {
> -				for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
> -					gmc_v9_0_flush_gpu_tlb(adev, vmid,
> -							i, flush_type);
> -			} else {
> -				gmc_v9_0_flush_gpu_tlb(adev, vmid,
> -						AMDGPU_GFXHUB(0), flush_type);
> -			}
> -			break;
> +		valid = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +								 &queried);
> +		if (!valid || queried != pasid)
> +			continue;
> +
> +		if (all_hub) {
> +			for_each_set_bit(i, adev->vmhubs_mask,
> +					 AMDGPU_MAX_VMHUBS)
> +				gmc_v9_0_flush_gpu_tlb(adev, vmid, i,
> +						       flush_type);
> +		} else {
> +			gmc_v9_0_flush_gpu_tlb(adev, vmid,
> +					       AMDGPU_GFXHUB(0),
> +					       flush_type);
>   		}
>   	}
>   
>   	return 0;
> -
>   }
>   
>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> @@ -2335,6 +2278,24 @@ static int gmc_v9_0_hw_init(void *handle)
>   	bool value;
>   	int i, r;
>   
> +	adev->gmc.flush_pasid_uses_kiq = true;
> +
> +	/* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB flush
> +	 * (type 2), which flushes both. Due to a race condition with
> +	 * concurrent memory accesses using the same TLB cache line, we still
> +	 * need a second TLB flush after this.
> +	 */
> +	adev->gmc.flush_tlb_needs_extra_type_2 =
> +		adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) &&
> +		adev->gmc.xgmi.num_physical_nodes;
> +	/*
> +	 * TODO: This workaround is badly documented and had a buggy
> +	 * implementation. We should probably verify what we do here.
> +	 */
> +	adev->gmc.flush_tlb_needs_extra_type_0 =
> +		adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
> +		adev->rev_id == 0;
> +
>   	/* The sequence of these two function calls matters.*/
>   	gmc_v9_0_init_golden_registers(adev);
>   

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 07/11] drm/amdgpu: cleanup gmc_v10_0_flush_gpu_tlb_pasid
  2023-09-05  6:04 ` [PATCH 07/11] drm/amdgpu: cleanup gmc_v10_0_flush_gpu_tlb_pasid Christian König
  2023-09-05 22:46   ` Alex Deucher
@ 2023-09-08 21:13   ` Felix Kuehling
  1 sibling, 0 replies; 38+ messages in thread
From: Felix Kuehling @ 2023-09-08 21:13 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: shashank.sharma

Patches 7-11 are

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


On 2023-09-05 02:04, Christian König wrote:
> The same PASID can be used by more than one VMID, reset each of them.
>
> Use the common KIQ handling.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 66 ++++++++------------------
>   1 file changed, 19 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 1f70c57bcd69..407ddb926941 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -341,57 +341,27 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>   					uint16_t pasid, uint32_t flush_type,
>   					bool all_hub, uint32_t inst)
>   {
> +	uint16_t queried;
>   	int vmid, i;
> -	signed long r;
> -	uint32_t seq;
> -	uint16_t queried_pasid;
> -	bool ret;
> -	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
> -	struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
> -	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
> -
> -	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
> -		spin_lock(&adev->gfx.kiq[0].ring_lock);
> -		/* 2 dwords flush + 8 dwords fence */
> -		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
> -		kiq->pmf->kiq_invalidate_tlbs(ring,
> -					pasid, flush_type, all_hub);
> -		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> -		if (r) {
> -			amdgpu_ring_undo(ring);
> -			spin_unlock(&adev->gfx.kiq[0].ring_lock);
> -			return -ETIME;
> -		}
> -
> -		amdgpu_ring_commit(ring);
> -		spin_unlock(&adev->gfx.kiq[0].ring_lock);
> -		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
> -		if (r < 1) {
> -			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
> -			return -ETIME;
> -		}
> -
> -		return 0;
> -	}
>   
>   	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
> -
> -		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> -				&queried_pasid);
> -		if (ret	&& queried_pasid == pasid) {
> -			if (all_hub) {
> -				for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
> -					gmc_v10_0_flush_gpu_tlb(adev, vmid,
> -							i, flush_type);
> -			} else {
> -				gmc_v10_0_flush_gpu_tlb(adev, vmid,
> -						AMDGPU_GFXHUB(0), flush_type);
> -			}
> -			if (!adev->enable_mes)
> -				break;
> +		bool valid;
> +
> +		valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +								  &queried);
> +		if (!valid || queried != pasid)
> +			continue;
> +
> +		if (all_hub) {
> +			for_each_set_bit(i, adev->vmhubs_mask,
> +					 AMDGPU_MAX_VMHUBS)
> +				gmc_v10_0_flush_gpu_tlb(adev, vmid, i,
> +							flush_type);
> +		} else {
> +			gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
> +						flush_type);
>   		}
>   	}
> -
>   	return 0;
>   }
>   
> @@ -1009,8 +979,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
>   
>   static int gmc_v10_0_hw_init(void *handle)
>   {
> -	int r;
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> +	int r;
> +
> +	adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
>   
>   	/* The sequence of these two function calls matters.*/
>   	gmc_v10_0_init_golden_registers(adev);

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb
  2023-09-08 19:30   ` Felix Kuehling
@ 2023-09-12  7:49     ` Christian König
  2023-09-12 14:10       ` Felix Kuehling
  0 siblings, 1 reply; 38+ messages in thread
From: Christian König @ 2023-09-12  7:49 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx; +Cc: shashank.sharma

Am 08.09.23 um 21:30 schrieb Felix Kuehling:
> On 2023-09-05 02:04, Christian König wrote:
>> Move the SDMA workaround necessary for Navi 1x into a higher layer.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  48 +++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |   5 +-
>>   drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c |   3 +
>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 159 ++++++-----------------
>>   4 files changed, 97 insertions(+), 118 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> index d78bd9732543..857051093900 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> @@ -575,6 +575,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct 
>> amdgpu_device *adev)
>>       return 0;
>>   }
>>   +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t 
>> vmid,
>> +                  uint32_t vmhub, uint32_t flush_type)
>> +{
>> +    struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> +    struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>> +    struct dma_fence *fence;
>> +    struct amdgpu_job *job;
>> +    int r;
>> +
>> +    if (!hub->sdma_invalidation_workaround || vmid ||
>
> The "|| vmid" part of the condition is new. AFAICT, the workaround was 
> applied to all VMIDs before this patch. Is this change intentional?

Yes, applying the workaround to anything else than VMID0 never worked in 
the first place.

Always using the KIQ on Navi 1x looked a bit like avoiding that problem.

Regards,
Christian.

>
> Regards,
>   Felix
>
>
>> + !adev->mman.buffer_funcs_enabled ||
>> +        !adev->ib_pool_ready || amdgpu_in_reset(adev) ||
>> +        !ring->sched.ready) {
>> +        adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
>> +                           flush_type);
>> +        return;
>> +    }
>> +
>> +    /* The SDMA on Navi 1x has a bug which can theoretically result 
>> in memory
>> +     * corruption if an invalidation happens at the same time as an VA
>> +     * translation. Avoid this by doing the invalidation from the SDMA
>> +     * itself at least for GART.
>> +     */
>> +    mutex_lock(&adev->mman.gtt_window_lock);
>> +    r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
>> +                     AMDGPU_FENCE_OWNER_UNDEFINED,
>> +                     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
>> +                     &job);
>> +    if (r)
>> +        goto error_alloc;
>> +
>> +    job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
>> +    job->vm_needs_flush = true;
>> +    job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
>> +    amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>> +    fence = amdgpu_job_submit(job);
>> +    mutex_unlock(&adev->mman.gtt_window_lock);
>> +
>> +    dma_fence_wait(fence, false);
>> +    dma_fence_put(fence);
>> +
>> +    return;
>> +
>> +error_alloc:
>> +    mutex_unlock(&adev->mman.gtt_window_lock);
>> +    DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>> +}
>> +
>>   /**
>>    * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
>>    * @adev: amdgpu_device pointer
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> index fdc25cd559b6..9e7df2f69123 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> @@ -117,6 +117,8 @@ struct amdgpu_vmhub {
>>         uint32_t    vm_contexts_disable;
>>   +    bool        sdma_invalidation_workaround;
>> +
>>       const struct amdgpu_vmhub_funcs *vmhub_funcs;
>>   };
>>   @@ -335,7 +337,6 @@ struct amdgpu_gmc {
>>       u64 noretry_flags;
>>   };
>>   -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
>> ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
>>   #define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, 
>> inst) \
>>       ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
>>       ((adev), (pasid), (type), (allhub), (inst)))
>> @@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device 
>> *adev);
>>   int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
>>   void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
>>   int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
>> +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t 
>> vmid,
>> +                  uint32_t vmhub, uint32_t flush_type);
>>     extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
>>   extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
>> index a041c6c970e1..8521c45e8f38 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
>> @@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device 
>> *adev)
>> GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
>> GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
>>   +    /* TODO: This is only needed on some Navi 1x revisions */
>> +    hub->sdma_invalidation_workaround = true;
>> +
>>       hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index fa87a85e1017..1f70c57bcd69 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -230,20 +230,49 @@ static bool 
>> gmc_v10_0_get_atc_vmid_pasid_mapping_info(
>>    * by the amdgpu vm/hsa code.
>>    */
>>   -static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, 
>> uint32_t vmid,
>> -                   unsigned int vmhub, uint32_t flush_type)
>> +/**
>> + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @vmid: vm instance to flush
>> + * @vmhub: vmhub type
>> + * @flush_type: the flush type
>> + *
>> + * Flush the TLB for the requested page table.
>> + */
>> +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, 
>> uint32_t vmid,
>> +                    uint32_t vmhub, uint32_t flush_type)
>>   {
>>       bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, 
>> vmhub);
>>       struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>>       u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, 
>> flush_type);
>> -    u32 tmp;
>>       /* Use register 17 for GART */
>>       const unsigned int eng = 17;
>> -    unsigned int i;
>>       unsigned char hub_ip = 0;
>> +    u32 sem, req, ack;
>> +    unsigned int i;
>> +    u32 tmp;
>>   -    hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
>> -           GC_HWIP : MMHUB_HWIP;
>> +    sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
>> +    req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>> +    ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>> +
>> +    /* flush hdp cache */
>> +    adev->hdp.funcs->flush_hdp(adev, NULL);
>> +
>> +    /* For SRIOV run time, driver shouldn't access the register 
>> through MMIO
>> +     * Directly use kiq to do the vm invalidation instead
>> +     */
>> +    if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
>> +        (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
>> +        down_read_trylock(&adev->reset_domain->sem)) {
>> +        amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>> +                1 << vmid);
>> +        up_read(&adev->reset_domain->sem);
>> +        return;
>> +    }
>> +
>> +    hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
>>         spin_lock(&adev->gmc.invalidate_lock);
>>       /*
>> @@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>       if (use_semaphore) {
>>           for (i = 0; i < adev->usec_timeout; i++) {
>>               /* a read return value of 1 means semaphore acuqire */
>> -            tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
>> -                     hub->eng_distance * eng, hub_ip);
>> -
>> +            tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
>>               if (tmp & 0x1)
>>                   break;
>>               udelay(1);
>> @@ -269,9 +296,7 @@ static void gmc_v10_0_flush_vm_hub(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>               DRM_ERROR("Timeout waiting for sem acquire in VM 
>> flush!\n");
>>       }
>>   -    WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
>> -              hub->eng_distance * eng,
>> -              inv_req, hub_ip);
>> +    WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
>>         /*
>>        * Issue a dummy read to wait for the ACK register to be cleared
>> @@ -279,14 +304,11 @@ static void gmc_v10_0_flush_vm_hub(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>        */
>>       if ((vmhub == AMDGPU_GFXHUB(0)) &&
>>           (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
>> -        RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
>> -                  hub->eng_distance * eng, hub_ip);
>> +        RREG32_RLC_NO_KIQ(req, hub_ip);
>>         /* Wait for ACK with a delay.*/
>>       for (i = 0; i < adev->usec_timeout; i++) {
>> -        tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
>> -                  hub->eng_distance * eng, hub_ip);
>> -
>> +        tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
>>           tmp &= 1 << vmid;
>>           if (tmp)
>>               break;
>> @@ -296,109 +318,12 @@ static void gmc_v10_0_flush_vm_hub(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>         /* TODO: It needs to continue working on debugging with 
>> semaphore for GFXHUB as well. */
>>       if (use_semaphore)
>> -        /*
>> -         * add semaphore release after invalidation,
>> -         * write with 0 means semaphore release
>> -         */
>> -        WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
>> -                  hub->eng_distance * eng, 0, hub_ip);
>> +        WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
>>         spin_unlock(&adev->gmc.invalidate_lock);
>>   -    if (i < adev->usec_timeout)
>> -        return;
>> -
>> -    DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
>> -}
>> -
>> -/**
>> - * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
>> - *
>> - * @adev: amdgpu_device pointer
>> - * @vmid: vm instance to flush
>> - * @vmhub: vmhub type
>> - * @flush_type: the flush type
>> - *
>> - * Flush the TLB for the requested page table.
>> - */
>> -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, 
>> uint32_t vmid,
>> -                    uint32_t vmhub, uint32_t flush_type)
>> -{
>> -    struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> -    struct dma_fence *fence;
>> -    struct amdgpu_job *job;
>> -
>> -    int r;
>> -
>> -    /* flush hdp cache */
>> -    adev->hdp.funcs->flush_hdp(adev, NULL);
>> -
>> -    /* For SRIOV run time, driver shouldn't access the register 
>> through MMIO
>> -     * Directly use kiq to do the vm invalidation instead
>> -     */
>> -    if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
>> -        (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
>> -        down_read_trylock(&adev->reset_domain->sem)) {
>> -        struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>> -        const unsigned int eng = 17;
>> -        u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, 
>> flush_type);
>> -        u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>> -        u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>> -
>> -        amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>> -                1 << vmid);
>> -
>> -        up_read(&adev->reset_domain->sem);
>> -        return;
>> -    }
>> -
>> -    mutex_lock(&adev->mman.gtt_window_lock);
>> -
>> -    if (vmhub == AMDGPU_MMHUB0(0)) {
>> -        gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
>> -        mutex_unlock(&adev->mman.gtt_window_lock);
>> -        return;
>> -    }
>> -
>> -    BUG_ON(vmhub != AMDGPU_GFXHUB(0));
>> -
>> -    if (!adev->mman.buffer_funcs_enabled ||
>> -        !adev->ib_pool_ready ||
>> -        amdgpu_in_reset(adev) ||
>> -        ring->sched.ready == false) {
>> -        gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
>> -        mutex_unlock(&adev->mman.gtt_window_lock);
>> -        return;
>> -    }
>> -
>> -    /* The SDMA on Navi has a bug which can theoretically result in 
>> memory
>> -     * corruption if an invalidation happens at the same time as an VA
>> -     * translation. Avoid this by doing the invalidation from the SDMA
>> -     * itself.
>> -     */
>> -    r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
>> -                     AMDGPU_FENCE_OWNER_UNDEFINED,
>> -                     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
>> -                     &job);
>> -    if (r)
>> -        goto error_alloc;
>> -
>> -    job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
>> -    job->vm_needs_flush = true;
>> -    job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
>> -    amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>> -    fence = amdgpu_job_submit(job);
>> -
>> -    mutex_unlock(&adev->mman.gtt_window_lock);
>> -
>> -    dma_fence_wait(fence, false);
>> -    dma_fence_put(fence);
>> -
>> -    return;
>> -
>> -error_alloc:
>> -    mutex_unlock(&adev->mman.gtt_window_lock);
>> -    DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>> +    if (i >= adev->usec_timeout)
>> +        DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
>>   }
>>     /**


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb
  2023-09-12  7:49     ` Christian König
@ 2023-09-12 14:10       ` Felix Kuehling
  0 siblings, 0 replies; 38+ messages in thread
From: Felix Kuehling @ 2023-09-12 14:10 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: shashank.sharma

On 2023-09-12 3:49, Christian König wrote:
> Am 08.09.23 um 21:30 schrieb Felix Kuehling:
>> On 2023-09-05 02:04, Christian König wrote:
>>> Move the SDMA workaround necessary for Navi 1x into a higher layer.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  48 +++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |   5 +-
>>>   drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c |   3 +
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 159 
>>> ++++++-----------------
>>>   4 files changed, 97 insertions(+), 118 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> index d78bd9732543..857051093900 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> @@ -575,6 +575,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct 
>>> amdgpu_device *adev)
>>>       return 0;
>>>   }
>>>   +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, 
>>> uint32_t vmid,
>>> +                  uint32_t vmhub, uint32_t flush_type)
>>> +{
>>> +    struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>>> +    struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>>> +    struct dma_fence *fence;
>>> +    struct amdgpu_job *job;
>>> +    int r;
>>> +
>>> +    if (!hub->sdma_invalidation_workaround || vmid ||
>>
>> The "|| vmid" part of the condition is new. AFAICT, the workaround 
>> was applied to all VMIDs before this patch. Is this change intentional?
>
> Yes, applying the workaround to anything else than VMID0 never worked 
> in the first place.
>
> Always using the KIQ on Navi 1x looked a bit like avoiding that problem.

OK. The patch is

Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>


>
> Regards,
> Christian.
>
>>
>> Regards,
>>   Felix
>>
>>
>>> + !adev->mman.buffer_funcs_enabled ||
>>> +        !adev->ib_pool_ready || amdgpu_in_reset(adev) ||
>>> +        !ring->sched.ready) {
>>> +        adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
>>> +                           flush_type);
>>> +        return;
>>> +    }
>>> +
>>> +    /* The SDMA on Navi 1x has a bug which can theoretically result 
>>> in memory
>>> +     * corruption if an invalidation happens at the same time as an VA
>>> +     * translation. Avoid this by doing the invalidation from the SDMA
>>> +     * itself at least for GART.
>>> +     */
>>> +    mutex_lock(&adev->mman.gtt_window_lock);
>>> +    r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
>>> +                     AMDGPU_FENCE_OWNER_UNDEFINED,
>>> +                     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
>>> +                     &job);
>>> +    if (r)
>>> +        goto error_alloc;
>>> +
>>> +    job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
>>> +    job->vm_needs_flush = true;
>>> +    job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
>>> +    amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>>> +    fence = amdgpu_job_submit(job);
>>> +    mutex_unlock(&adev->mman.gtt_window_lock);
>>> +
>>> +    dma_fence_wait(fence, false);
>>> +    dma_fence_put(fence);
>>> +
>>> +    return;
>>> +
>>> +error_alloc:
>>> +    mutex_unlock(&adev->mman.gtt_window_lock);
>>> +    DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>>> +}
>>> +
>>>   /**
>>>    * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
>>>    * @adev: amdgpu_device pointer
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>> index fdc25cd559b6..9e7df2f69123 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>> @@ -117,6 +117,8 @@ struct amdgpu_vmhub {
>>>         uint32_t    vm_contexts_disable;
>>>   +    bool        sdma_invalidation_workaround;
>>> +
>>>       const struct amdgpu_vmhub_funcs *vmhub_funcs;
>>>   };
>>>   @@ -335,7 +337,6 @@ struct amdgpu_gmc {
>>>       u64 noretry_flags;
>>>   };
>>>   -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
>>> ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
>>>   #define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, 
>>> inst) \
>>>       ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
>>>       ((adev), (pasid), (type), (allhub), (inst)))
>>> @@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device 
>>> *adev);
>>>   int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
>>>   void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
>>>   int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
>>> +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t 
>>> vmid,
>>> +                  uint32_t vmhub, uint32_t flush_type);
>>>     extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
>>>   extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
>>> index a041c6c970e1..8521c45e8f38 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
>>> @@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct 
>>> amdgpu_device *adev)
>>> GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
>>> GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
>>>   +    /* TODO: This is only needed on some Navi 1x revisions */
>>> +    hub->sdma_invalidation_workaround = true;
>>> +
>>>       hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
>>>   }
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> index fa87a85e1017..1f70c57bcd69 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> @@ -230,20 +230,49 @@ static bool 
>>> gmc_v10_0_get_atc_vmid_pasid_mapping_info(
>>>    * by the amdgpu vm/hsa code.
>>>    */
>>>   -static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, 
>>> uint32_t vmid,
>>> -                   unsigned int vmhub, uint32_t flush_type)
>>> +/**
>>> + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
>>> + *
>>> + * @adev: amdgpu_device pointer
>>> + * @vmid: vm instance to flush
>>> + * @vmhub: vmhub type
>>> + * @flush_type: the flush type
>>> + *
>>> + * Flush the TLB for the requested page table.
>>> + */
>>> +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, 
>>> uint32_t vmid,
>>> +                    uint32_t vmhub, uint32_t flush_type)
>>>   {
>>>       bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, 
>>> vmhub);
>>>       struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>>>       u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, 
>>> flush_type);
>>> -    u32 tmp;
>>>       /* Use register 17 for GART */
>>>       const unsigned int eng = 17;
>>> -    unsigned int i;
>>>       unsigned char hub_ip = 0;
>>> +    u32 sem, req, ack;
>>> +    unsigned int i;
>>> +    u32 tmp;
>>>   -    hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
>>> -           GC_HWIP : MMHUB_HWIP;
>>> +    sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
>>> +    req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>>> +    ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>>> +
>>> +    /* flush hdp cache */
>>> +    adev->hdp.funcs->flush_hdp(adev, NULL);
>>> +
>>> +    /* For SRIOV run time, driver shouldn't access the register 
>>> through MMIO
>>> +     * Directly use kiq to do the vm invalidation instead
>>> +     */
>>> +    if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
>>> +        (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
>>> + down_read_trylock(&adev->reset_domain->sem)) {
>>> +        amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>>> +                1 << vmid);
>>> +        up_read(&adev->reset_domain->sem);
>>> +        return;
>>> +    }
>>> +
>>> +    hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
>>>         spin_lock(&adev->gmc.invalidate_lock);
>>>       /*
>>> @@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct 
>>> amdgpu_device *adev, uint32_t vmid,
>>>       if (use_semaphore) {
>>>           for (i = 0; i < adev->usec_timeout; i++) {
>>>               /* a read return value of 1 means semaphore acuqire */
>>> -            tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
>>> -                     hub->eng_distance * eng, hub_ip);
>>> -
>>> +            tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
>>>               if (tmp & 0x1)
>>>                   break;
>>>               udelay(1);
>>> @@ -269,9 +296,7 @@ static void gmc_v10_0_flush_vm_hub(struct 
>>> amdgpu_device *adev, uint32_t vmid,
>>>               DRM_ERROR("Timeout waiting for sem acquire in VM 
>>> flush!\n");
>>>       }
>>>   -    WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
>>> -              hub->eng_distance * eng,
>>> -              inv_req, hub_ip);
>>> +    WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
>>>         /*
>>>        * Issue a dummy read to wait for the ACK register to be cleared
>>> @@ -279,14 +304,11 @@ static void gmc_v10_0_flush_vm_hub(struct 
>>> amdgpu_device *adev, uint32_t vmid,
>>>        */
>>>       if ((vmhub == AMDGPU_GFXHUB(0)) &&
>>>           (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
>>> -        RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
>>> -                  hub->eng_distance * eng, hub_ip);
>>> +        RREG32_RLC_NO_KIQ(req, hub_ip);
>>>         /* Wait for ACK with a delay.*/
>>>       for (i = 0; i < adev->usec_timeout; i++) {
>>> -        tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
>>> -                  hub->eng_distance * eng, hub_ip);
>>> -
>>> +        tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
>>>           tmp &= 1 << vmid;
>>>           if (tmp)
>>>               break;
>>> @@ -296,109 +318,12 @@ static void gmc_v10_0_flush_vm_hub(struct 
>>> amdgpu_device *adev, uint32_t vmid,
>>>         /* TODO: It needs to continue working on debugging with 
>>> semaphore for GFXHUB as well. */
>>>       if (use_semaphore)
>>> -        /*
>>> -         * add semaphore release after invalidation,
>>> -         * write with 0 means semaphore release
>>> -         */
>>> -        WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
>>> -                  hub->eng_distance * eng, 0, hub_ip);
>>> +        WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
>>>         spin_unlock(&adev->gmc.invalidate_lock);
>>>   -    if (i < adev->usec_timeout)
>>> -        return;
>>> -
>>> -    DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
>>> -}
>>> -
>>> -/**
>>> - * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
>>> - *
>>> - * @adev: amdgpu_device pointer
>>> - * @vmid: vm instance to flush
>>> - * @vmhub: vmhub type
>>> - * @flush_type: the flush type
>>> - *
>>> - * Flush the TLB for the requested page table.
>>> - */
>>> -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, 
>>> uint32_t vmid,
>>> -                    uint32_t vmhub, uint32_t flush_type)
>>> -{
>>> -    struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>>> -    struct dma_fence *fence;
>>> -    struct amdgpu_job *job;
>>> -
>>> -    int r;
>>> -
>>> -    /* flush hdp cache */
>>> -    adev->hdp.funcs->flush_hdp(adev, NULL);
>>> -
>>> -    /* For SRIOV run time, driver shouldn't access the register 
>>> through MMIO
>>> -     * Directly use kiq to do the vm invalidation instead
>>> -     */
>>> -    if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
>>> -        (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
>>> - down_read_trylock(&adev->reset_domain->sem)) {
>>> -        struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>>> -        const unsigned int eng = 17;
>>> -        u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, 
>>> flush_type);
>>> -        u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>>> -        u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>>> -
>>> -        amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>>> -                1 << vmid);
>>> -
>>> -        up_read(&adev->reset_domain->sem);
>>> -        return;
>>> -    }
>>> -
>>> -    mutex_lock(&adev->mman.gtt_window_lock);
>>> -
>>> -    if (vmhub == AMDGPU_MMHUB0(0)) {
>>> -        gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
>>> -        mutex_unlock(&adev->mman.gtt_window_lock);
>>> -        return;
>>> -    }
>>> -
>>> -    BUG_ON(vmhub != AMDGPU_GFXHUB(0));
>>> -
>>> -    if (!adev->mman.buffer_funcs_enabled ||
>>> -        !adev->ib_pool_ready ||
>>> -        amdgpu_in_reset(adev) ||
>>> -        ring->sched.ready == false) {
>>> -        gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
>>> -        mutex_unlock(&adev->mman.gtt_window_lock);
>>> -        return;
>>> -    }
>>> -
>>> -    /* The SDMA on Navi has a bug which can theoretically result in 
>>> memory
>>> -     * corruption if an invalidation happens at the same time as an VA
>>> -     * translation. Avoid this by doing the invalidation from the SDMA
>>> -     * itself.
>>> -     */
>>> -    r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
>>> -                     AMDGPU_FENCE_OWNER_UNDEFINED,
>>> -                     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
>>> -                     &job);
>>> -    if (r)
>>> -        goto error_alloc;
>>> -
>>> -    job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
>>> -    job->vm_needs_flush = true;
>>> -    job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
>>> -    amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>>> -    fence = amdgpu_job_submit(job);
>>> -
>>> -    mutex_unlock(&adev->mman.gtt_window_lock);
>>> -
>>> -    dma_fence_wait(fence, false);
>>> -    dma_fence_put(fence);
>>> -
>>> -    return;
>>> -
>>> -error_alloc:
>>> -    mutex_unlock(&adev->mman.gtt_window_lock);
>>> -    DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>>> +    if (i >= adev->usec_timeout)
>>> +        DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
>>>   }
>>>     /**
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb
  2023-09-08 18:58   ` Felix Kuehling
@ 2023-09-19  8:01     ` Christian König
  0 siblings, 0 replies; 38+ messages in thread
From: Christian König @ 2023-09-19  8:01 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx; +Cc: shashank.sharma

Am 08.09.23 um 20:58 schrieb Felix Kuehling:
>
> On 2023-09-05 02:04, Christian König wrote:
>> The KIQ code path was ignoring the second flush. Also avoid long 
>> lines and
>> re-calculating the register offsets over and over again.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +++++++++++++++++----------
>>   1 file changed, 18 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 0673cda547bb..4f6990ba71cb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -814,13 +814,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>                       uint32_t vmhub, uint32_t flush_type)
>>   {
>>       bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, 
>> vmhub);
>> +    u32 j, inv_req, inv_req2, tmp, sem, req, ack;
>>       const unsigned int eng = 17;
>> -    u32 j, inv_req, inv_req2, tmp;
>>       struct amdgpu_vmhub *hub;
>>         BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
>>         hub = &adev->vmhub[vmhub];
>> +    sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
>> +    req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>> +    ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>
> If you use SOC15_REG_OFFSET here, you can drop all the if (vmhub >= 
> AMDGPU_MMHUB0(0)) conditions below.

I though about that as well, but that won't work since we don't know the 
register name.

Regards,
Christian.

>
> Other than that, the patch looks good to me.
>
> Regards,
>   Felix
>
>
>> +
>>       if (adev->gmc.xgmi.num_physical_nodes &&
>>           adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
>>           /* Vega20+XGMI caches PTEs in TC and TLB. Add a
>> @@ -852,6 +856,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>             amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>>                              1 << vmid);
>> +        if (inv_req2)
>> +            amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
>> +                               inv_req2, 1 << vmid);
>> +
>>           up_read(&adev->reset_domain->sem);
>>           return;
>>       }
>> @@ -870,9 +878,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>           for (j = 0; j < adev->usec_timeout; j++) {
>>               /* a read return value of 1 means semaphore acquire */
>>               if (vmhub >= AMDGPU_MMHUB0(0))
>> -                tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
>> hub->vm_inv_eng0_sem + hub->eng_distance * eng);
>> +                tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
>>               else
>> -                tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
>> hub->vm_inv_eng0_sem + hub->eng_distance * eng);
>> +                tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
>>               if (tmp & 0x1)
>>                   break;
>>               udelay(1);
>> @@ -884,9 +892,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>         do {
>>           if (vmhub >= AMDGPU_MMHUB0(0))
>> -            WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
>> hub->eng_distance * eng, inv_req);
>> +            WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
>>           else
>> -            WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
>> hub->eng_distance * eng, inv_req);
>> +            WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
>>             /*
>>            * Issue a dummy read to wait for the ACK register to
>> @@ -895,14 +903,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>            */
>>           if ((vmhub == AMDGPU_GFXHUB(0)) &&
>>               (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
>> -            RREG32_NO_KIQ(hub->vm_inv_eng0_req +
>> -                      hub->eng_distance * eng);
>> +            RREG32_NO_KIQ(req);
>>             for (j = 0; j < adev->usec_timeout; j++) {
>>               if (vmhub >= AMDGPU_MMHUB0(0))
>> -                tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
>> hub->vm_inv_eng0_ack + hub->eng_distance * eng);
>> +                tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
>>               else
>> -                tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
>> hub->vm_inv_eng0_ack + hub->eng_distance * eng);
>> +                tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
>>               if (tmp & (1 << vmid))
>>                   break;
>>               udelay(1);
>> @@ -919,9 +926,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>            * write with 0 means semaphore release
>>            */
>>           if (vmhub >= AMDGPU_MMHUB0(0))
>> -            WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + 
>> hub->eng_distance * eng, 0);
>> +            WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
>>           else
>> -            WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + 
>> hub->eng_distance * eng, 0);
>> +            WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
>>       }
>>         spin_unlock(&adev->gmc.invalidate_lock);


^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2023-09-19  8:01 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-05  6:04 Rework flushing changes to the TLB Christian König
2023-09-05  6:04 ` [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb Christian König
2023-09-05 20:45   ` Alex Deucher
2023-09-06  8:50     ` Christian König
2023-09-08 18:58   ` Felix Kuehling
2023-09-19  8:01     ` Christian König
2023-09-05  6:04 ` [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb Christian König
2023-09-05 20:52   ` Alex Deucher
2023-09-08 19:30   ` Felix Kuehling
2023-09-12  7:49     ` Christian König
2023-09-12 14:10       ` Felix Kuehling
2023-09-05  6:04 ` [PATCH 03/11] drm/amdgpu: cleanup gmc_v11_0_flush_gpu_tlb Christian König
2023-09-05 20:56   ` Alex Deucher
2023-09-05  6:04 ` [PATCH 04/11] drm/amdgpu: fix and cleanup gmc_v7_0_flush_gpu_tlb_pasid Christian König
2023-09-05 22:39   ` Alex Deucher
2023-09-06 14:25   ` Shashank Sharma
2023-09-06 14:35     ` Shashank Sharma
2023-09-07  6:57       ` Christian König
2023-09-07  7:31         ` Shashank Sharma
2023-09-08 20:43   ` Felix Kuehling
2023-09-05  6:04 ` [PATCH 05/11] drm/amdgpu: fix and cleanup gmc_v8_0_flush_gpu_tlb_pasid Christian König
2023-09-05 22:40   ` Alex Deucher
2023-09-06 14:26   ` Shashank Sharma
2023-09-08 20:44   ` Felix Kuehling
2023-09-05  6:04 ` [PATCH 06/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb_pasid Christian König
2023-09-05 22:45   ` Deucher, Alexander
2023-09-08 21:13   ` Felix Kuehling
2023-09-05  6:04 ` [PATCH 07/11] drm/amdgpu: cleanup gmc_v10_0_flush_gpu_tlb_pasid Christian König
2023-09-05 22:46   ` Alex Deucher
2023-09-08 21:13   ` Felix Kuehling
2023-09-05  6:04 ` [PATCH 08/11] drm/amdgpu: fix and cleanup gmc_v11_0_flush_gpu_tlb_pasid Christian König
2023-09-05 22:47   ` Alex Deucher
2023-09-05  6:04 ` [PATCH 09/11] drm/amdgpu: drop error return from flush_gpu_tlb_pasid Christian König
2023-09-05 22:48   ` Alex Deucher
2023-09-05  6:04 ` [PATCH 10/11] drm/amdgpu: rework lock handling fro flush_tlb Christian König
2023-09-05 22:49   ` Alex Deucher
2023-09-05  6:04 ` [PATCH 11/11] drm/amdgpu: further move TLB hw workarounds a layer up Christian König
2023-09-05 22:51   ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.