All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
@ 2020-01-13 20:26 Alex Sierra
  2020-01-13 20:26 ` [PATCH 4/7] drm/amdgpu: replace kcq enable/disable functions on gfx_v9 Alex Sierra
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Alex Sierra @ 2020-01-13 20:26 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

tlbs invalidate pointer function added to kiq_pm4_funcs struct.
This way, tlb flush can be done through kiq member.
TLBs invalidatation implemented for gfx9 and gfx10.

Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  4 ++++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 15 +++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 14 ++++++++++++++
 3 files changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 8e88e0411662..af4bd279f42f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
 					struct amdgpu_ring *ring,
 					u64 addr,
 					u64 seq);
+	void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub);
 	/* Packet sizes */
 	int set_resources_size;
 	int map_queues_size;
 	int unmap_queues_size;
 	int query_status_size;
+	int invalidate_tlbs_size;
 };
 
 struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 379e46c1b7f6..d72b60f997c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,6 +40,7 @@
 #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 #include "clearstate_gfx10.h"
 #include "v10_structs.h"
@@ -346,15 +347,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
 }
 
+static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
 static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
 	.kiq_set_resources = gfx10_kiq_set_resources,
 	.kiq_map_queues = gfx10_kiq_map_queues,
 	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
 	.kiq_query_status = gfx10_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
 	.set_resources_size = 8,
 	.map_queues_size = 7,
 	.unmap_queues_size = 6,
 	.query_status_size = 7,
+	.invalidate_tlbs_size = 12,
 };
 
 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ad0179ea2cc5..b8759386dcbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
 }
 
+static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
 	.kiq_query_status = gfx_v9_0_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
 	.set_resources_size = 8,
 	.map_queues_size = 7,
 	.unmap_queues_size = 6,
 	.query_status_size = 7,
+	.invalidate_tlbs_size = 12,
 };
 
 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/7] drm/amdgpu: replace kcq enable/disable functions on gfx_v9
  2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
@ 2020-01-13 20:26 ` Alex Sierra
  2020-01-13 20:26 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Alex Sierra @ 2020-01-13 20:26 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
There are HW-indpendent functions that enables and disables kcq. These functions use
the kiq_pm4_funcs implementation.

[How]
Local kcq enable and disable functions removed and replace it by the generic kcq
enable under amdgpu_gfx

Change-Id: I7709bdba93742c234941a5936c82eb67e346077c
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 102 +-------------------------
 1 file changed, 2 insertions(+), 100 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b8759386dcbb..44cdb6fc92ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3234,74 +3234,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 }
 
-static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-	uint64_t queue_mask = 0;
-	int r, i;
-
-	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
-		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-			continue;
-
-		/* This situation may be hit in the future if a new HW
-		 * generation exposes more than 64 queues. If so, the
-		 * definition of queue_mask needs updating */
-		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
-			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
-			break;
-		}
-
-		queue_mask |= (1ull << i);
-	}
-
-	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
-	if (r) {
-		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-		return r;
-	}
-
-	/* set resources */
-	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
-	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
-			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
-	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
-	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
-	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
-	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
-	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
-	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
-		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-
-		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
-		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
-		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
-				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
-				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
-				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
-				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
-				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
-				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
-				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
-				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
-				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
-		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
-		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
-		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
-	}
-
-	r = amdgpu_ring_test_helper(kiq_ring);
-	if (r)
-		DRM_ERROR("KCQ enable failed\n");
-
-	return r;
-}
-
 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -3708,7 +3640,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
 			goto done;
 	}
 
-	r = gfx_v9_0_kiq_kcq_enable(adev);
+	r = amdgpu_gfx_enable_kcq(adev);
 done:
 	return r;
 }
@@ -3812,36 +3744,6 @@ static int gfx_v9_0_hw_init(void *handle)
 	return r;
 }
 
-static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
-{
-	int r, i;
-	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-
-	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
-	if (r)
-		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
-		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
-		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
-						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
-						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
-						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
-						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
-		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
-		amdgpu_ring_write(kiq_ring, 0);
-		amdgpu_ring_write(kiq_ring, 0);
-		amdgpu_ring_write(kiq_ring, 0);
-	}
-	r = amdgpu_ring_test_helper(kiq_ring);
-	if (r)
-		DRM_ERROR("KCQ disable failed\n");
-
-	return r;
-}
-
 static int gfx_v9_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -3853,7 +3755,7 @@ static int gfx_v9_0_hw_fini(void *handle)
 	/* DF freeze and kcq disable will fail */
 	if (!amdgpu_ras_intr_triggered())
 		/* disable KCQ to avoid CPC touch memory not valid anymore */
-		gfx_v9_0_kcq_disable(adev);
+		amdgpu_gfx_disable_kcq(adev);
 
 	if (amdgpu_sriov_vf(adev)) {
 		gfx_v9_0_cp_gfx_enable(adev, false);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
  2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
  2020-01-13 20:26 ` [PATCH 4/7] drm/amdgpu: replace kcq enable/disable functions on gfx_v9 Alex Sierra
@ 2020-01-13 20:26 ` Alex Sierra
  2020-01-14  0:34   ` Felix Kuehling
  2020-01-13 22:16 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Felix Kuehling
  2020-01-14  0:45 ` Felix Kuehling
  3 siblings, 1 reply; 12+ messages in thread
From: Alex Sierra @ 2020-01-13 20:26 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

This can be used directly from amdgpu and amdkfd to invalidate
TLB through pasid.
It supports gmc v7, v8, v9 and v10.

Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 74 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 +++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 +++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 76 +++++++++++++++++++++++++
 5 files changed, 223 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index c91dd602d5f1..d3c27a3c43f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
 	/* flush the vm tlb via mmio */
 	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
 				uint32_t vmhub, uint32_t flush_type);
+	/* flush the vm tlb via pasid */
+	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+					uint32_t flush_type, bool all_hub);
 	/* flush the vm tlb via ring */
 	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
 				       uint64_t pd_addr);
@@ -216,6 +219,9 @@ struct amdgpu_gmc {
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+	((adev), (pasid), (type), (allhub)))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
 #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 5ad89bb6f3ba..8afd05834714 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
 #include "hdp/hdp_5_0_0_sh_mask.h"
 #include "gc/gc_10_1_0_sh_mask.h"
 #include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_sh_mask.h"
 #include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
 #include "navi10_enum.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 
 #include "nbio_v2_3.h"
@@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
 		(!amdgpu_sriov_vf(adev)));
 }
 
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
+					struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
 }
 
+/**
+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid, i;
+	signed long r;
+	uint32_t seq;
+	uint16_t queried_pasid;
+	bool ret;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
+		spin_lock(&adev->gfx.kiq.ring_lock);
+		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
+		kiq->pmf->kiq_invalidate_tlbs(ring,
+					pasid, flush_type, all_hub);
+		amdgpu_fence_emit_polling(ring, &seq);
+		amdgpu_ring_commit(ring);
+		spin_unlock(&adev->gfx.kiq.ring_lock);
+		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+		if (r < 1) {
+			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+			return -ETIME;
+		}
+
+		return 0;
+	}
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+				&queried_pasid);
+		if (ret	&& queried_pasid == pasid) {
+			if (all_hub) {
+				for (i = 0; i < adev->num_vmhubs; i++)
+					gmc_v10_0_flush_gpu_tlb(adev, vmid,
+							i, 0);
+			} else {
+				gmc_v10_0_flush_gpu_tlb(adev, vmid,
+						AMDGPU_GFXHUB_0, 0);
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
 static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					     unsigned vmid, uint64_t pd_addr)
 {
@@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
 
 static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
 	.map_mtype = gmc_v10_0_map_mtype,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index f08e5330642d..19d5b133e1d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	unsigned int tmp;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
 
 static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
 	.set_prt = gmc_v7_0_set_prt,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 6d96d40fbcb8..27d83204fa2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	unsigned int tmp;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
 
 static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
 	.set_prt = gmc_v8_0_set_prt,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b83c8d745f42..40a496804356 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -38,10 +38,12 @@
 #include "dce/dce_12_0_sh_mask.h"
 #include "vega10_enum.h"
 #include "mmhub/mmhub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
 #include "athub/athub_1_0_offset.h"
 #include "oss/osssys_4_0_offset.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 #include "umc/umc_6_0_sh_mask.h"
 
@@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
 		   adev->pdev->device == 0x15d8)));
 }
 
+static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 }
 
+/**
+ * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid, i;
+	signed long r;
+	uint32_t seq;
+	uint16_t queried_pasid;
+	bool ret;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	if (ring->sched.ready) {
+		spin_lock(&adev->gfx.kiq.ring_lock);
+		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
+		kiq->pmf->kiq_invalidate_tlbs(ring,
+					pasid, flush_type, all_hub);
+		amdgpu_fence_emit_polling(ring, &seq);
+		amdgpu_ring_commit(ring);
+		spin_unlock(&adev->gfx.kiq.ring_lock);
+		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+		if (r < 1) {
+			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+			return -ETIME;
+		}
+
+		return 0;
+	}
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+				&queried_pasid);
+		if (ret && queried_pasid == pasid) {
+			if (all_hub) {
+				for (i = 0; i < adev->num_vmhubs; i++)
+					gmc_v9_0_flush_gpu_tlb(adev, vmid,
+							i, 0);
+			} else {
+				gmc_v9_0_flush_gpu_tlb(adev, vmid,
+						AMDGPU_GFXHUB_0, 0);
+			}
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					    unsigned vmid, uint64_t pd_addr)
 {
@@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
 
 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
 	.map_mtype = gmc_v9_0_map_mtype,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
  2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
  2020-01-13 20:26 ` [PATCH 4/7] drm/amdgpu: replace kcq enable/disable functions on gfx_v9 Alex Sierra
  2020-01-13 20:26 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
@ 2020-01-13 22:16 ` Felix Kuehling
  2020-01-14  0:45 ` Felix Kuehling
  3 siblings, 0 replies; 12+ messages in thread
From: Felix Kuehling @ 2020-01-13 22:16 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

The series is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


On 2020-01-13 3:26 p.m., Alex Sierra wrote:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 15 +++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 14 ++++++++++++++
>   3 files changed, 33 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..af4bd279f42f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
>   					struct amdgpu_ring *ring,
>   					u64 addr,
>   					u64 seq);
> +	void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub);
>   	/* Packet sizes */
>   	int set_resources_size;
>   	int map_queues_size;
>   	int unmap_queues_size;
>   	int query_status_size;
> +	int invalidate_tlbs_size;
>   };
>   
>   struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..d72b60f997c8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
>   #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "clearstate_gfx10.h"
>   #include "v10_structs.h"
> @@ -346,15 +347,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx10_kiq_set_resources,
>   	.kiq_map_queues = gfx10_kiq_map_queues,
>   	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
>   	.kiq_query_status = gfx10_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..b8759386dcbb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
>   	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
>   	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
>   	.kiq_query_status = gfx_v9_0_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
  2020-01-13 20:26 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
@ 2020-01-14  0:34   ` Felix Kuehling
  2020-01-14  0:48     ` Sierra Guiza, Alejandro (Alex)
  0 siblings, 1 reply; 12+ messages in thread
From: Felix Kuehling @ 2020-01-14  0:34 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Sorry, I already said, Reviewed-by, but realized there was one more 
problem. If you haven't submitted yet, please fix that first. Otherwise, 
please make it a follow-up patch. See inline ...

On 2020-01-13 3:26 p.m., Alex Sierra wrote:
> This can be used directly from amdgpu and amdkfd to invalidate
> TLB through pasid.
> It supports gmc v7, v8, v9 and v10.
>
> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 74 ++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 +++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 +++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 76 +++++++++++++++++++++++++
>   5 files changed, 223 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index c91dd602d5f1..d3c27a3c43f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
>   	/* flush the vm tlb via mmio */
>   	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>   				uint32_t vmhub, uint32_t flush_type);
> +	/* flush the vm tlb via pasid */
> +	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> +					uint32_t flush_type, bool all_hub);
>   	/* flush the vm tlb via ring */
>   	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>   				       uint64_t pd_addr);
> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
>   };
>   
>   #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
> +	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> +	((adev), (pasid), (type), (allhub)))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>   #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 5ad89bb6f3ba..8afd05834714 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -30,6 +30,8 @@
>   #include "hdp/hdp_5_0_0_sh_mask.h"
>   #include "gc/gc_10_1_0_sh_mask.h"
>   #include "mmhub/mmhub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_sh_mask.h"
>   #include "oss/osssys_5_0_0_offset.h"
> @@ -37,6 +39,7 @@
>   #include "navi10_enum.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   
>   #include "nbio_v2_3.h"
> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		(!amdgpu_sriov_vf(adev)));
>   }
>   
> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
> +					struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>   }
>   
> +/**
> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	signed long r;
> +	uint32_t seq;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
> +		spin_lock(&adev->gfx.kiq.ring_lock);
> +		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);

You need to allocate more space here for amdgpu_fence_emit_polling. 
Looks like gfx_v10_0_ring_emit_fence needs 8 dwords.


> +		kiq->pmf->kiq_invalidate_tlbs(ring,
> +					pasid, flush_type, all_hub);
> +		amdgpu_fence_emit_polling(ring, &seq);
> +		amdgpu_ring_commit(ring);
> +		spin_unlock(&adev->gfx.kiq.ring_lock);
> +		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +		if (r < 1) {
> +			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +			return -ETIME;
> +		}
> +
> +		return 0;
> +	}
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret	&& queried_pasid == pasid) {
> +			if (all_hub) {
> +				for (i = 0; i < adev->num_vmhubs; i++)
> +					gmc_v10_0_flush_gpu_tlb(adev, vmid,
> +							i, 0);
> +			} else {
> +				gmc_v10_0_flush_gpu_tlb(adev, vmid,
> +						AMDGPU_GFXHUB_0, 0);
> +			}
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					     unsigned vmid, uint64_t pd_addr)
>   {
> @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v10_0_map_mtype,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index f08e5330642d..19d5b133e1d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>   	.set_prt = gmc_v7_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 6d96d40fbcb8..27d83204fa2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>   	.set_prt = gmc_v8_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index b83c8d745f42..40a496804356 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -38,10 +38,12 @@
>   #include "dce/dce_12_0_sh_mask.h"
>   #include "vega10_enum.h"
>   #include "mmhub/mmhub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
>   #include "athub/athub_1_0_offset.h"
>   #include "oss/osssys_4_0_offset.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "umc/umc_6_0_sh_mask.h"
>   
> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		   adev->pdev->device == 0x15d8)));
>   }
>   
> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   }
>   
> +/**
> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	signed long r;
> +	uint32_t seq;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	if (ring->sched.ready) {
> +		spin_lock(&adev->gfx.kiq.ring_lock);
> +		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);

Same as above.

Regards,
   Felix

> +		kiq->pmf->kiq_invalidate_tlbs(ring,
> +					pasid, flush_type, all_hub);
> +		amdgpu_fence_emit_polling(ring, &seq);
> +		amdgpu_ring_commit(ring);
> +		spin_unlock(&adev->gfx.kiq.ring_lock);
> +		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +		if (r < 1) {
> +			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +			return -ETIME;
> +		}
> +
> +		return 0;
> +	}
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret && queried_pasid == pasid) {
> +			if (all_hub) {
> +				for (i = 0; i < adev->num_vmhubs; i++)
> +					gmc_v9_0_flush_gpu_tlb(adev, vmid,
> +							i, 0);
> +			} else {
> +				gmc_v9_0_flush_gpu_tlb(adev, vmid,
> +						AMDGPU_GFXHUB_0, 0);
> +			}
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					    unsigned vmid, uint64_t pd_addr)
>   {
> @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v9_0_map_mtype,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
  2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
                   ` (2 preceding siblings ...)
  2020-01-13 22:16 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Felix Kuehling
@ 2020-01-14  0:45 ` Felix Kuehling
  3 siblings, 0 replies; 12+ messages in thread
From: Felix Kuehling @ 2020-01-14  0:45 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

On 2020-01-13 3:26 p.m., Alex Sierra wrote:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 15 +++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 14 ++++++++++++++
>   3 files changed, 33 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..af4bd279f42f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
>   					struct amdgpu_ring *ring,
>   					u64 addr,
>   					u64 seq);
> +	void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub);
>   	/* Packet sizes */
>   	int set_resources_size;
>   	int map_queues_size;
>   	int unmap_queues_size;
>   	int query_status_size;
> +	int invalidate_tlbs_size;
>   };
>   
>   struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..d72b60f997c8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
>   #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "clearstate_gfx10.h"
>   #include "v10_structs.h"
> @@ -346,15 +347,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx10_kiq_set_resources,
>   	.kiq_map_queues = gfx10_kiq_map_queues,
>   	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
>   	.kiq_query_status = gfx10_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,

This looks like it was copied from the function that emits both flush 
and fence. Now that the function only emits the flush, this number 
should be smaller. Only 2 dwords. And it seems like 12 was 
over-estimated, because the fence is only 8 dwords.

Regards,
   Felix


>   };
>   
>   static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..b8759386dcbb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
>   	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
>   	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
>   	.kiq_query_status = gfx_v9_0_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
  2020-01-14  0:34   ` Felix Kuehling
@ 2020-01-14  0:48     ` Sierra Guiza, Alejandro (Alex)
  2020-01-14  0:55       ` Felix Kuehling
  0 siblings, 1 reply; 12+ messages in thread
From: Sierra Guiza, Alejandro (Alex) @ 2020-01-14  0:48 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx

[AMD Official Use Only - Internal Distribution Only]

I just pushed the series, but I'll go ahead and create a new patch for this.
The .invalidate_tlbs_size, is it based on dword size? Currently is 12, should I need to drop it to 8 then?

-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@amd.com> 
Sent: Monday, January 13, 2020 6:34 PM
To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid

Sorry, I already said, Reviewed-by, but realized there was one more problem. If you haven't submitted yet, please fix that first. Otherwise, please make it a follow-up patch. See inline ...

On 2020-01-13 3:26 p.m., Alex Sierra wrote:
> This can be used directly from amdgpu and amdkfd to invalidate TLB 
> through pasid.
> It supports gmc v7, v8, v9 and v10.
>
> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 74 ++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 +++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 +++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 76 +++++++++++++++++++++++++
>   5 files changed, 223 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index c91dd602d5f1..d3c27a3c43f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
>   	/* flush the vm tlb via mmio */
>   	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>   				uint32_t vmhub, uint32_t flush_type);
> +	/* flush the vm tlb via pasid */
> +	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> +					uint32_t flush_type, bool all_hub);
>   	/* flush the vm tlb via ring */
>   	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>   				       uint64_t pd_addr);
> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
>   };
>   
>   #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
> ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), 
> (type)))
> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
> +	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> +	((adev), (pasid), (type), (allhub)))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>   #define amdgpu_gmc_map_mtype(adev, flags) 
> (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 5ad89bb6f3ba..8afd05834714 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -30,6 +30,8 @@
>   #include "hdp/hdp_5_0_0_sh_mask.h"
>   #include "gc/gc_10_1_0_sh_mask.h"
>   #include "mmhub/mmhub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_sh_mask.h"
>   #include "oss/osssys_5_0_0_offset.h"
> @@ -37,6 +39,7 @@
>   #include "navi10_enum.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   
>   #include "nbio_v2_3.h"
> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		(!amdgpu_sriov_vf(adev)));
>   }
>   
> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
> +					struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid) {
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>   }
>   
> +/**
> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	signed long r;
> +	uint32_t seq;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
> +		spin_lock(&adev->gfx.kiq.ring_lock);
> +		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);

You need to allocate more space here for amdgpu_fence_emit_polling. 
Looks like gfx_v10_0_ring_emit_fence needs 8 dwords.


> +		kiq->pmf->kiq_invalidate_tlbs(ring,
> +					pasid, flush_type, all_hub);
> +		amdgpu_fence_emit_polling(ring, &seq);
> +		amdgpu_ring_commit(ring);
> +		spin_unlock(&adev->gfx.kiq.ring_lock);
> +		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +		if (r < 1) {
> +			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +			return -ETIME;
> +		}
> +
> +		return 0;
> +	}
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret	&& queried_pasid == pasid) {
> +			if (all_hub) {
> +				for (i = 0; i < adev->num_vmhubs; i++)
> +					gmc_v10_0_flush_gpu_tlb(adev, vmid,
> +							i, 0);
> +			} else {
> +				gmc_v10_0_flush_gpu_tlb(adev, vmid,
> +						AMDGPU_GFXHUB_0, 0);
> +			}
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					     unsigned vmid, uint64_t pd_addr)
>   {
> @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct 
> amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v10_0_map_mtype,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index f08e5330642d..19d5b133e1d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs 
> gmc_v7_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>   	.set_prt = gmc_v7_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 6d96d40fbcb8..27d83204fa2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs 
> gmc_v8_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>   	.set_prt = gmc_v8_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index b83c8d745f42..40a496804356 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -38,10 +38,12 @@
>   #include "dce/dce_12_0_sh_mask.h"
>   #include "vega10_enum.h"
>   #include "mmhub/mmhub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
>   #include "athub/athub_1_0_offset.h"
>   #include "oss/osssys_4_0_offset.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "umc/umc_6_0_sh_mask.h"
>   
> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		   adev->pdev->device == 0x15d8)));
>   }
>   
> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid) {
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   }
>   
> +/**
> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	signed long r;
> +	uint32_t seq;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	if (ring->sched.ready) {
> +		spin_lock(&adev->gfx.kiq.ring_lock);
> +		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);

Same as above.

Regards,
   Felix

> +		kiq->pmf->kiq_invalidate_tlbs(ring,
> +					pasid, flush_type, all_hub);
> +		amdgpu_fence_emit_polling(ring, &seq);
> +		amdgpu_ring_commit(ring);
> +		spin_unlock(&adev->gfx.kiq.ring_lock);
> +		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +		if (r < 1) {
> +			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +			return -ETIME;
> +		}
> +
> +		return 0;
> +	}
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret && queried_pasid == pasid) {
> +			if (all_hub) {
> +				for (i = 0; i < adev->num_vmhubs; i++)
> +					gmc_v9_0_flush_gpu_tlb(adev, vmid,
> +							i, 0);
> +			} else {
> +				gmc_v9_0_flush_gpu_tlb(adev, vmid,
> +						AMDGPU_GFXHUB_0, 0);
> +			}
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					    unsigned vmid, uint64_t pd_addr)
>   {
> @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct 
> amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v9_0_map_mtype,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
  2020-01-14  0:48     ` Sierra Guiza, Alejandro (Alex)
@ 2020-01-14  0:55       ` Felix Kuehling
  2020-01-14  3:21         ` Sierra Guiza, Alejandro (Alex)
  0 siblings, 1 reply; 12+ messages in thread
From: Felix Kuehling @ 2020-01-14  0:55 UTC (permalink / raw)
  To: Sierra Guiza, Alejandro (Alex), amd-gfx

I noticed that the invalidate_tlbs_size in patch 3 was also wrong. That 
should only be 2 dwords, not 12. The code here should do

     amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);

I think 12 was too much in the original code. Flush + fence should only 
be 10 dwords, unless I misses something or counted wrong.

Regards,
   Felix

On 2020-01-13 7:48 p.m., Sierra Guiza, Alejandro (Alex) wrote:
> [AMD Official Use Only - Internal Distribution Only]
>
> I just pushed the series, but I'll go ahead and create a new patch for this.
> The .invalidate_tlbs_size, is it based on dword size? Currently is 12, should I need to drop it to 8 then?
>
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling@amd.com>
> Sent: Monday, January 13, 2020 6:34 PM
> To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
>
> Sorry, I already said, Reviewed-by, but realized there was one more problem. If you haven't submitted yet, please fix that first. Otherwise, please make it a follow-up patch. See inline ...
>
> On 2020-01-13 3:26 p.m., Alex Sierra wrote:
>> This can be used directly from amdgpu and amdkfd to invalidate TLB
>> through pasid.
>> It supports gmc v7, v8, v9 and v10.
>>
>> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 74 ++++++++++++++++++++++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 +++++++++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 +++++++++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 76 +++++++++++++++++++++++++
>>    5 files changed, 223 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> index c91dd602d5f1..d3c27a3c43f6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
>>    	/* flush the vm tlb via mmio */
>>    	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>>    				uint32_t vmhub, uint32_t flush_type);
>> +	/* flush the vm tlb via pasid */
>> +	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
>> +					uint32_t flush_type, bool all_hub);
>>    	/* flush the vm tlb via ring */
>>    	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>>    				       uint64_t pd_addr);
>> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
>>    };
>>    
>>    #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type)
>> ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub),
>> (type)))
>> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
>> +	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
>> +	((adev), (pasid), (type), (allhub)))
>>    #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>>    #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>>    #define amdgpu_gmc_map_mtype(adev, flags)
>> (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 5ad89bb6f3ba..8afd05834714 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -30,6 +30,8 @@
>>    #include "hdp/hdp_5_0_0_sh_mask.h"
>>    #include "gc/gc_10_1_0_sh_mask.h"
>>    #include "mmhub/mmhub_2_0_0_sh_mask.h"
>> +#include "athub/athub_2_0_0_sh_mask.h"
>> +#include "athub/athub_2_0_0_offset.h"
>>    #include "dcn/dcn_2_0_0_offset.h"
>>    #include "dcn/dcn_2_0_0_sh_mask.h"
>>    #include "oss/osssys_5_0_0_offset.h"
>> @@ -37,6 +39,7 @@
>>    #include "navi10_enum.h"
>>    
>>    #include "soc15.h"
>> +#include "soc15d.h"
>>    #include "soc15_common.h"
>>    
>>    #include "nbio_v2_3.h"
>> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>>    		(!amdgpu_sriov_vf(adev)));
>>    }
>>    
>> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
>> +					struct amdgpu_device *adev,
>> +					uint8_t vmid, uint16_t *p_pasid) {
>> +	uint32_t value;
>> +
>> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
>> +		     + vmid);
>> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>> +
>> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>    	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>>    }
>>    
>> +/**
>> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid, i;
>> +	signed long r;
>> +	uint32_t seq;
>> +	uint16_t queried_pasid;
>> +	bool ret;
>> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> +
>> +	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
>> +		spin_lock(&adev->gfx.kiq.ring_lock);
>> +		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
> You need to allocate more space here for amdgpu_fence_emit_polling.
> Looks like gfx_v10_0_ring_emit_fence needs 8 dwords.
>
>
>> +		kiq->pmf->kiq_invalidate_tlbs(ring,
>> +					pasid, flush_type, all_hub);
>> +		amdgpu_fence_emit_polling(ring, &seq);
>> +		amdgpu_ring_commit(ring);
>> +		spin_unlock(&adev->gfx.kiq.ring_lock);
>> +		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>> +		if (r < 1) {
>> +			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> +			return -ETIME;
>> +		}
>> +
>> +		return 0;
>> +	}
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
>> +				&queried_pasid);
>> +		if (ret	&& queried_pasid == pasid) {
>> +			if (all_hub) {
>> +				for (i = 0; i < adev->num_vmhubs; i++)
>> +					gmc_v10_0_flush_gpu_tlb(adev, vmid,
>> +							i, 0);
>> +			} else {
>> +				gmc_v10_0_flush_gpu_tlb(adev, vmid,
>> +						AMDGPU_GFXHUB_0, 0);
>> +			}
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>    static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>    					     unsigned vmid, uint64_t pd_addr)
>>    {
>> @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct
>> amdgpu_device *adev,
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>>    	.map_mtype = gmc_v10_0_map_mtype,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index f08e5330642d..19d5b133e1d7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>>    	return 0;
>>    }
>>    
>> +/**
>> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid;
>> +	unsigned int tmp;
>> +
>> +	if (adev->in_gpu_reset)
>> +		return -EIO;
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
>> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
>> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>> +			RREG32(mmVM_INVALIDATE_RESPONSE);
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs
>> gmc_v7_0_ip_funcs = {
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>>    	.set_prt = gmc_v7_0_set_prt,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 6d96d40fbcb8..27d83204fa2b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>>    	return 0;
>>    }
>>    
>> +/**
>> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid;
>> +	unsigned int tmp;
>> +
>> +	if (adev->in_gpu_reset)
>> +		return -EIO;
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
>> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
>> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>> +			RREG32(mmVM_INVALIDATE_RESPONSE);
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs
>> gmc_v8_0_ip_funcs = {
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>>    	.set_prt = gmc_v8_0_set_prt,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index b83c8d745f42..40a496804356 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -38,10 +38,12 @@
>>    #include "dce/dce_12_0_sh_mask.h"
>>    #include "vega10_enum.h"
>>    #include "mmhub/mmhub_1_0_offset.h"
>> +#include "athub/athub_1_0_sh_mask.h"
>>    #include "athub/athub_1_0_offset.h"
>>    #include "oss/osssys_4_0_offset.h"
>>    
>>    #include "soc15.h"
>> +#include "soc15d.h"
>>    #include "soc15_common.h"
>>    #include "umc/umc_6_0_sh_mask.h"
>>    
>> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>>    		   adev->pdev->device == 0x15d8)));
>>    }
>>    
>> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
>> +					uint8_t vmid, uint16_t *p_pasid) {
>> +	uint32_t value;
>> +
>> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
>> +		     + vmid);
>> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>> +
>> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>    	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>>    }
>>    
>> +/**
>> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid, i;
>> +	signed long r;
>> +	uint32_t seq;
>> +	uint16_t queried_pasid;
>> +	bool ret;
>> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> +
>> +	if (adev->in_gpu_reset)
>> +		return -EIO;
>> +
>> +	if (ring->sched.ready) {
>> +		spin_lock(&adev->gfx.kiq.ring_lock);
>> +		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
> Same as above.
>
> Regards,
>     Felix
>
>> +		kiq->pmf->kiq_invalidate_tlbs(ring,
>> +					pasid, flush_type, all_hub);
>> +		amdgpu_fence_emit_polling(ring, &seq);
>> +		amdgpu_ring_commit(ring);
>> +		spin_unlock(&adev->gfx.kiq.ring_lock);
>> +		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>> +		if (r < 1) {
>> +			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> +			return -ETIME;
>> +		}
>> +
>> +		return 0;
>> +	}
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
>> +				&queried_pasid);
>> +		if (ret && queried_pasid == pasid) {
>> +			if (all_hub) {
>> +				for (i = 0; i < adev->num_vmhubs; i++)
>> +					gmc_v9_0_flush_gpu_tlb(adev, vmid,
>> +							i, 0);
>> +			} else {
>> +				gmc_v9_0_flush_gpu_tlb(adev, vmid,
>> +						AMDGPU_GFXHUB_0, 0);
>> +			}
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +
>> +}
>> +
>>    static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>    					    unsigned vmid, uint64_t pd_addr)
>>    {
>> @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct
>> amdgpu_device *adev,
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>>    	.map_mtype = gmc_v9_0_map_mtype,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
  2020-01-14  0:55       ` Felix Kuehling
@ 2020-01-14  3:21         ` Sierra Guiza, Alejandro (Alex)
  0 siblings, 0 replies; 12+ messages in thread
From: Sierra Guiza, Alejandro (Alex) @ 2020-01-14  3:21 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx

[AMD Official Use Only - Internal Distribution Only]

Yes, both gfx_v10_0_ring_emit_fence and gfx_v9_0_ring_emit_fence have 8 ring writes. Plus 2 of the flush.

Regards,
Alejandro S.
-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@amd.com> 
Sent: Monday, January 13, 2020 6:55 PM
To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid

I noticed that the invalidate_tlbs_size in patch 3 was also wrong. That should only be 2 dwords, not 12. The code here should do

     amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);

I think 12 was too much in the original code. Flush + fence should only be 10 dwords, unless I misses something or counted wrong.

Regards,
   Felix

On 2020-01-13 7:48 p.m., Sierra Guiza, Alejandro (Alex) wrote:
> [AMD Official Use Only - Internal Distribution Only]
>
> I just pushed the series, but I'll go ahead and create a new patch for this.
> The .invalidate_tlbs_size, is it based on dword size? Currently is 12, should I need to drop it to 8 then?
>
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling@amd.com>
> Sent: Monday, January 13, 2020 6:34 PM
> To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>; 
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via 
> pasid
>
> Sorry, I already said, Reviewed-by, but realized there was one more problem. If you haven't submitted yet, please fix that first. Otherwise, please make it a follow-up patch. See inline ...
>
> On 2020-01-13 3:26 p.m., Alex Sierra wrote:
>> This can be used directly from amdgpu and amdkfd to invalidate TLB 
>> through pasid.
>> It supports gmc v7, v8, v9 and v10.
>>
>> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 74 ++++++++++++++++++++++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 +++++++++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 +++++++++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 76 +++++++++++++++++++++++++
>>    5 files changed, 223 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> index c91dd602d5f1..d3c27a3c43f6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
>>    	/* flush the vm tlb via mmio */
>>    	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>>    				uint32_t vmhub, uint32_t flush_type);
>> +	/* flush the vm tlb via pasid */
>> +	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
>> +					uint32_t flush_type, bool all_hub);
>>    	/* flush the vm tlb via ring */
>>    	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>>    				       uint64_t pd_addr);
>> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
>>    };
>>    
>>    #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
>> ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub),
>> (type)))
>> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
>> +	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
>> +	((adev), (pasid), (type), (allhub)))
>>    #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>>    #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>>    #define amdgpu_gmc_map_mtype(adev, flags)
>> (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 5ad89bb6f3ba..8afd05834714 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -30,6 +30,8 @@
>>    #include "hdp/hdp_5_0_0_sh_mask.h"
>>    #include "gc/gc_10_1_0_sh_mask.h"
>>    #include "mmhub/mmhub_2_0_0_sh_mask.h"
>> +#include "athub/athub_2_0_0_sh_mask.h"
>> +#include "athub/athub_2_0_0_offset.h"
>>    #include "dcn/dcn_2_0_0_offset.h"
>>    #include "dcn/dcn_2_0_0_sh_mask.h"
>>    #include "oss/osssys_5_0_0_offset.h"
>> @@ -37,6 +39,7 @@
>>    #include "navi10_enum.h"
>>    
>>    #include "soc15.h"
>> +#include "soc15d.h"
>>    #include "soc15_common.h"
>>    
>>    #include "nbio_v2_3.h"
>> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>>    		(!amdgpu_sriov_vf(adev)));
>>    }
>>    
>> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
>> +					struct amdgpu_device *adev,
>> +					uint8_t vmid, uint16_t *p_pasid) {
>> +	uint32_t value;
>> +
>> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
>> +		     + vmid);
>> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>> +
>> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>    	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>>    }
>>    
>> +/**
>> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid, i;
>> +	signed long r;
>> +	uint32_t seq;
>> +	uint16_t queried_pasid;
>> +	bool ret;
>> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> +
>> +	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
>> +		spin_lock(&adev->gfx.kiq.ring_lock);
>> +		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
> You need to allocate more space here for amdgpu_fence_emit_polling.
> Looks like gfx_v10_0_ring_emit_fence needs 8 dwords.
>
>
>> +		kiq->pmf->kiq_invalidate_tlbs(ring,
>> +					pasid, flush_type, all_hub);
>> +		amdgpu_fence_emit_polling(ring, &seq);
>> +		amdgpu_ring_commit(ring);
>> +		spin_unlock(&adev->gfx.kiq.ring_lock);
>> +		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>> +		if (r < 1) {
>> +			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> +			return -ETIME;
>> +		}
>> +
>> +		return 0;
>> +	}
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
>> +				&queried_pasid);
>> +		if (ret	&& queried_pasid == pasid) {
>> +			if (all_hub) {
>> +				for (i = 0; i < adev->num_vmhubs; i++)
>> +					gmc_v10_0_flush_gpu_tlb(adev, vmid,
>> +							i, 0);
>> +			} else {
>> +				gmc_v10_0_flush_gpu_tlb(adev, vmid,
>> +						AMDGPU_GFXHUB_0, 0);
>> +			}
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>    static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>    					     unsigned vmid, uint64_t pd_addr)
>>    {
>> @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct 
>> amdgpu_device *adev,
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>>    	.map_mtype = gmc_v10_0_map_mtype, diff --git 
>> a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index f08e5330642d..19d5b133e1d7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>>    	return 0;
>>    }
>>    
>> +/**
>> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid;
>> +	unsigned int tmp;
>> +
>> +	if (adev->in_gpu_reset)
>> +		return -EIO;
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
>> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
>> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>> +			RREG32(mmVM_INVALIDATE_RESPONSE);
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs 
>> gmc_v7_0_ip_funcs = {
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>>    	.set_prt = gmc_v7_0_set_prt,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 6d96d40fbcb8..27d83204fa2b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>>    	return 0;
>>    }
>>    
>> +/**
>> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid;
>> +	unsigned int tmp;
>> +
>> +	if (adev->in_gpu_reset)
>> +		return -EIO;
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
>> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
>> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>> +			RREG32(mmVM_INVALIDATE_RESPONSE);
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs 
>> gmc_v8_0_ip_funcs = {
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>>    	.set_prt = gmc_v8_0_set_prt,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index b83c8d745f42..40a496804356 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -38,10 +38,12 @@
>>    #include "dce/dce_12_0_sh_mask.h"
>>    #include "vega10_enum.h"
>>    #include "mmhub/mmhub_1_0_offset.h"
>> +#include "athub/athub_1_0_sh_mask.h"
>>    #include "athub/athub_1_0_offset.h"
>>    #include "oss/osssys_4_0_offset.h"
>>    
>>    #include "soc15.h"
>> +#include "soc15d.h"
>>    #include "soc15_common.h"
>>    #include "umc/umc_6_0_sh_mask.h"
>>    
>> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>>    		   adev->pdev->device == 0x15d8)));
>>    }
>>    
>> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
>> +					uint8_t vmid, uint16_t *p_pasid) {
>> +	uint32_t value;
>> +
>> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
>> +		     + vmid);
>> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>> +
>> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>    	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>>    }
>>    
>> +/**
>> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid, i;
>> +	signed long r;
>> +	uint32_t seq;
>> +	uint16_t queried_pasid;
>> +	bool ret;
>> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> +
>> +	if (adev->in_gpu_reset)
>> +		return -EIO;
>> +
>> +	if (ring->sched.ready) {
>> +		spin_lock(&adev->gfx.kiq.ring_lock);
>> +		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
> Same as above.
>
> Regards,
>     Felix
>
>> +		kiq->pmf->kiq_invalidate_tlbs(ring,
>> +					pasid, flush_type, all_hub);
>> +		amdgpu_fence_emit_polling(ring, &seq);
>> +		amdgpu_ring_commit(ring);
>> +		spin_unlock(&adev->gfx.kiq.ring_lock);
>> +		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>> +		if (r < 1) {
>> +			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> +			return -ETIME;
>> +		}
>> +
>> +		return 0;
>> +	}
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
>> +				&queried_pasid);
>> +		if (ret && queried_pasid == pasid) {
>> +			if (all_hub) {
>> +				for (i = 0; i < adev->num_vmhubs; i++)
>> +					gmc_v9_0_flush_gpu_tlb(adev, vmid,
>> +							i, 0);
>> +			} else {
>> +				gmc_v9_0_flush_gpu_tlb(adev, vmid,
>> +						AMDGPU_GFXHUB_0, 0);
>> +			}
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +
>> +}
>> +
>>    static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>    					    unsigned vmid, uint64_t pd_addr)
>>    {
>> @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct 
>> amdgpu_device *adev,
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>>    	.map_mtype = gmc_v9_0_map_mtype,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
  2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
  2020-01-13 12:12   ` Christian König
@ 2020-01-13 16:36   ` Felix Kuehling
  1 sibling, 0 replies; 12+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:36 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  4 +++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 32 ++++++++++++++++++++++++
>   3 files changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..2927837bd401 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
>   					struct amdgpu_ring *ring,
>   					u64 addr,
>   					u64 seq);
> +	int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub);
>   	/* Packet sizes */
>   	int set_resources_size;
>   	int map_queues_size;
>   	int unmap_queues_size;
>   	int query_status_size;
> +	int invalidate_tlbs_size;
>   };
>   
>   struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..2e82213f57eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
>   #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "clearstate_gfx10.h"
>   #include "v10_structs.h"
> @@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_device *adev = kiq_ring->adev;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(kiq_ring, &seq);
> +	amdgpu_ring_commit(kiq_ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);

The other KIQ functions don't include the emit_polling, commit and 
locking. I think the way the KIQ-funcs interface is meant to be used, 
all that should be outside the IP-version-specific functions. For 
consistency all you should do here is the amdgpu_ring_write calls with 
IP-version-specific packets.

Regards,
   Felix


> +
> +	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx10_kiq_set_resources,
>   	.kiq_map_queues = gfx10_kiq_map_queues,
>   	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
>   	.kiq_query_status = gfx10_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..5be6fab55b73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_device *adev = kiq_ring->adev;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(kiq_ring, &seq);
> +	amdgpu_ring_commit(kiq_ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
>   	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
>   	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
>   	.kiq_query_status = gfx_v9_0_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
  2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
@ 2020-01-13 12:12   ` Christian König
  2020-01-13 16:36   ` Felix Kuehling
  1 sibling, 0 replies; 12+ messages in thread
From: Christian König @ 2020-01-13 12:12 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Am 11.01.20 um 19:39 schrieb Alex Sierra:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

Please note that I can't judge the correctness of the PM4 packets, but 
the interface looks really nice and clean now.

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  4 +++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 32 ++++++++++++++++++++++++
>   3 files changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..2927837bd401 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
>   					struct amdgpu_ring *ring,
>   					u64 addr,
>   					u64 seq);
> +	int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub);
>   	/* Packet sizes */
>   	int set_resources_size;
>   	int map_queues_size;
>   	int unmap_queues_size;
>   	int query_status_size;
> +	int invalidate_tlbs_size;
>   };
>   
>   struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..2e82213f57eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
>   #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "clearstate_gfx10.h"
>   #include "v10_structs.h"
> @@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_device *adev = kiq_ring->adev;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(kiq_ring, &seq);
> +	amdgpu_ring_commit(kiq_ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx10_kiq_set_resources,
>   	.kiq_map_queues = gfx10_kiq_map_queues,
>   	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
>   	.kiq_query_status = gfx10_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..5be6fab55b73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_device *adev = kiq_ring->adev;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(kiq_ring, &seq);
> +	amdgpu_ring_commit(kiq_ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
>   	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
>   	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
>   	.kiq_query_status = gfx_v9_0_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
  2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
  2020-01-13 12:12   ` Christian König
  2020-01-13 16:36   ` Felix Kuehling
  0 siblings, 2 replies; 12+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

tlbs invalidate pointer function added to kiq_pm4_funcs struct.
This way, tlb flush can be done through kiq member.
TLBs invalidatation implemented for gfx9 and gfx10.

Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  4 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 32 ++++++++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 8e88e0411662..2927837bd401 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
 					struct amdgpu_ring *ring,
 					u64 addr,
 					u64 seq);
+	int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub);
 	/* Packet sizes */
 	int set_resources_size;
 	int map_queues_size;
 	int unmap_queues_size;
 	int query_status_size;
+	int invalidate_tlbs_size;
 };
 
 struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 379e46c1b7f6..2e82213f57eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,6 +40,7 @@
 #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 #include "clearstate_gfx10.h"
 #include "v10_structs.h"
@@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
 }
 
+static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	signed long r;
+	uint32_t seq;
+	struct amdgpu_device *adev = kiq_ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+	amdgpu_fence_emit_polling(kiq_ring, &seq);
+	amdgpu_ring_commit(kiq_ring);
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+
+	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
 static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
 	.kiq_set_resources = gfx10_kiq_set_resources,
 	.kiq_map_queues = gfx10_kiq_map_queues,
 	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
 	.kiq_query_status = gfx10_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
 	.set_resources_size = 8,
 	.map_queues_size = 7,
 	.unmap_queues_size = 6,
 	.query_status_size = 7,
+	.invalidate_tlbs_size = 12,
 };
 
 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ad0179ea2cc5..5be6fab55b73 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
 }
 
+static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	signed long r;
+	uint32_t seq;
+	struct amdgpu_device *adev = kiq_ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+	amdgpu_fence_emit_polling(kiq_ring, &seq);
+	amdgpu_ring_commit(kiq_ring);
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+
+	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
 	.kiq_query_status = gfx_v9_0_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
 	.set_resources_size = 8,
 	.map_queues_size = 7,
 	.unmap_queues_size = 6,
 	.query_status_size = 7,
+	.invalidate_tlbs_size = 12,
 };
 
 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-01-14  3:21 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
2020-01-13 20:26 ` [PATCH 4/7] drm/amdgpu: replace kcq enable/disable functions on gfx_v9 Alex Sierra
2020-01-13 20:26 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
2020-01-14  0:34   ` Felix Kuehling
2020-01-14  0:48     ` Sierra Guiza, Alejandro (Alex)
2020-01-14  0:55       ` Felix Kuehling
2020-01-14  3:21         ` Sierra Guiza, Alejandro (Alex)
2020-01-13 22:16 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Felix Kuehling
2020-01-14  0:45 ` Felix Kuehling
  -- strict thread matches above, loose matches on Subject: below --
2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
2020-01-13 12:12   ` Christian König
2020-01-13 16:36   ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.