All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock
@ 2020-01-11 18:39 Alex Sierra
  2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
                   ` (6 more replies)
  0 siblings, 7 replies; 19+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
Avoid reclaim filesystem while eviction lock is held called from
MMU notifier.

[How]
Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
Using memalloc_nofs_save / memalloc_nofs_restore API.

Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +++++++++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  6 +++-
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b999b67ff57a..d6aba4f9df74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -82,6 +82,32 @@ struct amdgpu_prt_cb {
 	struct dma_fence_cb cb;
 };
 
+/**
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+	mutex_lock(&vm->eviction_lock);
+	vm->saved_flags = memalloc_nofs_save();
+}
+
+static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+	if (mutex_trylock(&vm->eviction_lock)) {
+		vm->saved_flags = memalloc_nofs_save();
+		return 1;
+	}
+	return 0;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+	memalloc_nofs_restore(vm->saved_flags);
+	mutex_unlock(&vm->eviction_lock);
+}
+
 /**
  * amdgpu_vm_level_shift - return the addr shift for each level
  *
@@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		}
 	}
 
-	mutex_lock(&vm->eviction_lock);
+	amdgpu_vm_eviction_lock(vm);
 	vm->evicting = false;
-	mutex_unlock(&vm->eviction_lock);
+	amdgpu_vm_eviction_unlock(vm);
 
 	return 0;
 }
@@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (!(flags & AMDGPU_PTE_VALID))
 		owner = AMDGPU_FENCE_OWNER_KFD;
 
-	mutex_lock(&vm->eviction_lock);
+	amdgpu_vm_eviction_lock(vm);
 	if (vm->evicting) {
 		r = -EBUSY;
 		goto error_unlock;
@@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	r = vm->update_funcs->commit(&params, fence);
 
 error_unlock:
-	mutex_unlock(&vm->eviction_lock);
+	amdgpu_vm_eviction_unlock(vm);
 	return r;
 }
 
@@ -2537,18 +2563,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
 		return false;
 
 	/* Try to block ongoing updates */
-	if (!mutex_trylock(&bo_base->vm->eviction_lock))
+	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
 		return false;
 
 	/* Don't evict VM page tables while they are updated */
 	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
 	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
-		mutex_unlock(&bo_base->vm->eviction_lock);
+		amdgpu_vm_eviction_unlock(bo_base->vm);
 		return false;
 	}
 
 	bo_base->vm->evicting = true;
-	mutex_unlock(&bo_base->vm->eviction_lock);
+	amdgpu_vm_eviction_unlock(bo_base->vm);
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 100547f094ff..c21a36bebc0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -30,6 +30,7 @@
 #include <drm/gpu_scheduler.h>
 #include <drm/drm_file.h>
 #include <drm/ttm/ttm_bo_driver.h>
+#include <linux/sched/mm.h>
 
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
@@ -242,9 +243,12 @@ struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root_cached	va;
 
-	/* Lock to prevent eviction while we are updating page tables */
+	/* Lock to prevent eviction while we are updating page tables
+	 * use vm_eviction_lock/unlock(vm)
+	 */
 	struct mutex		eviction_lock;
 	bool			evicting;
+	unsigned int		saved_flags;
 
 	/* BOs who needs a validation */
 	struct list_head	evicted;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9
  2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
  2020-01-13 12:10   ` Christian König
  2020-01-13 16:57   ` Felix Kuehling
  2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
                   ` (5 subsequent siblings)
  6 siblings, 2 replies; 19+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

Functions implemented from kiq_pm4_funcs struct members
for gfx_v9 version.

Change-Id: I8fd3e160c4bd58f19d35d29e39517db967063afe
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 ++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e3d466bd5c4e..ad0179ea2cc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -739,6 +739,120 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 				     void *inject_if);
 
+static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+				uint64_t queue_mask)
+{
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+	amdgpu_ring_write(kiq_ring,
+		PACKET3_SET_RESOURCES_VMID_MASK(0) |
+		/* vmid_mask:0* queue_type:0 (KIQ) */
+		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+	amdgpu_ring_write(kiq_ring,
+			lower_32_bits(queue_mask));	/* queue mask lo */
+	amdgpu_ring_write(kiq_ring,
+			upper_32_bits(queue_mask));	/* queue mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
+	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+				 struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = kiq_ring->adev;
+	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+			 /*queue_type: normal compute queue */
+			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+			 /* alloc format: all_on_one_pipe */
+			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+			 /* num_queues: must be 1 */
+			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+				   struct amdgpu_ring *ring,
+				   enum amdgpu_unmap_queues_action action,
+				   u64 gpu_addr, u64 seq)
+{
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_UNMAP_QUEUES_ACTION(action) |
+			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+	if (action == PREEMPT_QUEUES_NO_UNMAP) {
+		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, seq);
+	} else {
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+	}
+}
+
+static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+				   struct amdgpu_ring *ring,
+				   u64 addr,
+				   u64 seq)
+{
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+			  PACKET3_QUERY_STATUS_COMMAND(2));
+	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
+	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
+	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
+	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
+	.kiq_query_status = gfx_v9_0_kiq_query_status,
+	.set_resources_size = 8,
+	.map_queues_size = 7,
+	.unmap_queues_size = 6,
+	.query_status_size = 7,
+};
+
+static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
+}
+
 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
@@ -4260,6 +4374,7 @@ static int gfx_v9_0_early_init(void *handle)
 	else
 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	gfx_v9_0_set_kiq_pm4_funcs(adev);
 	gfx_v9_0_set_ring_funcs(adev);
 	gfx_v9_0_set_irq_funcs(adev);
 	gfx_v9_0_set_gds_init(adev);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
  2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
  2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
  2020-01-13 12:12   ` Christian König
  2020-01-13 16:36   ` Felix Kuehling
  2020-01-11 18:39 ` [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9 Alex Sierra
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 19+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

tlbs invalidate pointer function added to kiq_pm4_funcs struct.
This way, tlb flush can be done through kiq member.
TLBs invalidatation implemented for gfx9 and gfx10.

Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  4 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 32 ++++++++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 8e88e0411662..2927837bd401 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
 					struct amdgpu_ring *ring,
 					u64 addr,
 					u64 seq);
+	int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub);
 	/* Packet sizes */
 	int set_resources_size;
 	int map_queues_size;
 	int unmap_queues_size;
 	int query_status_size;
+	int invalidate_tlbs_size;
 };
 
 struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 379e46c1b7f6..2e82213f57eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,6 +40,7 @@
 #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 #include "clearstate_gfx10.h"
 #include "v10_structs.h"
@@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
 }
 
+static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	signed long r;
+	uint32_t seq;
+	struct amdgpu_device *adev = kiq_ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+	amdgpu_fence_emit_polling(kiq_ring, &seq);
+	amdgpu_ring_commit(kiq_ring);
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+
+	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
 static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
 	.kiq_set_resources = gfx10_kiq_set_resources,
 	.kiq_map_queues = gfx10_kiq_map_queues,
 	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
 	.kiq_query_status = gfx10_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
 	.set_resources_size = 8,
 	.map_queues_size = 7,
 	.unmap_queues_size = 6,
 	.query_status_size = 7,
+	.invalidate_tlbs_size = 12,
 };
 
 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ad0179ea2cc5..5be6fab55b73 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
 }
 
+static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	signed long r;
+	uint32_t seq;
+	struct amdgpu_device *adev = kiq_ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+	amdgpu_fence_emit_polling(kiq_ring, &seq);
+	amdgpu_ring_commit(kiq_ring);
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+
+	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
 	.kiq_query_status = gfx_v9_0_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
 	.set_resources_size = 8,
 	.map_queues_size = 7,
 	.unmap_queues_size = 6,
 	.query_status_size = 7,
+	.invalidate_tlbs_size = 12,
 };
 
 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9
  2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
  2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
  2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
  2020-01-13 12:12   ` Christian König
  2020-01-13 16:53   ` Felix Kuehling
  2020-01-11 18:39 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 19+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
There's a HW-indpendent function that enables kcq. This function uses
the kiq_pm4_funcs implementation.

[How]
Local kcq enable function removed and replace it by the generic kcq
enable under amdgpu_gfx

Change-Id: I7709bdba93742c234941a5936c82eb67e346077c
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 70 +--------------------------
 1 file changed, 1 insertion(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5be6fab55b73..7219eacad9ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3252,74 +3252,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 }
 
-static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-	uint64_t queue_mask = 0;
-	int r, i;
-
-	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
-		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-			continue;
-
-		/* This situation may be hit in the future if a new HW
-		 * generation exposes more than 64 queues. If so, the
-		 * definition of queue_mask needs updating */
-		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
-			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
-			break;
-		}
-
-		queue_mask |= (1ull << i);
-	}
-
-	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
-	if (r) {
-		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-		return r;
-	}
-
-	/* set resources */
-	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
-	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
-			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
-	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
-	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
-	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
-	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
-	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
-	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
-		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-
-		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
-		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
-		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
-				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
-				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
-				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
-				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
-				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
-				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
-				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
-				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
-				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
-		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
-		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
-		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
-	}
-
-	r = amdgpu_ring_test_helper(kiq_ring);
-	if (r)
-		DRM_ERROR("KCQ enable failed\n");
-
-	return r;
-}
-
 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -3726,7 +3658,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
 			goto done;
 	}
 
-	r = gfx_v9_0_kiq_kcq_enable(adev);
+	r = amdgpu_gfx_enable_kcq(adev);
 done:
 	return r;
 }
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
  2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
                   ` (2 preceding siblings ...)
  2020-01-11 18:39 ` [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9 Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
  2020-01-13 12:15   ` Christian König
  2020-01-13 16:49   ` Felix Kuehling
  2020-01-11 18:39 ` [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
                   ` (2 subsequent siblings)
  6 siblings, 2 replies; 19+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

This can be used directly from amdgpu and amdkfd to invalidate
TLB through pasid.
It supports gmc v7, v8, v9 and v10.

Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 59 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 +++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 61 +++++++++++++++++++++++++
 5 files changed, 193 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index c91dd602d5f1..d3c27a3c43f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
 	/* flush the vm tlb via mmio */
 	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
 				uint32_t vmhub, uint32_t flush_type);
+	/* flush the vm tlb via pasid */
+	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+					uint32_t flush_type, bool all_hub);
 	/* flush the vm tlb via ring */
 	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
 				       uint64_t pd_addr);
@@ -216,6 +219,9 @@ struct amdgpu_gmc {
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+	((adev), (pasid), (type), (allhub)))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
 #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 5ad89bb6f3ba..09408b8b390f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
 #include "hdp/hdp_5_0_0_sh_mask.h"
 #include "gc/gc_10_1_0_sh_mask.h"
 #include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_sh_mask.h"
 #include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
 #include "navi10_enum.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 
 #include "nbio_v2_3.h"
@@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
 		(!amdgpu_sriov_vf(adev)));
 }
 
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
+					struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -380,6 +396,48 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
 }
 
+/**
+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid, i;
+	uint16_t queried_pasid;
+	bool ret;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	if (amdgpu_emu_mode == 0 && ring->sched.ready)
+		return kiq->pmf->kiq_invalidate_tlbs(ring,
+						pasid, flush_type, all_hub);
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+				&queried_pasid);
+		if (ret	&& queried_pasid == pasid) {
+			if (all_hub) {
+				for (i = 0; i < adev->num_vmhubs; i++)
+					gmc_v10_0_flush_gpu_tlb(adev, vmid,
+							i, 0);
+			} else {
+				gmc_v10_0_flush_gpu_tlb(adev, vmid,
+						AMDGPU_GFXHUB_0, 0);
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
 static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					     unsigned vmid, uint64_t pd_addr)
 {
@@ -531,6 +589,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
 
 static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
 	.map_mtype = gmc_v10_0_map_mtype,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index f08e5330642d..19d5b133e1d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	unsigned int tmp;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
 
 static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
 	.set_prt = gmc_v7_0_set_prt,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 6d96d40fbcb8..27d83204fa2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	unsigned int tmp;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
 
 static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
 	.set_prt = gmc_v8_0_set_prt,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b83c8d745f42..95cce54999b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -38,10 +38,12 @@
 #include "dce/dce_12_0_sh_mask.h"
 #include "vega10_enum.h"
 #include "mmhub/mmhub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
 #include "athub/athub_1_0_offset.h"
 #include "oss/osssys_4_0_offset.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 #include "umc/umc_6_0_sh_mask.h"
 
@@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
 		   adev->pdev->device == 0x15d8)));
 }
 
+static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -539,6 +553,52 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 }
 
+/**
+ * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid, i;
+	uint16_t queried_pasid;
+	bool ret;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	if (ring->sched.ready)
+		return kiq->pmf->kiq_invalidate_tlbs(ring,
+						pasid, flush_type, all_hub);
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+				&queried_pasid);
+		if (ret && queried_pasid == pasid) {
+			if (all_hub) {
+				for (i = 0; i < adev->num_vmhubs; i++)
+					gmc_v9_0_flush_gpu_tlb(adev, vmid,
+							i, 0);
+			} else {
+				gmc_v9_0_flush_gpu_tlb(adev, vmid,
+						AMDGPU_GFXHUB_0, 0);
+			}
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					    unsigned vmid, uint64_t pd_addr)
 {
@@ -700,6 +760,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
 
 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
 	.map_mtype = gmc_v9_0_map_mtype,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
  2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
                   ` (3 preceding siblings ...)
  2020-01-11 18:39 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
  2020-01-13 12:16   ` Christian König
  2020-01-13 16:58   ` Felix Kuehling
  2020-01-11 18:39 ` [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
  2020-01-13 12:10 ` [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Christian König
  6 siblings, 2 replies; 19+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
TLB flush method has been deprecated using kfd2kgd interface.
This implementation is now on the amdgpu_amdkfd API.

[How]
TLB flush functions now implemented in amdgpu_amdkfd.

Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 ++++--
 3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 88e10b956413..8609287620ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -628,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 	return false;
 }
 
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	if (adev->family == AMDGPU_FAMILY_AI) {
+		int i;
+
+		for (i = 0; i < adev->num_vmhubs; i++)
+			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+	} else {
+		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+	}
+
+	return 0;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	uint32_t flush_type = 0;
+	bool all_hub = false;
+
+	if (adev->gmc.xgmi.num_physical_nodes &&
+		adev->asic_type == CHIP_VEGA20)
+		flush_type = 2;
+
+	if (adev->family == AMDGPU_FAMILY_AI)
+		all_hub = true;
+
+	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+}
+
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 069d5d230810..47b0f2957d1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t *ib_cmd, uint32_t ib_len);
 void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 536a153ac9a4..25b90f70aecd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -32,6 +32,7 @@
 #include <linux/mman.h>
 #include <linux/file.h>
 #include "amdgpu_amdkfd.h"
+#include "amdgpu.h"
 
 struct mm_struct;
 
@@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 void kfd_flush_tlb(struct kfd_process_device *pdd)
 {
 	struct kfd_dev *dev = pdd->dev;
-	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
 
 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
 		/* Nothing to flush until a VMID is assigned, which
 		 * only happens when the first queue is created.
 		 */
 		if (pdd->qpd.vmid)
-			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+							pdd->qpd.vmid);
 	} else {
-		f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
+		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+						pdd->process->pasid);
 	}
 }
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface
  2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
                   ` (4 preceding siblings ...)
  2020-01-11 18:39 ` [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
  2020-01-13 12:17   ` Christian König
  2020-01-13 12:10 ` [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Christian König
  6 siblings, 1 reply; 19+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
kfd2kgd interface will be deprecated. This removal only covers TLB
invalidation for now. They have been replaced in amdgpu_amdkfd API.

[How]
TLB invalidate functions removed from the different amdkfd_gfx_v*
versions.

Change-Id: Ic2c7d4a0d19fe1e884dee1ff10a520d31252afee
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 -
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 67 -------------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 41 --------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 41 --------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 96 -------------------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 -
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 -
 7 files changed, 251 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index f9011a07cb90..562e7a7f51a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -317,7 +317,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = kgd_gfx_v9_get_tile_config,
 	.set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
-	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
-	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 61cd707158e4..6132b4874498 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -686,71 +686,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
 	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
-{
-	signed long r;
-	uint32_t seq;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	spin_lock(&adev->gfx.kiq.ring_lock);
-	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
-	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-	amdgpu_ring_write(ring,
-			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-			PACKET3_INVALIDATE_TLBS_PASID(pasid));
-	amdgpu_fence_emit_polling(ring, &seq);
-	amdgpu_ring_commit(ring);
-	spin_unlock(&adev->gfx.kiq.ring_lock);
-
-	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
-	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-		return -ETIME;
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	uint16_t queried_pasid;
-	bool ret;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	if (amdgpu_emu_mode == 0 && ring->sched.ready)
-		return invalidate_tlbs_with_kiq(adev, pasid);
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
-				&queried_pasid);
-		if (ret	&& queried_pasid == pasid) {
-			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-					AMDGPU_GFXHUB_0, 0);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return 0;
-	}
-
-	amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
-	return 0;
-}
-
 static int kgd_address_watch_disable(struct kgd_dev *kgd)
 {
 	return 0;
@@ -832,7 +767,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
 			get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = amdgpu_amdkfd_get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 6e6f0a99ec06..8f052e98a3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		lower_32_bits(page_table_base));
 }
 
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	unsigned int tmp;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
-			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-			RREG32(mmVM_INVALIDATE_RESPONSE);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid\n");
-		return 0;
-	}
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-	RREG32(mmVM_INVALIDATE_RESPONSE);
-	return 0;
-}
-
  /**
   * read_vmid_from_vmfault_reg - read vmid from register
   *
@@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index bfbddedb2380..19a10db93d68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 			lower_32_bits(page_table_base));
 }
 
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	unsigned int tmp;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
-			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-			RREG32(mmVM_INVALIDATE_RESPONSE);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return -EINVAL;
-	}
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-	RREG32(mmVM_INVALIDATE_RESPONSE);
-	return 0;
-}
-
 const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index e7861f0ef415..932ae85d97e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -617,100 +617,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
 	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
-			uint32_t flush_type)
-{
-	signed long r;
-	uint32_t seq;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	spin_lock(&adev->gfx.kiq.ring_lock);
-	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
-	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-	amdgpu_ring_write(ring,
-			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
-			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
-			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
-	amdgpu_fence_emit_polling(ring, &seq);
-	amdgpu_ring_commit(ring);
-	spin_unlock(&adev->gfx.kiq.ring_lock);
-
-	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
-	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-		return -ETIME;
-	}
-
-	return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid, i;
-	uint16_t queried_pasid;
-	bool ret;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-	uint32_t flush_type = 0;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-	if (adev->gmc.xgmi.num_physical_nodes &&
-		adev->asic_type == CHIP_VEGA20)
-		flush_type = 2;
-
-	if (ring->sched.ready)
-		return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
-				&queried_pasid);
-		if (ret && queried_pasid == pasid) {
-			for (i = 0; i < adev->num_vmhubs; i++)
-				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-							i, flush_type);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int i;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return 0;
-	}
-
-	/* Use legacy mode tlb invalidation.
-	 *
-	 * Currently on Raven the code below is broken for anything but
-	 * legacy mode due to a MMHUB power gating problem. A workaround
-	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
-	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
-	 * bit.
-	 *
-	 * TODO 1: agree on the right set of invalidation registers for
-	 * KFD use. Use the last one for now. Invalidate both GC and
-	 * MMHUB.
-	 *
-	 * TODO 2: support range-based invalidation, requires kfg2kgd
-	 * interface change
-	 */
-	for (i = 0; i < adev->num_vmhubs; i++)
-		amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-
-	return 0;
-}
-
 int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
 {
 	return 0;
@@ -793,7 +699,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = kgd_gfx_v9_get_tile_config,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
-	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
-	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 02b1426d17d1..dfafa28b7559 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -57,7 +57,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
 
 bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
 					uint8_t vmid, uint16_t *p_pasid);
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
 		struct tile_config *config);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 2cd217e60125..a01ef836ad58 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -307,8 +307,6 @@ struct kfd2kgd_calls {
 
 	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
 			uint32_t vmid, uint64_t page_table_base);
-	int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
-	int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
 	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
 	uint64_t (*get_hive_id)(struct kgd_dev *kgd);
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock
  2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
                   ` (5 preceding siblings ...)
  2020-01-11 18:39 ` [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
@ 2020-01-13 12:10 ` Christian König
  6 siblings, 0 replies; 19+ messages in thread
From: Christian König @ 2020-01-13 12:10 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Am 11.01.20 um 19:39 schrieb Alex Sierra:
> [Why]
> Avoid reclaim filesystem while eviction lock is held called from
> MMU notifier.
>
> [How]
> Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
> Using memalloc_nofs_save / memalloc_nofs_restore API.
>
> Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +++++++++++++++++++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  6 +++-
>   2 files changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b999b67ff57a..d6aba4f9df74 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -82,6 +82,32 @@ struct amdgpu_prt_cb {
>   	struct dma_fence_cb cb;
>   };
>   
> +/**
> + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
> + * happens while holding this lock anywhere to prevent deadlocks when
> + * an MMU notifier runs in reclaim-FS context.
> + */
> +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
> +{
> +	mutex_lock(&vm->eviction_lock);
> +	vm->saved_flags = memalloc_nofs_save();
> +}
> +
> +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
> +{
> +	if (mutex_trylock(&vm->eviction_lock)) {
> +		vm->saved_flags = memalloc_nofs_save();
> +		return 1;
> +	}
> +	return 0;
> +}
> +
> +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
> +{
> +	memalloc_nofs_restore(vm->saved_flags);
> +	mutex_unlock(&vm->eviction_lock);
> +}
> +
>   /**
>    * amdgpu_vm_level_shift - return the addr shift for each level
>    *
> @@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		}
>   	}
>   
> -	mutex_lock(&vm->eviction_lock);
> +	amdgpu_vm_eviction_lock(vm);
>   	vm->evicting = false;
> -	mutex_unlock(&vm->eviction_lock);
> +	amdgpu_vm_eviction_unlock(vm);
>   
>   	return 0;
>   }
> @@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (!(flags & AMDGPU_PTE_VALID))
>   		owner = AMDGPU_FENCE_OWNER_KFD;
>   
> -	mutex_lock(&vm->eviction_lock);
> +	amdgpu_vm_eviction_lock(vm);
>   	if (vm->evicting) {
>   		r = -EBUSY;
>   		goto error_unlock;
> @@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	r = vm->update_funcs->commit(&params, fence);
>   
>   error_unlock:
> -	mutex_unlock(&vm->eviction_lock);
> +	amdgpu_vm_eviction_unlock(vm);
>   	return r;
>   }
>   
> @@ -2537,18 +2563,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>   		return false;
>   
>   	/* Try to block ongoing updates */
> -	if (!mutex_trylock(&bo_base->vm->eviction_lock))
> +	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
>   		return false;
>   
>   	/* Don't evict VM page tables while they are updated */
>   	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
>   	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
> -		mutex_unlock(&bo_base->vm->eviction_lock);
> +		amdgpu_vm_eviction_unlock(bo_base->vm);
>   		return false;
>   	}
>   
>   	bo_base->vm->evicting = true;
> -	mutex_unlock(&bo_base->vm->eviction_lock);
> +	amdgpu_vm_eviction_unlock(bo_base->vm);
>   	return true;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 100547f094ff..c21a36bebc0c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -30,6 +30,7 @@
>   #include <drm/gpu_scheduler.h>
>   #include <drm/drm_file.h>
>   #include <drm/ttm/ttm_bo_driver.h>
> +#include <linux/sched/mm.h>
>   
>   #include "amdgpu_sync.h"
>   #include "amdgpu_ring.h"
> @@ -242,9 +243,12 @@ struct amdgpu_vm {
>   	/* tree of virtual addresses mapped */
>   	struct rb_root_cached	va;
>   
> -	/* Lock to prevent eviction while we are updating page tables */
> +	/* Lock to prevent eviction while we are updating page tables
> +	 * use vm_eviction_lock/unlock(vm)
> +	 */
>   	struct mutex		eviction_lock;
>   	bool			evicting;
> +	unsigned int		saved_flags;
>   
>   	/* BOs who needs a validation */
>   	struct list_head	evicted;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9
  2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
@ 2020-01-13 12:10   ` Christian König
  2020-01-13 16:57   ` Felix Kuehling
  1 sibling, 0 replies; 19+ messages in thread
From: Christian König @ 2020-01-13 12:10 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Am 11.01.20 um 19:39 schrieb Alex Sierra:
> Functions implemented from kiq_pm4_funcs struct members
> for gfx_v9 version.
>
> Change-Id: I8fd3e160c4bd58f19d35d29e39517db967063afe
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

Acked-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 ++++++++++++++++++++++++++
>   1 file changed, 115 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e3d466bd5c4e..ad0179ea2cc5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -739,6 +739,120 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
>   static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
>   				     void *inject_if);
>   
> +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
> +				uint64_t queue_mask)
> +{
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
> +	amdgpu_ring_write(kiq_ring,
> +		PACKET3_SET_RESOURCES_VMID_MASK(0) |
> +		/* vmid_mask:0* queue_type:0 (KIQ) */
> +		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
> +	amdgpu_ring_write(kiq_ring,
> +			lower_32_bits(queue_mask));	/* queue mask lo */
> +	amdgpu_ring_write(kiq_ring,
> +			upper_32_bits(queue_mask));	/* queue mask hi */
> +	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
> +	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
> +	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
> +	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
> +}
> +
> +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
> +				 struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = kiq_ring->adev;
> +	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
> +	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> +	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
> +	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
> +	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> +			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
> +			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
> +			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
> +			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
> +			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
> +			 /*queue_type: normal compute queue */
> +			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
> +			 /* alloc format: all_on_one_pipe */
> +			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
> +			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
> +			 /* num_queues: must be 1 */
> +			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
> +	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
> +	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
> +	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
> +	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
> +}
> +
> +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
> +				   struct amdgpu_ring *ring,
> +				   enum amdgpu_unmap_queues_action action,
> +				   u64 gpu_addr, u64 seq)
> +{
> +	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
> +	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> +			  PACKET3_UNMAP_QUEUES_ACTION(action) |
> +			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
> +			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
> +			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
> +
> +	if (action == PREEMPT_QUEUES_NO_UNMAP) {
> +		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
> +		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
> +		amdgpu_ring_write(kiq_ring, seq);
> +	} else {
> +		amdgpu_ring_write(kiq_ring, 0);
> +		amdgpu_ring_write(kiq_ring, 0);
> +		amdgpu_ring_write(kiq_ring, 0);
> +	}
> +}
> +
> +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
> +				   struct amdgpu_ring *ring,
> +				   u64 addr,
> +				   u64 seq)
> +{
> +	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
> +	amdgpu_ring_write(kiq_ring,
> +			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
> +			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
> +			  PACKET3_QUERY_STATUS_COMMAND(2));
> +	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
> +			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
> +	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
> +	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
> +	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
> +	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> +}
> +
> +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
> +	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
> +	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
> +	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
> +	.kiq_query_status = gfx_v9_0_kiq_query_status,
> +	.set_resources_size = 8,
> +	.map_queues_size = 7,
> +	.unmap_queues_size = 6,
> +	.query_status_size = 7,
> +};
> +
> +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> +{
> +	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
> +}
> +
>   static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
>   {
>   	switch (adev->asic_type) {
> @@ -4260,6 +4374,7 @@ static int gfx_v9_0_early_init(void *handle)
>   	else
>   		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
>   	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	gfx_v9_0_set_kiq_pm4_funcs(adev);
>   	gfx_v9_0_set_ring_funcs(adev);
>   	gfx_v9_0_set_irq_funcs(adev);
>   	gfx_v9_0_set_gds_init(adev);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
  2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
@ 2020-01-13 12:12   ` Christian König
  2020-01-13 16:36   ` Felix Kuehling
  1 sibling, 0 replies; 19+ messages in thread
From: Christian König @ 2020-01-13 12:12 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Am 11.01.20 um 19:39 schrieb Alex Sierra:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

Please note that I can't judge the correctness of the PM4 packets, but 
the interface looks really nice and clean now.

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  4 +++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 32 ++++++++++++++++++++++++
>   3 files changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..2927837bd401 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
>   					struct amdgpu_ring *ring,
>   					u64 addr,
>   					u64 seq);
> +	int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub);
>   	/* Packet sizes */
>   	int set_resources_size;
>   	int map_queues_size;
>   	int unmap_queues_size;
>   	int query_status_size;
> +	int invalidate_tlbs_size;
>   };
>   
>   struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..2e82213f57eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
>   #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "clearstate_gfx10.h"
>   #include "v10_structs.h"
> @@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_device *adev = kiq_ring->adev;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(kiq_ring, &seq);
> +	amdgpu_ring_commit(kiq_ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx10_kiq_set_resources,
>   	.kiq_map_queues = gfx10_kiq_map_queues,
>   	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
>   	.kiq_query_status = gfx10_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..5be6fab55b73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_device *adev = kiq_ring->adev;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(kiq_ring, &seq);
> +	amdgpu_ring_commit(kiq_ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
>   	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
>   	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
>   	.kiq_query_status = gfx_v9_0_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9
  2020-01-11 18:39 ` [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9 Alex Sierra
@ 2020-01-13 12:12   ` Christian König
  2020-01-13 16:53   ` Felix Kuehling
  1 sibling, 0 replies; 19+ messages in thread
From: Christian König @ 2020-01-13 12:12 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Am 11.01.20 um 19:39 schrieb Alex Sierra:
> [Why]
> There's a HW-indpendent function that enables kcq. This function uses
> the kiq_pm4_funcs implementation.
>
> [How]
> Local kcq enable function removed and replace it by the generic kcq
> enable under amdgpu_gfx
>
> Change-Id: I7709bdba93742c234941a5936c82eb67e346077c
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

Acked-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 70 +--------------------------
>   1 file changed, 1 insertion(+), 69 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5be6fab55b73..7219eacad9ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3252,74 +3252,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
>   	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
>   }
>   
> -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
> -{
> -	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
> -	uint64_t queue_mask = 0;
> -	int r, i;
> -
> -	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> -		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
> -			continue;
> -
> -		/* This situation may be hit in the future if a new HW
> -		 * generation exposes more than 64 queues. If so, the
> -		 * definition of queue_mask needs updating */
> -		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
> -			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
> -			break;
> -		}
> -
> -		queue_mask |= (1ull << i);
> -	}
> -
> -	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
> -	if (r) {
> -		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
> -		return r;
> -	}
> -
> -	/* set resources */
> -	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
> -	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
> -			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
> -	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
> -	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
> -	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
> -	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
> -	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
> -	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
> -	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> -		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
> -		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
> -		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> -
> -		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
> -		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
> -		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> -				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
> -				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
> -				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
> -				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
> -				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
> -				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
> -				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
> -				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
> -				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
> -		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
> -		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
> -		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
> -		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
> -		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
> -	}
> -
> -	r = amdgpu_ring_test_helper(kiq_ring);
> -	if (r)
> -		DRM_ERROR("KCQ enable failed\n");
> -
> -	return r;
> -}
> -
>   static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
>   {
>   	struct amdgpu_device *adev = ring->adev;
> @@ -3726,7 +3658,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
>   			goto done;
>   	}
>   
> -	r = gfx_v9_0_kiq_kcq_enable(adev);
> +	r = amdgpu_gfx_enable_kcq(adev);
>   done:
>   	return r;
>   }

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
  2020-01-11 18:39 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
@ 2020-01-13 12:15   ` Christian König
  2020-01-13 16:49   ` Felix Kuehling
  1 sibling, 0 replies; 19+ messages in thread
From: Christian König @ 2020-01-13 12:15 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Am 11.01.20 um 19:39 schrieb Alex Sierra:
> This can be used directly from amdgpu and amdkfd to invalidate
> TLB through pasid.
> It supports gmc v7, v8, v9 and v10.
>
> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

Flushing by pasid is in principle racy, but I don't see a way to avoid that.

The worst thing that could happen is that we flush a VMID while we 
wouldn't have to.

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 +++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 59 ++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 +++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 61 +++++++++++++++++++++++++
>   5 files changed, 193 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index c91dd602d5f1..d3c27a3c43f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
>   	/* flush the vm tlb via mmio */
>   	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>   				uint32_t vmhub, uint32_t flush_type);
> +	/* flush the vm tlb via pasid */
> +	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> +					uint32_t flush_type, bool all_hub);
>   	/* flush the vm tlb via ring */
>   	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>   				       uint64_t pd_addr);
> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
>   };
>   
>   #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
> +	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> +	((adev), (pasid), (type), (allhub)))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>   #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 5ad89bb6f3ba..09408b8b390f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -30,6 +30,8 @@
>   #include "hdp/hdp_5_0_0_sh_mask.h"
>   #include "gc/gc_10_1_0_sh_mask.h"
>   #include "mmhub/mmhub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_sh_mask.h"
>   #include "oss/osssys_5_0_0_offset.h"
> @@ -37,6 +39,7 @@
>   #include "navi10_enum.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   
>   #include "nbio_v2_3.h"
> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		(!amdgpu_sriov_vf(adev)));
>   }
>   
> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
> +					struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -380,6 +396,48 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>   }
>   
> +/**
> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	if (amdgpu_emu_mode == 0 && ring->sched.ready)
> +		return kiq->pmf->kiq_invalidate_tlbs(ring,
> +						pasid, flush_type, all_hub);
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret	&& queried_pasid == pasid) {
> +			if (all_hub) {
> +				for (i = 0; i < adev->num_vmhubs; i++)
> +					gmc_v10_0_flush_gpu_tlb(adev, vmid,
> +							i, 0);
> +			} else {
> +				gmc_v10_0_flush_gpu_tlb(adev, vmid,
> +						AMDGPU_GFXHUB_0, 0);
> +			}
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					     unsigned vmid, uint64_t pd_addr)
>   {
> @@ -531,6 +589,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v10_0_map_mtype,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index f08e5330642d..19d5b133e1d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>   	.set_prt = gmc_v7_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 6d96d40fbcb8..27d83204fa2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>   	.set_prt = gmc_v8_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index b83c8d745f42..95cce54999b7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -38,10 +38,12 @@
>   #include "dce/dce_12_0_sh_mask.h"
>   #include "vega10_enum.h"
>   #include "mmhub/mmhub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
>   #include "athub/athub_1_0_offset.h"
>   #include "oss/osssys_4_0_offset.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "umc/umc_6_0_sh_mask.h"
>   
> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		   adev->pdev->device == 0x15d8)));
>   }
>   
> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -539,6 +553,52 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   }
>   
> +/**
> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	if (ring->sched.ready)
> +		return kiq->pmf->kiq_invalidate_tlbs(ring,
> +						pasid, flush_type, all_hub);
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret && queried_pasid == pasid) {
> +			if (all_hub) {
> +				for (i = 0; i < adev->num_vmhubs; i++)
> +					gmc_v9_0_flush_gpu_tlb(adev, vmid,
> +							i, 0);
> +			} else {
> +				gmc_v9_0_flush_gpu_tlb(adev, vmid,
> +						AMDGPU_GFXHUB_0, 0);
> +			}
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					    unsigned vmid, uint64_t pd_addr)
>   {
> @@ -700,6 +760,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v9_0_map_mtype,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
  2020-01-11 18:39 ` [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
@ 2020-01-13 12:16   ` Christian König
  2020-01-13 16:58   ` Felix Kuehling
  1 sibling, 0 replies; 19+ messages in thread
From: Christian König @ 2020-01-13 12:16 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Am 11.01.20 um 19:39 schrieb Alex Sierra:
> [Why]
> TLB flush method has been deprecated using kfd2kgd interface.
> This implementation is now on the amdgpu_amdkfd API.
>
> [How]
> TLB flush functions now implemented in amdgpu_amdkfd.
>
> Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

Acked-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 ++++--
>   3 files changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 88e10b956413..8609287620ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -628,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
>   	return false;
>   }
>   
> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
> +{
> +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> +
> +	if (adev->family == AMDGPU_FAMILY_AI) {
> +		int i;
> +
> +		for (i = 0; i < adev->num_vmhubs; i++)
> +			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
> +	} else {
> +		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
> +	}
> +
> +	return 0;
> +}
> +
> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
> +{
> +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> +	uint32_t flush_type = 0;
> +	bool all_hub = false;
> +
> +	if (adev->gmc.xgmi.num_physical_nodes &&
> +		adev->asic_type == CHIP_VEGA20)
> +		flush_type = 2;
> +
> +	if (adev->family == AMDGPU_FAMILY_AI)
> +		all_hub = true;
> +
> +	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
> +}
> +
>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 069d5d230810..47b0f2957d1f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
>   				uint32_t *ib_cmd, uint32_t ib_len);
>   void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
>   
>   bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 536a153ac9a4..25b90f70aecd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -32,6 +32,7 @@
>   #include <linux/mman.h>
>   #include <linux/file.h>
>   #include "amdgpu_amdkfd.h"
> +#include "amdgpu.h"
>   
>   struct mm_struct;
>   
> @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
>   void kfd_flush_tlb(struct kfd_process_device *pdd)
>   {
>   	struct kfd_dev *dev = pdd->dev;
> -	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
>   
>   	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
>   		/* Nothing to flush until a VMID is assigned, which
>   		 * only happens when the first queue is created.
>   		 */
>   		if (pdd->qpd.vmid)
> -			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
> +			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
> +							pdd->qpd.vmid);
>   	} else {
> -		f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
> +		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
> +						pdd->process->pasid);
>   	}
>   }
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface
  2020-01-11 18:39 ` [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
@ 2020-01-13 12:17   ` Christian König
  0 siblings, 0 replies; 19+ messages in thread
From: Christian König @ 2020-01-13 12:17 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Am 11.01.20 um 19:39 schrieb Alex Sierra:
> [Why]
> kfd2kgd interface will be deprecated. This removal only covers TLB
> invalidation for now. They have been replaced in amdgpu_amdkfd API.
>
> [How]
> TLB invalidate functions removed from the different amdkfd_gfx_v*
> versions.
>
> Change-Id: Ic2c7d4a0d19fe1e884dee1ff10a520d31252afee
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

Nice work for the patch set. Acked-by: Christian König 
<christian.koenig@amd.com>

> ---
>   .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 -
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 67 -------------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 41 --------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 41 --------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 96 -------------------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 -
>   .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 -
>   7 files changed, 251 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index f9011a07cb90..562e7a7f51a8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -317,7 +317,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
>   			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
>   	.get_tile_config = kgd_gfx_v9_get_tile_config,
>   	.set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
> -	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
> -	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
>   	.get_hive_id = amdgpu_amdkfd_get_hive_id,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 61cd707158e4..6132b4874498 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -686,71 +686,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
>   	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>   }
>   
> -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
> -{
> -	signed long r;
> -	uint32_t seq;
> -	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -
> -	spin_lock(&adev->gfx.kiq.ring_lock);
> -	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> -	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> -	amdgpu_ring_write(ring,
> -			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> -			PACKET3_INVALIDATE_TLBS_PASID(pasid));
> -	amdgpu_fence_emit_polling(ring, &seq);
> -	amdgpu_ring_commit(ring);
> -	spin_unlock(&adev->gfx.kiq.ring_lock);
> -
> -	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> -	if (r < 1) {
> -		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> -		return -ETIME;
> -	}
> -
> -	return 0;
> -}
> -
> -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int vmid;
> -	uint16_t queried_pasid;
> -	bool ret;
> -	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -
> -	if (amdgpu_emu_mode == 0 && ring->sched.ready)
> -		return invalidate_tlbs_with_kiq(adev, pasid);
> -
> -	for (vmid = 0; vmid < 16; vmid++) {
> -		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> -			continue;
> -
> -		ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
> -				&queried_pasid);
> -		if (ret	&& queried_pasid == pasid) {
> -			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> -					AMDGPU_GFXHUB_0, 0);
> -			break;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
> -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -
> -	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> -		pr_err("non kfd vmid %d\n", vmid);
> -		return 0;
> -	}
> -
> -	amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
> -	return 0;
> -}
> -
>   static int kgd_address_watch_disable(struct kgd_dev *kgd)
>   {
>   	return 0;
> @@ -832,7 +767,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
>   			get_atc_vmid_pasid_mapping_info,
>   	.get_tile_config = amdgpu_amdkfd_get_tile_config,
>   	.set_vm_context_page_table_base = set_vm_context_page_table_base,
> -	.invalidate_tlbs = invalidate_tlbs,
> -	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
>   	.get_hive_id = amdgpu_amdkfd_get_hive_id,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index 6e6f0a99ec06..8f052e98a3c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
>   		lower_32_bits(page_table_base));
>   }
>   
> -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int vmid;
> -	unsigned int tmp;
> -
> -	if (adev->in_gpu_reset)
> -		return -EIO;
> -
> -	for (vmid = 0; vmid < 16; vmid++) {
> -		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> -			continue;
> -
> -		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> -		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> -			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> -			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -			RREG32(mmVM_INVALIDATE_RESPONSE);
> -			break;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
> -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -
> -	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> -		pr_err("non kfd vmid\n");
> -		return 0;
> -	}
> -
> -	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -	RREG32(mmVM_INVALIDATE_RESPONSE);
> -	return 0;
> -}
> -
>    /**
>     * read_vmid_from_vmfault_reg - read vmid from register
>     *
> @@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
>   	.set_scratch_backing_va = set_scratch_backing_va,
>   	.get_tile_config = get_tile_config,
>   	.set_vm_context_page_table_base = set_vm_context_page_table_base,
> -	.invalidate_tlbs = invalidate_tlbs,
> -	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
>   	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index bfbddedb2380..19a10db93d68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
>   			lower_32_bits(page_table_base));
>   }
>   
> -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int vmid;
> -	unsigned int tmp;
> -
> -	if (adev->in_gpu_reset)
> -		return -EIO;
> -
> -	for (vmid = 0; vmid < 16; vmid++) {
> -		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> -			continue;
> -
> -		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> -		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> -			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> -			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -			RREG32(mmVM_INVALIDATE_RESPONSE);
> -			break;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
> -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -
> -	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> -		pr_err("non kfd vmid %d\n", vmid);
> -		return -EINVAL;
> -	}
> -
> -	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -	RREG32(mmVM_INVALIDATE_RESPONSE);
> -	return 0;
> -}
> -
>   const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
>   	.program_sh_mem_settings = kgd_program_sh_mem_settings,
>   	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
> @@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
>   	.set_scratch_backing_va = set_scratch_backing_va,
>   	.get_tile_config = get_tile_config,
>   	.set_vm_context_page_table_base = set_vm_context_page_table_base,
> -	.invalidate_tlbs = invalidate_tlbs,
> -	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index e7861f0ef415..932ae85d97e2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -617,100 +617,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
>   	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>   }
>   
> -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
> -			uint32_t flush_type)
> -{
> -	signed long r;
> -	uint32_t seq;
> -	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -
> -	spin_lock(&adev->gfx.kiq.ring_lock);
> -	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> -	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> -	amdgpu_ring_write(ring,
> -			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> -			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
> -			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> -			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> -	amdgpu_fence_emit_polling(ring, &seq);
> -	amdgpu_ring_commit(ring);
> -	spin_unlock(&adev->gfx.kiq.ring_lock);
> -
> -	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> -	if (r < 1) {
> -		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> -		return -ETIME;
> -	}
> -
> -	return 0;
> -}
> -
> -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int vmid, i;
> -	uint16_t queried_pasid;
> -	bool ret;
> -	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -	uint32_t flush_type = 0;
> -
> -	if (adev->in_gpu_reset)
> -		return -EIO;
> -	if (adev->gmc.xgmi.num_physical_nodes &&
> -		adev->asic_type == CHIP_VEGA20)
> -		flush_type = 2;
> -
> -	if (ring->sched.ready)
> -		return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
> -
> -	for (vmid = 0; vmid < 16; vmid++) {
> -		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> -			continue;
> -
> -		ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
> -				&queried_pasid);
> -		if (ret && queried_pasid == pasid) {
> -			for (i = 0; i < adev->num_vmhubs; i++)
> -				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> -							i, flush_type);
> -			break;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
> -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int i;
> -
> -	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> -		pr_err("non kfd vmid %d\n", vmid);
> -		return 0;
> -	}
> -
> -	/* Use legacy mode tlb invalidation.
> -	 *
> -	 * Currently on Raven the code below is broken for anything but
> -	 * legacy mode due to a MMHUB power gating problem. A workaround
> -	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
> -	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
> -	 * bit.
> -	 *
> -	 * TODO 1: agree on the right set of invalidation registers for
> -	 * KFD use. Use the last one for now. Invalidate both GC and
> -	 * MMHUB.
> -	 *
> -	 * TODO 2: support range-based invalidation, requires kfg2kgd
> -	 * interface change
> -	 */
> -	for (i = 0; i < adev->num_vmhubs; i++)
> -		amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
> -
> -	return 0;
> -}
> -
>   int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
>   {
>   	return 0;
> @@ -793,7 +699,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>   			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
>   	.get_tile_config = kgd_gfx_v9_get_tile_config,
>   	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
> -	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
> -	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
>   	.get_hive_id = amdgpu_amdkfd_get_hive_id,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index 02b1426d17d1..dfafa28b7559 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -57,7 +57,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
>   
>   bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
>   					uint8_t vmid, uint16_t *p_pasid);
> -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
> -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
>   int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
>   		struct tile_config *config);
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 2cd217e60125..a01ef836ad58 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -307,8 +307,6 @@ struct kfd2kgd_calls {
>   
>   	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
>   			uint32_t vmid, uint64_t page_table_base);
> -	int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
> -	int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
>   	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
>   	uint64_t (*get_hive_id)(struct kgd_dev *kgd);
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
  2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
  2020-01-13 12:12   ` Christian König
@ 2020-01-13 16:36   ` Felix Kuehling
  1 sibling, 0 replies; 19+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:36 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  4 +++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 32 ++++++++++++++++++++++++
>   3 files changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..2927837bd401 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
>   					struct amdgpu_ring *ring,
>   					u64 addr,
>   					u64 seq);
> +	int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub);
>   	/* Packet sizes */
>   	int set_resources_size;
>   	int map_queues_size;
>   	int unmap_queues_size;
>   	int query_status_size;
> +	int invalidate_tlbs_size;
>   };
>   
>   struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..2e82213f57eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
>   #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "clearstate_gfx10.h"
>   #include "v10_structs.h"
> @@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_device *adev = kiq_ring->adev;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(kiq_ring, &seq);
> +	amdgpu_ring_commit(kiq_ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);

The other KIQ functions don't include the emit_polling, commit and 
locking. I think the way the KIQ-funcs interface is meant to be used, 
all that should be outside the IP-version-specific functions. For 
consistency all you should do here is the amdgpu_ring_write calls with 
IP-version-specific packets.

Regards,
   Felix


> +
> +	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx10_kiq_set_resources,
>   	.kiq_map_queues = gfx10_kiq_map_queues,
>   	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
>   	.kiq_query_status = gfx10_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..5be6fab55b73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
>   	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
>   }
>   
> +static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_device *adev = kiq_ring->adev;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(kiq_ring, &seq);
> +	amdgpu_ring_commit(kiq_ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
>   	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
>   	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
>   	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
>   	.kiq_query_status = gfx_v9_0_kiq_query_status,
> +	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
>   	.set_resources_size = 8,
>   	.map_queues_size = 7,
>   	.unmap_queues_size = 6,
>   	.query_status_size = 7,
> +	.invalidate_tlbs_size = 12,
>   };
>   
>   static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
  2020-01-11 18:39 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
  2020-01-13 12:15   ` Christian König
@ 2020-01-13 16:49   ` Felix Kuehling
  1 sibling, 0 replies; 19+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:49 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> This can be used directly from amdgpu and amdkfd to invalidate
> TLB through pasid.
> It supports gmc v7, v8, v9 and v10.
>
> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 +++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 59 ++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 +++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 61 +++++++++++++++++++++++++
>   5 files changed, 193 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index c91dd602d5f1..d3c27a3c43f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
>   	/* flush the vm tlb via mmio */
>   	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>   				uint32_t vmhub, uint32_t flush_type);
> +	/* flush the vm tlb via pasid */
> +	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> +					uint32_t flush_type, bool all_hub);
>   	/* flush the vm tlb via ring */
>   	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>   				       uint64_t pd_addr);
> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
>   };
>   
>   #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
> +	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> +	((adev), (pasid), (type), (allhub)))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>   #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 5ad89bb6f3ba..09408b8b390f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -30,6 +30,8 @@
>   #include "hdp/hdp_5_0_0_sh_mask.h"
>   #include "gc/gc_10_1_0_sh_mask.h"
>   #include "mmhub/mmhub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_sh_mask.h"
>   #include "oss/osssys_5_0_0_offset.h"
> @@ -37,6 +39,7 @@
>   #include "navi10_enum.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   
>   #include "nbio_v2_3.h"
> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		(!amdgpu_sriov_vf(adev)));
>   }
>   
> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
> +					struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -380,6 +396,48 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>   }
>   
> +/**
> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	if (amdgpu_emu_mode == 0 && ring->sched.ready)
> +		return kiq->pmf->kiq_invalidate_tlbs(ring,
> +						pasid, flush_type, all_hub);

This is where you should do the locking, ring alloc, and commit.

Regards,
   Felix


> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret	&& queried_pasid == pasid) {
> +			if (all_hub) {
> +				for (i = 0; i < adev->num_vmhubs; i++)
> +					gmc_v10_0_flush_gpu_tlb(adev, vmid,
> +							i, 0);
> +			} else {
> +				gmc_v10_0_flush_gpu_tlb(adev, vmid,
> +						AMDGPU_GFXHUB_0, 0);
> +			}
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					     unsigned vmid, uint64_t pd_addr)
>   {
> @@ -531,6 +589,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v10_0_map_mtype,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index f08e5330642d..19d5b133e1d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>   	.set_prt = gmc_v7_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 6d96d40fbcb8..27d83204fa2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>   	.set_prt = gmc_v8_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index b83c8d745f42..95cce54999b7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -38,10 +38,12 @@
>   #include "dce/dce_12_0_sh_mask.h"
>   #include "vega10_enum.h"
>   #include "mmhub/mmhub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
>   #include "athub/athub_1_0_offset.h"
>   #include "oss/osssys_4_0_offset.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "umc/umc_6_0_sh_mask.h"
>   
> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		   adev->pdev->device == 0x15d8)));
>   }
>   
> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -539,6 +553,52 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   }
>   
> +/**
> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	if (ring->sched.ready)
> +		return kiq->pmf->kiq_invalidate_tlbs(ring,
> +						pasid, flush_type, all_hub);
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret && queried_pasid == pasid) {
> +			if (all_hub) {
> +				for (i = 0; i < adev->num_vmhubs; i++)
> +					gmc_v9_0_flush_gpu_tlb(adev, vmid,
> +							i, 0);
> +			} else {
> +				gmc_v9_0_flush_gpu_tlb(adev, vmid,
> +						AMDGPU_GFXHUB_0, 0);
> +			}
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					    unsigned vmid, uint64_t pd_addr)
>   {
> @@ -700,6 +760,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v9_0_map_mtype,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9
  2020-01-11 18:39 ` [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9 Alex Sierra
  2020-01-13 12:12   ` Christian König
@ 2020-01-13 16:53   ` Felix Kuehling
  1 sibling, 0 replies; 19+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:53 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Can you include kcq_disable in the patch as well?

Thanks,
   Felix

On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> [Why]
> There's a HW-indpendent function that enables kcq. This function uses
> the kiq_pm4_funcs implementation.
>
> [How]
> Local kcq enable function removed and replace it by the generic kcq
> enable under amdgpu_gfx
>
> Change-Id: I7709bdba93742c234941a5936c82eb67e346077c
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 70 +--------------------------
>   1 file changed, 1 insertion(+), 69 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5be6fab55b73..7219eacad9ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3252,74 +3252,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
>   	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
>   }
>   
> -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
> -{
> -	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
> -	uint64_t queue_mask = 0;
> -	int r, i;
> -
> -	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> -		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
> -			continue;
> -
> -		/* This situation may be hit in the future if a new HW
> -		 * generation exposes more than 64 queues. If so, the
> -		 * definition of queue_mask needs updating */
> -		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
> -			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
> -			break;
> -		}
> -
> -		queue_mask |= (1ull << i);
> -	}
> -
> -	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
> -	if (r) {
> -		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
> -		return r;
> -	}
> -
> -	/* set resources */
> -	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
> -	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
> -			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
> -	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
> -	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
> -	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
> -	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
> -	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
> -	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
> -	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> -		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
> -		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
> -		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> -
> -		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
> -		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
> -		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> -				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
> -				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
> -				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
> -				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
> -				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
> -				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
> -				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
> -				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
> -				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
> -		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
> -		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
> -		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
> -		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
> -		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
> -	}
> -
> -	r = amdgpu_ring_test_helper(kiq_ring);
> -	if (r)
> -		DRM_ERROR("KCQ enable failed\n");
> -
> -	return r;
> -}
> -
>   static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
>   {
>   	struct amdgpu_device *adev = ring->adev;
> @@ -3726,7 +3658,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
>   			goto done;
>   	}
>   
> -	r = gfx_v9_0_kiq_kcq_enable(adev);
> +	r = amdgpu_gfx_enable_kcq(adev);
>   done:
>   	return r;
>   }
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9
  2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
  2020-01-13 12:10   ` Christian König
@ 2020-01-13 16:57   ` Felix Kuehling
  1 sibling, 0 replies; 19+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:57 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> Functions implemented from kiq_pm4_funcs struct members
> for gfx_v9 version.
>
> Change-Id: I8fd3e160c4bd58f19d35d29e39517db967063afe
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 ++++++++++++++++++++++++++
>   1 file changed, 115 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e3d466bd5c4e..ad0179ea2cc5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -739,6 +739,120 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
>   static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
>   				     void *inject_if);
>   
> +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
> +				uint64_t queue_mask)
> +{
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
> +	amdgpu_ring_write(kiq_ring,
> +		PACKET3_SET_RESOURCES_VMID_MASK(0) |
> +		/* vmid_mask:0* queue_type:0 (KIQ) */
> +		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
> +	amdgpu_ring_write(kiq_ring,
> +			lower_32_bits(queue_mask));	/* queue mask lo */
> +	amdgpu_ring_write(kiq_ring,
> +			upper_32_bits(queue_mask));	/* queue mask hi */
> +	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
> +	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
> +	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
> +	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
> +}
> +
> +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
> +				 struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = kiq_ring->adev;
> +	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
> +	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> +	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
> +	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
> +	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> +			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
> +			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
> +			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
> +			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
> +			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
> +			 /*queue_type: normal compute queue */
> +			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
> +			 /* alloc format: all_on_one_pipe */
> +			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
> +			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
> +			 /* num_queues: must be 1 */
> +			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
> +	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
> +	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
> +	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
> +	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
> +}
> +
> +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
> +				   struct amdgpu_ring *ring,
> +				   enum amdgpu_unmap_queues_action action,
> +				   u64 gpu_addr, u64 seq)
> +{
> +	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
> +	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> +			  PACKET3_UNMAP_QUEUES_ACTION(action) |
> +			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
> +			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
> +			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
> +
> +	if (action == PREEMPT_QUEUES_NO_UNMAP) {
> +		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
> +		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
> +		amdgpu_ring_write(kiq_ring, seq);
> +	} else {
> +		amdgpu_ring_write(kiq_ring, 0);
> +		amdgpu_ring_write(kiq_ring, 0);
> +		amdgpu_ring_write(kiq_ring, 0);
> +	}
> +}
> +
> +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
> +				   struct amdgpu_ring *ring,
> +				   u64 addr,
> +				   u64 seq)
> +{
> +	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> +	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
> +	amdgpu_ring_write(kiq_ring,
> +			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
> +			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
> +			  PACKET3_QUERY_STATUS_COMMAND(2));
> +	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> +	amdgpu_ring_write(kiq_ring,
> +			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
> +			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
> +	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
> +	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
> +	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
> +	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> +}
> +
> +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
> +	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
> +	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
> +	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
> +	.kiq_query_status = gfx_v9_0_kiq_query_status,
> +	.set_resources_size = 8,
> +	.map_queues_size = 7,
> +	.unmap_queues_size = 6,
> +	.query_status_size = 7,
> +};
> +
> +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> +{
> +	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
> +}
> +
>   static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
>   {
>   	switch (adev->asic_type) {
> @@ -4260,6 +4374,7 @@ static int gfx_v9_0_early_init(void *handle)
>   	else
>   		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
>   	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	gfx_v9_0_set_kiq_pm4_funcs(adev);
>   	gfx_v9_0_set_ring_funcs(adev);
>   	gfx_v9_0_set_irq_funcs(adev);
>   	gfx_v9_0_set_gds_init(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
  2020-01-11 18:39 ` [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
  2020-01-13 12:16   ` Christian König
@ 2020-01-13 16:58   ` Felix Kuehling
  1 sibling, 0 replies; 19+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:58 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> [Why]
> TLB flush method has been deprecated using kfd2kgd interface.
> This implementation is now on the amdgpu_amdkfd API.
>
> [How]
> TLB flush functions now implemented in amdgpu_amdkfd.
>
> Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 ++++--
>   3 files changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 88e10b956413..8609287620ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -628,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
>   	return false;
>   }
>   
> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
> +{
> +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> +
> +	if (adev->family == AMDGPU_FAMILY_AI) {
> +		int i;
> +
> +		for (i = 0; i < adev->num_vmhubs; i++)
> +			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
> +	} else {
> +		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
> +	}
> +
> +	return 0;
> +}
> +
> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
> +{
> +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> +	uint32_t flush_type = 0;
> +	bool all_hub = false;
> +
> +	if (adev->gmc.xgmi.num_physical_nodes &&
> +		adev->asic_type == CHIP_VEGA20)
> +		flush_type = 2;
> +
> +	if (adev->family == AMDGPU_FAMILY_AI)
> +		all_hub = true;
> +
> +	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
> +}
> +
>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 069d5d230810..47b0f2957d1f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
>   				uint32_t *ib_cmd, uint32_t ib_len);
>   void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
>   
>   bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 536a153ac9a4..25b90f70aecd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -32,6 +32,7 @@
>   #include <linux/mman.h>
>   #include <linux/file.h>
>   #include "amdgpu_amdkfd.h"
> +#include "amdgpu.h"
>   
>   struct mm_struct;
>   
> @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
>   void kfd_flush_tlb(struct kfd_process_device *pdd)
>   {
>   	struct kfd_dev *dev = pdd->dev;
> -	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
>   
>   	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
>   		/* Nothing to flush until a VMID is assigned, which
>   		 * only happens when the first queue is created.
>   		 */
>   		if (pdd->qpd.vmid)
> -			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
> +			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
> +							pdd->qpd.vmid);
>   	} else {
> -		f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
> +		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
> +						pdd->process->pasid);
>   	}
>   }
>   
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2020-01-13 16:58 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
2020-01-13 12:10   ` Christian König
2020-01-13 16:57   ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
2020-01-13 12:12   ` Christian König
2020-01-13 16:36   ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9 Alex Sierra
2020-01-13 12:12   ` Christian König
2020-01-13 16:53   ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
2020-01-13 12:15   ` Christian König
2020-01-13 16:49   ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
2020-01-13 12:16   ` Christian König
2020-01-13 16:58   ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
2020-01-13 12:17   ` Christian König
2020-01-13 12:10 ` [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.