All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock
@ 2019-12-20  6:24 Alex Sierra
  2019-12-20  6:24 ` [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
                   ` (6 more replies)
  0 siblings, 7 replies; 24+ messages in thread
From: Alex Sierra @ 2019-12-20  6:24 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
Avoid reclaim filesystem while eviction lock is held called from
MMU notifier.

[How]
Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
Using memalloc_nofs_save / memalloc_nofs_restore API.

Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 ++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 28 +++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b999b67ff57a..b36daa6230fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -678,9 +678,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		}
 	}
 
-	mutex_lock(&vm->eviction_lock);
+	vm_eviction_lock(vm);
 	vm->evicting = false;
-	mutex_unlock(&vm->eviction_lock);
+	vm_eviction_unlock(vm);
 
 	return 0;
 }
@@ -1559,7 +1559,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (!(flags & AMDGPU_PTE_VALID))
 		owner = AMDGPU_FENCE_OWNER_KFD;
 
-	mutex_lock(&vm->eviction_lock);
+	vm_eviction_lock(vm);
 	if (vm->evicting) {
 		r = -EBUSY;
 		goto error_unlock;
@@ -1576,7 +1576,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	r = vm->update_funcs->commit(&params, fence);
 
 error_unlock:
-	mutex_unlock(&vm->eviction_lock);
+	vm_eviction_unlock(vm);
 	return r;
 }
 
@@ -2537,18 +2537,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
 		return false;
 
 	/* Try to block ongoing updates */
-	if (!mutex_trylock(&bo_base->vm->eviction_lock))
+	if (!vm_eviction_trylock(bo_base->vm))
 		return false;
 
 	/* Don't evict VM page tables while they are updated */
 	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
 	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
-		mutex_unlock(&bo_base->vm->eviction_lock);
+		vm_eviction_unlock(bo_base->vm);
 		return false;
 	}
 
 	bo_base->vm->evicting = true;
-	mutex_unlock(&bo_base->vm->eviction_lock);
+	vm_eviction_unlock(bo_base->vm);
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 100547f094ff..d35aa76469ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -30,6 +30,7 @@
 #include <drm/gpu_scheduler.h>
 #include <drm/drm_file.h>
 #include <drm/ttm/ttm_bo_driver.h>
+#include <linux/sched/mm.h>
 
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
@@ -242,9 +243,12 @@ struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root_cached	va;
 
-	/* Lock to prevent eviction while we are updating page tables */
+	/* Lock to prevent eviction while we are updating page tables
+	 * use vm_eviction_lock/unlock(vm)
+	 */
 	struct mutex		eviction_lock;
 	bool			evicting;
+	unsigned int            saved_flags;
 
 	/* BOs who needs a validation */
 	struct list_head	evicted;
@@ -436,4 +440,26 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm);
 void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
 
+/* vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void vm_eviction_lock(struct amdgpu_vm *vm)
+{
+	mutex_lock(&vm->eviction_lock);
+	vm->saved_flags = memalloc_nofs_save();
+}
+static inline int vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+	if (mutex_trylock(&vm->eviction_lock)) {
+		vm->saved_flags = memalloc_nofs_save();
+		return 1;
+	}
+	return 0;
+}
+static inline void vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+	memalloc_nofs_restore(vm->saved_flags);
+	mutex_unlock(&vm->eviction_lock);
+}
 #endif
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
  2019-12-20  6:24 [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
@ 2019-12-20  6:24 ` Alex Sierra
  2019-12-20 21:32   ` Felix Kuehling
                     ` (2 more replies)
  2019-12-20  6:24 ` [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
                   ` (5 subsequent siblings)
  6 siblings, 3 replies; 24+ messages in thread
From: Alex Sierra @ 2019-12-20  6:24 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

This can be used directly from amdgpu and amdkfd to invalidate
TLB through pasid.
It supports gmc v7, v8, v9 and v10.

Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 81 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 ++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 84 +++++++++++++++++++++++++
 5 files changed, 238 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index b499a3de8bb6..b6413a56f546 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
 	/* flush the vm tlb via mmio */
 	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
 				uint32_t vmhub, uint32_t flush_type);
+	/* flush the vm tlb via pasid */
+	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+					uint32_t flush_type, bool all_hub);
 	/* flush the vm tlb via ring */
 	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
 				       uint64_t pd_addr);
@@ -216,6 +219,9 @@ struct amdgpu_gmc {
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+	((adev), (pasid), (type), (allhub)))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
 #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index f5725336a5f2..b1a5408a8d7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
 #include "hdp/hdp_5_0_0_sh_mask.h"
 #include "gc/gc_10_1_0_sh_mask.h"
 #include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_sh_mask.h"
 #include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
 #include "navi10_enum.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 
 #include "nbio_v2_3.h"
@@ -234,6 +237,48 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
 		(!amdgpu_sriov_vf(adev)));
 }
 
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
+					struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static int gmc_v10_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	signed long r;
+	uint32_t seq;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+	amdgpu_fence_emit_polling(ring, &seq);
+	amdgpu_ring_commit(ring);
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+
+	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -380,6 +425,41 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
 }
 
+/**
+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	uint16_t queried_pasid;
+	bool ret;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+	if (amdgpu_emu_mode == 0 && ring->sched.ready)
+		return gmc_v10_0_invalidate_tlbs_with_kiq(adev,
+						pasid, flush_type, all_hub);
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+				&queried_pasid);
+		if (ret	&& queried_pasid == pasid) {
+			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+					AMDGPU_GFXHUB_0, 0);
+			break;
+		}
+	}
+
+	return 0;
+}
+
 static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					     unsigned vmid, uint64_t pd_addr)
 {
@@ -531,6 +611,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
 
 static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
 	.map_mtype = gmc_v10_0_map_mtype,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index f08e5330642d..19d5b133e1d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	unsigned int tmp;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
 
 static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
 	.set_prt = gmc_v7_0_set_prt,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 6d96d40fbcb8..27d83204fa2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	unsigned int tmp;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
 
 static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
 	.set_prt = gmc_v8_0_set_prt,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index fa025ceeea0f..eb1e64bd56ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -38,10 +38,12 @@
 #include "dce/dce_12_0_sh_mask.h"
 #include "vega10_enum.h"
 #include "mmhub/mmhub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
 #include "athub/athub_1_0_offset.h"
 #include "oss/osssys_4_0_offset.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 #include "umc/umc_6_0_sh_mask.h"
 
@@ -434,6 +436,47 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
 		   adev->pdev->device == 0x15d8)));
 }
 
+static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static int gmc_v9_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	signed long r;
+	uint32_t seq;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+	amdgpu_fence_emit_polling(ring, &seq);
+	amdgpu_ring_commit(ring);
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+
+	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -532,6 +575,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 }
 
+/**
+ * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid, i;
+	uint16_t queried_pasid;
+	bool ret;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	if (ring->sched.ready)
+		return gmc_v9_0_invalidate_tlbs_with_kiq(adev,
+						pasid, flush_type, all_hub);
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+				&queried_pasid);
+		if (ret && queried_pasid == pasid) {
+			for (i = 0; i < adev->num_vmhubs; i++)
+				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+							i, flush_type);
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					    unsigned vmid, uint64_t pd_addr)
 {
@@ -693,6 +776,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
 
 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
 	.map_mtype = gmc_v9_0_map_mtype,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
  2019-12-20  6:24 [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
  2019-12-20  6:24 ` [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
@ 2019-12-20  6:24 ` Alex Sierra
  2019-12-20 21:35   ` Felix Kuehling
  2019-12-20  6:24 ` [PATCH 4/5] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 24+ messages in thread
From: Alex Sierra @ 2019-12-20  6:24 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
TLB flush method has been deprecated using kfd2kgd interface.
This implementation is now on the amdgpu_amdkfd API.

[How]
TLB flush functions now implemented in amdgpu_amdkfd.

Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 ++++--
 3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index d3da9dde4ee1..b7f6e70c5762 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 	return false;
 }
 
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	/* TODO: condition missing for FAMILY above NV */
+	if (adev->family == AMDGPU_FAMILY_AI) {
+		int i;
+
+		for (i = 0; i < adev->num_vmhubs; i++)
+			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+	} else {
+		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+	}
+
+	return 0;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	uint32_t flush_type = 0;
+	bool all_hub = false;
+
+	if (adev->gmc.xgmi.num_physical_nodes &&
+		adev->asic_type == CHIP_VEGA20)
+		flush_type = 2;
+
+	if (adev->family == AMDGPU_FAMILY_AI)
+		all_hub = true;
+
+	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+}
+
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 069d5d230810..47b0f2957d1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t *ib_cmd, uint32_t ib_len);
 void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 536a153ac9a4..25b90f70aecd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -32,6 +32,7 @@
 #include <linux/mman.h>
 #include <linux/file.h>
 #include "amdgpu_amdkfd.h"
+#include "amdgpu.h"
 
 struct mm_struct;
 
@@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 void kfd_flush_tlb(struct kfd_process_device *pdd)
 {
 	struct kfd_dev *dev = pdd->dev;
-	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
 
 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
 		/* Nothing to flush until a VMID is assigned, which
 		 * only happens when the first queue is created.
 		 */
 		if (pdd->qpd.vmid)
-			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+							pdd->qpd.vmid);
 	} else {
-		f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
+		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+						pdd->process->pasid);
 	}
 }
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 4/5] drm/amdgpu: flush TLB functions removal from kfd2kgd interface
  2019-12-20  6:24 [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
  2019-12-20  6:24 ` [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
  2019-12-20  6:24 ` [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
@ 2019-12-20  6:24 ` Alex Sierra
  2019-12-20 21:38   ` Felix Kuehling
  2019-12-20  6:24 ` [PATCH 5/5] drm/amdgpu: invalidate BO during userptr mapping Alex Sierra
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 24+ messages in thread
From: Alex Sierra @ 2019-12-20  6:24 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
kfd2kgd interface will be deprecated. This removal only covers TLB
invalidation for now. They have been replaced in amdgpu_amdkfd API.

[How]
TLB invalidate functions removed from the different amdkfd_gfx_v*
versions.

Change-Id: Ic2c7d4a0d19fe1e884dee1ff10a520d31252afee
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 -
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 67 -------------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 41 --------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 41 --------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 96 -------------------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 -
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 -
 7 files changed, 251 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 3c119407dc34..82e80b92e6ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -296,7 +296,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = kgd_gfx_v9_get_tile_config,
 	.set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
-	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
-	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 61cd707158e4..6132b4874498 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -686,71 +686,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
 	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
-{
-	signed long r;
-	uint32_t seq;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	spin_lock(&adev->gfx.kiq.ring_lock);
-	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
-	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-	amdgpu_ring_write(ring,
-			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-			PACKET3_INVALIDATE_TLBS_PASID(pasid));
-	amdgpu_fence_emit_polling(ring, &seq);
-	amdgpu_ring_commit(ring);
-	spin_unlock(&adev->gfx.kiq.ring_lock);
-
-	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
-	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-		return -ETIME;
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	uint16_t queried_pasid;
-	bool ret;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	if (amdgpu_emu_mode == 0 && ring->sched.ready)
-		return invalidate_tlbs_with_kiq(adev, pasid);
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
-				&queried_pasid);
-		if (ret	&& queried_pasid == pasid) {
-			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-					AMDGPU_GFXHUB_0, 0);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return 0;
-	}
-
-	amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
-	return 0;
-}
-
 static int kgd_address_watch_disable(struct kgd_dev *kgd)
 {
 	return 0;
@@ -832,7 +767,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
 			get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = amdgpu_amdkfd_get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 6e6f0a99ec06..8f052e98a3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		lower_32_bits(page_table_base));
 }
 
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	unsigned int tmp;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
-			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-			RREG32(mmVM_INVALIDATE_RESPONSE);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid\n");
-		return 0;
-	}
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-	RREG32(mmVM_INVALIDATE_RESPONSE);
-	return 0;
-}
-
  /**
   * read_vmid_from_vmfault_reg - read vmid from register
   *
@@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index bfbddedb2380..19a10db93d68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 			lower_32_bits(page_table_base));
 }
 
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	unsigned int tmp;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
-			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-			RREG32(mmVM_INVALIDATE_RESPONSE);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return -EINVAL;
-	}
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-	RREG32(mmVM_INVALIDATE_RESPONSE);
-	return 0;
-}
-
 const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index e7861f0ef415..932ae85d97e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -617,100 +617,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
 	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
-			uint32_t flush_type)
-{
-	signed long r;
-	uint32_t seq;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	spin_lock(&adev->gfx.kiq.ring_lock);
-	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
-	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-	amdgpu_ring_write(ring,
-			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
-			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
-			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
-	amdgpu_fence_emit_polling(ring, &seq);
-	amdgpu_ring_commit(ring);
-	spin_unlock(&adev->gfx.kiq.ring_lock);
-
-	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
-	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-		return -ETIME;
-	}
-
-	return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid, i;
-	uint16_t queried_pasid;
-	bool ret;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-	uint32_t flush_type = 0;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-	if (adev->gmc.xgmi.num_physical_nodes &&
-		adev->asic_type == CHIP_VEGA20)
-		flush_type = 2;
-
-	if (ring->sched.ready)
-		return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
-				&queried_pasid);
-		if (ret && queried_pasid == pasid) {
-			for (i = 0; i < adev->num_vmhubs; i++)
-				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-							i, flush_type);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int i;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return 0;
-	}
-
-	/* Use legacy mode tlb invalidation.
-	 *
-	 * Currently on Raven the code below is broken for anything but
-	 * legacy mode due to a MMHUB power gating problem. A workaround
-	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
-	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
-	 * bit.
-	 *
-	 * TODO 1: agree on the right set of invalidation registers for
-	 * KFD use. Use the last one for now. Invalidate both GC and
-	 * MMHUB.
-	 *
-	 * TODO 2: support range-based invalidation, requires kfg2kgd
-	 * interface change
-	 */
-	for (i = 0; i < adev->num_vmhubs; i++)
-		amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-
-	return 0;
-}
-
 int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
 {
 	return 0;
@@ -793,7 +699,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = kgd_gfx_v9_get_tile_config,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
-	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
-	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 02b1426d17d1..dfafa28b7559 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -57,7 +57,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
 
 bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
 					uint8_t vmid, uint16_t *p_pasid);
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
 		struct tile_config *config);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 2cd217e60125..a01ef836ad58 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -307,8 +307,6 @@ struct kfd2kgd_calls {
 
 	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
 			uint32_t vmid, uint64_t page_table_base);
-	int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
-	int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
 	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
 	uint64_t (*get_hive_id)(struct kgd_dev *kgd);
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 5/5] drm/amdgpu: invalidate BO during userptr mapping
  2019-12-20  6:24 [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
                   ` (2 preceding siblings ...)
  2019-12-20  6:24 ` [PATCH 4/5] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
@ 2019-12-20  6:24 ` Alex Sierra
  2019-12-20 21:40   ` Felix Kuehling
  2019-12-20 21:39 ` [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Felix Kuehling
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 24+ messages in thread
From: Alex Sierra @ 2019-12-20  6:24 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

This is required for HMM functionality only on GFXv9 GPU, which supports
recoverable page faults.

[Why]
Instead of stopping all user mode queues during a userptr mapping.
The GFXv9 recoverable page fault is used to revalidate userptr mappings.
Now, this will be done on the page fault handler.

[How]
Invalidate buffer objects that correspond to the specific address range
on the mmu notifier.

Change-Id: I94b8fee8d88ff240b619cba1c5458aba98b17736
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 56 ++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 392300f77b13..06415d8ad3c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -187,6 +187,45 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 	}
 }
 
+/**
+ * amdgpu_mn_invalidate_bo - invalidate a BO
+ *
+ * @bo: amdgpu buffer object to invalidate
+ * @adev: amdgpu device pointer
+ *
+ * Block for operations on BO while is cleared.
+ */
+static int amdgpu_mn_invalidate_bo(struct amdgpu_device *adev,
+				     struct amdgpu_bo *bo)
+{
+	struct amdgpu_vm_bo_base *bo_base;
+	struct amdgpu_bo_va *bo_va;
+	struct kgd_dev *kgd = (struct kgd_dev *)adev;
+	long r = 0;
+	long tmo;
+
+	tmo = msecs_to_jiffies(100);
+	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+		bo_va = container_of(bo_base, struct amdgpu_bo_va, base);
+		r = amdgpu_vm_bo_update(adev, bo_va, true);
+		if (r)
+			break;
+
+		r = dma_fence_wait_timeout(bo_va->last_pt_update, false, tmo);
+		if (r <= 0) {
+			if (r == 0)
+				r = -ETIMEDOUT;
+
+			break;
+		} else {
+			r = 0;
+		}
+
+		amdgpu_amdkfd_flush_gpu_tlb_pasid(kgd, bo_base->vm->pasid);
+	}
+	return r;
+}
+
 /**
  * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
  *
@@ -250,6 +289,7 @@ amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
 			      const struct mmu_notifier_range *update)
 {
 	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	struct amdgpu_device *adev = amn->adev;
 	unsigned long start = update->start;
 	unsigned long end = update->end;
 	bool blockable = mmu_notifier_range_blockable(update);
@@ -275,11 +315,19 @@ amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
 		it = interval_tree_iter_next(it, start, end);
 
 		list_for_each_entry(bo, &node->bos, mn_list) {
-			struct kgd_mem *mem = bo->kfd_bo;
 
-			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
-							 start, end))
-				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
+			if (amdgpu_ttm_tt_affect_userptr(
+						bo->tbo.ttm, start, end)){
+				if (!amdgpu_noretry &&
+				adev->family >= AMDGPU_FAMILY_AI) {
+					amdgpu_mn_invalidate_bo(adev, bo);
+				} else {
+					struct kgd_mem *mem = bo->kfd_bo;
+
+					amdgpu_amdkfd_evict_userptr(mem,
+								amn->mm);
+				}
+			}
 		}
 	}
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
  2019-12-20  6:24 ` [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
@ 2019-12-20 21:32   ` Felix Kuehling
  2019-12-20 23:51   ` Yong Zhao
  2020-01-05 15:41   ` Christian König
  2 siblings, 0 replies; 24+ messages in thread
From: Felix Kuehling @ 2019-12-20 21:32 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

On 2019-12-20 1:24, Alex Sierra wrote:
> This can be used directly from amdgpu and amdkfd to invalidate
> TLB through pasid.
> It supports gmc v7, v8, v9 and v10.

Two small corrections inline to make the behaviour between KIQ and 
MMIO-based flushing consistent. Looks good otherwise.


>
> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 81 ++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 84 +++++++++++++++++++++++++
>   5 files changed, 238 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index b499a3de8bb6..b6413a56f546 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
>   	/* flush the vm tlb via mmio */
>   	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>   				uint32_t vmhub, uint32_t flush_type);
> +	/* flush the vm tlb via pasid */
> +	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> +					uint32_t flush_type, bool all_hub);
>   	/* flush the vm tlb via ring */
>   	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>   				       uint64_t pd_addr);
> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
>   };
>   
>   #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
> +	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> +	((adev), (pasid), (type), (allhub)))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>   #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index f5725336a5f2..b1a5408a8d7e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -30,6 +30,8 @@
>   #include "hdp/hdp_5_0_0_sh_mask.h"
>   #include "gc/gc_10_1_0_sh_mask.h"
>   #include "mmhub/mmhub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_sh_mask.h"
>   #include "oss/osssys_5_0_0_offset.h"
> @@ -37,6 +39,7 @@
>   #include "navi10_enum.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   
>   #include "nbio_v2_3.h"
> @@ -234,6 +237,48 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		(!amdgpu_sriov_vf(adev)));
>   }
>   
> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
> +					struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> +static int gmc_v10_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(ring, &seq);
> +	amdgpu_ring_commit(ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -380,6 +425,41 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>   }
>   
> +/**
> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	if (amdgpu_emu_mode == 0 && ring->sched.ready)
> +		return gmc_v10_0_invalidate_tlbs_with_kiq(adev,
> +						pasid, flush_type, all_hub);
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret	&& queried_pasid == pasid) {
> +			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> +					AMDGPU_GFXHUB_0, 0);

This should honor the all_hub flag.

Also, this calls the function through the function pointer, which is 
unnecessary. You know that you need the gfx_10 version of the function, 
so you can call gmc_v10_0_flush_gpu_tlb directly here.


> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					     unsigned vmid, uint64_t pd_addr)
>   {
> @@ -531,6 +611,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v10_0_map_mtype,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index f08e5330642d..19d5b133e1d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>   	.set_prt = gmc_v7_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 6d96d40fbcb8..27d83204fa2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>   	.set_prt = gmc_v8_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index fa025ceeea0f..eb1e64bd56ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -38,10 +38,12 @@
>   #include "dce/dce_12_0_sh_mask.h"
>   #include "vega10_enum.h"
>   #include "mmhub/mmhub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
>   #include "athub/athub_1_0_offset.h"
>   #include "oss/osssys_4_0_offset.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "umc/umc_6_0_sh_mask.h"
>   
> @@ -434,6 +436,47 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		   adev->pdev->device == 0x15d8)));
>   }
>   
> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> +static int gmc_v9_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(ring, &seq);
> +	amdgpu_ring_commit(ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -532,6 +575,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   }
>   
> +/**
> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	if (ring->sched.ready)
> +		return gmc_v9_0_invalidate_tlbs_with_kiq(adev,
> +						pasid, flush_type, all_hub);
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret && queried_pasid == pasid) {
> +			for (i = 0; i < adev->num_vmhubs; i++)
> +				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> +							i, flush_type);

This unconditionally flushes all hubs. It should honor the all_hubs flag.

As above, you can call gmc_v9_0_flush_gpu_tlb directly here.

Regards,
   Felix

> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					    unsigned vmid, uint64_t pd_addr)
>   {
> @@ -693,6 +776,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v9_0_map_mtype,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
  2019-12-20  6:24 ` [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
@ 2019-12-20 21:35   ` Felix Kuehling
  2019-12-20 23:50     ` Yong Zhao
  0 siblings, 1 reply; 24+ messages in thread
From: Felix Kuehling @ 2019-12-20 21:35 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

On 2019-12-20 1:24, Alex Sierra wrote:
> [Why]
> TLB flush method has been deprecated using kfd2kgd interface.
> This implementation is now on the amdgpu_amdkfd API.
>
> [How]
> TLB flush functions now implemented in amdgpu_amdkfd.
>
> Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

Looks good to me. See my comment about the TODO inline.


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 ++++--
>   3 files changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index d3da9dde4ee1..b7f6e70c5762 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
>   	return false;
>   }
>   
> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
> +{
> +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> +	/* TODO: condition missing for FAMILY above NV */

I'm not sure what's missing here. NV and above don't need any special 
treatment. Since SDMA is connected to GFXHUB on NV, only the GFXHUB 
needs to be flushed.

Regards,
   Felix


> +	if (adev->family == AMDGPU_FAMILY_AI) {
> +		int i;
> +
> +		for (i = 0; i < adev->num_vmhubs; i++)
> +			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
> +	} else {
> +		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
> +	}
> +
> +	return 0;
> +}
> +
> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
> +{
> +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> +	uint32_t flush_type = 0;
> +	bool all_hub = false;
> +
> +	if (adev->gmc.xgmi.num_physical_nodes &&
> +		adev->asic_type == CHIP_VEGA20)
> +		flush_type = 2;
> +
> +	if (adev->family == AMDGPU_FAMILY_AI)
> +		all_hub = true;
> +
> +	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
> +}
> +
>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 069d5d230810..47b0f2957d1f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
>   				uint32_t *ib_cmd, uint32_t ib_len);
>   void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
>   
>   bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 536a153ac9a4..25b90f70aecd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -32,6 +32,7 @@
>   #include <linux/mman.h>
>   #include <linux/file.h>
>   #include "amdgpu_amdkfd.h"
> +#include "amdgpu.h"
>   
>   struct mm_struct;
>   
> @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
>   void kfd_flush_tlb(struct kfd_process_device *pdd)
>   {
>   	struct kfd_dev *dev = pdd->dev;
> -	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
>   
>   	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
>   		/* Nothing to flush until a VMID is assigned, which
>   		 * only happens when the first queue is created.
>   		 */
>   		if (pdd->qpd.vmid)
> -			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
> +			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
> +							pdd->qpd.vmid);
>   	} else {
> -		f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
> +		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
> +						pdd->process->pasid);
>   	}
>   }
>   
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/5] drm/amdgpu: flush TLB functions removal from kfd2kgd interface
  2019-12-20  6:24 ` [PATCH 4/5] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
@ 2019-12-20 21:38   ` Felix Kuehling
  0 siblings, 0 replies; 24+ messages in thread
From: Felix Kuehling @ 2019-12-20 21:38 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

On 2019-12-20 1:24, Alex Sierra wrote:
> [Why]
> kfd2kgd interface will be deprecated. This removal only covers TLB
> invalidation for now. They have been replaced in amdgpu_amdkfd API.
>
> [How]
> TLB invalidate functions removed from the different amdkfd_gfx_v*
> versions.
>
> Change-Id: Ic2c7d4a0d19fe1e884dee1ff10a520d31252afee
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

This patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

> ---
>   .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 -
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 67 -------------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 41 --------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 41 --------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 96 -------------------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 -
>   .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 -
>   7 files changed, 251 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index 3c119407dc34..82e80b92e6ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -296,7 +296,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
>   			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
>   	.get_tile_config = kgd_gfx_v9_get_tile_config,
>   	.set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
> -	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
> -	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
>   	.get_hive_id = amdgpu_amdkfd_get_hive_id,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 61cd707158e4..6132b4874498 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -686,71 +686,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
>   	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>   }
>   
> -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
> -{
> -	signed long r;
> -	uint32_t seq;
> -	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -
> -	spin_lock(&adev->gfx.kiq.ring_lock);
> -	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> -	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> -	amdgpu_ring_write(ring,
> -			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> -			PACKET3_INVALIDATE_TLBS_PASID(pasid));
> -	amdgpu_fence_emit_polling(ring, &seq);
> -	amdgpu_ring_commit(ring);
> -	spin_unlock(&adev->gfx.kiq.ring_lock);
> -
> -	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> -	if (r < 1) {
> -		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> -		return -ETIME;
> -	}
> -
> -	return 0;
> -}
> -
> -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int vmid;
> -	uint16_t queried_pasid;
> -	bool ret;
> -	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -
> -	if (amdgpu_emu_mode == 0 && ring->sched.ready)
> -		return invalidate_tlbs_with_kiq(adev, pasid);
> -
> -	for (vmid = 0; vmid < 16; vmid++) {
> -		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> -			continue;
> -
> -		ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
> -				&queried_pasid);
> -		if (ret	&& queried_pasid == pasid) {
> -			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> -					AMDGPU_GFXHUB_0, 0);
> -			break;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
> -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -
> -	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> -		pr_err("non kfd vmid %d\n", vmid);
> -		return 0;
> -	}
> -
> -	amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
> -	return 0;
> -}
> -
>   static int kgd_address_watch_disable(struct kgd_dev *kgd)
>   {
>   	return 0;
> @@ -832,7 +767,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
>   			get_atc_vmid_pasid_mapping_info,
>   	.get_tile_config = amdgpu_amdkfd_get_tile_config,
>   	.set_vm_context_page_table_base = set_vm_context_page_table_base,
> -	.invalidate_tlbs = invalidate_tlbs,
> -	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
>   	.get_hive_id = amdgpu_amdkfd_get_hive_id,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index 6e6f0a99ec06..8f052e98a3c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
>   		lower_32_bits(page_table_base));
>   }
>   
> -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int vmid;
> -	unsigned int tmp;
> -
> -	if (adev->in_gpu_reset)
> -		return -EIO;
> -
> -	for (vmid = 0; vmid < 16; vmid++) {
> -		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> -			continue;
> -
> -		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> -		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> -			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> -			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -			RREG32(mmVM_INVALIDATE_RESPONSE);
> -			break;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
> -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -
> -	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> -		pr_err("non kfd vmid\n");
> -		return 0;
> -	}
> -
> -	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -	RREG32(mmVM_INVALIDATE_RESPONSE);
> -	return 0;
> -}
> -
>    /**
>     * read_vmid_from_vmfault_reg - read vmid from register
>     *
> @@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
>   	.set_scratch_backing_va = set_scratch_backing_va,
>   	.get_tile_config = get_tile_config,
>   	.set_vm_context_page_table_base = set_vm_context_page_table_base,
> -	.invalidate_tlbs = invalidate_tlbs,
> -	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
>   	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index bfbddedb2380..19a10db93d68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
>   			lower_32_bits(page_table_base));
>   }
>   
> -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int vmid;
> -	unsigned int tmp;
> -
> -	if (adev->in_gpu_reset)
> -		return -EIO;
> -
> -	for (vmid = 0; vmid < 16; vmid++) {
> -		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> -			continue;
> -
> -		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> -		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> -			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> -			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -			RREG32(mmVM_INVALIDATE_RESPONSE);
> -			break;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
> -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -
> -	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> -		pr_err("non kfd vmid %d\n", vmid);
> -		return -EINVAL;
> -	}
> -
> -	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> -	RREG32(mmVM_INVALIDATE_RESPONSE);
> -	return 0;
> -}
> -
>   const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
>   	.program_sh_mem_settings = kgd_program_sh_mem_settings,
>   	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
> @@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
>   	.set_scratch_backing_va = set_scratch_backing_va,
>   	.get_tile_config = get_tile_config,
>   	.set_vm_context_page_table_base = set_vm_context_page_table_base,
> -	.invalidate_tlbs = invalidate_tlbs,
> -	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index e7861f0ef415..932ae85d97e2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -617,100 +617,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
>   	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>   }
>   
> -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
> -			uint32_t flush_type)
> -{
> -	signed long r;
> -	uint32_t seq;
> -	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -
> -	spin_lock(&adev->gfx.kiq.ring_lock);
> -	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> -	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> -	amdgpu_ring_write(ring,
> -			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> -			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
> -			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> -			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> -	amdgpu_fence_emit_polling(ring, &seq);
> -	amdgpu_ring_commit(ring);
> -	spin_unlock(&adev->gfx.kiq.ring_lock);
> -
> -	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> -	if (r < 1) {
> -		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> -		return -ETIME;
> -	}
> -
> -	return 0;
> -}
> -
> -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int vmid, i;
> -	uint16_t queried_pasid;
> -	bool ret;
> -	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -	uint32_t flush_type = 0;
> -
> -	if (adev->in_gpu_reset)
> -		return -EIO;
> -	if (adev->gmc.xgmi.num_physical_nodes &&
> -		adev->asic_type == CHIP_VEGA20)
> -		flush_type = 2;
> -
> -	if (ring->sched.ready)
> -		return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
> -
> -	for (vmid = 0; vmid < 16; vmid++) {
> -		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> -			continue;
> -
> -		ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
> -				&queried_pasid);
> -		if (ret && queried_pasid == pasid) {
> -			for (i = 0; i < adev->num_vmhubs; i++)
> -				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> -							i, flush_type);
> -			break;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
> -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -	int i;
> -
> -	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> -		pr_err("non kfd vmid %d\n", vmid);
> -		return 0;
> -	}
> -
> -	/* Use legacy mode tlb invalidation.
> -	 *
> -	 * Currently on Raven the code below is broken for anything but
> -	 * legacy mode due to a MMHUB power gating problem. A workaround
> -	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
> -	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
> -	 * bit.
> -	 *
> -	 * TODO 1: agree on the right set of invalidation registers for
> -	 * KFD use. Use the last one for now. Invalidate both GC and
> -	 * MMHUB.
> -	 *
> -	 * TODO 2: support range-based invalidation, requires kfg2kgd
> -	 * interface change
> -	 */
> -	for (i = 0; i < adev->num_vmhubs; i++)
> -		amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
> -
> -	return 0;
> -}
> -
>   int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
>   {
>   	return 0;
> @@ -793,7 +699,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>   			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
>   	.get_tile_config = kgd_gfx_v9_get_tile_config,
>   	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
> -	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
> -	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
>   	.get_hive_id = amdgpu_amdkfd_get_hive_id,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index 02b1426d17d1..dfafa28b7559 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -57,7 +57,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
>   
>   bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
>   					uint8_t vmid, uint16_t *p_pasid);
> -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
> -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
>   int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
>   		struct tile_config *config);
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 2cd217e60125..a01ef836ad58 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -307,8 +307,6 @@ struct kfd2kgd_calls {
>   
>   	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
>   			uint32_t vmid, uint64_t page_table_base);
> -	int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
> -	int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
>   	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
>   	uint64_t (*get_hive_id)(struct kgd_dev *kgd);
>   
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock
  2019-12-20  6:24 [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
                   ` (3 preceding siblings ...)
  2019-12-20  6:24 ` [PATCH 5/5] drm/amdgpu: invalidate BO during userptr mapping Alex Sierra
@ 2019-12-20 21:39 ` Felix Kuehling
  2019-12-20 23:28 ` Yong Zhao
  2020-01-02  9:16 ` Christian König
  6 siblings, 0 replies; 24+ messages in thread
From: Felix Kuehling @ 2019-12-20 21:39 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

On 2019-12-20 1:24, Alex Sierra wrote:
> [Why]
> Avoid reclaim filesystem while eviction lock is held called from
> MMU notifier.
>
> [How]
> Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
> Using memalloc_nofs_save / memalloc_nofs_restore API.
>
> Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
This patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 ++++++-------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 28 +++++++++++++++++++++++++-
>   2 files changed, 34 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b999b67ff57a..b36daa6230fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -678,9 +678,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		}
>   	}
>   
> -	mutex_lock(&vm->eviction_lock);
> +	vm_eviction_lock(vm);
>   	vm->evicting = false;
> -	mutex_unlock(&vm->eviction_lock);
> +	vm_eviction_unlock(vm);
>   
>   	return 0;
>   }
> @@ -1559,7 +1559,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (!(flags & AMDGPU_PTE_VALID))
>   		owner = AMDGPU_FENCE_OWNER_KFD;
>   
> -	mutex_lock(&vm->eviction_lock);
> +	vm_eviction_lock(vm);
>   	if (vm->evicting) {
>   		r = -EBUSY;
>   		goto error_unlock;
> @@ -1576,7 +1576,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	r = vm->update_funcs->commit(&params, fence);
>   
>   error_unlock:
> -	mutex_unlock(&vm->eviction_lock);
> +	vm_eviction_unlock(vm);
>   	return r;
>   }
>   
> @@ -2537,18 +2537,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>   		return false;
>   
>   	/* Try to block ongoing updates */
> -	if (!mutex_trylock(&bo_base->vm->eviction_lock))
> +	if (!vm_eviction_trylock(bo_base->vm))
>   		return false;
>   
>   	/* Don't evict VM page tables while they are updated */
>   	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
>   	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
> -		mutex_unlock(&bo_base->vm->eviction_lock);
> +		vm_eviction_unlock(bo_base->vm);
>   		return false;
>   	}
>   
>   	bo_base->vm->evicting = true;
> -	mutex_unlock(&bo_base->vm->eviction_lock);
> +	vm_eviction_unlock(bo_base->vm);
>   	return true;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 100547f094ff..d35aa76469ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -30,6 +30,7 @@
>   #include <drm/gpu_scheduler.h>
>   #include <drm/drm_file.h>
>   #include <drm/ttm/ttm_bo_driver.h>
> +#include <linux/sched/mm.h>
>   
>   #include "amdgpu_sync.h"
>   #include "amdgpu_ring.h"
> @@ -242,9 +243,12 @@ struct amdgpu_vm {
>   	/* tree of virtual addresses mapped */
>   	struct rb_root_cached	va;
>   
> -	/* Lock to prevent eviction while we are updating page tables */
> +	/* Lock to prevent eviction while we are updating page tables
> +	 * use vm_eviction_lock/unlock(vm)
> +	 */
>   	struct mutex		eviction_lock;
>   	bool			evicting;
> +	unsigned int            saved_flags;
>   
>   	/* BOs who needs a validation */
>   	struct list_head	evicted;
> @@ -436,4 +440,26 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>   				struct amdgpu_vm *vm);
>   void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
>   
> +/* vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
> + * happens while holding this lock anywhere to prevent deadlocks when
> + * an MMU notifier runs in reclaim-FS context.
> + */
> +static inline void vm_eviction_lock(struct amdgpu_vm *vm)
> +{
> +	mutex_lock(&vm->eviction_lock);
> +	vm->saved_flags = memalloc_nofs_save();
> +}
> +static inline int vm_eviction_trylock(struct amdgpu_vm *vm)
> +{
> +	if (mutex_trylock(&vm->eviction_lock)) {
> +		vm->saved_flags = memalloc_nofs_save();
> +		return 1;
> +	}
> +	return 0;
> +}
> +static inline void vm_eviction_unlock(struct amdgpu_vm *vm)
> +{
> +	memalloc_nofs_restore(vm->saved_flags);
> +	mutex_unlock(&vm->eviction_lock);
> +}
>   #endif
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: invalidate BO during userptr mapping
  2019-12-20  6:24 ` [PATCH 5/5] drm/amdgpu: invalidate BO during userptr mapping Alex Sierra
@ 2019-12-20 21:40   ` Felix Kuehling
  0 siblings, 0 replies; 24+ messages in thread
From: Felix Kuehling @ 2019-12-20 21:40 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

I think this patch is just a proof of concept for now. It should not be 
submitted because there are still some known locking issues that need to 
be solved, and we don't have the code yet that handles the recoverable 
page faults resulting from this.

Regards,
   Felix

On 2019-12-20 1:24, Alex Sierra wrote:
> This is required for HMM functionality only on GFXv9 GPU, which supports
> recoverable page faults.
>
> [Why]
> Instead of stopping all user mode queues during a userptr mapping.
> The GFXv9 recoverable page fault is used to revalidate userptr mappings.
> Now, this will be done on the page fault handler.
>
> [How]
> Invalidate buffer objects that correspond to the specific address range
> on the mmu notifier.
>
> Change-Id: I94b8fee8d88ff240b619cba1c5458aba98b17736
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 56 ++++++++++++++++++++++++--
>   1 file changed, 52 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 392300f77b13..06415d8ad3c1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -187,6 +187,45 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>   	}
>   }
>   
> +/**
> + * amdgpu_mn_invalidate_bo - invalidate a BO
> + *
> + * @bo: amdgpu buffer object to invalidate
> + * @adev: amdgpu device pointer
> + *
> + * Block for operations on BO while is cleared.
> + */
> +static int amdgpu_mn_invalidate_bo(struct amdgpu_device *adev,
> +				     struct amdgpu_bo *bo)
> +{
> +	struct amdgpu_vm_bo_base *bo_base;
> +	struct amdgpu_bo_va *bo_va;
> +	struct kgd_dev *kgd = (struct kgd_dev *)adev;
> +	long r = 0;
> +	long tmo;
> +
> +	tmo = msecs_to_jiffies(100);
> +	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
> +		bo_va = container_of(bo_base, struct amdgpu_bo_va, base);
> +		r = amdgpu_vm_bo_update(adev, bo_va, true);
> +		if (r)
> +			break;
> +
> +		r = dma_fence_wait_timeout(bo_va->last_pt_update, false, tmo);
> +		if (r <= 0) {
> +			if (r == 0)
> +				r = -ETIMEDOUT;
> +
> +			break;
> +		} else {
> +			r = 0;
> +		}
> +
> +		amdgpu_amdkfd_flush_gpu_tlb_pasid(kgd, bo_base->vm->pasid);
> +	}
> +	return r;
> +}
> +
>   /**
>    * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
>    *
> @@ -250,6 +289,7 @@ amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
>   			      const struct mmu_notifier_range *update)
>   {
>   	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> +	struct amdgpu_device *adev = amn->adev;
>   	unsigned long start = update->start;
>   	unsigned long end = update->end;
>   	bool blockable = mmu_notifier_range_blockable(update);
> @@ -275,11 +315,19 @@ amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
>   		it = interval_tree_iter_next(it, start, end);
>   
>   		list_for_each_entry(bo, &node->bos, mn_list) {
> -			struct kgd_mem *mem = bo->kfd_bo;
>   
> -			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
> -							 start, end))
> -				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
> +			if (amdgpu_ttm_tt_affect_userptr(
> +						bo->tbo.ttm, start, end)){
> +				if (!amdgpu_noretry &&
> +				adev->family >= AMDGPU_FAMILY_AI) {
> +					amdgpu_mn_invalidate_bo(adev, bo);
> +				} else {
> +					struct kgd_mem *mem = bo->kfd_bo;
> +
> +					amdgpu_amdkfd_evict_userptr(mem,
> +								amn->mm);
> +				}
> +			}
>   		}
>   	}
>   
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock
  2019-12-20  6:24 [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
                   ` (4 preceding siblings ...)
  2019-12-20 21:39 ` [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Felix Kuehling
@ 2019-12-20 23:28 ` Yong Zhao
  2020-01-02  9:16 ` Christian König
  6 siblings, 0 replies; 24+ messages in thread
From: Yong Zhao @ 2019-12-20 23:28 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

One style comment inline.

Yong

On 2019-12-20 1:24 a.m., Alex Sierra wrote:
> [Why]
> Avoid reclaim filesystem while eviction lock is held called from
> MMU notifier.
>
> [How]
> Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
> Using memalloc_nofs_save / memalloc_nofs_restore API.
>
> Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 ++++++-------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 28 +++++++++++++++++++++++++-
>   2 files changed, 34 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b999b67ff57a..b36daa6230fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -678,9 +678,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		}
>   	}
>   
> -	mutex_lock(&vm->eviction_lock);
> +	vm_eviction_lock(vm);
>   	vm->evicting = false;
> -	mutex_unlock(&vm->eviction_lock);
> +	vm_eviction_unlock(vm);
>   
>   	return 0;
>   }
> @@ -1559,7 +1559,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (!(flags & AMDGPU_PTE_VALID))
>   		owner = AMDGPU_FENCE_OWNER_KFD;
>   
> -	mutex_lock(&vm->eviction_lock);
> +	vm_eviction_lock(vm);
>   	if (vm->evicting) {
>   		r = -EBUSY;
>   		goto error_unlock;
> @@ -1576,7 +1576,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	r = vm->update_funcs->commit(&params, fence);
>   
>   error_unlock:
> -	mutex_unlock(&vm->eviction_lock);
> +	vm_eviction_unlock(vm);
>   	return r;
>   }
>   
> @@ -2537,18 +2537,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>   		return false;
>   
>   	/* Try to block ongoing updates */
> -	if (!mutex_trylock(&bo_base->vm->eviction_lock))
> +	if (!vm_eviction_trylock(bo_base->vm))
>   		return false;
>   
>   	/* Don't evict VM page tables while they are updated */
>   	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
>   	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
> -		mutex_unlock(&bo_base->vm->eviction_lock);
> +		vm_eviction_unlock(bo_base->vm);
>   		return false;
>   	}
>   
>   	bo_base->vm->evicting = true;
> -	mutex_unlock(&bo_base->vm->eviction_lock);
> +	vm_eviction_unlock(bo_base->vm);
>   	return true;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 100547f094ff..d35aa76469ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -30,6 +30,7 @@
>   #include <drm/gpu_scheduler.h>
>   #include <drm/drm_file.h>
>   #include <drm/ttm/ttm_bo_driver.h>
> +#include <linux/sched/mm.h>
>   
>   #include "amdgpu_sync.h"
>   #include "amdgpu_ring.h"
> @@ -242,9 +243,12 @@ struct amdgpu_vm {
>   	/* tree of virtual addresses mapped */
>   	struct rb_root_cached	va;
>   
> -	/* Lock to prevent eviction while we are updating page tables */
> +	/* Lock to prevent eviction while we are updating page tables
> +	 * use vm_eviction_lock/unlock(vm)
> +	 */
>   	struct mutex		eviction_lock;
>   	bool			evicting;
> +	unsigned int            saved_flags;
[yz] The tabs should be used here instead of spaces.
>   
>   	/* BOs who needs a validation */
>   	struct list_head	evicted;
> @@ -436,4 +440,26 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>   				struct amdgpu_vm *vm);
>   void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
>   
> +/* vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
> + * happens while holding this lock anywhere to prevent deadlocks when
> + * an MMU notifier runs in reclaim-FS context.
> + */
> +static inline void vm_eviction_lock(struct amdgpu_vm *vm)
> +{
> +	mutex_lock(&vm->eviction_lock);
> +	vm->saved_flags = memalloc_nofs_save();
> +}
> +static inline int vm_eviction_trylock(struct amdgpu_vm *vm)
> +{
> +	if (mutex_trylock(&vm->eviction_lock)) {
> +		vm->saved_flags = memalloc_nofs_save();
> +		return 1;
> +	}
> +	return 0;
> +}
> +static inline void vm_eviction_unlock(struct amdgpu_vm *vm)
> +{
> +	memalloc_nofs_restore(vm->saved_flags);
> +	mutex_unlock(&vm->eviction_lock);
> +}
>   #endif
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
  2019-12-20 21:35   ` Felix Kuehling
@ 2019-12-20 23:50     ` Yong Zhao
  2019-12-21  0:01       ` Yong Zhao
  0 siblings, 1 reply; 24+ messages in thread
From: Yong Zhao @ 2019-12-20 23:50 UTC (permalink / raw)
  To: Felix Kuehling, Alex Sierra, amd-gfx

Inline.

On 2019-12-20 4:35 p.m., Felix Kuehling wrote:
> On 2019-12-20 1:24, Alex Sierra wrote:
>> [Why]
>> TLB flush method has been deprecated using kfd2kgd interface.
>> This implementation is now on the amdgpu_amdkfd API.
>>
>> [How]
>> TLB flush functions now implemented in amdgpu_amdkfd.
>>
>> Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>
> Looks good to me. See my comment about the TODO inline.
>
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 ++++--
>>   3 files changed, 39 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> index d3da9dde4ee1..b7f6e70c5762 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> @@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct 
>> amdgpu_device *adev, u32 vmid)
>>       return false;
>>   }
>>   +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t 
>> vmid)
>> +{
>> +    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>> +    /* TODO: condition missing for FAMILY above NV */
>
> I'm not sure what's missing here. NV and above don't need any special 
> treatment. Since SDMA is connected to GFXHUB on NV, only the GFXHUB 
> needs to be flushed.
>
> Regards,
>   Felix
>
>
>> +    if (adev->family == AMDGPU_FAMILY_AI) {
>> +        int i;
>> +
>> +        for (i = 0; i < adev->num_vmhubs; i++)
>> +            amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
>> +    } else {
>> +        amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
>> +    }

This if else can be unified by

for (i = 0; i < adev->num_vmhubs; i++)

     amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);

>> +
>> +    return 0;
>> +}
>> +
>> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t 
>> pasid)
>> +{
>> +    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>> +    uint32_t flush_type = 0;
>> +    bool all_hub = false;
>> +
>> +    if (adev->gmc.xgmi.num_physical_nodes &&
>> +        adev->asic_type == CHIP_VEGA20)
>> +        flush_type = 2;
>> +
>> +    if (adev->family == AMDGPU_FAMILY_AI)
>> +        all_hub = true;
>> +
>> +    return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, 
>> all_hub);
>> +}
>> +
>>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
>>   {
>>       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 069d5d230810..47b0f2957d1f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, 
>> enum kgd_engine_type engine,
>>                   uint32_t *ib_cmd, uint32_t ib_len);
>>   void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
>>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
>> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t 
>> vmid);
>> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t 
>> pasid);
>>     bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 
>> vmid);
>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 536a153ac9a4..25b90f70aecd 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -32,6 +32,7 @@
>>   #include <linux/mman.h>
>>   #include <linux/file.h>
>>   #include "amdgpu_amdkfd.h"
>> +#include "amdgpu.h"
>>     struct mm_struct;
>>   @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev 
>> *dev, struct kfd_process *process,
>>   void kfd_flush_tlb(struct kfd_process_device *pdd)
>>   {
>>       struct kfd_dev *dev = pdd->dev;
>> -    const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
>>         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
>>           /* Nothing to flush until a VMID is assigned, which
>>            * only happens when the first queue is created.
>>            */
>>           if (pdd->qpd.vmid)
>> -            f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
>> +            amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
>> +                            pdd->qpd.vmid);
>>       } else {
>> -        f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
>> +        amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
>> +                        pdd->process->pasid);
>>       }
>>   }
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cyong.zhao%40amd.com%7C196afdae9b69425e58aa08d78594927a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124745521063472&amp;sdata=5ZZNtq%2FyOZX%2BdQrG1ydoidOU90ZrbWGz9tnuycEg4F4%3D&amp;reserved=0 
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
  2019-12-20  6:24 ` [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
  2019-12-20 21:32   ` Felix Kuehling
@ 2019-12-20 23:51   ` Yong Zhao
  2020-01-05 15:41   ` Christian König
  2 siblings, 0 replies; 24+ messages in thread
From: Yong Zhao @ 2019-12-20 23:51 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx


On 2019-12-20 1:24 a.m., Alex Sierra wrote:
> This can be used directly from amdgpu and amdkfd to invalidate
> TLB through pasid.
> It supports gmc v7, v8, v9 and v10.
>
> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 81 ++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 84 +++++++++++++++++++++++++
>   5 files changed, 238 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index b499a3de8bb6..b6413a56f546 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
>   	/* flush the vm tlb via mmio */
>   	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>   				uint32_t vmhub, uint32_t flush_type);
> +	/* flush the vm tlb via pasid */
> +	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> +					uint32_t flush_type, bool all_hub);
>   	/* flush the vm tlb via ring */
>   	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>   				       uint64_t pd_addr);
> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
>   };
>   
>   #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
> +	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> +	((adev), (pasid), (type), (allhub)))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>   #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index f5725336a5f2..b1a5408a8d7e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -30,6 +30,8 @@
>   #include "hdp/hdp_5_0_0_sh_mask.h"
>   #include "gc/gc_10_1_0_sh_mask.h"
>   #include "mmhub/mmhub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_sh_mask.h"
>   #include "oss/osssys_5_0_0_offset.h"
> @@ -37,6 +39,7 @@
>   #include "navi10_enum.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   
>   #include "nbio_v2_3.h"
> @@ -234,6 +237,48 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		(!amdgpu_sriov_vf(adev)));
>   }
>   
> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
> +					struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> +static int gmc_v10_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(ring, &seq);
> +	amdgpu_ring_commit(ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -380,6 +425,41 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>   }
>   
> +/**
> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	if (amdgpu_emu_mode == 0 && ring->sched.ready)
> +		return gmc_v10_0_invalidate_tlbs_with_kiq(adev,
> +						pasid, flush_type, all_hub);
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret	&& queried_pasid == pasid) {
[yz] The space should be used between ret and &&.
> +			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> +					AMDGPU_GFXHUB_0, 0);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					     unsigned vmid, uint64_t pd_addr)
>   {
> @@ -531,6 +611,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v10_0_map_mtype,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index f08e5330642d..19d5b133e1d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>   	.set_prt = gmc_v7_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 6d96d40fbcb8..27d83204fa2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>   	.set_prt = gmc_v8_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index fa025ceeea0f..eb1e64bd56ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -38,10 +38,12 @@
>   #include "dce/dce_12_0_sh_mask.h"
>   #include "vega10_enum.h"
>   #include "mmhub/mmhub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
>   #include "athub/athub_1_0_offset.h"
>   #include "oss/osssys_4_0_offset.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "umc/umc_6_0_sh_mask.h"
>   
> @@ -434,6 +436,47 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		   adev->pdev->device == 0x15d8)));
>   }
>   
> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> +static int gmc_v9_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(ring, &seq);
> +	amdgpu_ring_commit(ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -532,6 +575,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   }
>   
> +/**
> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	if (ring->sched.ready)
> +		return gmc_v9_0_invalidate_tlbs_with_kiq(adev,
> +						pasid, flush_type, all_hub);
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret && queried_pasid == pasid) {
> +			for (i = 0; i < adev->num_vmhubs; i++)
> +				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> +							i, flush_type);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					    unsigned vmid, uint64_t pd_addr)
>   {
> @@ -693,6 +776,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v9_0_map_mtype,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
  2019-12-20 23:50     ` Yong Zhao
@ 2019-12-21  0:01       ` Yong Zhao
  2019-12-23 19:34         ` Felix Kuehling
  0 siblings, 1 reply; 24+ messages in thread
From: Yong Zhao @ 2019-12-21  0:01 UTC (permalink / raw)
  To: Felix Kuehling, Alex Sierra, amd-gfx


On 2019-12-20 6:50 p.m., Yong Zhao wrote:
> Inline.
>
> On 2019-12-20 4:35 p.m., Felix Kuehling wrote:
>> On 2019-12-20 1:24, Alex Sierra wrote:
>>> [Why]
>>> TLB flush method has been deprecated using kfd2kgd interface.
>>> This implementation is now on the amdgpu_amdkfd API.
>>>
>>> [How]
>>> TLB flush functions now implemented in amdgpu_amdkfd.
>>>
>>> Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
>>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>>
>> Looks good to me. See my comment about the TODO inline.
>>
>>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 
>>> ++++++++++++++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 ++++--
>>>   3 files changed, 39 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>> index d3da9dde4ee1..b7f6e70c5762 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>> @@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct 
>>> amdgpu_device *adev, u32 vmid)
>>>       return false;
>>>   }
>>>   +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, 
>>> uint16_t vmid)
>>> +{
>>> +    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>> +    /* TODO: condition missing for FAMILY above NV */
>>
>> I'm not sure what's missing here. NV and above don't need any special 
>> treatment. Since SDMA is connected to GFXHUB on NV, only the GFXHUB 
>> needs to be flushed.
>>
>> Regards,
>>   Felix
>>
>>
>>> +    if (adev->family == AMDGPU_FAMILY_AI) {
>>> +        int i;
>>> +
>>> +        for (i = 0; i < adev->num_vmhubs; i++)
>>> +            amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
>>> +    } else {
>>> +        amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
>>> +    }
>
> This if else can be unified by
>
> for (i = 0; i < adev->num_vmhubs; i++)
>
>     amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
>
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t 
>>> pasid)
>>> +{
>>> +    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>> +    uint32_t flush_type = 0;
>>> +    bool all_hub = false;
>>> +
>>> +    if (adev->gmc.xgmi.num_physical_nodes &&
>>> +        adev->asic_type == CHIP_VEGA20)
>>> +        flush_type = 2;
>>> +
>>> +    if (adev->family == AMDGPU_FAMILY_AI)
>>> +        all_hub = true;
>>> +
>>> +    return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, 
>>> all_hub);
The all_hub parameter can be inferred from num_vmhubs in 
flush_gpu_tlb_pasid(), so it can be optimized out here.
>>> +}
>>> +
>>>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
>>>   {
>>>       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> index 069d5d230810..47b0f2957d1f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, 
>>> enum kgd_engine_type engine,
>>>                   uint32_t *ib_cmd, uint32_t ib_len);
>>>   void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
>>>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
>>> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t 
>>> vmid);
>>> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t 
>>> pasid);
>>>     bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 
>>> vmid);
>>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> index 536a153ac9a4..25b90f70aecd 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> @@ -32,6 +32,7 @@
>>>   #include <linux/mman.h>
>>>   #include <linux/file.h>
>>>   #include "amdgpu_amdkfd.h"
>>> +#include "amdgpu.h"
>>>     struct mm_struct;
>>>   @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev 
>>> *dev, struct kfd_process *process,
>>>   void kfd_flush_tlb(struct kfd_process_device *pdd)
>>>   {
>>>       struct kfd_dev *dev = pdd->dev;
>>> -    const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
>>>         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
>>>           /* Nothing to flush until a VMID is assigned, which
>>>            * only happens when the first queue is created.
>>>            */
>>>           if (pdd->qpd.vmid)
>>> -            f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
>>> +            amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
>>> +                            pdd->qpd.vmid);
>>>       } else {
>>> -        f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
>>> +        amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
>>> +                        pdd->process->pasid);
>>>       }
>>>   }
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cyong.zhao%40amd.com%7C3a33649d2a804998d00408d785a7776f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124827007059728&amp;sdata=fNTunmAJObxfgbJBlNWWXucyH9ezedLv%2BrqnMTz3Ai4%3D&amp;reserved=0 
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cyong.zhao%40amd.com%7C3a33649d2a804998d00408d785a7776f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124827007059728&amp;sdata=fNTunmAJObxfgbJBlNWWXucyH9ezedLv%2BrqnMTz3Ai4%3D&amp;reserved=0 
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
  2019-12-21  0:01       ` Yong Zhao
@ 2019-12-23 19:34         ` Felix Kuehling
  2019-12-23 19:44           ` Zhao, Yong
  0 siblings, 1 reply; 24+ messages in thread
From: Felix Kuehling @ 2019-12-23 19:34 UTC (permalink / raw)
  To: Yong Zhao, Alex Sierra, amd-gfx


On 2019-12-20 7:01 p.m., Yong Zhao wrote:
>
> On 2019-12-20 6:50 p.m., Yong Zhao wrote:
>> Inline.
>>
>> On 2019-12-20 4:35 p.m., Felix Kuehling wrote:
>>> On 2019-12-20 1:24, Alex Sierra wrote:
>>>> [Why]
>>>> TLB flush method has been deprecated using kfd2kgd interface.
>>>> This implementation is now on the amdgpu_amdkfd API.
>>>>
>>>> [How]
>>>> TLB flush functions now implemented in amdgpu_amdkfd.
>>>>
>>>> Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
>>>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>>>
>>> Looks good to me. See my comment about the TODO inline.
>>>
>>>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 
>>>> ++++++++++++++++++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>>>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 ++++--
>>>>   3 files changed, 39 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> index d3da9dde4ee1..b7f6e70c5762 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> @@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct 
>>>> amdgpu_device *adev, u32 vmid)
>>>>       return false;
>>>>   }
>>>>   +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, 
>>>> uint16_t vmid)
>>>> +{
>>>> +    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>> +    /* TODO: condition missing for FAMILY above NV */
>>>
>>> I'm not sure what's missing here. NV and above don't need any 
>>> special treatment. Since SDMA is connected to GFXHUB on NV, only the 
>>> GFXHUB needs to be flushed.
>>>
>>> Regards,
>>>   Felix
>>>
>>>
>>>> +    if (adev->family == AMDGPU_FAMILY_AI) {
>>>> +        int i;
>>>> +
>>>> +        for (i = 0; i < adev->num_vmhubs; i++)
>>>> +            amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
>>>> +    } else {
>>>> +        amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
>>>> +    }
>>
>> This if else can be unified by
>>
>> for (i = 0; i < adev->num_vmhubs; i++)
>>
>>     amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
>>
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, 
>>>> uint16_t pasid)
>>>> +{
>>>> +    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>> +    uint32_t flush_type = 0;
>>>> +    bool all_hub = false;
>>>> +
>>>> +    if (adev->gmc.xgmi.num_physical_nodes &&
>>>> +        adev->asic_type == CHIP_VEGA20)
>>>> +        flush_type = 2;
>>>> +
>>>> +    if (adev->family == AMDGPU_FAMILY_AI)
>>>> +        all_hub = true;
>>>> +
>>>> +    return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, 
>>>> all_hub);
> The all_hub parameter can be inferred from num_vmhubs in 
> flush_gpu_tlb_pasid(), so it can be optimized out here.

Hi Yong,

This is incorrect. NV has two VM hubs: GFXHUB and MMHUB. But KFD doesn't 
care about MMHUB on Navi because SDMA is connected to the GFXHUB. 
Therefore the all_hub parameter should not be based on the num_vmhubs. 
We need a special case for NV.

Or rather the special case could be AI, where SDMA is not connected to 
GFXHUB. So only on AI we need to flush all hubs for KFD VMs.

Regards,
   Felix

>>>> +}
>>>> +
>>>>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
>>>>   {
>>>>       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> index 069d5d230810..47b0f2957d1f 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev 
>>>> *kgd, enum kgd_engine_type engine,
>>>>                   uint32_t *ib_cmd, uint32_t ib_len);
>>>>   void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
>>>>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
>>>> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t 
>>>> vmid);
>>>> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, 
>>>> uint16_t pasid);
>>>>     bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 
>>>> vmid);
>>>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> index 536a153ac9a4..25b90f70aecd 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> @@ -32,6 +32,7 @@
>>>>   #include <linux/mman.h>
>>>>   #include <linux/file.h>
>>>>   #include "amdgpu_amdkfd.h"
>>>> +#include "amdgpu.h"
>>>>     struct mm_struct;
>>>>   @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev 
>>>> *dev, struct kfd_process *process,
>>>>   void kfd_flush_tlb(struct kfd_process_device *pdd)
>>>>   {
>>>>       struct kfd_dev *dev = pdd->dev;
>>>> -    const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
>>>>         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
>>>>           /* Nothing to flush until a VMID is assigned, which
>>>>            * only happens when the first queue is created.
>>>>            */
>>>>           if (pdd->qpd.vmid)
>>>> -            f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
>>>> +            amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
>>>> +                            pdd->qpd.vmid);
>>>>       } else {
>>>> -        f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
>>>> +        amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
>>>> +                        pdd->process->pasid);
>>>>       }
>>>>   }
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cyong.zhao%40amd.com%7C3a33649d2a804998d00408d785a7776f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124827007059728&amp;sdata=fNTunmAJObxfgbJBlNWWXucyH9ezedLv%2BrqnMTz3Ai4%3D&amp;reserved=0 
>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cyong.zhao%40amd.com%7C3a33649d2a804998d00408d785a7776f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124827007059728&amp;sdata=fNTunmAJObxfgbJBlNWWXucyH9ezedLv%2BrqnMTz3Ai4%3D&amp;reserved=0 
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
  2019-12-23 19:34         ` Felix Kuehling
@ 2019-12-23 19:44           ` Zhao, Yong
  0 siblings, 0 replies; 24+ messages in thread
From: Zhao, Yong @ 2019-12-23 19:44 UTC (permalink / raw)
  To: Kuehling, Felix, Sierra Guiza, Alejandro (Alex), amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 7165 bytes --]

[AMD Official Use Only - Internal Distribution Only]

True. There indeed are two vmhubs on Navi. So my two comments are not useful here.

Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Monday, December 23, 2019 2:34 PM
To: Zhao, Yong <Yong.Zhao@amd.com>; Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd


On 2019-12-20 7:01 p.m., Yong Zhao wrote:
>
> On 2019-12-20 6:50 p.m., Yong Zhao wrote:
>> Inline.
>>
>> On 2019-12-20 4:35 p.m., Felix Kuehling wrote:
>>> On 2019-12-20 1:24, Alex Sierra wrote:
>>>> [Why]
>>>> TLB flush method has been deprecated using kfd2kgd interface.
>>>> This implementation is now on the amdgpu_amdkfd API.
>>>>
>>>> [How]
>>>> TLB flush functions now implemented in amdgpu_amdkfd.
>>>>
>>>> Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
>>>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>>>
>>> Looks good to me. See my comment about the TODO inline.
>>>
>>>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32
>>>> ++++++++++++++++++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>>>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 ++++--
>>>>   3 files changed, 39 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> index d3da9dde4ee1..b7f6e70c5762 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> @@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct
>>>> amdgpu_device *adev, u32 vmid)
>>>>       return false;
>>>>   }
>>>>   +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd,
>>>> uint16_t vmid)
>>>> +{
>>>> +    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>> +    /* TODO: condition missing for FAMILY above NV */
>>>
>>> I'm not sure what's missing here. NV and above don't need any
>>> special treatment. Since SDMA is connected to GFXHUB on NV, only the
>>> GFXHUB needs to be flushed.
>>>
>>> Regards,
>>>   Felix
>>>
>>>
>>>> +    if (adev->family == AMDGPU_FAMILY_AI) {
>>>> +        int i;
>>>> +
>>>> +        for (i = 0; i < adev->num_vmhubs; i++)
>>>> +            amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
>>>> +    } else {
>>>> +        amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
>>>> +    }
>>
>> This if else can be unified by
>>
>> for (i = 0; i < adev->num_vmhubs; i++)
>>
>>     amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
>>
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd,
>>>> uint16_t pasid)
>>>> +{
>>>> +    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>> +    uint32_t flush_type = 0;
>>>> +    bool all_hub = false;
>>>> +
>>>> +    if (adev->gmc.xgmi.num_physical_nodes &&
>>>> +        adev->asic_type == CHIP_VEGA20)
>>>> +        flush_type = 2;
>>>> +
>>>> +    if (adev->family == AMDGPU_FAMILY_AI)
>>>> +        all_hub = true;
>>>> +
>>>> +    return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type,
>>>> all_hub);
> The all_hub parameter can be inferred from num_vmhubs in
> flush_gpu_tlb_pasid(), so it can be optimized out here.

Hi Yong,

This is incorrect. NV has two VM hubs: GFXHUB and MMHUB. But KFD doesn't
care about MMHUB on Navi because SDMA is connected to the GFXHUB.
Therefore the all_hub parameter should not be based on the num_vmhubs.
We need a special case for NV.

Or rather the special case could be AI, where SDMA is not connected to
GFXHUB. So only on AI we need to flush all hubs for KFD VMs.

Regards,
   Felix

>>>> +}
>>>> +
>>>>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
>>>>   {
>>>>       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> index 069d5d230810..47b0f2957d1f 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev
>>>> *kgd, enum kgd_engine_type engine,
>>>>                   uint32_t *ib_cmd, uint32_t ib_len);
>>>>   void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
>>>>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
>>>> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t
>>>> vmid);
>>>> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd,
>>>> uint16_t pasid);
>>>>     bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32
>>>> vmid);
>>>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> index 536a153ac9a4..25b90f70aecd 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> @@ -32,6 +32,7 @@
>>>>   #include <linux/mman.h>
>>>>   #include <linux/file.h>
>>>>   #include "amdgpu_amdkfd.h"
>>>> +#include "amdgpu.h"
>>>>     struct mm_struct;
>>>>   @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev
>>>> *dev, struct kfd_process *process,
>>>>   void kfd_flush_tlb(struct kfd_process_device *pdd)
>>>>   {
>>>>       struct kfd_dev *dev = pdd->dev;
>>>> -    const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
>>>>         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
>>>>           /* Nothing to flush until a VMID is assigned, which
>>>>            * only happens when the first queue is created.
>>>>            */
>>>>           if (pdd->qpd.vmid)
>>>> -            f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
>>>> +            amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
>>>> +                            pdd->qpd.vmid);
>>>>       } else {
>>>> -        f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
>>>> +        amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
>>>> +                        pdd->process->pasid);
>>>>       }
>>>>   }
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cyong.zhao%40amd.com%7C3a33649d2a804998d00408d785a7776f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124827007059728&amp;sdata=fNTunmAJObxfgbJBlNWWXucyH9ezedLv%2BrqnMTz3Ai4%3D&amp;reserved=0
>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cyong.zhao%40amd.com%7C3a33649d2a804998d00408d785a7776f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124827007059728&amp;sdata=fNTunmAJObxfgbJBlNWWXucyH9ezedLv%2BrqnMTz3Ai4%3D&amp;reserved=0
>>

[-- Attachment #1.2: Type: text/html, Size: 13182 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock
  2019-12-20  6:24 [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
                   ` (5 preceding siblings ...)
  2019-12-20 23:28 ` Yong Zhao
@ 2020-01-02  9:16 ` Christian König
  6 siblings, 0 replies; 24+ messages in thread
From: Christian König @ 2020-01-02  9:16 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Am 20.12.19 um 07:24 schrieb Alex Sierra:
> [Why]
> Avoid reclaim filesystem while eviction lock is held called from
> MMU notifier.
>
> [How]
> Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
> Using memalloc_nofs_save / memalloc_nofs_restore API.
>
> Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 ++++++-------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 28 +++++++++++++++++++++++++-
>   2 files changed, 34 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b999b67ff57a..b36daa6230fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -678,9 +678,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		}
>   	}
>   
> -	mutex_lock(&vm->eviction_lock);
> +	vm_eviction_lock(vm);
>   	vm->evicting = false;
> -	mutex_unlock(&vm->eviction_lock);
> +	vm_eviction_unlock(vm);
>   
>   	return 0;
>   }
> @@ -1559,7 +1559,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (!(flags & AMDGPU_PTE_VALID))
>   		owner = AMDGPU_FENCE_OWNER_KFD;
>   
> -	mutex_lock(&vm->eviction_lock);
> +	vm_eviction_lock(vm);
>   	if (vm->evicting) {
>   		r = -EBUSY;
>   		goto error_unlock;
> @@ -1576,7 +1576,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	r = vm->update_funcs->commit(&params, fence);
>   
>   error_unlock:
> -	mutex_unlock(&vm->eviction_lock);
> +	vm_eviction_unlock(vm);
>   	return r;
>   }
>   
> @@ -2537,18 +2537,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>   		return false;
>   
>   	/* Try to block ongoing updates */
> -	if (!mutex_trylock(&bo_base->vm->eviction_lock))
> +	if (!vm_eviction_trylock(bo_base->vm))
>   		return false;
>   
>   	/* Don't evict VM page tables while they are updated */
>   	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
>   	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
> -		mutex_unlock(&bo_base->vm->eviction_lock);
> +		vm_eviction_unlock(bo_base->vm);
>   		return false;
>   	}
>   
>   	bo_base->vm->evicting = true;
> -	mutex_unlock(&bo_base->vm->eviction_lock);
> +	vm_eviction_unlock(bo_base->vm);
>   	return true;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 100547f094ff..d35aa76469ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -30,6 +30,7 @@
>   #include <drm/gpu_scheduler.h>
>   #include <drm/drm_file.h>
>   #include <drm/ttm/ttm_bo_driver.h>
> +#include <linux/sched/mm.h>
>   
>   #include "amdgpu_sync.h"
>   #include "amdgpu_ring.h"
> @@ -242,9 +243,12 @@ struct amdgpu_vm {
>   	/* tree of virtual addresses mapped */
>   	struct rb_root_cached	va;
>   
> -	/* Lock to prevent eviction while we are updating page tables */
> +	/* Lock to prevent eviction while we are updating page tables
> +	 * use vm_eviction_lock/unlock(vm)
> +	 */
>   	struct mutex		eviction_lock;
>   	bool			evicting;
> +	unsigned int            saved_flags;
>   
>   	/* BOs who needs a validation */
>   	struct list_head	evicted;
> @@ -436,4 +440,26 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>   				struct amdgpu_vm *vm);
>   void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
>   
> +/* vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
> + * happens while holding this lock anywhere to prevent deadlocks when
> + * an MMU notifier runs in reclaim-FS context.
> + */
> +static inline void vm_eviction_lock(struct amdgpu_vm *vm)

Please add a proper amdgpu_ prefix to the function names.

Additional to that please don't put local static functions into the 
header, those shouldn't be used outside of the VM code.

Christian.

> +{
> +	mutex_lock(&vm->eviction_lock);
> +	vm->saved_flags = memalloc_nofs_save();
> +}
> +static inline int vm_eviction_trylock(struct amdgpu_vm *vm)
> +{
> +	if (mutex_trylock(&vm->eviction_lock)) {
> +		vm->saved_flags = memalloc_nofs_save();
> +		return 1;
> +	}
> +	return 0;
> +}
> +static inline void vm_eviction_unlock(struct amdgpu_vm *vm)
> +{
> +	memalloc_nofs_restore(vm->saved_flags);
> +	mutex_unlock(&vm->eviction_lock);
> +}
>   #endif

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
  2019-12-20  6:24 ` [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
  2019-12-20 21:32   ` Felix Kuehling
  2019-12-20 23:51   ` Yong Zhao
@ 2020-01-05 15:41   ` Christian König
  2020-01-06 16:04     ` Felix Kuehling
  2 siblings, 1 reply; 24+ messages in thread
From: Christian König @ 2020-01-05 15:41 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx

Am 20.12.19 um 07:24 schrieb Alex Sierra:
> This can be used directly from amdgpu and amdkfd to invalidate
> TLB through pasid.
> It supports gmc v7, v8, v9 and v10.
>
> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 81 ++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 84 +++++++++++++++++++++++++
>   5 files changed, 238 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index b499a3de8bb6..b6413a56f546 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
>   	/* flush the vm tlb via mmio */
>   	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>   				uint32_t vmhub, uint32_t flush_type);
> +	/* flush the vm tlb via pasid */
> +	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> +					uint32_t flush_type, bool all_hub);
>   	/* flush the vm tlb via ring */
>   	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>   				       uint64_t pd_addr);
> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
>   };
>   
>   #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
> +	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> +	((adev), (pasid), (type), (allhub)))
>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>   #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index f5725336a5f2..b1a5408a8d7e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -30,6 +30,8 @@
>   #include "hdp/hdp_5_0_0_sh_mask.h"
>   #include "gc/gc_10_1_0_sh_mask.h"
>   #include "mmhub/mmhub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_offset.h"
>   #include "dcn/dcn_2_0_0_sh_mask.h"
>   #include "oss/osssys_5_0_0_offset.h"
> @@ -37,6 +39,7 @@
>   #include "navi10_enum.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   
>   #include "nbio_v2_3.h"
> @@ -234,6 +237,48 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		(!amdgpu_sriov_vf(adev)));
>   }
>   
> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
> +					struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> +static int gmc_v10_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +	amdgpu_fence_emit_polling(ring, &seq);
> +	amdgpu_ring_commit(ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -380,6 +425,41 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>   }
>   
> +/**
> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	if (amdgpu_emu_mode == 0 && ring->sched.ready)
> +		return gmc_v10_0_invalidate_tlbs_with_kiq(adev,
> +						pasid, flush_type, all_hub);
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret	&& queried_pasid == pasid) {
> +			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> +					AMDGPU_GFXHUB_0, 0);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					     unsigned vmid, uint64_t pd_addr)
>   {
> @@ -531,6 +611,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v10_0_map_mtype,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index f08e5330642d..19d5b133e1d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>   	.set_prt = gmc_v7_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 6d96d40fbcb8..27d83204fa2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +/**
> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid;
> +	unsigned int tmp;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> +			RREG32(mmVM_INVALIDATE_RESPONSE);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
>   
>   static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>   	.set_prt = gmc_v8_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index fa025ceeea0f..eb1e64bd56ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -38,10 +38,12 @@
>   #include "dce/dce_12_0_sh_mask.h"
>   #include "vega10_enum.h"
>   #include "mmhub/mmhub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
>   #include "athub/athub_1_0_offset.h"
>   #include "oss/osssys_4_0_offset.h"
>   
>   #include "soc15.h"
> +#include "soc15d.h"
>   #include "soc15_common.h"
>   #include "umc/umc_6_0_sh_mask.h"
>   
> @@ -434,6 +436,47 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>   		   adev->pdev->device == 0x15d8)));
>   }
>   
> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
> +					uint8_t vmid, uint16_t *p_pasid)
> +{
> +	uint32_t value;
> +
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +		     + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}

Probably better to expose just this function in the GMC interface.

> +
> +static int gmc_v9_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
> +				uint16_t pasid, uint32_t flush_type,
> +				bool all_hub)
> +{
> +	signed long r;
> +	uint32_t seq;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	spin_lock(&adev->gfx.kiq.ring_lock);
> +	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> +	amdgpu_ring_write(ring,
> +			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> +			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> +			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> +			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));

That stuff is completely unrelated to the GMC and shouldn't be added here.

Christian.

> +	amdgpu_fence_emit_polling(ring, &seq);
> +	amdgpu_ring_commit(ring);
> +	spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> +	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return -ETIME;
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -532,6 +575,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   }
>   
> +/**
> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> +					uint16_t pasid, uint32_t flush_type,
> +					bool all_hub)
> +{
> +	int vmid, i;
> +	uint16_t queried_pasid;
> +	bool ret;
> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> +	if (adev->in_gpu_reset)
> +		return -EIO;
> +
> +	if (ring->sched.ready)
> +		return gmc_v9_0_invalidate_tlbs_with_kiq(adev,
> +						pasid, flush_type, all_hub);
> +
> +	for (vmid = 1; vmid < 16; vmid++) {
> +
> +		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> +				&queried_pasid);
> +		if (ret && queried_pasid == pasid) {
> +			for (i = 0; i < adev->num_vmhubs; i++)
> +				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> +							i, flush_type);
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +}
> +
>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   					    unsigned vmid, uint64_t pd_addr)
>   {
> @@ -693,6 +776,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
>   
>   static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>   	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
> +	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>   	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>   	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>   	.map_mtype = gmc_v9_0_map_mtype,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
  2020-01-05 15:41   ` Christian König
@ 2020-01-06 16:04     ` Felix Kuehling
  2020-01-06 16:23       ` Christian König
  0 siblings, 1 reply; 24+ messages in thread
From: Felix Kuehling @ 2020-01-06 16:04 UTC (permalink / raw)
  To: amd-gfx, Christian König, Sierra Guiza, Alejandro (Alex)

On 2020-01-05 10:41 a.m., Christian König wrote:
> Am 20.12.19 um 07:24 schrieb Alex Sierra:
>> This can be used directly from amdgpu and amdkfd to invalidate
>> TLB through pasid.
>> It supports gmc v7, v8, v9 and v10.
>>
>> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 81 ++++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 ++++++++++
>>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 84 +++++++++++++++++++++++++
>>   5 files changed, 238 insertions(+)
[snip]
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index fa025ceeea0f..eb1e64bd56ed 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -38,10 +38,12 @@
>>   #include "dce/dce_12_0_sh_mask.h"
>>   #include "vega10_enum.h"
>>   #include "mmhub/mmhub_1_0_offset.h"
>> +#include "athub/athub_1_0_sh_mask.h"
>>   #include "athub/athub_1_0_offset.h"
>>   #include "oss/osssys_4_0_offset.h"
>>     #include "soc15.h"
>> +#include "soc15d.h"
>>   #include "soc15_common.h"
>>   #include "umc/umc_6_0_sh_mask.h"
>>   @@ -434,6 +436,47 @@ static bool 
>> gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>>              adev->pdev->device == 0x15d8)));
>>   }
>>   +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct 
>> amdgpu_device *adev,
>> +                    uint8_t vmid, uint16_t *p_pasid)
>> +{
>> +    uint32_t value;
>> +
>> +    value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, 
>> mmATC_VMID0_PASID_MAPPING)
>> +             + vmid);
>> +    *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>> +
>> +    return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>> +}
>
> Probably better to expose just this function in the GMC interface.

This is called below in gmc_v9_0_flush_gpu_tlb_pasid in the case that 
the KIQ is not ready. It is not needed outside this file, so no need to 
expose it in the GMC interface.


>
>> +
>> +static int gmc_v9_0_invalidate_tlbs_with_kiq(struct amdgpu_device 
>> *adev,
>> +                uint16_t pasid, uint32_t flush_type,
>> +                bool all_hub)
>> +{
>> +    signed long r;
>> +    uint32_t seq;
>> +    struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>> +
>> +    spin_lock(&adev->gfx.kiq.ring_lock);
>> +    amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
>> +    amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
>> +    amdgpu_ring_write(ring,
>> +            PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
>> +            PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
>> +            PACKET3_INVALIDATE_TLBS_PASID(pasid) |
>> +            PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
>
> That stuff is completely unrelated to the GMC and shouldn't be added 
> here.

Where else should it go? To me TLB flushing is very much something that 
belongs in this file.

Maybe you'd rather add "flush_tlbs_with_kiq" to amdgpu_ring_funcs and 
implement it in the gfx_v*.c GFX-IP code? I'm not sure that makes much 
sense either because it's only available on the KIQ ring.


>
> Christian.
>
>> +    amdgpu_fence_emit_polling(ring, &seq);
>> +    amdgpu_ring_commit(ring);
>> +    spin_unlock(&adev->gfx.kiq.ring_lock);
>> +
>> +    r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>> +    if (r < 1) {
>> +        DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> +        return -ETIME;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   /*
>>    * GART
>>    * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -532,6 +575,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>> amdgpu_device *adev, uint32_t vmid,
>>       DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>>   }
>>   +/**
>> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +                    uint16_t pasid, uint32_t flush_type,
>> +                    bool all_hub)

Christian, do you agree that this function belongs in the GMC interface? 
If not here, where do you suggest moving it?

Regards,
   Felix


>> +{
>> +    int vmid, i;
>> +    uint16_t queried_pasid;
>> +    bool ret;
>> +    struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>> +
>> +    if (adev->in_gpu_reset)
>> +        return -EIO;
>> +
>> +    if (ring->sched.ready)
>> +        return gmc_v9_0_invalidate_tlbs_with_kiq(adev,
>> +                        pasid, flush_type, all_hub);
>> +
>> +    for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +        ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
>> +                &queried_pasid);
>> +        if (ret && queried_pasid == pasid) {
>> +            for (i = 0; i < adev->num_vmhubs; i++)
>> +                amdgpu_gmc_flush_gpu_tlb(adev, vmid,
>> +                            i, flush_type);
>> +            break;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +
>> +}
>> +
>>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>                           unsigned vmid, uint64_t pd_addr)
>>   {
>> @@ -693,6 +776,7 @@ static void gmc_v9_0_get_vm_pte(struct 
>> amdgpu_device *adev,
>>     static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>>       .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
>> +    .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>>       .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>>       .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>>       .map_mtype = gmc_v9_0_map_mtype,
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cfelix.kuehling%40amd.com%7C0ebb82d62d1044ea57b608d791f5b021%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637138356784407460&amp;sdata=K8zhHT2YYFj8LXdD3YiihtNkbKNwa0ml6nQZ74zF828%3D&amp;reserved=0 
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
  2020-01-06 16:04     ` Felix Kuehling
@ 2020-01-06 16:23       ` Christian König
  2020-01-07  1:09         ` Sierra Guiza, Alejandro (Alex)
  0 siblings, 1 reply; 24+ messages in thread
From: Christian König @ 2020-01-06 16:23 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx, Sierra Guiza, Alejandro (Alex)

Am 06.01.20 um 17:04 schrieb Felix Kuehling:
> On 2020-01-05 10:41 a.m., Christian König wrote:
>> Am 20.12.19 um 07:24 schrieb Alex Sierra:
>>> This can be used directly from amdgpu and amdkfd to invalidate
>>> TLB through pasid.
>>> It supports gmc v7, v8, v9 and v10.
>>>
>>> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
>>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 81 ++++++++++++++++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 ++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 84 
>>> +++++++++++++++++++++++++
>>>   5 files changed, 238 insertions(+)
> [snip]
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index fa025ceeea0f..eb1e64bd56ed 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -38,10 +38,12 @@
>>>   #include "dce/dce_12_0_sh_mask.h"
>>>   #include "vega10_enum.h"
>>>   #include "mmhub/mmhub_1_0_offset.h"
>>> +#include "athub/athub_1_0_sh_mask.h"
>>>   #include "athub/athub_1_0_offset.h"
>>>   #include "oss/osssys_4_0_offset.h"
>>>     #include "soc15.h"
>>> +#include "soc15d.h"
>>>   #include "soc15_common.h"
>>>   #include "umc/umc_6_0_sh_mask.h"
>>>   @@ -434,6 +436,47 @@ static bool 
>>> gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>>>              adev->pdev->device == 0x15d8)));
>>>   }
>>>   +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct 
>>> amdgpu_device *adev,
>>> +                    uint8_t vmid, uint16_t *p_pasid)
>>> +{
>>> +    uint32_t value;
>>> +
>>> +    value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, 
>>> mmATC_VMID0_PASID_MAPPING)
>>> +             + vmid);
>>> +    *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>>> +
>>> +    return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>>> +}
>>
>> Probably better to expose just this function in the GMC interface.
>
> This is called below in gmc_v9_0_flush_gpu_tlb_pasid in the case that 
> the KIQ is not ready. It is not needed outside this file, so no need 
> to expose it in the GMC interface.
>
>
>>
>>> +
>>> +static int gmc_v9_0_invalidate_tlbs_with_kiq(struct amdgpu_device 
>>> *adev,
>>> +                uint16_t pasid, uint32_t flush_type,
>>> +                bool all_hub)
>>> +{
>>> +    signed long r;
>>> +    uint32_t seq;
>>> +    struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>> +
>>> +    spin_lock(&adev->gfx.kiq.ring_lock);
>>> +    amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
>>> +    amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
>>> +    amdgpu_ring_write(ring,
>>> +            PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
>>> +            PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
>>> +            PACKET3_INVALIDATE_TLBS_PASID(pasid) |
>>> +            PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
>>
>> That stuff is completely unrelated to the GMC and shouldn't be added 
>> here.
>
> Where else should it go? To me TLB flushing is very much something 
> that belongs in this file.
>
> Maybe you'd rather add "flush_tlbs_with_kiq" to amdgpu_ring_funcs and 
> implement it in the gfx_v*.c GFX-IP code? I'm not sure that makes much 
> sense either because it's only available on the KIQ ring.

Yes, something like this. We should probably add a amdgpu_kiq_funcs and 
expose the functions needed by the GMC code there.

See the amdgpu_virt_kiq_reg_write_reg_wait() case for reference, it is 
very similar and should probably be added to that function table as well.

Christian.

>
>
>>
>> Christian.
>>
>>> +    amdgpu_fence_emit_polling(ring, &seq);
>>> +    amdgpu_ring_commit(ring);
>>> +    spin_unlock(&adev->gfx.kiq.ring_lock);
>>> +
>>> +    r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>>> +    if (r < 1) {
>>> +        DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>>> +        return -ETIME;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   /*
>>>    * GART
>>>    * VMID 0 is the physical GPU addresses as used by the kernel.
>>> @@ -532,6 +575,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>>> amdgpu_device *adev, uint32_t vmid,
>>>       DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>>>   }
>>>   +/**
>>> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
>>> + *
>>> + * @adev: amdgpu_device pointer
>>> + * @pasid: pasid to be flush
>>> + *
>>> + * Flush the TLB for the requested pasid.
>>> + */
>>> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>>> +                    uint16_t pasid, uint32_t flush_type,
>>> +                    bool all_hub)
>
> Christian, do you agree that this function belongs in the GMC 
> interface? If not here, where do you suggest moving it?
>
> Regards,
>   Felix
>
>
>>> +{
>>> +    int vmid, i;
>>> +    uint16_t queried_pasid;
>>> +    bool ret;
>>> +    struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>> +
>>> +    if (adev->in_gpu_reset)
>>> +        return -EIO;
>>> +
>>> +    if (ring->sched.ready)
>>> +        return gmc_v9_0_invalidate_tlbs_with_kiq(adev,
>>> +                        pasid, flush_type, all_hub);
>>> +
>>> +    for (vmid = 1; vmid < 16; vmid++) {
>>> +
>>> +        ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
>>> +                &queried_pasid);
>>> +        if (ret && queried_pasid == pasid) {
>>> +            for (i = 0; i < adev->num_vmhubs; i++)
>>> +                amdgpu_gmc_flush_gpu_tlb(adev, vmid,
>>> +                            i, flush_type);
>>> +            break;
>>> +        }
>>> +    }
>>> +
>>> +    return 0;
>>> +
>>> +}
>>> +
>>>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>>                           unsigned vmid, uint64_t pd_addr)
>>>   {
>>> @@ -693,6 +776,7 @@ static void gmc_v9_0_get_vm_pte(struct 
>>> amdgpu_device *adev,
>>>     static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>>>       .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
>>> +    .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>>>       .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>>>       .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>>>       .map_mtype = gmc_v9_0_map_mtype,
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cfelix.kuehling%40amd.com%7C0ebb82d62d1044ea57b608d791f5b021%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637138356784407460&amp;sdata=K8zhHT2YYFj8LXdD3YiihtNkbKNwa0ml6nQZ74zF828%3D&amp;reserved=0 
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
  2020-01-06 16:23       ` Christian König
@ 2020-01-07  1:09         ` Sierra Guiza, Alejandro (Alex)
  2020-01-07 12:37           ` Christian König
  0 siblings, 1 reply; 24+ messages in thread
From: Sierra Guiza, Alejandro (Alex) @ 2020-01-07  1:09 UTC (permalink / raw)
  To: Koenig, Christian, Kuehling, Felix, amd-gfx

[AMD Official Use Only - Internal Distribution Only]



-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com> 
Sent: Monday, January 6, 2020 10:23 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org; Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>
Subject: Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid

Am 06.01.20 um 17:04 schrieb Felix Kuehling:
> On 2020-01-05 10:41 a.m., Christian König wrote:
>> Am 20.12.19 um 07:24 schrieb Alex Sierra:
>>> This can be used directly from amdgpu and amdkfd to invalidate TLB 
>>> through pasid.
>>> It supports gmc v7, v8, v9 and v10.
>>>
>>> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
>>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 81 
>>> ++++++++++++++++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 ++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 84
>>> +++++++++++++++++++++++++
>>>   5 files changed, 238 insertions(+)
> [snip]
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index fa025ceeea0f..eb1e64bd56ed 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -38,10 +38,12 @@
>>>   #include "dce/dce_12_0_sh_mask.h"
>>>   #include "vega10_enum.h"
>>>   #include "mmhub/mmhub_1_0_offset.h"
>>> +#include "athub/athub_1_0_sh_mask.h"
>>>   #include "athub/athub_1_0_offset.h"
>>>   #include "oss/osssys_4_0_offset.h"
>>>     #include "soc15.h"
>>> +#include "soc15d.h"
>>>   #include "soc15_common.h"
>>>   #include "umc/umc_6_0_sh_mask.h"
>>>   @@ -434,6 +436,47 @@ static bool
>>> gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>>>              adev->pdev->device == 0x15d8)));
>>>   }
>>>   +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct
>>> amdgpu_device *adev,
>>> +                    uint8_t vmid, uint16_t *p_pasid) {
>>> +    uint32_t value;
>>> +
>>> +    value = RREG32(SOC15_REG_OFFSET(ATHUB, 0,
>>> mmATC_VMID0_PASID_MAPPING)
>>> +             + vmid);
>>> +    *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>>> +
>>> +    return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>>> +}
>>
>> Probably better to expose just this function in the GMC interface.
>
> This is called below in gmc_v9_0_flush_gpu_tlb_pasid in the case that 
> the KIQ is not ready. It is not needed outside this file, so no need 
> to expose it in the GMC interface.
>
>
>>
>>> +
>>> +static int gmc_v9_0_invalidate_tlbs_with_kiq(struct amdgpu_device
>>> *adev,
>>> +                uint16_t pasid, uint32_t flush_type,
>>> +                bool all_hub)
>>> +{
>>> +    signed long r;
>>> +    uint32_t seq;
>>> +    struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>> +
>>> +    spin_lock(&adev->gfx.kiq.ring_lock);
>>> +    amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs 
>>> +package*/
>>> +    amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
>>> +    amdgpu_ring_write(ring,
>>> +            PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
>>> +            PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
>>> +            PACKET3_INVALIDATE_TLBS_PASID(pasid) |
>>> +            PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
>>
>>> That stuff is completely unrelated to the GMC and shouldn't be added
>>> here.
>>
>> Where else should it go? To me TLB flushing is very much something 
>> that belongs in this file.
>>
>> Maybe you'd rather add "flush_tlbs_with_kiq" to amdgpu_ring_funcs and
>> implement it in the gfx_v*.c GFX-IP code? I'm not sure that makes much 
>> sense either because it's only available on the KIQ ring.

>Yes, something like this. We should probably add a amdgpu_kiq_funcs and expose the functions needed by the GMC code there.

>See the amdgpu_virt_kiq_reg_write_reg_wait() case for reference, it is very similar and should probably be added to that function table as well.

>Christian.
To summarize: 
1.- The idea is to add a new function pointer to the amdgpu_ring_funcs to flush the tlbs with kiq. 
2.- This function pointer should be pointed and implemented on each of the gfx_v*.c under the gfx_*_ring_funcs_kiq struct. If this is true, Im not seeing this struct on the gfx_v10.c file.
3.- Use the amdgpu_virt_kiq_reg_write_reg_wait as a reference for the implementation.

>>
>>
>>>
>>> Christian.
>>>
>>> +    amdgpu_fence_emit_polling(ring, &seq);
>>> +    amdgpu_ring_commit(ring);
>>> +    spin_unlock(&adev->gfx.kiq.ring_lock);
>>> +
>>> +    r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>>> +    if (r < 1) {
>>> +        DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>>> +        return -ETIME;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   /*
>>>    * GART
>>>    * VMID 0 is the physical GPU addresses as used by the kernel.
>>> @@ -532,6 +575,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
>>> amdgpu_device *adev, uint32_t vmid,
>>>       DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>>>   }
>>>   +/**
>>> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
>>> + *
>>> + * @adev: amdgpu_device pointer
>>> + * @pasid: pasid to be flush
>>> + *
>>> + * Flush the TLB for the requested pasid.
>>> + */
>>> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>>> +                    uint16_t pasid, uint32_t flush_type,
>>> +                    bool all_hub)
>
> Christian, do you agree that this function belongs in the GMC 
> interface? If not here, where do you suggest moving it?
>
> Regards,
>   Felix
>
>
>>> +{
>>> +    int vmid, i;
>>> +    uint16_t queried_pasid;
>>> +    bool ret;
>>> +    struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>> +
>>> +    if (adev->in_gpu_reset)
>>> +        return -EIO;
>>> +
>>> +    if (ring->sched.ready)
>>> +        return gmc_v9_0_invalidate_tlbs_with_kiq(adev,
>>> +                        pasid, flush_type, all_hub);
>>> +
>>> +    for (vmid = 1; vmid < 16; vmid++) {
>>> +
>>> +        ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
>>> +                &queried_pasid);
>>> +        if (ret && queried_pasid == pasid) {
>>> +            for (i = 0; i < adev->num_vmhubs; i++)
>>> +                amdgpu_gmc_flush_gpu_tlb(adev, vmid,
>>> +                            i, flush_type);
>>> +            break;
>>> +        }
>>> +    }
>>> +
>>> +    return 0;
>>> +
>>> +}
>>> +
>>>   static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring 
>>> *ring,
>>>                           unsigned vmid, uint64_t pd_addr)
>>>   {
>>> @@ -693,6 +776,7 @@ static void gmc_v9_0_get_vm_pte(struct 
>>> amdgpu_device *adev,
>>>     static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>>>       .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
>>> +    .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>>>       .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>>>       .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>>>       .map_mtype = gmc_v9_0_map_mtype,
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flis
>> ts.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7C
>> felix.kuehling%40amd.com%7C0ebb82d62d1044ea57b608d791f5b021%7C3dd8961
>> fe4884e608e11a82d994e183d%7C0%7C0%7C637138356784407460&amp;sdata=K8zh
>> HT2YYFj8LXdD3YiihtNkbKNwa0ml6nQZ74zF828%3D&amp;reserved=0
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
  2020-01-07  1:09         ` Sierra Guiza, Alejandro (Alex)
@ 2020-01-07 12:37           ` Christian König
  0 siblings, 0 replies; 24+ messages in thread
From: Christian König @ 2020-01-07 12:37 UTC (permalink / raw)
  To: Sierra Guiza, Alejandro (Alex),
	Koenig, Christian, Kuehling, Felix, amd-gfx

Am 07.01.20 um 02:09 schrieb Sierra Guiza, Alejandro (Alex):
> [AMD Official Use Only - Internal Distribution Only]
>
>
>
> -----Original Message-----
> From: Koenig, Christian <Christian.Koenig@amd.com>
> Sent: Monday, January 6, 2020 10:23 AM
> To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org; Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>
> Subject: Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
>
> Am 06.01.20 um 17:04 schrieb Felix Kuehling:
>> On 2020-01-05 10:41 a.m., Christian König wrote:
>>> Am 20.12.19 um 07:24 schrieb Alex Sierra:
>>>> This can be used directly from amdgpu and amdkfd to invalidate TLB
>>>> through pasid.
>>>> It supports gmc v7, v8, v9 and v10.
>>>>
>>>> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
>>>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>>>>    drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 81
>>>> ++++++++++++++++++++++++
>>>>    drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 ++++++++++
>>>>    drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++++++++++
>>>>    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 84
>>>> +++++++++++++++++++++++++
>>>>    5 files changed, 238 insertions(+)
>> [snip]
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> index fa025ceeea0f..eb1e64bd56ed 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> @@ -38,10 +38,12 @@
>>>>    #include "dce/dce_12_0_sh_mask.h"
>>>>    #include "vega10_enum.h"
>>>>    #include "mmhub/mmhub_1_0_offset.h"
>>>> +#include "athub/athub_1_0_sh_mask.h"
>>>>    #include "athub/athub_1_0_offset.h"
>>>>    #include "oss/osssys_4_0_offset.h"
>>>>      #include "soc15.h"
>>>> +#include "soc15d.h"
>>>>    #include "soc15_common.h"
>>>>    #include "umc/umc_6_0_sh_mask.h"
>>>>    @@ -434,6 +436,47 @@ static bool
>>>> gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>>>>               adev->pdev->device == 0x15d8)));
>>>>    }
>>>>    +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct
>>>> amdgpu_device *adev,
>>>> +                    uint8_t vmid, uint16_t *p_pasid) {
>>>> +    uint32_t value;
>>>> +
>>>> +    value = RREG32(SOC15_REG_OFFSET(ATHUB, 0,
>>>> mmATC_VMID0_PASID_MAPPING)
>>>> +             + vmid);
>>>> +    *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>>>> +
>>>> +    return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>>>> +}
>>> Probably better to expose just this function in the GMC interface.
>> This is called below in gmc_v9_0_flush_gpu_tlb_pasid in the case that
>> the KIQ is not ready. It is not needed outside this file, so no need
>> to expose it in the GMC interface.
>>
>>
>>>> +
>>>> +static int gmc_v9_0_invalidate_tlbs_with_kiq(struct amdgpu_device
>>>> *adev,
>>>> +                uint16_t pasid, uint32_t flush_type,
>>>> +                bool all_hub)
>>>> +{
>>>> +    signed long r;
>>>> +    uint32_t seq;
>>>> +    struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>>> +
>>>> +    spin_lock(&adev->gfx.kiq.ring_lock);
>>>> +    amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs
>>>> +package*/
>>>> +    amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
>>>> +    amdgpu_ring_write(ring,
>>>> +            PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
>>>> +            PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
>>>> +            PACKET3_INVALIDATE_TLBS_PASID(pasid) |
>>>> +            PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
>>>> That stuff is completely unrelated to the GMC and shouldn't be added
>>>> here.
>>> Where else should it go? To me TLB flushing is very much something
>>> that belongs in this file.
>>>
>>> Maybe you'd rather add "flush_tlbs_with_kiq" to amdgpu_ring_funcs and
>>> implement it in the gfx_v*.c GFX-IP code? I'm not sure that makes much
>>> sense either because it's only available on the KIQ ring.
>> Yes, something like this. We should probably add a amdgpu_kiq_funcs and expose the functions needed by the GMC code there.
>> See the amdgpu_virt_kiq_reg_write_reg_wait() case for reference, it is very similar and should probably be added to that function table as well.
>> Christian.
> To summarize:
> 1.- The idea is to add a new function pointer to the amdgpu_ring_funcs to flush the tlbs with kiq.

I would add a new amdgpu_kiq_funcs structure for that.

> 2.- This function pointer should be pointed and implemented on each of the gfx_v*.c under the gfx_*_ring_funcs_kiq struct. If this is true, Im not seeing this struct on the gfx_v10.c file.
> 3.- Use the amdgpu_virt_kiq_reg_write_reg_wait as a reference for the implementation.

Well yes and no, the amdgpu_virt_kiq_reg_write_reg_wait() was just an 
example of a similar case.

The function amdgpu_virt_kiq_reg_write_reg_wait() should probably be 
cleaned up and moved into the gfx_*.c files as well.

Regards,
Christian.

>
>>>
>>>> Christian.
>>>>
>>>> +    amdgpu_fence_emit_polling(ring, &seq);
>>>> +    amdgpu_ring_commit(ring);
>>>> +    spin_unlock(&adev->gfx.kiq.ring_lock);
>>>> +
>>>> +    r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>>>> +    if (r < 1) {
>>>> +        DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>>>> +        return -ETIME;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>    /*
>>>>     * GART
>>>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>>>> @@ -532,6 +575,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct
>>>> amdgpu_device *adev, uint32_t vmid,
>>>>        DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>>>>    }
>>>>    +/**
>>>> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
>>>> + *
>>>> + * @adev: amdgpu_device pointer
>>>> + * @pasid: pasid to be flush
>>>> + *
>>>> + * Flush the TLB for the requested pasid.
>>>> + */
>>>> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>>>> +                    uint16_t pasid, uint32_t flush_type,
>>>> +                    bool all_hub)
>> Christian, do you agree that this function belongs in the GMC
>> interface? If not here, where do you suggest moving it?
>>
>> Regards,
>>    Felix
>>
>>
>>>> +{
>>>> +    int vmid, i;
>>>> +    uint16_t queried_pasid;
>>>> +    bool ret;
>>>> +    struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>>> +
>>>> +    if (adev->in_gpu_reset)
>>>> +        return -EIO;
>>>> +
>>>> +    if (ring->sched.ready)
>>>> +        return gmc_v9_0_invalidate_tlbs_with_kiq(adev,
>>>> +                        pasid, flush_type, all_hub);
>>>> +
>>>> +    for (vmid = 1; vmid < 16; vmid++) {
>>>> +
>>>> +        ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
>>>> +                &queried_pasid);
>>>> +        if (ret && queried_pasid == pasid) {
>>>> +            for (i = 0; i < adev->num_vmhubs; i++)
>>>> +                amdgpu_gmc_flush_gpu_tlb(adev, vmid,
>>>> +                            i, flush_type);
>>>> +            break;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +
>>>> +}
>>>> +
>>>>    static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring
>>>> *ring,
>>>>                            unsigned vmid, uint64_t pd_addr)
>>>>    {
>>>> @@ -693,6 +776,7 @@ static void gmc_v9_0_get_vm_pte(struct
>>>> amdgpu_device *adev,
>>>>      static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>>>>        .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
>>>> +    .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>>>>        .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>>>>        .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>>>>        .map_mtype = gmc_v9_0_map_mtype,
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flis
>>> ts.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7C
>>> felix.kuehling%40amd.com%7C0ebb82d62d1044ea57b608d791f5b021%7C3dd8961
>>> fe4884e608e11a82d994e183d%7C0%7C0%7C637138356784407460&amp;sdata=K8zh
>>> HT2YYFj8LXdD3YiihtNkbKNwa0ml6nQZ74zF828%3D&amp;reserved=0
>>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock
  2020-01-02 21:11 Alex Sierra
@ 2020-01-02 21:41 ` Yong Zhao
  0 siblings, 0 replies; 24+ messages in thread
From: Yong Zhao @ 2020-01-02 21:41 UTC (permalink / raw)
  To: amd-gfx

One comment inline.

On 2020-01-02 4:11 p.m., Alex Sierra wrote:
> [Why]
> Avoid reclaim filesystem while eviction lock is held called from
> MMU notifier.
>
> [How]
> Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
> Using memalloc_nofs_save / memalloc_nofs_restore API.
>
> Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +++++++++++++++++++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  6 +++-
>   2 files changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b999b67ff57a..d6aba4f9df74 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -82,6 +82,32 @@ struct amdgpu_prt_cb {
>   	struct dma_fence_cb cb;
>   };
>   
> +/**
> + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
> + * happens while holding this lock anywhere to prevent deadlocks when
> + * an MMU notifier runs in reclaim-FS context.
> + */
> +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
> +{
> +	mutex_lock(&vm->eviction_lock);
> +	vm->saved_flags = memalloc_nofs_save();
[yz] I feel memalloc_nofs_save() should be called before mutex_lock(). 
Not too sure though.
> +}
> +
> +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
> +{
> +	if (mutex_trylock(&vm->eviction_lock)) {
> +		vm->saved_flags = memalloc_nofs_save();
> +		return 1;
> +	}
> +	return 0;
> +}
> +
> +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
> +{
> +	memalloc_nofs_restore(vm->saved_flags);
> +	mutex_unlock(&vm->eviction_lock);
> +}
> +
>   /**
>    * amdgpu_vm_level_shift - return the addr shift for each level
>    *
> @@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		}
>   	}
>   
> -	mutex_lock(&vm->eviction_lock);
> +	amdgpu_vm_eviction_lock(vm);
>   	vm->evicting = false;
> -	mutex_unlock(&vm->eviction_lock);
> +	amdgpu_vm_eviction_unlock(vm);
>   
>   	return 0;
>   }
> @@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (!(flags & AMDGPU_PTE_VALID))
>   		owner = AMDGPU_FENCE_OWNER_KFD;
>   
> -	mutex_lock(&vm->eviction_lock);
> +	amdgpu_vm_eviction_lock(vm);
>   	if (vm->evicting) {
>   		r = -EBUSY;
>   		goto error_unlock;
> @@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	r = vm->update_funcs->commit(&params, fence);
>   
>   error_unlock:
> -	mutex_unlock(&vm->eviction_lock);
> +	amdgpu_vm_eviction_unlock(vm);
>   	return r;
>   }
>   
> @@ -2537,18 +2563,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>   		return false;
>   
>   	/* Try to block ongoing updates */
> -	if (!mutex_trylock(&bo_base->vm->eviction_lock))
> +	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
>   		return false;
>   
>   	/* Don't evict VM page tables while they are updated */
>   	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
>   	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
> -		mutex_unlock(&bo_base->vm->eviction_lock);
> +		amdgpu_vm_eviction_unlock(bo_base->vm);
>   		return false;
>   	}
>   
>   	bo_base->vm->evicting = true;
> -	mutex_unlock(&bo_base->vm->eviction_lock);
> +	amdgpu_vm_eviction_unlock(bo_base->vm);
>   	return true;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 100547f094ff..c21a36bebc0c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -30,6 +30,7 @@
>   #include <drm/gpu_scheduler.h>
>   #include <drm/drm_file.h>
>   #include <drm/ttm/ttm_bo_driver.h>
> +#include <linux/sched/mm.h>
>   
>   #include "amdgpu_sync.h"
>   #include "amdgpu_ring.h"
> @@ -242,9 +243,12 @@ struct amdgpu_vm {
>   	/* tree of virtual addresses mapped */
>   	struct rb_root_cached	va;
>   
> -	/* Lock to prevent eviction while we are updating page tables */
> +	/* Lock to prevent eviction while we are updating page tables
> +	 * use vm_eviction_lock/unlock(vm)
> +	 */
>   	struct mutex		eviction_lock;
>   	bool			evicting;
> +	unsigned int		saved_flags;
>   
>   	/* BOs who needs a validation */
>   	struct list_head	evicted;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock
@ 2020-01-02 21:11 Alex Sierra
  2020-01-02 21:41 ` Yong Zhao
  0 siblings, 1 reply; 24+ messages in thread
From: Alex Sierra @ 2020-01-02 21:11 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra

[Why]
Avoid reclaim filesystem while eviction lock is held called from
MMU notifier.

[How]
Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
Using memalloc_nofs_save / memalloc_nofs_restore API.

Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +++++++++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  6 +++-
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b999b67ff57a..d6aba4f9df74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -82,6 +82,32 @@ struct amdgpu_prt_cb {
 	struct dma_fence_cb cb;
 };
 
+/**
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+	mutex_lock(&vm->eviction_lock);
+	vm->saved_flags = memalloc_nofs_save();
+}
+
+static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+	if (mutex_trylock(&vm->eviction_lock)) {
+		vm->saved_flags = memalloc_nofs_save();
+		return 1;
+	}
+	return 0;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+	memalloc_nofs_restore(vm->saved_flags);
+	mutex_unlock(&vm->eviction_lock);
+}
+
 /**
  * amdgpu_vm_level_shift - return the addr shift for each level
  *
@@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		}
 	}
 
-	mutex_lock(&vm->eviction_lock);
+	amdgpu_vm_eviction_lock(vm);
 	vm->evicting = false;
-	mutex_unlock(&vm->eviction_lock);
+	amdgpu_vm_eviction_unlock(vm);
 
 	return 0;
 }
@@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (!(flags & AMDGPU_PTE_VALID))
 		owner = AMDGPU_FENCE_OWNER_KFD;
 
-	mutex_lock(&vm->eviction_lock);
+	amdgpu_vm_eviction_lock(vm);
 	if (vm->evicting) {
 		r = -EBUSY;
 		goto error_unlock;
@@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	r = vm->update_funcs->commit(&params, fence);
 
 error_unlock:
-	mutex_unlock(&vm->eviction_lock);
+	amdgpu_vm_eviction_unlock(vm);
 	return r;
 }
 
@@ -2537,18 +2563,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
 		return false;
 
 	/* Try to block ongoing updates */
-	if (!mutex_trylock(&bo_base->vm->eviction_lock))
+	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
 		return false;
 
 	/* Don't evict VM page tables while they are updated */
 	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
 	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
-		mutex_unlock(&bo_base->vm->eviction_lock);
+		amdgpu_vm_eviction_unlock(bo_base->vm);
 		return false;
 	}
 
 	bo_base->vm->evicting = true;
-	mutex_unlock(&bo_base->vm->eviction_lock);
+	amdgpu_vm_eviction_unlock(bo_base->vm);
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 100547f094ff..c21a36bebc0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -30,6 +30,7 @@
 #include <drm/gpu_scheduler.h>
 #include <drm/drm_file.h>
 #include <drm/ttm/ttm_bo_driver.h>
+#include <linux/sched/mm.h>
 
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
@@ -242,9 +243,12 @@ struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root_cached	va;
 
-	/* Lock to prevent eviction while we are updating page tables */
+	/* Lock to prevent eviction while we are updating page tables
+	 * use vm_eviction_lock/unlock(vm)
+	 */
 	struct mutex		eviction_lock;
 	bool			evicting;
+	unsigned int		saved_flags;
 
 	/* BOs who needs a validation */
 	struct list_head	evicted;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2020-01-07 12:37 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-20  6:24 [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
2019-12-20  6:24 ` [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
2019-12-20 21:32   ` Felix Kuehling
2019-12-20 23:51   ` Yong Zhao
2020-01-05 15:41   ` Christian König
2020-01-06 16:04     ` Felix Kuehling
2020-01-06 16:23       ` Christian König
2020-01-07  1:09         ` Sierra Guiza, Alejandro (Alex)
2020-01-07 12:37           ` Christian König
2019-12-20  6:24 ` [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
2019-12-20 21:35   ` Felix Kuehling
2019-12-20 23:50     ` Yong Zhao
2019-12-21  0:01       ` Yong Zhao
2019-12-23 19:34         ` Felix Kuehling
2019-12-23 19:44           ` Zhao, Yong
2019-12-20  6:24 ` [PATCH 4/5] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
2019-12-20 21:38   ` Felix Kuehling
2019-12-20  6:24 ` [PATCH 5/5] drm/amdgpu: invalidate BO during userptr mapping Alex Sierra
2019-12-20 21:40   ` Felix Kuehling
2019-12-20 21:39 ` [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock Felix Kuehling
2019-12-20 23:28 ` Yong Zhao
2020-01-02  9:16 ` Christian König
2020-01-02 21:11 Alex Sierra
2020-01-02 21:41 ` Yong Zhao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.