* [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock
@ 2020-01-11 18:39 Alex Sierra
2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
` (6 more replies)
0 siblings, 7 replies; 22+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Sierra
[Why]
Avoid reclaim filesystem while eviction lock is held called from
MMU notifier.
[How]
Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
Using memalloc_nofs_save / memalloc_nofs_restore API.
Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +++++++++++++++++++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 +++-
2 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b999b67ff57a..d6aba4f9df74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -82,6 +82,32 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
};
+/**
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+ mutex_lock(&vm->eviction_lock);
+ vm->saved_flags = memalloc_nofs_save();
+}
+
+static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+ if (mutex_trylock(&vm->eviction_lock)) {
+ vm->saved_flags = memalloc_nofs_save();
+ return 1;
+ }
+ return 0;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+ memalloc_nofs_restore(vm->saved_flags);
+ mutex_unlock(&vm->eviction_lock);
+}
+
/**
* amdgpu_vm_level_shift - return the addr shift for each level
*
@@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
}
- mutex_lock(&vm->eviction_lock);
+ amdgpu_vm_eviction_lock(vm);
vm->evicting = false;
- mutex_unlock(&vm->eviction_lock);
+ amdgpu_vm_eviction_unlock(vm);
return 0;
}
@@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_KFD;
- mutex_lock(&vm->eviction_lock);
+ amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
goto error_unlock;
@@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
r = vm->update_funcs->commit(¶ms, fence);
error_unlock:
- mutex_unlock(&vm->eviction_lock);
+ amdgpu_vm_eviction_unlock(vm);
return r;
}
@@ -2537,18 +2563,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
return false;
/* Try to block ongoing updates */
- if (!mutex_trylock(&bo_base->vm->eviction_lock))
+ if (!amdgpu_vm_eviction_trylock(bo_base->vm))
return false;
/* Don't evict VM page tables while they are updated */
if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
!dma_fence_is_signaled(bo_base->vm->last_delayed)) {
- mutex_unlock(&bo_base->vm->eviction_lock);
+ amdgpu_vm_eviction_unlock(bo_base->vm);
return false;
}
bo_base->vm->evicting = true;
- mutex_unlock(&bo_base->vm->eviction_lock);
+ amdgpu_vm_eviction_unlock(bo_base->vm);
return true;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 100547f094ff..c21a36bebc0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -30,6 +30,7 @@
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
#include <drm/ttm/ttm_bo_driver.h>
+#include <linux/sched/mm.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
@@ -242,9 +243,12 @@ struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
- /* Lock to prevent eviction while we are updating page tables */
+ /* Lock to prevent eviction while we are updating page tables
+ * use vm_eviction_lock/unlock(vm)
+ */
struct mutex eviction_lock;
bool evicting;
+ unsigned int saved_flags;
/* BOs who needs a validation */
struct list_head evicted;
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9
2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
2020-01-13 12:10 ` Christian König
2020-01-13 16:57 ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
` (5 subsequent siblings)
6 siblings, 2 replies; 22+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Sierra
Functions implemented from kiq_pm4_funcs struct members
for gfx_v9 version.
Change-Id: I8fd3e160c4bd58f19d35d29e39517db967063afe
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 ++++++++++++++++++++++++++
1 file changed, 115 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e3d466bd5c4e..ad0179ea2cc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -739,6 +739,120 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
void *inject_if);
+static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ /* vmid_mask:0* queue_type:0 (KIQ) */
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+ amdgpu_ring_write(kiq_ring,
+ lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring,
+ upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+ /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+ /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ /* num_queues: must be 1 */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v9_0_kiq_set_resources,
+ .kiq_map_queues = gfx_v9_0_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
+ .kiq_query_status = gfx_v9_0_kiq_query_status,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+};
+
+static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
+}
+
static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
@@ -4260,6 +4374,7 @@ static int gfx_v9_0_early_init(void *handle)
else
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ gfx_v9_0_set_kiq_pm4_funcs(adev);
gfx_v9_0_set_ring_funcs(adev);
gfx_v9_0_set_irq_funcs(adev);
gfx_v9_0_set_gds_init(adev);
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
2020-01-13 12:12 ` Christian König
2020-01-13 16:36 ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9 Alex Sierra
` (4 subsequent siblings)
6 siblings, 2 replies; 22+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Sierra
tlbs invalidate pointer function added to kiq_pm4_funcs struct.
This way, tlb flush can be done through kiq member.
TLBs invalidatation implemented for gfx9 and gfx10.
Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +++
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 33 +++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 32 ++++++++++++++++++++++++
3 files changed, 69 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 8e88e0411662..2927837bd401 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
struct amdgpu_ring *ring,
u64 addr,
u64 seq);
+ int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub);
/* Packet sizes */
int set_resources_size;
int map_queues_size;
int unmap_queues_size;
int query_status_size;
+ int invalidate_tlbs_size;
};
struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 379e46c1b7f6..2e82213f57eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,6 +40,7 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
@@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
}
+static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ signed long r;
+ uint32_t seq;
+ struct amdgpu_device *adev = kiq_ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+ amdgpu_fence_emit_polling(kiq_ring, &seq);
+ amdgpu_ring_commit(kiq_ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+
+ r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx10_kiq_set_resources,
.kiq_map_queues = gfx10_kiq_map_queues,
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
.query_status_size = 7,
+ .invalidate_tlbs_size = 12,
};
static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ad0179ea2cc5..5be6fab55b73 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
}
+static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ signed long r;
+ uint32_t seq;
+ struct amdgpu_device *adev = kiq_ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+ amdgpu_fence_emit_polling(kiq_ring, &seq);
+ amdgpu_ring_commit(kiq_ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+
+ r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx_v9_0_kiq_set_resources,
.kiq_map_queues = gfx_v9_0_kiq_map_queues,
.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
.kiq_query_status = gfx_v9_0_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
.query_status_size = 7,
+ .invalidate_tlbs_size = 12,
};
static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9
2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
2020-01-13 12:12 ` Christian König
2020-01-13 16:53 ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
` (3 subsequent siblings)
6 siblings, 2 replies; 22+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Sierra
[Why]
There's a HW-indpendent function that enables kcq. This function uses
the kiq_pm4_funcs implementation.
[How]
Local kcq enable function removed and replace it by the generic kcq
enable under amdgpu_gfx
Change-Id: I7709bdba93742c234941a5936c82eb67e346077c
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 70 +--------------------------
1 file changed, 1 insertion(+), 69 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5be6fab55b73..7219eacad9ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3252,74 +3252,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
}
-static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- uint64_t queue_mask = 0;
- int r, i;
-
- for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- if (!test_bit(i, adev->gfx.mec.queue_bitmap))
- continue;
-
- /* This situation may be hit in the future if a new HW
- * generation exposes more than 64 queues. If so, the
- * definition of queue_mask needs updating */
- if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
- DRM_ERROR("Invalid KCQ enabled: %d\n", i);
- break;
- }
-
- queue_mask |= (1ull << i);
- }
-
- r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- /* set resources */
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
- amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
- PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
- amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
- amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* oac mask */
- amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
- uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
- uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
- /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
- PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
- PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
- PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
- PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
- PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
- PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
- PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
- PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
- PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
- amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
- amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
- amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
- }
-
- r = amdgpu_ring_test_helper(kiq_ring);
- if (r)
- DRM_ERROR("KCQ enable failed\n");
-
- return r;
-}
-
static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -3726,7 +3658,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
goto done;
}
- r = gfx_v9_0_kiq_kcq_enable(adev);
+ r = amdgpu_gfx_enable_kcq(adev);
done:
return r;
}
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
` (2 preceding siblings ...)
2020-01-11 18:39 ` [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9 Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
2020-01-13 12:15 ` Christian König
2020-01-13 16:49 ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
` (2 subsequent siblings)
6 siblings, 2 replies; 22+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Sierra
This can be used directly from amdgpu and amdkfd to invalidate
TLB through pasid.
It supports gmc v7, v8, v9 and v10.
Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 +++
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 59 ++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 +++++++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 ++++++++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 61 +++++++++++++++++++++++++
5 files changed, 193 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index c91dd602d5f1..d3c27a3c43f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
/* flush the vm tlb via mmio */
void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type);
+ /* flush the vm tlb via pasid */
+ int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
@@ -216,6 +219,9 @@ struct amdgpu_gmc {
};
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+ ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+ ((adev), (pasid), (type), (allhub)))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 5ad89bb6f3ba..09408b8b390f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
#include "hdp/hdp_5_0_0_sh_mask.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
#include "dcn/dcn_2_0_0_offset.h"
#include "dcn/dcn_2_0_0_sh_mask.h"
#include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
#include "navi10_enum.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "nbio_v2_3.h"
@@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
(!amdgpu_sriov_vf(adev)));
}
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -380,6 +396,48 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
}
+/**
+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid, i;
+ uint16_t queried_pasid;
+ bool ret;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ if (amdgpu_emu_mode == 0 && ring->sched.ready)
+ return kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, flush_type, all_hub);
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried_pasid);
+ if (ret && queried_pasid == pasid) {
+ if (all_hub) {
+ for (i = 0; i < adev->num_vmhubs; i++)
+ gmc_v10_0_flush_gpu_tlb(adev, vmid,
+ i, 0);
+ } else {
+ gmc_v10_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB_0, 0);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
@@ -531,6 +589,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
.map_mtype = gmc_v10_0_map_mtype,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index f08e5330642d..19d5b133e1d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid;
+ unsigned int tmp;
+
+ if (adev->in_gpu_reset)
+ return -EIO;
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ break;
+ }
+ }
+
+ return 0;
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
.set_prt = gmc_v7_0_set_prt,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 6d96d40fbcb8..27d83204fa2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid;
+ unsigned int tmp;
+
+ if (adev->in_gpu_reset)
+ return -EIO;
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ break;
+ }
+ }
+
+ return 0;
+
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
.set_prt = gmc_v8_0_set_prt,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b83c8d745f42..95cce54999b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -38,10 +38,12 @@
#include "dce/dce_12_0_sh_mask.h"
#include "vega10_enum.h"
#include "mmhub/mmhub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
#include "athub/athub_1_0_offset.h"
#include "oss/osssys_4_0_offset.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "umc/umc_6_0_sh_mask.h"
@@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
adev->pdev->device == 0x15d8)));
}
+static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -539,6 +553,52 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for VM flush ACK!\n");
}
+/**
+ * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid, i;
+ uint16_t queried_pasid;
+ bool ret;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ if (adev->in_gpu_reset)
+ return -EIO;
+
+ if (ring->sched.ready)
+ return kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, flush_type, all_hub);
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried_pasid);
+ if (ret && queried_pasid == pasid) {
+ if (all_hub) {
+ for (i = 0; i < adev->num_vmhubs; i++)
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ i, 0);
+ } else {
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB_0, 0);
+ }
+ break;
+ }
+ }
+
+ return 0;
+
+}
+
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
@@ -700,6 +760,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
.map_mtype = gmc_v9_0_map_mtype,
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
` (3 preceding siblings ...)
2020-01-11 18:39 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
2020-01-13 12:16 ` Christian König
2020-01-13 16:58 ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
2020-01-13 12:10 ` [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Christian König
6 siblings, 2 replies; 22+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Sierra
[Why]
TLB flush method has been deprecated using kfd2kgd interface.
This implementation is now on the amdgpu_amdkfd API.
[How]
TLB flush functions now implemented in amdgpu_amdkfd.
Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 ++++--
3 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 88e10b956413..8609287620ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -628,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ if (adev->family == AMDGPU_FAMILY_AI) {
+ int i;
+
+ for (i = 0; i < adev->num_vmhubs; i++)
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+ } else {
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+ }
+
+ return 0;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ uint32_t flush_type = 0;
+ bool all_hub = false;
+
+ if (adev->gmc.xgmi.num_physical_nodes &&
+ adev->asic_type == CHIP_VEGA20)
+ flush_type = 2;
+
+ if (adev->family == AMDGPU_FAMILY_AI)
+ all_hub = true;
+
+ return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+}
+
bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 069d5d230810..47b0f2957d1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t *ib_cmd, uint32_t ib_len);
void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 536a153ac9a4..25b90f70aecd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -32,6 +32,7 @@
#include <linux/mman.h>
#include <linux/file.h>
#include "amdgpu_amdkfd.h"
+#include "amdgpu.h"
struct mm_struct;
@@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
void kfd_flush_tlb(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
- const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
/* Nothing to flush until a VMID is assigned, which
* only happens when the first queue is created.
*/
if (pdd->qpd.vmid)
- f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+ amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+ pdd->qpd.vmid);
} else {
- f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
+ amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+ pdd->process->pasid);
}
}
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface
2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
` (4 preceding siblings ...)
2020-01-11 18:39 ` [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
@ 2020-01-11 18:39 ` Alex Sierra
2020-01-13 12:17 ` Christian König
2020-01-13 12:10 ` [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Christian König
6 siblings, 1 reply; 22+ messages in thread
From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Sierra
[Why]
kfd2kgd interface will be deprecated. This removal only covers TLB
invalidation for now. They have been replaced in amdgpu_amdkfd API.
[How]
TLB invalidate functions removed from the different amdkfd_gfx_v*
versions.
Change-Id: Ic2c7d4a0d19fe1e884dee1ff10a520d31252afee
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
.../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 -
.../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 67 -------------
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 41 --------
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 41 --------
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 96 -------------------
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 2 -
.../gpu/drm/amd/include/kgd_kfd_interface.h | 2 -
7 files changed, 251 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index f9011a07cb90..562e7a7f51a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -317,7 +317,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.get_tile_config = kgd_gfx_v9_get_tile_config,
.set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
- .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
- .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 61cd707158e4..6132b4874498 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -686,71 +686,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
-{
- signed long r;
- uint32_t seq;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
- spin_lock(&adev->gfx.kiq.ring_lock);
- amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
- amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
- amdgpu_ring_write(ring,
- PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
- PACKET3_INVALIDATE_TLBS_PASID(pasid));
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
-
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- DRM_ERROR("wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
-}
-
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
- uint16_t queried_pasid;
- bool ret;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
- if (amdgpu_emu_mode == 0 && ring->sched.ready)
- return invalidate_tlbs_with_kiq(adev, pasid);
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
-
- ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- amdgpu_gmc_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB_0, 0);
- break;
- }
- }
-
- return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid %d\n", vmid);
- return 0;
- }
-
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
- return 0;
-}
-
static int kgd_address_watch_disable(struct kgd_dev *kgd)
{
return 0;
@@ -832,7 +767,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
get_atc_vmid_pasid_mapping_info,
.get_tile_config = amdgpu_amdkfd_get_tile_config,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
- .invalidate_tlbs = invalidate_tlbs,
- .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 6e6f0a99ec06..8f052e98a3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
lower_32_bits(page_table_base));
}
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
- unsigned int tmp;
-
- if (adev->in_gpu_reset)
- return -EIO;
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
-
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
- }
-
- return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid\n");
- return 0;
- }
-
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- return 0;
-}
-
/**
* read_vmid_from_vmfault_reg - read vmid from register
*
@@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
- .invalidate_tlbs = invalidate_tlbs,
- .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index bfbddedb2380..19a10db93d68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
lower_32_bits(page_table_base));
}
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
- unsigned int tmp;
-
- if (adev->in_gpu_reset)
- return -EIO;
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
-
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
- }
-
- return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid %d\n", vmid);
- return -EINVAL;
- }
-
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- return 0;
-}
-
const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
- .invalidate_tlbs = invalidate_tlbs,
- .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index e7861f0ef415..932ae85d97e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -617,100 +617,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
- uint32_t flush_type)
-{
- signed long r;
- uint32_t seq;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
- spin_lock(&adev->gfx.kiq.ring_lock);
- amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
- amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
- amdgpu_ring_write(ring,
- PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
- PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
- PACKET3_INVALIDATE_TLBS_PASID(pasid) |
- PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
-
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- DRM_ERROR("wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid, i;
- uint16_t queried_pasid;
- bool ret;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- uint32_t flush_type = 0;
-
- if (adev->in_gpu_reset)
- return -EIO;
- if (adev->gmc.xgmi.num_physical_nodes &&
- adev->asic_type == CHIP_VEGA20)
- flush_type = 2;
-
- if (ring->sched.ready)
- return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
-
- ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- break;
- }
- }
-
- return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int i;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid %d\n", vmid);
- return 0;
- }
-
- /* Use legacy mode tlb invalidation.
- *
- * Currently on Raven the code below is broken for anything but
- * legacy mode due to a MMHUB power gating problem. A workaround
- * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
- * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
- * bit.
- *
- * TODO 1: agree on the right set of invalidation registers for
- * KFD use. Use the last one for now. Invalidate both GC and
- * MMHUB.
- *
- * TODO 2: support range-based invalidation, requires kfg2kgd
- * interface change
- */
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-
- return 0;
-}
-
int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
{
return 0;
@@ -793,7 +699,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.get_tile_config = kgd_gfx_v9_get_tile_config,
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
- .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
- .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 02b1426d17d1..dfafa28b7559 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -57,7 +57,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
uint8_t vmid, uint16_t *p_pasid);
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
struct tile_config *config);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 2cd217e60125..a01ef836ad58 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -307,8 +307,6 @@ struct kfd2kgd_calls {
void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
uint32_t vmid, uint64_t page_table_base);
- int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
- int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
uint64_t (*get_hive_id)(struct kgd_dev *kgd);
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock
2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
` (5 preceding siblings ...)
2020-01-11 18:39 ` [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
@ 2020-01-13 12:10 ` Christian König
6 siblings, 0 replies; 22+ messages in thread
From: Christian König @ 2020-01-13 12:10 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
Am 11.01.20 um 19:39 schrieb Alex Sierra:
> [Why]
> Avoid reclaim filesystem while eviction lock is held called from
> MMU notifier.
>
> [How]
> Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
> Using memalloc_nofs_save / memalloc_nofs_restore API.
>
> Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +++++++++++++++++++++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 +++-
> 2 files changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b999b67ff57a..d6aba4f9df74 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -82,6 +82,32 @@ struct amdgpu_prt_cb {
> struct dma_fence_cb cb;
> };
>
> +/**
> + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
> + * happens while holding this lock anywhere to prevent deadlocks when
> + * an MMU notifier runs in reclaim-FS context.
> + */
> +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
> +{
> + mutex_lock(&vm->eviction_lock);
> + vm->saved_flags = memalloc_nofs_save();
> +}
> +
> +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
> +{
> + if (mutex_trylock(&vm->eviction_lock)) {
> + vm->saved_flags = memalloc_nofs_save();
> + return 1;
> + }
> + return 0;
> +}
> +
> +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
> +{
> + memalloc_nofs_restore(vm->saved_flags);
> + mutex_unlock(&vm->eviction_lock);
> +}
> +
> /**
> * amdgpu_vm_level_shift - return the addr shift for each level
> *
> @@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> }
> }
>
> - mutex_lock(&vm->eviction_lock);
> + amdgpu_vm_eviction_lock(vm);
> vm->evicting = false;
> - mutex_unlock(&vm->eviction_lock);
> + amdgpu_vm_eviction_unlock(vm);
>
> return 0;
> }
> @@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> if (!(flags & AMDGPU_PTE_VALID))
> owner = AMDGPU_FENCE_OWNER_KFD;
>
> - mutex_lock(&vm->eviction_lock);
> + amdgpu_vm_eviction_lock(vm);
> if (vm->evicting) {
> r = -EBUSY;
> goto error_unlock;
> @@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> r = vm->update_funcs->commit(¶ms, fence);
>
> error_unlock:
> - mutex_unlock(&vm->eviction_lock);
> + amdgpu_vm_eviction_unlock(vm);
> return r;
> }
>
> @@ -2537,18 +2563,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
> return false;
>
> /* Try to block ongoing updates */
> - if (!mutex_trylock(&bo_base->vm->eviction_lock))
> + if (!amdgpu_vm_eviction_trylock(bo_base->vm))
> return false;
>
> /* Don't evict VM page tables while they are updated */
> if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
> !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
> - mutex_unlock(&bo_base->vm->eviction_lock);
> + amdgpu_vm_eviction_unlock(bo_base->vm);
> return false;
> }
>
> bo_base->vm->evicting = true;
> - mutex_unlock(&bo_base->vm->eviction_lock);
> + amdgpu_vm_eviction_unlock(bo_base->vm);
> return true;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 100547f094ff..c21a36bebc0c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -30,6 +30,7 @@
> #include <drm/gpu_scheduler.h>
> #include <drm/drm_file.h>
> #include <drm/ttm/ttm_bo_driver.h>
> +#include <linux/sched/mm.h>
>
> #include "amdgpu_sync.h"
> #include "amdgpu_ring.h"
> @@ -242,9 +243,12 @@ struct amdgpu_vm {
> /* tree of virtual addresses mapped */
> struct rb_root_cached va;
>
> - /* Lock to prevent eviction while we are updating page tables */
> + /* Lock to prevent eviction while we are updating page tables
> + * use vm_eviction_lock/unlock(vm)
> + */
> struct mutex eviction_lock;
> bool evicting;
> + unsigned int saved_flags;
>
> /* BOs who needs a validation */
> struct list_head evicted;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9
2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
@ 2020-01-13 12:10 ` Christian König
2020-01-13 16:57 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Christian König @ 2020-01-13 12:10 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
Am 11.01.20 um 19:39 schrieb Alex Sierra:
> Functions implemented from kiq_pm4_funcs struct members
> for gfx_v9 version.
>
> Change-Id: I8fd3e160c4bd58f19d35d29e39517db967063afe
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 ++++++++++++++++++++++++++
> 1 file changed, 115 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e3d466bd5c4e..ad0179ea2cc5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -739,6 +739,120 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
> static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
> void *inject_if);
>
> +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
> + uint64_t queue_mask)
> +{
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_SET_RESOURCES_VMID_MASK(0) |
> + /* vmid_mask:0* queue_type:0 (KIQ) */
> + PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
> + amdgpu_ring_write(kiq_ring,
> + lower_32_bits(queue_mask)); /* queue mask lo */
> + amdgpu_ring_write(kiq_ring,
> + upper_32_bits(queue_mask)); /* queue mask hi */
> + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
> + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
> + amdgpu_ring_write(kiq_ring, 0); /* oac mask */
> + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
> +}
> +
> +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
> + struct amdgpu_ring *ring)
> +{
> + struct amdgpu_device *adev = kiq_ring->adev;
> + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
> + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
> + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
> + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
> + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
> + PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
> + PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
> + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
> + /*queue_type: normal compute queue */
> + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
> + /* alloc format: all_on_one_pipe */
> + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
> + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
> + /* num_queues: must be 1 */
> + PACKET3_MAP_QUEUES_NUM_QUEUES(1));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
> + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
> + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
> + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
> + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
> +}
> +
> +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
> + struct amdgpu_ring *ring,
> + enum amdgpu_unmap_queues_action action,
> + u64 gpu_addr, u64 seq)
> +{
> + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
> + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> + PACKET3_UNMAP_QUEUES_ACTION(action) |
> + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
> + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
> + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
> +
> + if (action == PREEMPT_QUEUES_NO_UNMAP) {
> + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
> + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
> + amdgpu_ring_write(kiq_ring, seq);
> + } else {
> + amdgpu_ring_write(kiq_ring, 0);
> + amdgpu_ring_write(kiq_ring, 0);
> + amdgpu_ring_write(kiq_ring, 0);
> + }
> +}
> +
> +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
> + struct amdgpu_ring *ring,
> + u64 addr,
> + u64 seq)
> +{
> + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
> + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
> + PACKET3_QUERY_STATUS_COMMAND(2));
> + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
> + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
> + amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
> + amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
> + amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
> + amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> +}
> +
> +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
> + .kiq_set_resources = gfx_v9_0_kiq_set_resources,
> + .kiq_map_queues = gfx_v9_0_kiq_map_queues,
> + .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
> + .kiq_query_status = gfx_v9_0_kiq_query_status,
> + .set_resources_size = 8,
> + .map_queues_size = 7,
> + .unmap_queues_size = 6,
> + .query_status_size = 7,
> +};
> +
> +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> +{
> + adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
> +}
> +
> static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
> {
> switch (adev->asic_type) {
> @@ -4260,6 +4374,7 @@ static int gfx_v9_0_early_init(void *handle)
> else
> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + gfx_v9_0_set_kiq_pm4_funcs(adev);
> gfx_v9_0_set_ring_funcs(adev);
> gfx_v9_0_set_irq_funcs(adev);
> gfx_v9_0_set_gds_init(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
@ 2020-01-13 12:12 ` Christian König
2020-01-13 16:36 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Christian König @ 2020-01-13 12:12 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
Am 11.01.20 um 19:39 schrieb Alex Sierra:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Please note that I can't judge the correctness of the PM4 packets, but
the interface looks really nice and clean now.
Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +++
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 33 +++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 32 ++++++++++++++++++++++++
> 3 files changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..2927837bd401 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
> struct amdgpu_ring *ring,
> u64 addr,
> u64 seq);
> + int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub);
> /* Packet sizes */
> int set_resources_size;
> int map_queues_size;
> int unmap_queues_size;
> int query_status_size;
> + int invalidate_tlbs_size;
> };
>
> struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..2e82213f57eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
> #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
> #include "clearstate_gfx10.h"
> #include "v10_structs.h"
> @@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
> amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> }
>
> +static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + signed long r;
> + uint32_t seq;
> + struct amdgpu_device *adev = kiq_ring->adev;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> + spin_lock(&adev->gfx.kiq.ring_lock);
> + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> + amdgpu_fence_emit_polling(kiq_ring, &seq);
> + amdgpu_ring_commit(kiq_ring);
> + spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> + if (r < 1) {
> + DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> + return -ETIME;
> + }
> +
> + return 0;
> +}
> +
> static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
> .kiq_set_resources = gfx10_kiq_set_resources,
> .kiq_map_queues = gfx10_kiq_map_queues,
> .kiq_unmap_queues = gfx10_kiq_unmap_queues,
> .kiq_query_status = gfx10_kiq_query_status,
> + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
> .set_resources_size = 8,
> .map_queues_size = 7,
> .unmap_queues_size = 6,
> .query_status_size = 7,
> + .invalidate_tlbs_size = 12,
> };
>
> static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..5be6fab55b73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
> amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> }
>
> +static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + signed long r;
> + uint32_t seq;
> + struct amdgpu_device *adev = kiq_ring->adev;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> + spin_lock(&adev->gfx.kiq.ring_lock);
> + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> + amdgpu_fence_emit_polling(kiq_ring, &seq);
> + amdgpu_ring_commit(kiq_ring);
> + spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> + if (r < 1) {
> + DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> + return -ETIME;
> + }
> +
> + return 0;
> +}
> +
> static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
> .kiq_set_resources = gfx_v9_0_kiq_set_resources,
> .kiq_map_queues = gfx_v9_0_kiq_map_queues,
> .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
> .kiq_query_status = gfx_v9_0_kiq_query_status,
> + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
> .set_resources_size = 8,
> .map_queues_size = 7,
> .unmap_queues_size = 6,
> .query_status_size = 7,
> + .invalidate_tlbs_size = 12,
> };
>
> static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9
2020-01-11 18:39 ` [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9 Alex Sierra
@ 2020-01-13 12:12 ` Christian König
2020-01-13 16:53 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Christian König @ 2020-01-13 12:12 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
Am 11.01.20 um 19:39 schrieb Alex Sierra:
> [Why]
> There's a HW-indpendent function that enables kcq. This function uses
> the kiq_pm4_funcs implementation.
>
> [How]
> Local kcq enable function removed and replace it by the generic kcq
> enable under amdgpu_gfx
>
> Change-Id: I7709bdba93742c234941a5936c82eb67e346077c
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 70 +--------------------------
> 1 file changed, 1 insertion(+), 69 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5be6fab55b73..7219eacad9ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3252,74 +3252,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
> WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
> }
>
> -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
> -{
> - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
> - uint64_t queue_mask = 0;
> - int r, i;
> -
> - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> - if (!test_bit(i, adev->gfx.mec.queue_bitmap))
> - continue;
> -
> - /* This situation may be hit in the future if a new HW
> - * generation exposes more than 64 queues. If so, the
> - * definition of queue_mask needs updating */
> - if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
> - DRM_ERROR("Invalid KCQ enabled: %d\n", i);
> - break;
> - }
> -
> - queue_mask |= (1ull << i);
> - }
> -
> - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
> - if (r) {
> - DRM_ERROR("Failed to lock KIQ (%d).\n", r);
> - return r;
> - }
> -
> - /* set resources */
> - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
> - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
> - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
> - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
> - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
> - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
> - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
> - amdgpu_ring_write(kiq_ring, 0); /* oac mask */
> - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
> - for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
> - uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
> - uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> -
> - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
> - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
> - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
> - PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
> - PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
> - PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
> - PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
> - PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
> - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
> - PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
> - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
> - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
> - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
> - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
> - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
> - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
> - }
> -
> - r = amdgpu_ring_test_helper(kiq_ring);
> - if (r)
> - DRM_ERROR("KCQ enable failed\n");
> -
> - return r;
> -}
> -
> static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
> {
> struct amdgpu_device *adev = ring->adev;
> @@ -3726,7 +3658,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
> goto done;
> }
>
> - r = gfx_v9_0_kiq_kcq_enable(adev);
> + r = amdgpu_gfx_enable_kcq(adev);
> done:
> return r;
> }
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
2020-01-11 18:39 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
@ 2020-01-13 12:15 ` Christian König
2020-01-13 16:49 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Christian König @ 2020-01-13 12:15 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
Am 11.01.20 um 19:39 schrieb Alex Sierra:
> This can be used directly from amdgpu and amdkfd to invalidate
> TLB through pasid.
> It supports gmc v7, v8, v9 and v10.
>
> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Flushing by pasid is in principle racy, but I don't see a way to avoid that.
The worst thing that could happen is that we flush a VMID while we
wouldn't have to.
Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 +++
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 59 ++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 +++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 ++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 61 +++++++++++++++++++++++++
> 5 files changed, 193 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index c91dd602d5f1..d3c27a3c43f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
> /* flush the vm tlb via mmio */
> void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
> uint32_t vmhub, uint32_t flush_type);
> + /* flush the vm tlb via pasid */
> + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> + uint32_t flush_type, bool all_hub);
> /* flush the vm tlb via ring */
> uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
> uint64_t pd_addr);
> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
> };
>
> #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
> + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> + ((adev), (pasid), (type), (allhub)))
> #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
> #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
> #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 5ad89bb6f3ba..09408b8b390f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -30,6 +30,8 @@
> #include "hdp/hdp_5_0_0_sh_mask.h"
> #include "gc/gc_10_1_0_sh_mask.h"
> #include "mmhub/mmhub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_offset.h"
> #include "dcn/dcn_2_0_0_offset.h"
> #include "dcn/dcn_2_0_0_sh_mask.h"
> #include "oss/osssys_5_0_0_offset.h"
> @@ -37,6 +39,7 @@
> #include "navi10_enum.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
>
> #include "nbio_v2_3.h"
> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
> (!amdgpu_sriov_vf(adev)));
> }
>
> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
> + struct amdgpu_device *adev,
> + uint8_t vmid, uint16_t *p_pasid)
> +{
> + uint32_t value;
> +
> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> + + vmid);
> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -380,6 +396,48 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> }
>
> +/**
> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid, i;
> + uint16_t queried_pasid;
> + bool ret;
> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> + if (amdgpu_emu_mode == 0 && ring->sched.ready)
> + return kiq->pmf->kiq_invalidate_tlbs(ring,
> + pasid, flush_type, all_hub);
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> + &queried_pasid);
> + if (ret && queried_pasid == pasid) {
> + if (all_hub) {
> + for (i = 0; i < adev->num_vmhubs; i++)
> + gmc_v10_0_flush_gpu_tlb(adev, vmid,
> + i, 0);
> + } else {
> + gmc_v10_0_flush_gpu_tlb(adev, vmid,
> + AMDGPU_GFXHUB_0, 0);
> + }
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +
> static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> unsigned vmid, uint64_t pd_addr)
> {
> @@ -531,6 +589,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
>
> static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
> .map_mtype = gmc_v10_0_map_mtype,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index f08e5330642d..19d5b133e1d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
> return 0;
> }
>
> +/**
> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid;
> + unsigned int tmp;
> +
> + if (adev->in_gpu_reset)
> + return -EIO;
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> + RREG32(mmVM_INVALIDATE_RESPONSE);
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
>
> static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
> .set_prt = gmc_v7_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 6d96d40fbcb8..27d83204fa2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
> return 0;
> }
>
> +/**
> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid;
> + unsigned int tmp;
> +
> + if (adev->in_gpu_reset)
> + return -EIO;
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> + RREG32(mmVM_INVALIDATE_RESPONSE);
> + break;
> + }
> + }
> +
> + return 0;
> +
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
>
> static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
> .set_prt = gmc_v8_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index b83c8d745f42..95cce54999b7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -38,10 +38,12 @@
> #include "dce/dce_12_0_sh_mask.h"
> #include "vega10_enum.h"
> #include "mmhub/mmhub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
> #include "athub/athub_1_0_offset.h"
> #include "oss/osssys_4_0_offset.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
> #include "umc/umc_6_0_sh_mask.h"
>
> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
> adev->pdev->device == 0x15d8)));
> }
>
> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
> + uint8_t vmid, uint16_t *p_pasid)
> +{
> + uint32_t value;
> +
> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> + + vmid);
> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -539,6 +553,52 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> DRM_ERROR("Timeout waiting for VM flush ACK!\n");
> }
>
> +/**
> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid, i;
> + uint16_t queried_pasid;
> + bool ret;
> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> + if (adev->in_gpu_reset)
> + return -EIO;
> +
> + if (ring->sched.ready)
> + return kiq->pmf->kiq_invalidate_tlbs(ring,
> + pasid, flush_type, all_hub);
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> + &queried_pasid);
> + if (ret && queried_pasid == pasid) {
> + if (all_hub) {
> + for (i = 0; i < adev->num_vmhubs; i++)
> + gmc_v9_0_flush_gpu_tlb(adev, vmid,
> + i, 0);
> + } else {
> + gmc_v9_0_flush_gpu_tlb(adev, vmid,
> + AMDGPU_GFXHUB_0, 0);
> + }
> + break;
> + }
> + }
> +
> + return 0;
> +
> +}
> +
> static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> unsigned vmid, uint64_t pd_addr)
> {
> @@ -700,6 +760,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
>
> static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
> .map_mtype = gmc_v9_0_map_mtype,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
2020-01-11 18:39 ` [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
@ 2020-01-13 12:16 ` Christian König
2020-01-13 16:58 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Christian König @ 2020-01-13 12:16 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
Am 11.01.20 um 19:39 schrieb Alex Sierra:
> [Why]
> TLB flush method has been deprecated using kfd2kgd interface.
> This implementation is now on the amdgpu_amdkfd API.
>
> [How]
> TLB flush functions now implemented in amdgpu_amdkfd.
>
> Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 ++++--
> 3 files changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 88e10b956413..8609287620ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -628,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
> return false;
> }
>
> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
> +{
> + struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> +
> + if (adev->family == AMDGPU_FAMILY_AI) {
> + int i;
> +
> + for (i = 0; i < adev->num_vmhubs; i++)
> + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
> + } else {
> + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
> + }
> +
> + return 0;
> +}
> +
> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
> +{
> + struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> + uint32_t flush_type = 0;
> + bool all_hub = false;
> +
> + if (adev->gmc.xgmi.num_physical_nodes &&
> + adev->asic_type == CHIP_VEGA20)
> + flush_type = 2;
> +
> + if (adev->family == AMDGPU_FAMILY_AI)
> + all_hub = true;
> +
> + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
> +}
> +
> bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 069d5d230810..47b0f2957d1f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
> uint32_t *ib_cmd, uint32_t ib_len);
> void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
> bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
>
> bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 536a153ac9a4..25b90f70aecd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -32,6 +32,7 @@
> #include <linux/mman.h>
> #include <linux/file.h>
> #include "amdgpu_amdkfd.h"
> +#include "amdgpu.h"
>
> struct mm_struct;
>
> @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
> void kfd_flush_tlb(struct kfd_process_device *pdd)
> {
> struct kfd_dev *dev = pdd->dev;
> - const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
>
> if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> /* Nothing to flush until a VMID is assigned, which
> * only happens when the first queue is created.
> */
> if (pdd->qpd.vmid)
> - f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
> + amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
> + pdd->qpd.vmid);
> } else {
> - f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
> + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
> + pdd->process->pasid);
> }
> }
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface
2020-01-11 18:39 ` [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
@ 2020-01-13 12:17 ` Christian König
0 siblings, 0 replies; 22+ messages in thread
From: Christian König @ 2020-01-13 12:17 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
Am 11.01.20 um 19:39 schrieb Alex Sierra:
> [Why]
> kfd2kgd interface will be deprecated. This removal only covers TLB
> invalidation for now. They have been replaced in amdgpu_amdkfd API.
>
> [How]
> TLB invalidate functions removed from the different amdkfd_gfx_v*
> versions.
>
> Change-Id: Ic2c7d4a0d19fe1e884dee1ff10a520d31252afee
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Nice work for the patch set. Acked-by: Christian König
<christian.koenig@amd.com>
> ---
> .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 -
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 67 -------------
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 41 --------
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 41 --------
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 96 -------------------
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 2 -
> .../gpu/drm/amd/include/kgd_kfd_interface.h | 2 -
> 7 files changed, 251 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index f9011a07cb90..562e7a7f51a8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -317,7 +317,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
> kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
> .get_tile_config = kgd_gfx_v9_get_tile_config,
> .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
> - .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
> - .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
> .get_hive_id = amdgpu_amdkfd_get_hive_id,
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 61cd707158e4..6132b4874498 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -686,71 +686,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> }
>
> -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
> -{
> - signed long r;
> - uint32_t seq;
> - struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -
> - spin_lock(&adev->gfx.kiq.ring_lock);
> - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> - amdgpu_ring_write(ring,
> - PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> - PACKET3_INVALIDATE_TLBS_PASID(pasid));
> - amdgpu_fence_emit_polling(ring, &seq);
> - amdgpu_ring_commit(ring);
> - spin_unlock(&adev->gfx.kiq.ring_lock);
> -
> - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> - if (r < 1) {
> - DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> - return -ETIME;
> - }
> -
> - return 0;
> -}
> -
> -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> - struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> - int vmid;
> - uint16_t queried_pasid;
> - bool ret;
> - struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -
> - if (amdgpu_emu_mode == 0 && ring->sched.ready)
> - return invalidate_tlbs_with_kiq(adev, pasid);
> -
> - for (vmid = 0; vmid < 16; vmid++) {
> - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> - continue;
> -
> - ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
> - &queried_pasid);
> - if (ret && queried_pasid == pasid) {
> - amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> - AMDGPU_GFXHUB_0, 0);
> - break;
> - }
> - }
> -
> - return 0;
> -}
> -
> -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> - struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -
> - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> - pr_err("non kfd vmid %d\n", vmid);
> - return 0;
> - }
> -
> - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
> - return 0;
> -}
> -
> static int kgd_address_watch_disable(struct kgd_dev *kgd)
> {
> return 0;
> @@ -832,7 +767,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
> get_atc_vmid_pasid_mapping_info,
> .get_tile_config = amdgpu_amdkfd_get_tile_config,
> .set_vm_context_page_table_base = set_vm_context_page_table_base,
> - .invalidate_tlbs = invalidate_tlbs,
> - .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
> .get_hive_id = amdgpu_amdkfd_get_hive_id,
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index 6e6f0a99ec06..8f052e98a3c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
> lower_32_bits(page_table_base));
> }
>
> -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> - struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> - int vmid;
> - unsigned int tmp;
> -
> - if (adev->in_gpu_reset)
> - return -EIO;
> -
> - for (vmid = 0; vmid < 16; vmid++) {
> - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> - continue;
> -
> - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> - RREG32(mmVM_INVALIDATE_RESPONSE);
> - break;
> - }
> - }
> -
> - return 0;
> -}
> -
> -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> - struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -
> - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> - pr_err("non kfd vmid\n");
> - return 0;
> - }
> -
> - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> - RREG32(mmVM_INVALIDATE_RESPONSE);
> - return 0;
> -}
> -
> /**
> * read_vmid_from_vmfault_reg - read vmid from register
> *
> @@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
> .set_scratch_backing_va = set_scratch_backing_va,
> .get_tile_config = get_tile_config,
> .set_vm_context_page_table_base = set_vm_context_page_table_base,
> - .invalidate_tlbs = invalidate_tlbs,
> - .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
> .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index bfbddedb2380..19a10db93d68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
> lower_32_bits(page_table_base));
> }
>
> -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> - struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> - int vmid;
> - unsigned int tmp;
> -
> - if (adev->in_gpu_reset)
> - return -EIO;
> -
> - for (vmid = 0; vmid < 16; vmid++) {
> - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> - continue;
> -
> - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> - RREG32(mmVM_INVALIDATE_RESPONSE);
> - break;
> - }
> - }
> -
> - return 0;
> -}
> -
> -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> - struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> -
> - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> - pr_err("non kfd vmid %d\n", vmid);
> - return -EINVAL;
> - }
> -
> - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> - RREG32(mmVM_INVALIDATE_RESPONSE);
> - return 0;
> -}
> -
> const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
> .program_sh_mem_settings = kgd_program_sh_mem_settings,
> .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
> @@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
> .set_scratch_backing_va = set_scratch_backing_va,
> .get_tile_config = get_tile_config,
> .set_vm_context_page_table_base = set_vm_context_page_table_base,
> - .invalidate_tlbs = invalidate_tlbs,
> - .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index e7861f0ef415..932ae85d97e2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -617,100 +617,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> }
>
> -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
> - uint32_t flush_type)
> -{
> - signed long r;
> - uint32_t seq;
> - struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> -
> - spin_lock(&adev->gfx.kiq.ring_lock);
> - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> - amdgpu_ring_write(ring,
> - PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> - PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
> - PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> - amdgpu_fence_emit_polling(ring, &seq);
> - amdgpu_ring_commit(ring);
> - spin_unlock(&adev->gfx.kiq.ring_lock);
> -
> - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> - if (r < 1) {
> - DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> - return -ETIME;
> - }
> -
> - return 0;
> -}
> -
> -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
> -{
> - struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> - int vmid, i;
> - uint16_t queried_pasid;
> - bool ret;
> - struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> - uint32_t flush_type = 0;
> -
> - if (adev->in_gpu_reset)
> - return -EIO;
> - if (adev->gmc.xgmi.num_physical_nodes &&
> - adev->asic_type == CHIP_VEGA20)
> - flush_type = 2;
> -
> - if (ring->sched.ready)
> - return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
> -
> - for (vmid = 0; vmid < 16; vmid++) {
> - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
> - continue;
> -
> - ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
> - &queried_pasid);
> - if (ret && queried_pasid == pasid) {
> - for (i = 0; i < adev->num_vmhubs; i++)
> - amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> - i, flush_type);
> - break;
> - }
> - }
> -
> - return 0;
> -}
> -
> -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
> -{
> - struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> - int i;
> -
> - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
> - pr_err("non kfd vmid %d\n", vmid);
> - return 0;
> - }
> -
> - /* Use legacy mode tlb invalidation.
> - *
> - * Currently on Raven the code below is broken for anything but
> - * legacy mode due to a MMHUB power gating problem. A workaround
> - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
> - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
> - * bit.
> - *
> - * TODO 1: agree on the right set of invalidation registers for
> - * KFD use. Use the last one for now. Invalidate both GC and
> - * MMHUB.
> - *
> - * TODO 2: support range-based invalidation, requires kfg2kgd
> - * interface change
> - */
> - for (i = 0; i < adev->num_vmhubs; i++)
> - amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
> -
> - return 0;
> -}
> -
> int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
> {
> return 0;
> @@ -793,7 +699,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
> kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
> .get_tile_config = kgd_gfx_v9_get_tile_config,
> .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
> - .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
> - .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
> .get_hive_id = amdgpu_amdkfd_get_hive_id,
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index 02b1426d17d1..dfafa28b7559 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -57,7 +57,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
>
> bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> uint8_t vmid, uint16_t *p_pasid);
> -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
> -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
> int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
> struct tile_config *config);
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 2cd217e60125..a01ef836ad58 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -307,8 +307,6 @@ struct kfd2kgd_calls {
>
> void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
> uint32_t vmid, uint64_t page_table_base);
> - int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
> - int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
> uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
> uint64_t (*get_hive_id)(struct kgd_dev *kgd);
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
2020-01-13 12:12 ` Christian König
@ 2020-01-13 16:36 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:36 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +++
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 33 +++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 32 ++++++++++++++++++++++++
> 3 files changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..2927837bd401 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
> struct amdgpu_ring *ring,
> u64 addr,
> u64 seq);
> + int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub);
> /* Packet sizes */
> int set_resources_size;
> int map_queues_size;
> int unmap_queues_size;
> int query_status_size;
> + int invalidate_tlbs_size;
> };
>
> struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..2e82213f57eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
> #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
> #include "clearstate_gfx10.h"
> #include "v10_structs.h"
> @@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
> amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> }
>
> +static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + signed long r;
> + uint32_t seq;
> + struct amdgpu_device *adev = kiq_ring->adev;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> + spin_lock(&adev->gfx.kiq.ring_lock);
> + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> + amdgpu_fence_emit_polling(kiq_ring, &seq);
> + amdgpu_ring_commit(kiq_ring);
> + spin_unlock(&adev->gfx.kiq.ring_lock);
The other KIQ functions don't include the emit_polling, commit and
locking. I think the way the KIQ-funcs interface is meant to be used,
all that should be outside the IP-version-specific functions. For
consistency all you should do here is the amdgpu_ring_write calls with
IP-version-specific packets.
Regards,
Felix
> +
> + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> + if (r < 1) {
> + DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> + return -ETIME;
> + }
> +
> + return 0;
> +}
> +
> static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
> .kiq_set_resources = gfx10_kiq_set_resources,
> .kiq_map_queues = gfx10_kiq_map_queues,
> .kiq_unmap_queues = gfx10_kiq_unmap_queues,
> .kiq_query_status = gfx10_kiq_query_status,
> + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
> .set_resources_size = 8,
> .map_queues_size = 7,
> .unmap_queues_size = 6,
> .query_status_size = 7,
> + .invalidate_tlbs_size = 12,
> };
>
> static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..5be6fab55b73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
> amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> }
>
> +static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + signed long r;
> + uint32_t seq;
> + struct amdgpu_device *adev = kiq_ring->adev;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> + spin_lock(&adev->gfx.kiq.ring_lock);
> + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> + amdgpu_fence_emit_polling(kiq_ring, &seq);
> + amdgpu_ring_commit(kiq_ring);
> + spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> + if (r < 1) {
> + DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> + return -ETIME;
> + }
> +
> + return 0;
> +}
> +
> static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
> .kiq_set_resources = gfx_v9_0_kiq_set_resources,
> .kiq_map_queues = gfx_v9_0_kiq_map_queues,
> .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
> .kiq_query_status = gfx_v9_0_kiq_query_status,
> + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
> .set_resources_size = 8,
> .map_queues_size = 7,
> .unmap_queues_size = 6,
> .query_status_size = 7,
> + .invalidate_tlbs_size = 12,
> };
>
> static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid
2020-01-11 18:39 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
2020-01-13 12:15 ` Christian König
@ 2020-01-13 16:49 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:49 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> This can be used directly from amdgpu and amdkfd to invalidate
> TLB through pasid.
> It supports gmc v7, v8, v9 and v10.
>
> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 +++
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 59 ++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 +++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 ++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 61 +++++++++++++++++++++++++
> 5 files changed, 193 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index c91dd602d5f1..d3c27a3c43f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
> /* flush the vm tlb via mmio */
> void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
> uint32_t vmhub, uint32_t flush_type);
> + /* flush the vm tlb via pasid */
> + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> + uint32_t flush_type, bool all_hub);
> /* flush the vm tlb via ring */
> uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
> uint64_t pd_addr);
> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
> };
>
> #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
> + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> + ((adev), (pasid), (type), (allhub)))
> #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
> #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
> #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 5ad89bb6f3ba..09408b8b390f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -30,6 +30,8 @@
> #include "hdp/hdp_5_0_0_sh_mask.h"
> #include "gc/gc_10_1_0_sh_mask.h"
> #include "mmhub/mmhub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_offset.h"
> #include "dcn/dcn_2_0_0_offset.h"
> #include "dcn/dcn_2_0_0_sh_mask.h"
> #include "oss/osssys_5_0_0_offset.h"
> @@ -37,6 +39,7 @@
> #include "navi10_enum.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
>
> #include "nbio_v2_3.h"
> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
> (!amdgpu_sriov_vf(adev)));
> }
>
> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
> + struct amdgpu_device *adev,
> + uint8_t vmid, uint16_t *p_pasid)
> +{
> + uint32_t value;
> +
> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> + + vmid);
> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -380,6 +396,48 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> }
>
> +/**
> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid, i;
> + uint16_t queried_pasid;
> + bool ret;
> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> + if (amdgpu_emu_mode == 0 && ring->sched.ready)
> + return kiq->pmf->kiq_invalidate_tlbs(ring,
> + pasid, flush_type, all_hub);
This is where you should do the locking, ring alloc, and commit.
Regards,
Felix
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> + &queried_pasid);
> + if (ret && queried_pasid == pasid) {
> + if (all_hub) {
> + for (i = 0; i < adev->num_vmhubs; i++)
> + gmc_v10_0_flush_gpu_tlb(adev, vmid,
> + i, 0);
> + } else {
> + gmc_v10_0_flush_gpu_tlb(adev, vmid,
> + AMDGPU_GFXHUB_0, 0);
> + }
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +
> static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> unsigned vmid, uint64_t pd_addr)
> {
> @@ -531,6 +589,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
>
> static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
> .map_mtype = gmc_v10_0_map_mtype,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index f08e5330642d..19d5b133e1d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
> return 0;
> }
>
> +/**
> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid;
> + unsigned int tmp;
> +
> + if (adev->in_gpu_reset)
> + return -EIO;
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> + RREG32(mmVM_INVALIDATE_RESPONSE);
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
>
> static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
> .set_prt = gmc_v7_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 6d96d40fbcb8..27d83204fa2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
> return 0;
> }
>
> +/**
> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid;
> + unsigned int tmp;
> +
> + if (adev->in_gpu_reset)
> + return -EIO;
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> + RREG32(mmVM_INVALIDATE_RESPONSE);
> + break;
> + }
> + }
> +
> + return 0;
> +
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
>
> static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
> .set_prt = gmc_v8_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index b83c8d745f42..95cce54999b7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -38,10 +38,12 @@
> #include "dce/dce_12_0_sh_mask.h"
> #include "vega10_enum.h"
> #include "mmhub/mmhub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
> #include "athub/athub_1_0_offset.h"
> #include "oss/osssys_4_0_offset.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
> #include "umc/umc_6_0_sh_mask.h"
>
> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
> adev->pdev->device == 0x15d8)));
> }
>
> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
> + uint8_t vmid, uint16_t *p_pasid)
> +{
> + uint32_t value;
> +
> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> + + vmid);
> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -539,6 +553,52 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> DRM_ERROR("Timeout waiting for VM flush ACK!\n");
> }
>
> +/**
> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid, i;
> + uint16_t queried_pasid;
> + bool ret;
> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> + if (adev->in_gpu_reset)
> + return -EIO;
> +
> + if (ring->sched.ready)
> + return kiq->pmf->kiq_invalidate_tlbs(ring,
> + pasid, flush_type, all_hub);
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> + &queried_pasid);
> + if (ret && queried_pasid == pasid) {
> + if (all_hub) {
> + for (i = 0; i < adev->num_vmhubs; i++)
> + gmc_v9_0_flush_gpu_tlb(adev, vmid,
> + i, 0);
> + } else {
> + gmc_v9_0_flush_gpu_tlb(adev, vmid,
> + AMDGPU_GFXHUB_0, 0);
> + }
> + break;
> + }
> + }
> +
> + return 0;
> +
> +}
> +
> static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> unsigned vmid, uint64_t pd_addr)
> {
> @@ -700,6 +760,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
>
> static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
> .map_mtype = gmc_v9_0_map_mtype,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9
2020-01-11 18:39 ` [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9 Alex Sierra
2020-01-13 12:12 ` Christian König
@ 2020-01-13 16:53 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:53 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
Can you include kcq_disable in the patch as well?
Thanks,
Felix
On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> [Why]
> There's a HW-indpendent function that enables kcq. This function uses
> the kiq_pm4_funcs implementation.
>
> [How]
> Local kcq enable function removed and replace it by the generic kcq
> enable under amdgpu_gfx
>
> Change-Id: I7709bdba93742c234941a5936c82eb67e346077c
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 70 +--------------------------
> 1 file changed, 1 insertion(+), 69 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5be6fab55b73..7219eacad9ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3252,74 +3252,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
> WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
> }
>
> -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
> -{
> - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
> - uint64_t queue_mask = 0;
> - int r, i;
> -
> - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> - if (!test_bit(i, adev->gfx.mec.queue_bitmap))
> - continue;
> -
> - /* This situation may be hit in the future if a new HW
> - * generation exposes more than 64 queues. If so, the
> - * definition of queue_mask needs updating */
> - if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
> - DRM_ERROR("Invalid KCQ enabled: %d\n", i);
> - break;
> - }
> -
> - queue_mask |= (1ull << i);
> - }
> -
> - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
> - if (r) {
> - DRM_ERROR("Failed to lock KIQ (%d).\n", r);
> - return r;
> - }
> -
> - /* set resources */
> - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
> - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
> - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
> - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
> - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
> - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
> - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
> - amdgpu_ring_write(kiq_ring, 0); /* oac mask */
> - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
> - for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
> - uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
> - uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> -
> - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
> - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
> - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
> - PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
> - PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
> - PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
> - PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
> - PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
> - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
> - PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
> - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
> - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
> - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
> - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
> - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
> - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
> - }
> -
> - r = amdgpu_ring_test_helper(kiq_ring);
> - if (r)
> - DRM_ERROR("KCQ enable failed\n");
> -
> - return r;
> -}
> -
> static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
> {
> struct amdgpu_device *adev = ring->adev;
> @@ -3726,7 +3658,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
> goto done;
> }
>
> - r = gfx_v9_0_kiq_kcq_enable(adev);
> + r = amdgpu_gfx_enable_kcq(adev);
> done:
> return r;
> }
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9
2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
2020-01-13 12:10 ` Christian König
@ 2020-01-13 16:57 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:57 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> Functions implemented from kiq_pm4_funcs struct members
> for gfx_v9 version.
>
> Change-Id: I8fd3e160c4bd58f19d35d29e39517db967063afe
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 ++++++++++++++++++++++++++
> 1 file changed, 115 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e3d466bd5c4e..ad0179ea2cc5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -739,6 +739,120 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
> static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
> void *inject_if);
>
> +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
> + uint64_t queue_mask)
> +{
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_SET_RESOURCES_VMID_MASK(0) |
> + /* vmid_mask:0* queue_type:0 (KIQ) */
> + PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
> + amdgpu_ring_write(kiq_ring,
> + lower_32_bits(queue_mask)); /* queue mask lo */
> + amdgpu_ring_write(kiq_ring,
> + upper_32_bits(queue_mask)); /* queue mask hi */
> + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
> + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
> + amdgpu_ring_write(kiq_ring, 0); /* oac mask */
> + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
> +}
> +
> +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
> + struct amdgpu_ring *ring)
> +{
> + struct amdgpu_device *adev = kiq_ring->adev;
> + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
> + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
> + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
> + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
> + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
> + PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
> + PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
> + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
> + /*queue_type: normal compute queue */
> + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
> + /* alloc format: all_on_one_pipe */
> + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
> + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
> + /* num_queues: must be 1 */
> + PACKET3_MAP_QUEUES_NUM_QUEUES(1));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
> + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
> + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
> + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
> + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
> +}
> +
> +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
> + struct amdgpu_ring *ring,
> + enum amdgpu_unmap_queues_action action,
> + u64 gpu_addr, u64 seq)
> +{
> + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
> + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> + PACKET3_UNMAP_QUEUES_ACTION(action) |
> + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
> + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
> + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
> +
> + if (action == PREEMPT_QUEUES_NO_UNMAP) {
> + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
> + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
> + amdgpu_ring_write(kiq_ring, seq);
> + } else {
> + amdgpu_ring_write(kiq_ring, 0);
> + amdgpu_ring_write(kiq_ring, 0);
> + amdgpu_ring_write(kiq_ring, 0);
> + }
> +}
> +
> +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
> + struct amdgpu_ring *ring,
> + u64 addr,
> + u64 seq)
> +{
> + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
> +
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
> + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
> + PACKET3_QUERY_STATUS_COMMAND(2));
> + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
> + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
> + amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
> + amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
> + amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
> + amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> +}
> +
> +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
> + .kiq_set_resources = gfx_v9_0_kiq_set_resources,
> + .kiq_map_queues = gfx_v9_0_kiq_map_queues,
> + .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
> + .kiq_query_status = gfx_v9_0_kiq_query_status,
> + .set_resources_size = 8,
> + .map_queues_size = 7,
> + .unmap_queues_size = 6,
> + .query_status_size = 7,
> +};
> +
> +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> +{
> + adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
> +}
> +
> static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
> {
> switch (adev->asic_type) {
> @@ -4260,6 +4374,7 @@ static int gfx_v9_0_early_init(void *handle)
> else
> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + gfx_v9_0_set_kiq_pm4_funcs(adev);
> gfx_v9_0_set_ring_funcs(adev);
> gfx_v9_0_set_irq_funcs(adev);
> gfx_v9_0_set_gds_init(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
2020-01-11 18:39 ` [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
2020-01-13 12:16 ` Christian König
@ 2020-01-13 16:58 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Felix Kuehling @ 2020-01-13 16:58 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> [Why]
> TLB flush method has been deprecated using kfd2kgd interface.
> This implementation is now on the amdgpu_amdkfd API.
>
> [How]
> TLB flush functions now implemented in amdgpu_amdkfd.
>
> Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 ++++--
> 3 files changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 88e10b956413..8609287620ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -628,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
> return false;
> }
>
> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
> +{
> + struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> +
> + if (adev->family == AMDGPU_FAMILY_AI) {
> + int i;
> +
> + for (i = 0; i < adev->num_vmhubs; i++)
> + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
> + } else {
> + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
> + }
> +
> + return 0;
> +}
> +
> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
> +{
> + struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> + uint32_t flush_type = 0;
> + bool all_hub = false;
> +
> + if (adev->gmc.xgmi.num_physical_nodes &&
> + adev->asic_type == CHIP_VEGA20)
> + flush_type = 2;
> +
> + if (adev->family == AMDGPU_FAMILY_AI)
> + all_hub = true;
> +
> + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
> +}
> +
> bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 069d5d230810..47b0f2957d1f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
> uint32_t *ib_cmd, uint32_t ib_len);
> void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
> bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
> +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
>
> bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 536a153ac9a4..25b90f70aecd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -32,6 +32,7 @@
> #include <linux/mman.h>
> #include <linux/file.h>
> #include "amdgpu_amdkfd.h"
> +#include "amdgpu.h"
>
> struct mm_struct;
>
> @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
> void kfd_flush_tlb(struct kfd_process_device *pdd)
> {
> struct kfd_dev *dev = pdd->dev;
> - const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
>
> if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> /* Nothing to flush until a VMID is assigned, which
> * only happens when the first queue is created.
> */
> if (pdd->qpd.vmid)
> - f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
> + amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
> + pdd->qpd.vmid);
> } else {
> - f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
> + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
> + pdd->process->pasid);
> }
> }
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
2020-01-13 22:16 ` Felix Kuehling
@ 2020-01-14 0:45 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Felix Kuehling @ 2020-01-14 0:45 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
On 2020-01-13 3:26 p.m., Alex Sierra wrote:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++++
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 +++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++++++++++++++
> 3 files changed, 33 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..af4bd279f42f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
> struct amdgpu_ring *ring,
> u64 addr,
> u64 seq);
> + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub);
> /* Packet sizes */
> int set_resources_size;
> int map_queues_size;
> int unmap_queues_size;
> int query_status_size;
> + int invalidate_tlbs_size;
> };
>
> struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..d72b60f997c8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
> #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
> #include "clearstate_gfx10.h"
> #include "v10_structs.h"
> @@ -346,15 +347,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
> amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> }
>
> +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +}
> +
> static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
> .kiq_set_resources = gfx10_kiq_set_resources,
> .kiq_map_queues = gfx10_kiq_map_queues,
> .kiq_unmap_queues = gfx10_kiq_unmap_queues,
> .kiq_query_status = gfx10_kiq_query_status,
> + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
> .set_resources_size = 8,
> .map_queues_size = 7,
> .unmap_queues_size = 6,
> .query_status_size = 7,
> + .invalidate_tlbs_size = 12,
This looks like it was copied from the function that emits both flush
and fence. Now that the function only emits the flush, this number
should be smaller. Only 2 dwords. And it seems like 12 was
over-estimated, because the fence is only 8 dwords.
Regards,
Felix
> };
>
> static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..b8759386dcbb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
> amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> }
>
> +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +}
> +
> static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
> .kiq_set_resources = gfx_v9_0_kiq_set_resources,
> .kiq_map_queues = gfx_v9_0_kiq_map_queues,
> .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
> .kiq_query_status = gfx_v9_0_kiq_query_status,
> + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
> .set_resources_size = 8,
> .map_queues_size = 7,
> .unmap_queues_size = 6,
> .query_status_size = 7,
> + .invalidate_tlbs_size = 12,
> };
>
> static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
@ 2020-01-13 22:16 ` Felix Kuehling
2020-01-14 0:45 ` Felix Kuehling
1 sibling, 0 replies; 22+ messages in thread
From: Felix Kuehling @ 2020-01-13 22:16 UTC (permalink / raw)
To: Alex Sierra, amd-gfx
The series is
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
On 2020-01-13 3:26 p.m., Alex Sierra wrote:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++++
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 +++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++++++++++++++
> 3 files changed, 33 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..af4bd279f42f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
> struct amdgpu_ring *ring,
> u64 addr,
> u64 seq);
> + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub);
> /* Packet sizes */
> int set_resources_size;
> int map_queues_size;
> int unmap_queues_size;
> int query_status_size;
> + int invalidate_tlbs_size;
> };
>
> struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..d72b60f997c8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
> #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
> #include "clearstate_gfx10.h"
> #include "v10_structs.h"
> @@ -346,15 +347,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
> amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> }
>
> +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +}
> +
> static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
> .kiq_set_resources = gfx10_kiq_set_resources,
> .kiq_map_queues = gfx10_kiq_map_queues,
> .kiq_unmap_queues = gfx10_kiq_unmap_queues,
> .kiq_query_status = gfx10_kiq_query_status,
> + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
> .set_resources_size = 8,
> .map_queues_size = 7,
> .unmap_queues_size = 6,
> .query_status_size = 7,
> + .invalidate_tlbs_size = 12,
> };
>
> static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..b8759386dcbb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
> amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> }
>
> +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> +}
> +
> static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
> .kiq_set_resources = gfx_v9_0_kiq_set_resources,
> .kiq_map_queues = gfx_v9_0_kiq_map_queues,
> .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
> .kiq_query_status = gfx_v9_0_kiq_query_status,
> + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
> .set_resources_size = 8,
> .map_queues_size = 7,
> .unmap_queues_size = 6,
> .query_status_size = 7,
> + .invalidate_tlbs_size = 12,
> };
>
> static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
@ 2020-01-13 20:26 Alex Sierra
2020-01-13 22:16 ` Felix Kuehling
2020-01-14 0:45 ` Felix Kuehling
0 siblings, 2 replies; 22+ messages in thread
From: Alex Sierra @ 2020-01-13 20:26 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Sierra
tlbs invalidate pointer function added to kiq_pm4_funcs struct.
This way, tlb flush can be done through kiq member.
TLBs invalidatation implemented for gfx9 and gfx10.
Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++++
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 +++++++++++++++
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++++++++++++++
3 files changed, 33 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 8e88e0411662..af4bd279f42f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
struct amdgpu_ring *ring,
u64 addr,
u64 seq);
+ void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub);
/* Packet sizes */
int set_resources_size;
int map_queues_size;
int unmap_queues_size;
int query_status_size;
+ int invalidate_tlbs_size;
};
struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 379e46c1b7f6..d72b60f997c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,6 +40,7 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
@@ -346,15 +347,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
}
+static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx10_kiq_set_resources,
.kiq_map_queues = gfx10_kiq_map_queues,
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
.query_status_size = 7,
+ .invalidate_tlbs_size = 12,
};
static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ad0179ea2cc5..b8759386dcbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
}
+static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx_v9_0_kiq_set_resources,
.kiq_map_queues = gfx_v9_0_kiq_map_queues,
.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
.kiq_query_status = gfx_v9_0_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
.query_status_size = 7,
+ .invalidate_tlbs_size = 12,
};
static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 22+ messages in thread
end of thread, other threads:[~2020-01-14 0:45 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra
2020-01-11 18:39 ` [PATCH 2/7] drm/amdgpu: kiq pm4 function implementation for gfx_v9 Alex Sierra
2020-01-13 12:10 ` Christian König
2020-01-13 16:57 ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
2020-01-13 12:12 ` Christian König
2020-01-13 16:36 ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 4/7] drm/amdgpu: replace kcq enable function on gfx_v9 Alex Sierra
2020-01-13 12:12 ` Christian König
2020-01-13 16:53 ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra
2020-01-13 12:15 ` Christian König
2020-01-13 16:49 ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 6/7] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd Alex Sierra
2020-01-13 12:16 ` Christian König
2020-01-13 16:58 ` Felix Kuehling
2020-01-11 18:39 ` [PATCH 7/7] drm/amdgpu: flush TLB functions removal from kfd2kgd interface Alex Sierra
2020-01-13 12:17 ` Christian König
2020-01-13 12:10 ` [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Christian König
2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra
2020-01-13 22:16 ` Felix Kuehling
2020-01-14 0:45 ` Felix Kuehling
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.