* [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 @ 2020-01-13 20:26 Alex Sierra 2020-01-13 20:26 ` [PATCH 4/7] drm/amdgpu: replace kcq enable/disable functions on gfx_v9 Alex Sierra ` (3 more replies) 0 siblings, 4 replies; 12+ messages in thread From: Alex Sierra @ 2020-01-13 20:26 UTC (permalink / raw) To: amd-gfx; +Cc: Alex Sierra tlbs invalidate pointer function added to kiq_pm4_funcs struct. This way, tlb flush can be done through kiq member. TLBs invalidatation implemented for gfx9 and gfx10. Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293 Signed-off-by: Alex Sierra <alex.sierra@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8e88e0411662..af4bd279f42f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { struct amdgpu_ring *ring, u64 addr, u64 seq); + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub); /* Packet sizes */ int set_resources_size; int map_queues_size; int unmap_queues_size; int query_status_size; + int invalidate_tlbs_size; }; struct amdgpu_kiq { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 379e46c1b7f6..d72b60f997c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -40,6 +40,7 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" @@ -346,15 +347,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { .kiq_set_resources = gfx10_kiq_set_resources, .kiq_map_queues = gfx10_kiq_map_queues, .kiq_unmap_queues = gfx10_kiq_unmap_queues, .kiq_query_status = gfx10_kiq_query_status, + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 12, }; static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ad0179ea2cc5..b8759386dcbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_0_kiq_set_resources, .kiq_map_queues = gfx_v9_0_kiq_map_queues, .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, .kiq_query_status = gfx_v9_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 12, }; static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 4/7] drm/amdgpu: replace kcq enable/disable functions on gfx_v9 2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra @ 2020-01-13 20:26 ` Alex Sierra 2020-01-13 20:26 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra ` (2 subsequent siblings) 3 siblings, 0 replies; 12+ messages in thread From: Alex Sierra @ 2020-01-13 20:26 UTC (permalink / raw) To: amd-gfx; +Cc: Alex Sierra [Why] There are HW-indpendent functions that enables and disables kcq. These functions use the kiq_pm4_funcs implementation. [How] Local kcq enable and disable functions removed and replace it by the generic kcq enable under amdgpu_gfx Change-Id: I7709bdba93742c234941a5936c82eb67e346077c Signed-off-by: Alex Sierra <alex.sierra@amd.com> Acked-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 102 +------------------------- 1 file changed, 2 insertions(+), 100 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b8759386dcbb..44cdb6fc92ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3234,74 +3234,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) -{ - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint64_t queue_mask = 0; - int r, i; - - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - if (!test_bit(i, adev->gfx.mec.queue_bitmap)) - continue; - - /* This situation may be hit in the future if a new HW - * generation exposes more than 64 queues. If so, the - * definition of queue_mask needs updating */ - if (WARN_ON(i >= (sizeof(queue_mask)*8))) { - DRM_ERROR("Invalid KCQ enabled: %d\n", i); - break; - } - - queue_mask |= (1ull << i); - } - - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - /* set resources */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* oac mask */ - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ - PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ - PACKET3_MAP_QUEUES_QUEUE(ring->queue) | - PACKET3_MAP_QUEUES_PIPE(ring->pipe) | - PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | - PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ - PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - } - - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ enable failed\n"); - - return r; -} - static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -3708,7 +3640,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = gfx_v9_0_kiq_kcq_enable(adev); + r = amdgpu_gfx_enable_kcq(adev); done: return r; } @@ -3812,36 +3744,6 @@ static int gfx_v9_0_hw_init(void *handle) return r; } -static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) -{ - int r, i; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - - r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); - if (r) - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ - PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | - PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | - PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - } - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ disable failed\n"); - - return r; -} - static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3853,7 +3755,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + amdgpu_gfx_disable_kcq(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid 2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra 2020-01-13 20:26 ` [PATCH 4/7] drm/amdgpu: replace kcq enable/disable functions on gfx_v9 Alex Sierra @ 2020-01-13 20:26 ` Alex Sierra 2020-01-14 0:34 ` Felix Kuehling 2020-01-13 22:16 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Felix Kuehling 2020-01-14 0:45 ` Felix Kuehling 3 siblings, 1 reply; 12+ messages in thread From: Alex Sierra @ 2020-01-13 20:26 UTC (permalink / raw) To: amd-gfx; +Cc: Alex Sierra This can be used directly from amdgpu and amdkfd to invalidate TLB through pasid. It supports gmc v7, v8, v9 and v10. Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490 Signed-off-by: Alex Sierra <alex.sierra@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 74 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 +++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 +++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 76 +++++++++++++++++++++++++ 5 files changed, 223 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index c91dd602d5f1..d3c27a3c43f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type); + /* flush the vm tlb via pasid */ + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -216,6 +219,9 @@ struct amdgpu_gmc { }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ + ((adev), (pasid), (type), (allhub))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5ad89bb6f3ba..8afd05834714 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -30,6 +30,8 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include "gc/gc_10_1_0_sh_mask.h" #include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_offset.h" #include "dcn/dcn_2_0_0_offset.h" #include "dcn/dcn_2_0_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" @@ -37,6 +39,7 @@ #include "navi10_enum.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "nbio_v2_3.h" @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, (!amdgpu_sriov_vf(adev))); } +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( + struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); } +/** + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (amdgpu_emu_mode == 0 && ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v10_0_flush_gpu_tlb(adev, vmid, + i, 0); + } else { + gmc_v10_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + } + break; + } + } + + return 0; +} + static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, .map_mtype = gmc_v10_0_map_mtype, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index f08e5330642d..19d5b133e1d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, .set_prt = gmc_v7_0_set_prt, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 6d96d40fbcb8..27d83204fa2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; + +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, .set_prt = gmc_v8_0_set_prt, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b83c8d745f42..40a496804356 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -38,10 +38,12 @@ #include "dce/dce_12_0_sh_mask.h" #include "vega10_enum.h" #include "mmhub/mmhub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" #include "athub/athub_1_0_offset.h" #include "oss/osssys_4_0_offset.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "umc/umc_6_0_sh_mask.h" @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, adev->pdev->device == 0x15d8))); } +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } +/** + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (adev->in_gpu_reset) + return -EIO; + + if (ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v9_0_flush_gpu_tlb(adev, vmid, + i, 0); + } else { + gmc_v9_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + } + break; + } + } + + return 0; + +} + static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, .map_mtype = gmc_v9_0_map_mtype, -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid 2020-01-13 20:26 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra @ 2020-01-14 0:34 ` Felix Kuehling 2020-01-14 0:48 ` Sierra Guiza, Alejandro (Alex) 0 siblings, 1 reply; 12+ messages in thread From: Felix Kuehling @ 2020-01-14 0:34 UTC (permalink / raw) To: Alex Sierra, amd-gfx Sorry, I already said, Reviewed-by, but realized there was one more problem. If you haven't submitted yet, please fix that first. Otherwise, please make it a follow-up patch. See inline ... On 2020-01-13 3:26 p.m., Alex Sierra wrote: > This can be used directly from amdgpu and amdkfd to invalidate > TLB through pasid. > It supports gmc v7, v8, v9 and v10. > > Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490 > Signed-off-by: Alex Sierra <alex.sierra@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++ > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 74 ++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 +++++++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 +++++++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 76 +++++++++++++++++++++++++ > 5 files changed, 223 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index c91dd602d5f1..d3c27a3c43f6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs { > /* flush the vm tlb via mmio */ > void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, > uint32_t vmhub, uint32_t flush_type); > + /* flush the vm tlb via pasid */ > + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, > + uint32_t flush_type, bool all_hub); > /* flush the vm tlb via ring */ > uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, > uint64_t pd_addr); > @@ -216,6 +219,9 @@ struct amdgpu_gmc { > }; > > #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) > +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ > + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ > + ((adev), (pasid), (type), (allhub))) > #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) > #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) > #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > index 5ad89bb6f3ba..8afd05834714 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > @@ -30,6 +30,8 @@ > #include "hdp/hdp_5_0_0_sh_mask.h" > #include "gc/gc_10_1_0_sh_mask.h" > #include "mmhub/mmhub_2_0_0_sh_mask.h" > +#include "athub/athub_2_0_0_sh_mask.h" > +#include "athub/athub_2_0_0_offset.h" > #include "dcn/dcn_2_0_0_offset.h" > #include "dcn/dcn_2_0_0_sh_mask.h" > #include "oss/osssys_5_0_0_offset.h" > @@ -37,6 +39,7 @@ > #include "navi10_enum.h" > > #include "soc15.h" > +#include "soc15d.h" > #include "soc15_common.h" > > #include "nbio_v2_3.h" > @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, > (!amdgpu_sriov_vf(adev))); > } > > +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( > + struct amdgpu_device *adev, > + uint8_t vmid, uint16_t *p_pasid) > +{ > + uint32_t value; > + > + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) > + + vmid); > + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; > + > + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); > +} > + > /* > * GART > * VMID 0 is the physical GPU addresses as used by the kernel. > @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); > } > > +/** > + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid > + * > + * @adev: amdgpu_device pointer > + * @pasid: pasid to be flush > + * > + * Flush the TLB for the requested pasid. > + */ > +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + int vmid, i; > + signed long r; > + uint32_t seq; > + uint16_t queried_pasid; > + bool ret; > + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + > + if (amdgpu_emu_mode == 0 && ring->sched.ready) { > + spin_lock(&adev->gfx.kiq.ring_lock); > + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); You need to allocate more space here for amdgpu_fence_emit_polling. Looks like gfx_v10_0_ring_emit_fence needs 8 dwords. > + kiq->pmf->kiq_invalidate_tlbs(ring, > + pasid, flush_type, all_hub); > + amdgpu_fence_emit_polling(ring, &seq); > + amdgpu_ring_commit(ring); > + spin_unlock(&adev->gfx.kiq.ring_lock); > + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); > + if (r < 1) { > + DRM_ERROR("wait for kiq fence error: %ld.\n", r); > + return -ETIME; > + } > + > + return 0; > + } > + > + for (vmid = 1; vmid < 16; vmid++) { > + > + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, > + &queried_pasid); > + if (ret && queried_pasid == pasid) { > + if (all_hub) { > + for (i = 0; i < adev->num_vmhubs; i++) > + gmc_v10_0_flush_gpu_tlb(adev, vmid, > + i, 0); > + } else { > + gmc_v10_0_flush_gpu_tlb(adev, vmid, > + AMDGPU_GFXHUB_0, 0); > + } > + break; > + } > + } > + > + return 0; > +} > + > static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > unsigned vmid, uint64_t pd_addr) > { > @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, > > static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, > + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, > .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, > .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, > .map_mtype = gmc_v10_0_map_mtype, > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > index f08e5330642d..19d5b133e1d7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) > return 0; > } > > +/** > + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid > + * > + * @adev: amdgpu_device pointer > + * @pasid: pasid to be flush > + * > + * Flush the TLB for the requested pasid. > + */ > +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + int vmid; > + unsigned int tmp; > + > + if (adev->in_gpu_reset) > + return -EIO; > + > + for (vmid = 1; vmid < 16; vmid++) { > + > + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); > + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && > + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { > + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); > + RREG32(mmVM_INVALIDATE_RESPONSE); > + break; > + } > + } > + > + return 0; > +} > + > /* > * GART > * VMID 0 is the physical GPU addresses as used by the kernel. > @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { > > static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, > + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, > .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, > .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, > .set_prt = gmc_v7_0_set_prt, > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > index 6d96d40fbcb8..27d83204fa2b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) > return 0; > } > > +/** > + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid > + * > + * @adev: amdgpu_device pointer > + * @pasid: pasid to be flush > + * > + * Flush the TLB for the requested pasid. > + */ > +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + int vmid; > + unsigned int tmp; > + > + if (adev->in_gpu_reset) > + return -EIO; > + > + for (vmid = 1; vmid < 16; vmid++) { > + > + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); > + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && > + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { > + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); > + RREG32(mmVM_INVALIDATE_RESPONSE); > + break; > + } > + } > + > + return 0; > + > +} > + > /* > * GART > * VMID 0 is the physical GPU addresses as used by the kernel. > @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { > > static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, > + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, > .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, > .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, > .set_prt = gmc_v8_0_set_prt, > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index b83c8d745f42..40a496804356 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -38,10 +38,12 @@ > #include "dce/dce_12_0_sh_mask.h" > #include "vega10_enum.h" > #include "mmhub/mmhub_1_0_offset.h" > +#include "athub/athub_1_0_sh_mask.h" > #include "athub/athub_1_0_offset.h" > #include "oss/osssys_4_0_offset.h" > > #include "soc15.h" > +#include "soc15d.h" > #include "soc15_common.h" > #include "umc/umc_6_0_sh_mask.h" > > @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, > adev->pdev->device == 0x15d8))); > } > > +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, > + uint8_t vmid, uint16_t *p_pasid) > +{ > + uint32_t value; > + > + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) > + + vmid); > + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; > + > + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); > +} > + > /* > * GART > * VMID 0 is the physical GPU addresses as used by the kernel. > @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > DRM_ERROR("Timeout waiting for VM flush ACK!\n"); > } > > +/** > + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid > + * > + * @adev: amdgpu_device pointer > + * @pasid: pasid to be flush > + * > + * Flush the TLB for the requested pasid. > + */ > +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + int vmid, i; > + signed long r; > + uint32_t seq; > + uint16_t queried_pasid; > + bool ret; > + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + > + if (adev->in_gpu_reset) > + return -EIO; > + > + if (ring->sched.ready) { > + spin_lock(&adev->gfx.kiq.ring_lock); > + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); Same as above. Regards, Felix > + kiq->pmf->kiq_invalidate_tlbs(ring, > + pasid, flush_type, all_hub); > + amdgpu_fence_emit_polling(ring, &seq); > + amdgpu_ring_commit(ring); > + spin_unlock(&adev->gfx.kiq.ring_lock); > + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); > + if (r < 1) { > + DRM_ERROR("wait for kiq fence error: %ld.\n", r); > + return -ETIME; > + } > + > + return 0; > + } > + > + for (vmid = 1; vmid < 16; vmid++) { > + > + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, > + &queried_pasid); > + if (ret && queried_pasid == pasid) { > + if (all_hub) { > + for (i = 0; i < adev->num_vmhubs; i++) > + gmc_v9_0_flush_gpu_tlb(adev, vmid, > + i, 0); > + } else { > + gmc_v9_0_flush_gpu_tlb(adev, vmid, > + AMDGPU_GFXHUB_0, 0); > + } > + break; > + } > + } > + > + return 0; > + > +} > + > static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > unsigned vmid, uint64_t pd_addr) > { > @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, > > static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, > + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, > .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, > .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, > .map_mtype = gmc_v9_0_map_mtype, _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid 2020-01-14 0:34 ` Felix Kuehling @ 2020-01-14 0:48 ` Sierra Guiza, Alejandro (Alex) 2020-01-14 0:55 ` Felix Kuehling 0 siblings, 1 reply; 12+ messages in thread From: Sierra Guiza, Alejandro (Alex) @ 2020-01-14 0:48 UTC (permalink / raw) To: Kuehling, Felix, amd-gfx [AMD Official Use Only - Internal Distribution Only] I just pushed the series, but I'll go ahead and create a new patch for this. The .invalidate_tlbs_size, is it based on dword size? Currently is 12, should I need to drop it to 8 then? -----Original Message----- From: Kuehling, Felix <Felix.Kuehling@amd.com> Sent: Monday, January 13, 2020 6:34 PM To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Sorry, I already said, Reviewed-by, but realized there was one more problem. If you haven't submitted yet, please fix that first. Otherwise, please make it a follow-up patch. See inline ... On 2020-01-13 3:26 p.m., Alex Sierra wrote: > This can be used directly from amdgpu and amdkfd to invalidate TLB > through pasid. > It supports gmc v7, v8, v9 and v10. > > Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490 > Signed-off-by: Alex Sierra <alex.sierra@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++ > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 74 ++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 +++++++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 +++++++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 76 +++++++++++++++++++++++++ > 5 files changed, 223 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index c91dd602d5f1..d3c27a3c43f6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs { > /* flush the vm tlb via mmio */ > void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, > uint32_t vmhub, uint32_t flush_type); > + /* flush the vm tlb via pasid */ > + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, > + uint32_t flush_type, bool all_hub); > /* flush the vm tlb via ring */ > uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, > uint64_t pd_addr); > @@ -216,6 +219,9 @@ struct amdgpu_gmc { > }; > > #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) > ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), > (type))) > +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ > + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ > + ((adev), (pasid), (type), (allhub))) > #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) > #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) > #define amdgpu_gmc_map_mtype(adev, flags) > (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > index 5ad89bb6f3ba..8afd05834714 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > @@ -30,6 +30,8 @@ > #include "hdp/hdp_5_0_0_sh_mask.h" > #include "gc/gc_10_1_0_sh_mask.h" > #include "mmhub/mmhub_2_0_0_sh_mask.h" > +#include "athub/athub_2_0_0_sh_mask.h" > +#include "athub/athub_2_0_0_offset.h" > #include "dcn/dcn_2_0_0_offset.h" > #include "dcn/dcn_2_0_0_sh_mask.h" > #include "oss/osssys_5_0_0_offset.h" > @@ -37,6 +39,7 @@ > #include "navi10_enum.h" > > #include "soc15.h" > +#include "soc15d.h" > #include "soc15_common.h" > > #include "nbio_v2_3.h" > @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, > (!amdgpu_sriov_vf(adev))); > } > > +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( > + struct amdgpu_device *adev, > + uint8_t vmid, uint16_t *p_pasid) { > + uint32_t value; > + > + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) > + + vmid); > + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; > + > + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); > +} > + > /* > * GART > * VMID 0 is the physical GPU addresses as used by the kernel. > @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); > } > > +/** > + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid > + * > + * @adev: amdgpu_device pointer > + * @pasid: pasid to be flush > + * > + * Flush the TLB for the requested pasid. > + */ > +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + int vmid, i; > + signed long r; > + uint32_t seq; > + uint16_t queried_pasid; > + bool ret; > + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + > + if (amdgpu_emu_mode == 0 && ring->sched.ready) { > + spin_lock(&adev->gfx.kiq.ring_lock); > + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); You need to allocate more space here for amdgpu_fence_emit_polling. Looks like gfx_v10_0_ring_emit_fence needs 8 dwords. > + kiq->pmf->kiq_invalidate_tlbs(ring, > + pasid, flush_type, all_hub); > + amdgpu_fence_emit_polling(ring, &seq); > + amdgpu_ring_commit(ring); > + spin_unlock(&adev->gfx.kiq.ring_lock); > + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); > + if (r < 1) { > + DRM_ERROR("wait for kiq fence error: %ld.\n", r); > + return -ETIME; > + } > + > + return 0; > + } > + > + for (vmid = 1; vmid < 16; vmid++) { > + > + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, > + &queried_pasid); > + if (ret && queried_pasid == pasid) { > + if (all_hub) { > + for (i = 0; i < adev->num_vmhubs; i++) > + gmc_v10_0_flush_gpu_tlb(adev, vmid, > + i, 0); > + } else { > + gmc_v10_0_flush_gpu_tlb(adev, vmid, > + AMDGPU_GFXHUB_0, 0); > + } > + break; > + } > + } > + > + return 0; > +} > + > static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > unsigned vmid, uint64_t pd_addr) > { > @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct > amdgpu_device *adev, > > static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, > + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, > .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, > .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, > .map_mtype = gmc_v10_0_map_mtype, > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > index f08e5330642d..19d5b133e1d7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) > return 0; > } > > +/** > + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid > + * > + * @adev: amdgpu_device pointer > + * @pasid: pasid to be flush > + * > + * Flush the TLB for the requested pasid. > + */ > +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + int vmid; > + unsigned int tmp; > + > + if (adev->in_gpu_reset) > + return -EIO; > + > + for (vmid = 1; vmid < 16; vmid++) { > + > + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); > + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && > + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { > + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); > + RREG32(mmVM_INVALIDATE_RESPONSE); > + break; > + } > + } > + > + return 0; > +} > + > /* > * GART > * VMID 0 is the physical GPU addresses as used by the kernel. > @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs > gmc_v7_0_ip_funcs = { > > static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, > + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, > .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, > .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, > .set_prt = gmc_v7_0_set_prt, > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > index 6d96d40fbcb8..27d83204fa2b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) > return 0; > } > > +/** > + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid > + * > + * @adev: amdgpu_device pointer > + * @pasid: pasid to be flush > + * > + * Flush the TLB for the requested pasid. > + */ > +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + int vmid; > + unsigned int tmp; > + > + if (adev->in_gpu_reset) > + return -EIO; > + > + for (vmid = 1; vmid < 16; vmid++) { > + > + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); > + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && > + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { > + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); > + RREG32(mmVM_INVALIDATE_RESPONSE); > + break; > + } > + } > + > + return 0; > + > +} > + > /* > * GART > * VMID 0 is the physical GPU addresses as used by the kernel. > @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs > gmc_v8_0_ip_funcs = { > > static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, > + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, > .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, > .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, > .set_prt = gmc_v8_0_set_prt, > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index b83c8d745f42..40a496804356 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -38,10 +38,12 @@ > #include "dce/dce_12_0_sh_mask.h" > #include "vega10_enum.h" > #include "mmhub/mmhub_1_0_offset.h" > +#include "athub/athub_1_0_sh_mask.h" > #include "athub/athub_1_0_offset.h" > #include "oss/osssys_4_0_offset.h" > > #include "soc15.h" > +#include "soc15d.h" > #include "soc15_common.h" > #include "umc/umc_6_0_sh_mask.h" > > @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, > adev->pdev->device == 0x15d8))); > } > > +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, > + uint8_t vmid, uint16_t *p_pasid) { > + uint32_t value; > + > + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) > + + vmid); > + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; > + > + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); > +} > + > /* > * GART > * VMID 0 is the physical GPU addresses as used by the kernel. > @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > DRM_ERROR("Timeout waiting for VM flush ACK!\n"); > } > > +/** > + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid > + * > + * @adev: amdgpu_device pointer > + * @pasid: pasid to be flush > + * > + * Flush the TLB for the requested pasid. > + */ > +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + int vmid, i; > + signed long r; > + uint32_t seq; > + uint16_t queried_pasid; > + bool ret; > + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + > + if (adev->in_gpu_reset) > + return -EIO; > + > + if (ring->sched.ready) { > + spin_lock(&adev->gfx.kiq.ring_lock); > + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); Same as above. Regards, Felix > + kiq->pmf->kiq_invalidate_tlbs(ring, > + pasid, flush_type, all_hub); > + amdgpu_fence_emit_polling(ring, &seq); > + amdgpu_ring_commit(ring); > + spin_unlock(&adev->gfx.kiq.ring_lock); > + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); > + if (r < 1) { > + DRM_ERROR("wait for kiq fence error: %ld.\n", r); > + return -ETIME; > + } > + > + return 0; > + } > + > + for (vmid = 1; vmid < 16; vmid++) { > + > + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, > + &queried_pasid); > + if (ret && queried_pasid == pasid) { > + if (all_hub) { > + for (i = 0; i < adev->num_vmhubs; i++) > + gmc_v9_0_flush_gpu_tlb(adev, vmid, > + i, 0); > + } else { > + gmc_v9_0_flush_gpu_tlb(adev, vmid, > + AMDGPU_GFXHUB_0, 0); > + } > + break; > + } > + } > + > + return 0; > + > +} > + > static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > unsigned vmid, uint64_t pd_addr) > { > @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct > amdgpu_device *adev, > > static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, > + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, > .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, > .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, > .map_mtype = gmc_v9_0_map_mtype, _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid 2020-01-14 0:48 ` Sierra Guiza, Alejandro (Alex) @ 2020-01-14 0:55 ` Felix Kuehling 2020-01-14 3:21 ` Sierra Guiza, Alejandro (Alex) 0 siblings, 1 reply; 12+ messages in thread From: Felix Kuehling @ 2020-01-14 0:55 UTC (permalink / raw) To: Sierra Guiza, Alejandro (Alex), amd-gfx I noticed that the invalidate_tlbs_size in patch 3 was also wrong. That should only be 2 dwords, not 12. The code here should do amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); I think 12 was too much in the original code. Flush + fence should only be 10 dwords, unless I misses something or counted wrong. Regards, Felix On 2020-01-13 7:48 p.m., Sierra Guiza, Alejandro (Alex) wrote: > [AMD Official Use Only - Internal Distribution Only] > > I just pushed the series, but I'll go ahead and create a new patch for this. > The .invalidate_tlbs_size, is it based on dword size? Currently is 12, should I need to drop it to 8 then? > > -----Original Message----- > From: Kuehling, Felix <Felix.Kuehling@amd.com> > Sent: Monday, January 13, 2020 6:34 PM > To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>; amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid > > Sorry, I already said, Reviewed-by, but realized there was one more problem. If you haven't submitted yet, please fix that first. Otherwise, please make it a follow-up patch. See inline ... > > On 2020-01-13 3:26 p.m., Alex Sierra wrote: >> This can be used directly from amdgpu and amdkfd to invalidate TLB >> through pasid. >> It supports gmc v7, v8, v9 and v10. >> >> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490 >> Signed-off-by: Alex Sierra <alex.sierra@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++ >> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 74 ++++++++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 +++++++++++ >> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 +++++++++++ >> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 76 +++++++++++++++++++++++++ >> 5 files changed, 223 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> index c91dd602d5f1..d3c27a3c43f6 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs { >> /* flush the vm tlb via mmio */ >> void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, >> uint32_t vmhub, uint32_t flush_type); >> + /* flush the vm tlb via pasid */ >> + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, >> + uint32_t flush_type, bool all_hub); >> /* flush the vm tlb via ring */ >> uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, >> uint64_t pd_addr); >> @@ -216,6 +219,9 @@ struct amdgpu_gmc { >> }; >> >> #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) >> ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), >> (type))) >> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ >> + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ >> + ((adev), (pasid), (type), (allhub))) >> #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) >> #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) >> #define amdgpu_gmc_map_mtype(adev, flags) >> (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> index 5ad89bb6f3ba..8afd05834714 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> @@ -30,6 +30,8 @@ >> #include "hdp/hdp_5_0_0_sh_mask.h" >> #include "gc/gc_10_1_0_sh_mask.h" >> #include "mmhub/mmhub_2_0_0_sh_mask.h" >> +#include "athub/athub_2_0_0_sh_mask.h" >> +#include "athub/athub_2_0_0_offset.h" >> #include "dcn/dcn_2_0_0_offset.h" >> #include "dcn/dcn_2_0_0_sh_mask.h" >> #include "oss/osssys_5_0_0_offset.h" >> @@ -37,6 +39,7 @@ >> #include "navi10_enum.h" >> >> #include "soc15.h" >> +#include "soc15d.h" >> #include "soc15_common.h" >> >> #include "nbio_v2_3.h" >> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, >> (!amdgpu_sriov_vf(adev))); >> } >> >> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( >> + struct amdgpu_device *adev, >> + uint8_t vmid, uint16_t *p_pasid) { >> + uint32_t value; >> + >> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) >> + + vmid); >> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; >> + >> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >> DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); >> } >> >> +/** >> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid, i; >> + signed long r; >> + uint32_t seq; >> + uint16_t queried_pasid; >> + bool ret; >> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; >> + struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> + >> + if (amdgpu_emu_mode == 0 && ring->sched.ready) { >> + spin_lock(&adev->gfx.kiq.ring_lock); >> + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); > You need to allocate more space here for amdgpu_fence_emit_polling. > Looks like gfx_v10_0_ring_emit_fence needs 8 dwords. > > >> + kiq->pmf->kiq_invalidate_tlbs(ring, >> + pasid, flush_type, all_hub); >> + amdgpu_fence_emit_polling(ring, &seq); >> + amdgpu_ring_commit(ring); >> + spin_unlock(&adev->gfx.kiq.ring_lock); >> + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); >> + if (r < 1) { >> + DRM_ERROR("wait for kiq fence error: %ld.\n", r); >> + return -ETIME; >> + } >> + >> + return 0; >> + } >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, >> + &queried_pasid); >> + if (ret && queried_pasid == pasid) { >> + if (all_hub) { >> + for (i = 0; i < adev->num_vmhubs; i++) >> + gmc_v10_0_flush_gpu_tlb(adev, vmid, >> + i, 0); >> + } else { >> + gmc_v10_0_flush_gpu_tlb(adev, vmid, >> + AMDGPU_GFXHUB_0, 0); >> + } >> + break; >> + } >> + } >> + >> + return 0; >> +} >> + >> static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >> unsigned vmid, uint64_t pd_addr) >> { >> @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct >> amdgpu_device *adev, >> >> static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, >> .map_mtype = gmc_v10_0_map_mtype, >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> index f08e5330642d..19d5b133e1d7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) >> return 0; >> } >> >> +/** >> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid; >> + unsigned int tmp; >> + >> + if (adev->in_gpu_reset) >> + return -EIO; >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); >> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && >> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { >> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); >> + RREG32(mmVM_INVALIDATE_RESPONSE); >> + break; >> + } >> + } >> + >> + return 0; >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs >> gmc_v7_0_ip_funcs = { >> >> static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, >> .set_prt = gmc_v7_0_set_prt, >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> index 6d96d40fbcb8..27d83204fa2b 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) >> return 0; >> } >> >> +/** >> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid; >> + unsigned int tmp; >> + >> + if (adev->in_gpu_reset) >> + return -EIO; >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); >> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && >> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { >> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); >> + RREG32(mmVM_INVALIDATE_RESPONSE); >> + break; >> + } >> + } >> + >> + return 0; >> + >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs >> gmc_v8_0_ip_funcs = { >> >> static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, >> .set_prt = gmc_v8_0_set_prt, >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> index b83c8d745f42..40a496804356 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> @@ -38,10 +38,12 @@ >> #include "dce/dce_12_0_sh_mask.h" >> #include "vega10_enum.h" >> #include "mmhub/mmhub_1_0_offset.h" >> +#include "athub/athub_1_0_sh_mask.h" >> #include "athub/athub_1_0_offset.h" >> #include "oss/osssys_4_0_offset.h" >> >> #include "soc15.h" >> +#include "soc15d.h" >> #include "soc15_common.h" >> #include "umc/umc_6_0_sh_mask.h" >> >> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, >> adev->pdev->device == 0x15d8))); >> } >> >> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, >> + uint8_t vmid, uint16_t *p_pasid) { >> + uint32_t value; >> + >> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) >> + + vmid); >> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; >> + >> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >> DRM_ERROR("Timeout waiting for VM flush ACK!\n"); >> } >> >> +/** >> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid, i; >> + signed long r; >> + uint32_t seq; >> + uint16_t queried_pasid; >> + bool ret; >> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; >> + struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> + >> + if (adev->in_gpu_reset) >> + return -EIO; >> + >> + if (ring->sched.ready) { >> + spin_lock(&adev->gfx.kiq.ring_lock); >> + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); > Same as above. > > Regards, > Felix > >> + kiq->pmf->kiq_invalidate_tlbs(ring, >> + pasid, flush_type, all_hub); >> + amdgpu_fence_emit_polling(ring, &seq); >> + amdgpu_ring_commit(ring); >> + spin_unlock(&adev->gfx.kiq.ring_lock); >> + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); >> + if (r < 1) { >> + DRM_ERROR("wait for kiq fence error: %ld.\n", r); >> + return -ETIME; >> + } >> + >> + return 0; >> + } >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, >> + &queried_pasid); >> + if (ret && queried_pasid == pasid) { >> + if (all_hub) { >> + for (i = 0; i < adev->num_vmhubs; i++) >> + gmc_v9_0_flush_gpu_tlb(adev, vmid, >> + i, 0); >> + } else { >> + gmc_v9_0_flush_gpu_tlb(adev, vmid, >> + AMDGPU_GFXHUB_0, 0); >> + } >> + break; >> + } >> + } >> + >> + return 0; >> + >> +} >> + >> static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >> unsigned vmid, uint64_t pd_addr) >> { >> @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct >> amdgpu_device *adev, >> >> static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, >> .map_mtype = gmc_v9_0_map_mtype, _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid 2020-01-14 0:55 ` Felix Kuehling @ 2020-01-14 3:21 ` Sierra Guiza, Alejandro (Alex) 0 siblings, 0 replies; 12+ messages in thread From: Sierra Guiza, Alejandro (Alex) @ 2020-01-14 3:21 UTC (permalink / raw) To: Kuehling, Felix, amd-gfx [AMD Official Use Only - Internal Distribution Only] Yes, both gfx_v10_0_ring_emit_fence and gfx_v9_0_ring_emit_fence have 8 ring writes. Plus 2 of the flush. Regards, Alejandro S. -----Original Message----- From: Kuehling, Felix <Felix.Kuehling@amd.com> Sent: Monday, January 13, 2020 6:55 PM To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid I noticed that the invalidate_tlbs_size in patch 3 was also wrong. That should only be 2 dwords, not 12. The code here should do amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); I think 12 was too much in the original code. Flush + fence should only be 10 dwords, unless I misses something or counted wrong. Regards, Felix On 2020-01-13 7:48 p.m., Sierra Guiza, Alejandro (Alex) wrote: > [AMD Official Use Only - Internal Distribution Only] > > I just pushed the series, but I'll go ahead and create a new patch for this. > The .invalidate_tlbs_size, is it based on dword size? Currently is 12, should I need to drop it to 8 then? > > -----Original Message----- > From: Kuehling, Felix <Felix.Kuehling@amd.com> > Sent: Monday, January 13, 2020 6:34 PM > To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>; > amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via > pasid > > Sorry, I already said, Reviewed-by, but realized there was one more problem. If you haven't submitted yet, please fix that first. Otherwise, please make it a follow-up patch. See inline ... > > On 2020-01-13 3:26 p.m., Alex Sierra wrote: >> This can be used directly from amdgpu and amdkfd to invalidate TLB >> through pasid. >> It supports gmc v7, v8, v9 and v10. >> >> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490 >> Signed-off-by: Alex Sierra <alex.sierra@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++ >> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 74 ++++++++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 +++++++++++ >> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 +++++++++++ >> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 76 +++++++++++++++++++++++++ >> 5 files changed, 223 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> index c91dd602d5f1..d3c27a3c43f6 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs { >> /* flush the vm tlb via mmio */ >> void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, >> uint32_t vmhub, uint32_t flush_type); >> + /* flush the vm tlb via pasid */ >> + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, >> + uint32_t flush_type, bool all_hub); >> /* flush the vm tlb via ring */ >> uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, >> uint64_t pd_addr); >> @@ -216,6 +219,9 @@ struct amdgpu_gmc { >> }; >> >> #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) >> ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), >> (type))) >> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ >> + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ >> + ((adev), (pasid), (type), (allhub))) >> #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) >> #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) >> #define amdgpu_gmc_map_mtype(adev, flags) >> (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> index 5ad89bb6f3ba..8afd05834714 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> @@ -30,6 +30,8 @@ >> #include "hdp/hdp_5_0_0_sh_mask.h" >> #include "gc/gc_10_1_0_sh_mask.h" >> #include "mmhub/mmhub_2_0_0_sh_mask.h" >> +#include "athub/athub_2_0_0_sh_mask.h" >> +#include "athub/athub_2_0_0_offset.h" >> #include "dcn/dcn_2_0_0_offset.h" >> #include "dcn/dcn_2_0_0_sh_mask.h" >> #include "oss/osssys_5_0_0_offset.h" >> @@ -37,6 +39,7 @@ >> #include "navi10_enum.h" >> >> #include "soc15.h" >> +#include "soc15d.h" >> #include "soc15_common.h" >> >> #include "nbio_v2_3.h" >> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, >> (!amdgpu_sriov_vf(adev))); >> } >> >> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( >> + struct amdgpu_device *adev, >> + uint8_t vmid, uint16_t *p_pasid) { >> + uint32_t value; >> + >> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) >> + + vmid); >> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; >> + >> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >> DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); >> } >> >> +/** >> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid, i; >> + signed long r; >> + uint32_t seq; >> + uint16_t queried_pasid; >> + bool ret; >> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; >> + struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> + >> + if (amdgpu_emu_mode == 0 && ring->sched.ready) { >> + spin_lock(&adev->gfx.kiq.ring_lock); >> + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); > You need to allocate more space here for amdgpu_fence_emit_polling. > Looks like gfx_v10_0_ring_emit_fence needs 8 dwords. > > >> + kiq->pmf->kiq_invalidate_tlbs(ring, >> + pasid, flush_type, all_hub); >> + amdgpu_fence_emit_polling(ring, &seq); >> + amdgpu_ring_commit(ring); >> + spin_unlock(&adev->gfx.kiq.ring_lock); >> + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); >> + if (r < 1) { >> + DRM_ERROR("wait for kiq fence error: %ld.\n", r); >> + return -ETIME; >> + } >> + >> + return 0; >> + } >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, >> + &queried_pasid); >> + if (ret && queried_pasid == pasid) { >> + if (all_hub) { >> + for (i = 0; i < adev->num_vmhubs; i++) >> + gmc_v10_0_flush_gpu_tlb(adev, vmid, >> + i, 0); >> + } else { >> + gmc_v10_0_flush_gpu_tlb(adev, vmid, >> + AMDGPU_GFXHUB_0, 0); >> + } >> + break; >> + } >> + } >> + >> + return 0; >> +} >> + >> static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >> unsigned vmid, uint64_t pd_addr) >> { >> @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct >> amdgpu_device *adev, >> >> static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, >> .map_mtype = gmc_v10_0_map_mtype, diff --git >> a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> index f08e5330642d..19d5b133e1d7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) >> return 0; >> } >> >> +/** >> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid; >> + unsigned int tmp; >> + >> + if (adev->in_gpu_reset) >> + return -EIO; >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); >> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && >> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { >> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); >> + RREG32(mmVM_INVALIDATE_RESPONSE); >> + break; >> + } >> + } >> + >> + return 0; >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs >> gmc_v7_0_ip_funcs = { >> >> static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, >> .set_prt = gmc_v7_0_set_prt, >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> index 6d96d40fbcb8..27d83204fa2b 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) >> return 0; >> } >> >> +/** >> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid; >> + unsigned int tmp; >> + >> + if (adev->in_gpu_reset) >> + return -EIO; >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); >> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && >> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { >> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); >> + RREG32(mmVM_INVALIDATE_RESPONSE); >> + break; >> + } >> + } >> + >> + return 0; >> + >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs >> gmc_v8_0_ip_funcs = { >> >> static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, >> .set_prt = gmc_v8_0_set_prt, >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> index b83c8d745f42..40a496804356 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> @@ -38,10 +38,12 @@ >> #include "dce/dce_12_0_sh_mask.h" >> #include "vega10_enum.h" >> #include "mmhub/mmhub_1_0_offset.h" >> +#include "athub/athub_1_0_sh_mask.h" >> #include "athub/athub_1_0_offset.h" >> #include "oss/osssys_4_0_offset.h" >> >> #include "soc15.h" >> +#include "soc15d.h" >> #include "soc15_common.h" >> #include "umc/umc_6_0_sh_mask.h" >> >> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, >> adev->pdev->device == 0x15d8))); >> } >> >> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, >> + uint8_t vmid, uint16_t *p_pasid) { >> + uint32_t value; >> + >> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) >> + + vmid); >> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; >> + >> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >> DRM_ERROR("Timeout waiting for VM flush ACK!\n"); >> } >> >> +/** >> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid, i; >> + signed long r; >> + uint32_t seq; >> + uint16_t queried_pasid; >> + bool ret; >> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; >> + struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> + >> + if (adev->in_gpu_reset) >> + return -EIO; >> + >> + if (ring->sched.ready) { >> + spin_lock(&adev->gfx.kiq.ring_lock); >> + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); > Same as above. > > Regards, > Felix > >> + kiq->pmf->kiq_invalidate_tlbs(ring, >> + pasid, flush_type, all_hub); >> + amdgpu_fence_emit_polling(ring, &seq); >> + amdgpu_ring_commit(ring); >> + spin_unlock(&adev->gfx.kiq.ring_lock); >> + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); >> + if (r < 1) { >> + DRM_ERROR("wait for kiq fence error: %ld.\n", r); >> + return -ETIME; >> + } >> + >> + return 0; >> + } >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, >> + &queried_pasid); >> + if (ret && queried_pasid == pasid) { >> + if (all_hub) { >> + for (i = 0; i < adev->num_vmhubs; i++) >> + gmc_v9_0_flush_gpu_tlb(adev, vmid, >> + i, 0); >> + } else { >> + gmc_v9_0_flush_gpu_tlb(adev, vmid, >> + AMDGPU_GFXHUB_0, 0); >> + } >> + break; >> + } >> + } >> + >> + return 0; >> + >> +} >> + >> static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >> unsigned vmid, uint64_t pd_addr) >> { >> @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct >> amdgpu_device *adev, >> >> static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, >> .map_mtype = gmc_v9_0_map_mtype, _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra 2020-01-13 20:26 ` [PATCH 4/7] drm/amdgpu: replace kcq enable/disable functions on gfx_v9 Alex Sierra 2020-01-13 20:26 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra @ 2020-01-13 22:16 ` Felix Kuehling 2020-01-14 0:45 ` Felix Kuehling 3 siblings, 0 replies; 12+ messages in thread From: Felix Kuehling @ 2020-01-13 22:16 UTC (permalink / raw) To: Alex Sierra, amd-gfx The series is Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> On 2020-01-13 3:26 p.m., Alex Sierra wrote: > tlbs invalidate pointer function added to kiq_pm4_funcs struct. > This way, tlb flush can be done through kiq member. > TLBs invalidatation implemented for gfx9 and gfx10. > > Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293 > Signed-off-by: Alex Sierra <alex.sierra@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++++ > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 +++++++++++++++ > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++++++++++++++ > 3 files changed, 33 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index 8e88e0411662..af4bd279f42f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { > struct amdgpu_ring *ring, > u64 addr, > u64 seq); > + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub); > /* Packet sizes */ > int set_resources_size; > int map_queues_size; > int unmap_queues_size; > int query_status_size; > + int invalidate_tlbs_size; > }; > > struct amdgpu_kiq { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 379e46c1b7f6..d72b60f997c8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -40,6 +40,7 @@ > #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" > > #include "soc15.h" > +#include "soc15d.h" > #include "soc15_common.h" > #include "clearstate_gfx10.h" > #include "v10_structs.h" > @@ -346,15 +347,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, > amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); > } > > +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); > + amdgpu_ring_write(kiq_ring, > + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | > + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | > + PACKET3_INVALIDATE_TLBS_PASID(pasid) | > + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); > +} > + > static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { > .kiq_set_resources = gfx10_kiq_set_resources, > .kiq_map_queues = gfx10_kiq_map_queues, > .kiq_unmap_queues = gfx10_kiq_unmap_queues, > .kiq_query_status = gfx10_kiq_query_status, > + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, > .set_resources_size = 8, > .map_queues_size = 7, > .unmap_queues_size = 6, > .query_status_size = 7, > + .invalidate_tlbs_size = 12, > }; > > static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index ad0179ea2cc5..b8759386dcbb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, > amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); > } > > +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); > + amdgpu_ring_write(kiq_ring, > + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | > + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | > + PACKET3_INVALIDATE_TLBS_PASID(pasid) | > + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); > +} > + > static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { > .kiq_set_resources = gfx_v9_0_kiq_set_resources, > .kiq_map_queues = gfx_v9_0_kiq_map_queues, > .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, > .kiq_query_status = gfx_v9_0_kiq_query_status, > + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, > .set_resources_size = 8, > .map_queues_size = 7, > .unmap_queues_size = 6, > .query_status_size = 7, > + .invalidate_tlbs_size = 12, > }; > > static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra ` (2 preceding siblings ...) 2020-01-13 22:16 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Felix Kuehling @ 2020-01-14 0:45 ` Felix Kuehling 3 siblings, 0 replies; 12+ messages in thread From: Felix Kuehling @ 2020-01-14 0:45 UTC (permalink / raw) To: Alex Sierra, amd-gfx On 2020-01-13 3:26 p.m., Alex Sierra wrote: > tlbs invalidate pointer function added to kiq_pm4_funcs struct. > This way, tlb flush can be done through kiq member. > TLBs invalidatation implemented for gfx9 and gfx10. > > Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293 > Signed-off-by: Alex Sierra <alex.sierra@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++++ > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 +++++++++++++++ > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++++++++++++++ > 3 files changed, 33 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index 8e88e0411662..af4bd279f42f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { > struct amdgpu_ring *ring, > u64 addr, > u64 seq); > + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub); > /* Packet sizes */ > int set_resources_size; > int map_queues_size; > int unmap_queues_size; > int query_status_size; > + int invalidate_tlbs_size; > }; > > struct amdgpu_kiq { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 379e46c1b7f6..d72b60f997c8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -40,6 +40,7 @@ > #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" > > #include "soc15.h" > +#include "soc15d.h" > #include "soc15_common.h" > #include "clearstate_gfx10.h" > #include "v10_structs.h" > @@ -346,15 +347,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, > amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); > } > > +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); > + amdgpu_ring_write(kiq_ring, > + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | > + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | > + PACKET3_INVALIDATE_TLBS_PASID(pasid) | > + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); > +} > + > static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { > .kiq_set_resources = gfx10_kiq_set_resources, > .kiq_map_queues = gfx10_kiq_map_queues, > .kiq_unmap_queues = gfx10_kiq_unmap_queues, > .kiq_query_status = gfx10_kiq_query_status, > + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, > .set_resources_size = 8, > .map_queues_size = 7, > .unmap_queues_size = 6, > .query_status_size = 7, > + .invalidate_tlbs_size = 12, This looks like it was copied from the function that emits both flush and fence. Now that the function only emits the flush, this number should be smaller. Only 2 dwords. And it seems like 12 was over-estimated, because the fence is only 8 dwords. Regards, Felix > }; > > static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index ad0179ea2cc5..b8759386dcbb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, > amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); > } > > +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); > + amdgpu_ring_write(kiq_ring, > + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | > + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | > + PACKET3_INVALIDATE_TLBS_PASID(pasid) | > + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); > +} > + > static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { > .kiq_set_resources = gfx_v9_0_kiq_set_resources, > .kiq_map_queues = gfx_v9_0_kiq_map_queues, > .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, > .kiq_query_status = gfx_v9_0_kiq_query_status, > + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, > .set_resources_size = 8, > .map_queues_size = 7, > .unmap_queues_size = 6, > .query_status_size = 7, > + .invalidate_tlbs_size = 12, > }; > > static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock @ 2020-01-11 18:39 Alex Sierra 2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra 0 siblings, 1 reply; 12+ messages in thread From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw) To: amd-gfx; +Cc: Alex Sierra [Why] Avoid reclaim filesystem while eviction lock is held called from MMU notifier. [How] Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked. Using memalloc_nofs_save / memalloc_nofs_restore API. Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c Signed-off-by: Alex Sierra <alex.sierra@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +++++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 +++- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b999b67ff57a..d6aba4f9df74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -82,6 +82,32 @@ struct amdgpu_prt_cb { struct dma_fence_cb cb; }; +/** + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS + * happens while holding this lock anywhere to prevent deadlocks when + * an MMU notifier runs in reclaim-FS context. + */ +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) +{ + mutex_lock(&vm->eviction_lock); + vm->saved_flags = memalloc_nofs_save(); +} + +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) +{ + if (mutex_trylock(&vm->eviction_lock)) { + vm->saved_flags = memalloc_nofs_save(); + return 1; + } + return 0; +} + +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) +{ + memalloc_nofs_restore(vm->saved_flags); + mutex_unlock(&vm->eviction_lock); +} + /** * amdgpu_vm_level_shift - return the addr shift for each level * @@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } } - mutex_lock(&vm->eviction_lock); + amdgpu_vm_eviction_lock(vm); vm->evicting = false; - mutex_unlock(&vm->eviction_lock); + amdgpu_vm_eviction_unlock(vm); return 0; } @@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; - mutex_lock(&vm->eviction_lock); + amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; goto error_unlock; @@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); error_unlock: - mutex_unlock(&vm->eviction_lock); + amdgpu_vm_eviction_unlock(vm); return r; } @@ -2537,18 +2563,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return false; /* Try to block ongoing updates */ - if (!mutex_trylock(&bo_base->vm->eviction_lock)) + if (!amdgpu_vm_eviction_trylock(bo_base->vm)) return false; /* Don't evict VM page tables while they are updated */ if (!dma_fence_is_signaled(bo_base->vm->last_direct) || !dma_fence_is_signaled(bo_base->vm->last_delayed)) { - mutex_unlock(&bo_base->vm->eviction_lock); + amdgpu_vm_eviction_unlock(bo_base->vm); return false; } bo_base->vm->evicting = true; - mutex_unlock(&bo_base->vm->eviction_lock); + amdgpu_vm_eviction_unlock(bo_base->vm); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 100547f094ff..c21a36bebc0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,6 +30,7 @@ #include <drm/gpu_scheduler.h> #include <drm/drm_file.h> #include <drm/ttm/ttm_bo_driver.h> +#include <linux/sched/mm.h> #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -242,9 +243,12 @@ struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; - /* Lock to prevent eviction while we are updating page tables */ + /* Lock to prevent eviction while we are updating page tables + * use vm_eviction_lock/unlock(vm) + */ struct mutex eviction_lock; bool evicting; + unsigned int saved_flags; /* BOs who needs a validation */ struct list_head evicted; -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra @ 2020-01-11 18:39 ` Alex Sierra 2020-01-13 12:12 ` Christian König 2020-01-13 16:36 ` Felix Kuehling 0 siblings, 2 replies; 12+ messages in thread From: Alex Sierra @ 2020-01-11 18:39 UTC (permalink / raw) To: amd-gfx; +Cc: Alex Sierra tlbs invalidate pointer function added to kiq_pm4_funcs struct. This way, tlb flush can be done through kiq member. TLBs invalidatation implemented for gfx9 and gfx10. Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293 Signed-off-by: Alex Sierra <alex.sierra@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 33 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 32 ++++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8e88e0411662..2927837bd401 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { struct amdgpu_ring *ring, u64 addr, u64 seq); + int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub); /* Packet sizes */ int set_resources_size; int map_queues_size; int unmap_queues_size; int query_status_size; + int invalidate_tlbs_size; }; struct amdgpu_kiq { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 379e46c1b7f6..2e82213f57eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -40,6 +40,7 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" @@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + signed long r; + uint32_t seq; + struct amdgpu_device *adev = kiq_ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size); + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); + amdgpu_fence_emit_polling(kiq_ring, &seq); + amdgpu_ring_commit(kiq_ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { .kiq_set_resources = gfx10_kiq_set_resources, .kiq_map_queues = gfx10_kiq_map_queues, .kiq_unmap_queues = gfx10_kiq_unmap_queues, .kiq_query_status = gfx10_kiq_query_status, + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 12, }; static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ad0179ea2cc5..5be6fab55b73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + signed long r; + uint32_t seq; + struct amdgpu_device *adev = kiq_ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size); + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); + amdgpu_fence_emit_polling(kiq_ring, &seq); + amdgpu_ring_commit(kiq_ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_0_kiq_set_resources, .kiq_map_queues = gfx_v9_0_kiq_map_queues, .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, .kiq_query_status = gfx_v9_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 12, }; static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra @ 2020-01-13 12:12 ` Christian König 2020-01-13 16:36 ` Felix Kuehling 1 sibling, 0 replies; 12+ messages in thread From: Christian König @ 2020-01-13 12:12 UTC (permalink / raw) To: Alex Sierra, amd-gfx Am 11.01.20 um 19:39 schrieb Alex Sierra: > tlbs invalidate pointer function added to kiq_pm4_funcs struct. > This way, tlb flush can be done through kiq member. > TLBs invalidatation implemented for gfx9 and gfx10. > > Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293 > Signed-off-by: Alex Sierra <alex.sierra@amd.com> Please note that I can't judge the correctness of the PM4 packets, but the interface looks really nice and clean now. Reviewed-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +++ > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 33 +++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 32 ++++++++++++++++++++++++ > 3 files changed, 69 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index 8e88e0411662..2927837bd401 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { > struct amdgpu_ring *ring, > u64 addr, > u64 seq); > + int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub); > /* Packet sizes */ > int set_resources_size; > int map_queues_size; > int unmap_queues_size; > int query_status_size; > + int invalidate_tlbs_size; > }; > > struct amdgpu_kiq { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 379e46c1b7f6..2e82213f57eb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -40,6 +40,7 @@ > #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" > > #include "soc15.h" > +#include "soc15d.h" > #include "soc15_common.h" > #include "clearstate_gfx10.h" > #include "v10_structs.h" > @@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, > amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); > } > > +static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + signed long r; > + uint32_t seq; > + struct amdgpu_device *adev = kiq_ring->adev; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + > + spin_lock(&adev->gfx.kiq.ring_lock); > + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size); > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); > + amdgpu_ring_write(kiq_ring, > + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | > + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | > + PACKET3_INVALIDATE_TLBS_PASID(pasid) | > + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); > + amdgpu_fence_emit_polling(kiq_ring, &seq); > + amdgpu_ring_commit(kiq_ring); > + spin_unlock(&adev->gfx.kiq.ring_lock); > + > + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout); > + if (r < 1) { > + DRM_ERROR("wait for kiq fence error: %ld.\n", r); > + return -ETIME; > + } > + > + return 0; > +} > + > static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { > .kiq_set_resources = gfx10_kiq_set_resources, > .kiq_map_queues = gfx10_kiq_map_queues, > .kiq_unmap_queues = gfx10_kiq_unmap_queues, > .kiq_query_status = gfx10_kiq_query_status, > + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, > .set_resources_size = 8, > .map_queues_size = 7, > .unmap_queues_size = 6, > .query_status_size = 7, > + .invalidate_tlbs_size = 12, > }; > > static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index ad0179ea2cc5..5be6fab55b73 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, > amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); > } > > +static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + signed long r; > + uint32_t seq; > + struct amdgpu_device *adev = kiq_ring->adev; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + > + spin_lock(&adev->gfx.kiq.ring_lock); > + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size); > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); > + amdgpu_ring_write(kiq_ring, > + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | > + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | > + PACKET3_INVALIDATE_TLBS_PASID(pasid) | > + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); > + amdgpu_fence_emit_polling(kiq_ring, &seq); > + amdgpu_ring_commit(kiq_ring); > + spin_unlock(&adev->gfx.kiq.ring_lock); > + > + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout); > + if (r < 1) { > + DRM_ERROR("wait for kiq fence error: %ld.\n", r); > + return -ETIME; > + } > + > + return 0; > +} > + > static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { > .kiq_set_resources = gfx_v9_0_kiq_set_resources, > .kiq_map_queues = gfx_v9_0_kiq_map_queues, > .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, > .kiq_query_status = gfx_v9_0_kiq_query_status, > + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, > .set_resources_size = 8, > .map_queues_size = 7, > .unmap_queues_size = 6, > .query_status_size = 7, > + .invalidate_tlbs_size = 12, > }; > > static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra 2020-01-13 12:12 ` Christian König @ 2020-01-13 16:36 ` Felix Kuehling 1 sibling, 0 replies; 12+ messages in thread From: Felix Kuehling @ 2020-01-13 16:36 UTC (permalink / raw) To: Alex Sierra, amd-gfx On 2020-01-11 1:39 p.m., Alex Sierra wrote: > tlbs invalidate pointer function added to kiq_pm4_funcs struct. > This way, tlb flush can be done through kiq member. > TLBs invalidatation implemented for gfx9 and gfx10. > > Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293 > Signed-off-by: Alex Sierra <alex.sierra@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +++ > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 33 +++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 32 ++++++++++++++++++++++++ > 3 files changed, 69 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index 8e88e0411662..2927837bd401 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { > struct amdgpu_ring *ring, > u64 addr, > u64 seq); > + int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub); > /* Packet sizes */ > int set_resources_size; > int map_queues_size; > int unmap_queues_size; > int query_status_size; > + int invalidate_tlbs_size; > }; > > struct amdgpu_kiq { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 379e46c1b7f6..2e82213f57eb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -40,6 +40,7 @@ > #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" > > #include "soc15.h" > +#include "soc15d.h" > #include "soc15_common.h" > #include "clearstate_gfx10.h" > #include "v10_structs.h" > @@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, > amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); > } > > +static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + signed long r; > + uint32_t seq; > + struct amdgpu_device *adev = kiq_ring->adev; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + > + spin_lock(&adev->gfx.kiq.ring_lock); > + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size); > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); > + amdgpu_ring_write(kiq_ring, > + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | > + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | > + PACKET3_INVALIDATE_TLBS_PASID(pasid) | > + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); > + amdgpu_fence_emit_polling(kiq_ring, &seq); > + amdgpu_ring_commit(kiq_ring); > + spin_unlock(&adev->gfx.kiq.ring_lock); The other KIQ functions don't include the emit_polling, commit and locking. I think the way the KIQ-funcs interface is meant to be used, all that should be outside the IP-version-specific functions. For consistency all you should do here is the amdgpu_ring_write calls with IP-version-specific packets. Regards, Felix > + > + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout); > + if (r < 1) { > + DRM_ERROR("wait for kiq fence error: %ld.\n", r); > + return -ETIME; > + } > + > + return 0; > +} > + > static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { > .kiq_set_resources = gfx10_kiq_set_resources, > .kiq_map_queues = gfx10_kiq_map_queues, > .kiq_unmap_queues = gfx10_kiq_unmap_queues, > .kiq_query_status = gfx10_kiq_query_status, > + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, > .set_resources_size = 8, > .map_queues_size = 7, > .unmap_queues_size = 6, > .query_status_size = 7, > + .invalidate_tlbs_size = 12, > }; > > static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index ad0179ea2cc5..5be6fab55b73 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, > amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); > } > > +static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, > + uint16_t pasid, uint32_t flush_type, > + bool all_hub) > +{ > + signed long r; > + uint32_t seq; > + struct amdgpu_device *adev = kiq_ring->adev; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + > + spin_lock(&adev->gfx.kiq.ring_lock); > + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size); > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); > + amdgpu_ring_write(kiq_ring, > + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | > + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | > + PACKET3_INVALIDATE_TLBS_PASID(pasid) | > + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); > + amdgpu_fence_emit_polling(kiq_ring, &seq); > + amdgpu_ring_commit(kiq_ring); > + spin_unlock(&adev->gfx.kiq.ring_lock); > + > + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout); > + if (r < 1) { > + DRM_ERROR("wait for kiq fence error: %ld.\n", r); > + return -ETIME; > + } > + > + return 0; > +} > + > static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { > .kiq_set_resources = gfx_v9_0_kiq_set_resources, > .kiq_map_queues = gfx_v9_0_kiq_map_queues, > .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, > .kiq_query_status = gfx_v9_0_kiq_query_status, > + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, > .set_resources_size = 8, > .map_queues_size = 7, > .unmap_queues_size = 6, > .query_status_size = 7, > + .invalidate_tlbs_size = 12, > }; > > static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2020-01-14 3:21 UTC | newest] Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-01-13 20:26 [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra 2020-01-13 20:26 ` [PATCH 4/7] drm/amdgpu: replace kcq enable/disable functions on gfx_v9 Alex Sierra 2020-01-13 20:26 ` [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid Alex Sierra 2020-01-14 0:34 ` Felix Kuehling 2020-01-14 0:48 ` Sierra Guiza, Alejandro (Alex) 2020-01-14 0:55 ` Felix Kuehling 2020-01-14 3:21 ` Sierra Guiza, Alejandro (Alex) 2020-01-13 22:16 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Felix Kuehling 2020-01-14 0:45 ` Felix Kuehling -- strict thread matches above, loose matches on Subject: below -- 2020-01-11 18:39 [PATCH 1/7] drm/amdgpu: Avoid reclaim fs while eviction lock Alex Sierra 2020-01-11 18:39 ` [PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 Alex Sierra 2020-01-13 12:12 ` Christian König 2020-01-13 16:36 ` Felix Kuehling
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.