* [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start @ 2020-02-10 18:06 James Zhu 2020-02-10 18:06 ` [PATCH 2/2] drm/amdgpu/vcn: fix race condition issue for dpg unpause mode switch James Zhu 2020-02-12 14:27 ` [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start Zhu, James 0 siblings, 2 replies; 5+ messages in thread From: James Zhu @ 2020-02-10 18:06 UTC (permalink / raw) To: amd-gfx; +Cc: jamesz Fix race condition issue when multiple vcn starts are called. Signed-off-by: James Zhu <James.Zhu@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f96464e..aa7663f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(&adev->vcn.vcn_pg_lock); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +211,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(&adev->vcn.vcn_pg_lock); return 0; } @@ -321,6 +323,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + mutex_lock(&adev->vcn.vcn_pg_lock); if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -345,6 +348,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); } + mutex_unlock(&adev->vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..2ae110d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,7 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] drm/amdgpu/vcn: fix race condition issue for dpg unpause mode switch 2020-02-10 18:06 [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start James Zhu @ 2020-02-10 18:06 ` James Zhu 2020-02-12 14:27 ` [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start Zhu, James 1 sibling, 0 replies; 5+ messages in thread From: James Zhu @ 2020-02-10 18:06 UTC (permalink / raw) To: amd-gfx; +Cc: jamesz Couldn't only rely on enc fence to decide switching to dpg unpaude mode. Since a enc thread may not schedule a fence in time during multiple threads running situation. Signed-off-by: James Zhu <James.Zhu@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 12 +++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index aa7663f..28ef11c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -64,6 +64,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); mutex_init(&adev->vcn.vcn_pg_lock); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + atomic_set(&adev->vcn.inst[i].enc_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -338,14 +340,15 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) for (i = 0; i < adev->vcn.num_enc_rings; ++i) { fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); } - if (fences) + if (fences || atomic_read(&adev->vcn.inst[ring->me].enc_submission_cnt)) new_state.fw_based = VCN_DPG_STATE__PAUSE; else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { + atomic_inc(&adev->vcn.inst[ring->me].enc_submission_cnt); new_state.fw_based = VCN_DPG_STATE__PAUSE; - + } adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); } mutex_unlock(&adev->vcn.vcn_pg_lock); @@ -354,6 +357,9 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && + atomic_dec_return(&ring->adev->vcn.inst[ring->me].enc_submission_cnt) < 0) + atomic_set(&ring->adev->vcn.inst[ring->me].enc_submission_cnt, 0); } int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 2ae110d..4ca76c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -183,6 +183,7 @@ struct amdgpu_vcn_inst { void *dpg_sram_cpu_addr; uint64_t dpg_sram_gpu_addr; uint32_t *dpg_sram_curr_addr; + atomic_t enc_submission_cnt; }; struct amdgpu_vcn { -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start 2020-02-10 18:06 [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start James Zhu 2020-02-10 18:06 ` [PATCH 2/2] drm/amdgpu/vcn: fix race condition issue for dpg unpause mode switch James Zhu @ 2020-02-12 14:27 ` Zhu, James 2020-02-12 15:11 ` Liu, Leo 1 sibling, 1 reply; 5+ messages in thread From: Zhu, James @ 2020-02-12 14:27 UTC (permalink / raw) To: amd-gfx [-- Attachment #1.1: Type: text/plain, Size: 2622 bytes --] [AMD Official Use Only - Internal Distribution Only] ping ________________________________ From: Zhu, James <James.Zhu@amd.com> Sent: Monday, February 10, 2020 1:06 PM To: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org> Cc: Zhu, James <James.Zhu@amd.com> Subject: [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start Fix race condition issue when multiple vcn starts are called. Signed-off-by: James Zhu <James.Zhu@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f96464e..aa7663f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(&adev->vcn.vcn_pg_lock); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +211,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(&adev->vcn.vcn_pg_lock); return 0; } @@ -321,6 +323,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + mutex_lock(&adev->vcn.vcn_pg_lock); if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -345,6 +348,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); } + mutex_unlock(&adev->vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..2ae110d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,7 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, -- 2.7.4 [-- Attachment #1.2: Type: text/html, Size: 5239 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 5+ messages in thread
* RE: [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start 2020-02-12 14:27 ` [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start Zhu, James @ 2020-02-12 15:11 ` Liu, Leo 2020-02-12 16:13 ` Zhu, James 0 siblings, 1 reply; 5+ messages in thread From: Liu, Leo @ 2020-02-12 15:11 UTC (permalink / raw) To: Zhu, James, amd-gfx [-- Attachment #1.1: Type: text/plain, Size: 3260 bytes --] With your patches, still seeing the hung with multiple processes of decode, encode, and transcode. I think we need find the root cause of that and give a comprehensive fix either from driver side or firmware side or both. Regards, Leo From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhu, James Sent: Wednesday, February 12, 2020 9:28 AM To: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start [AMD Official Use Only - Internal Distribution Only] ping ________________________________ From: Zhu, James <James.Zhu@amd.com<mailto:James.Zhu@amd.com>> Sent: Monday, February 10, 2020 1:06 PM To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>> Cc: Zhu, James <James.Zhu@amd.com<mailto:James.Zhu@amd.com>> Subject: [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start Fix race condition issue when multiple vcn starts are called. Signed-off-by: James Zhu <James.Zhu@amd.com<mailto:James.Zhu@amd.com>> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f96464e..aa7663f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(&adev->vcn.vcn_pg_lock); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +211,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(&adev->vcn.vcn_pg_lock); return 0; } @@ -321,6 +323,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + mutex_lock(&adev->vcn.vcn_pg_lock); if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -345,6 +348,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); } + mutex_unlock(&adev->vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..2ae110d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,7 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, -- 2.7.4 [-- Attachment #1.2: Type: text/html, Size: 8309 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start 2020-02-12 15:11 ` Liu, Leo @ 2020-02-12 16:13 ` Zhu, James 0 siblings, 0 replies; 5+ messages in thread From: Zhu, James @ 2020-02-12 16:13 UTC (permalink / raw) To: Liu, Leo, amd-gfx [-- Attachment #1.1: Type: text/plain, Size: 3826 bytes --] Timeout issue are complicated. These patched can fix driver side issue. Acturus SPG timeout issue can be fixed with these patches. For other type of timeout issues are still under investigation. Thanks & Best Regards! James Zhu ________________________________ From: Liu, Leo <Leo.Liu@amd.com> Sent: Wednesday, February 12, 2020 10:11 AM To: Zhu, James <James.Zhu@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org> Subject: RE: [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start With your patches, still seeing the hung with multiple processes of decode, encode, and transcode. I think we need find the root cause of that and give a comprehensive fix either from driver side or firmware side or both. Regards, Leo From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhu, James Sent: Wednesday, February 12, 2020 9:28 AM To: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start [AMD Official Use Only - Internal Distribution Only] ping ________________________________ From: Zhu, James <James.Zhu@amd.com<mailto:James.Zhu@amd.com>> Sent: Monday, February 10, 2020 1:06 PM To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>> Cc: Zhu, James <James.Zhu@amd.com<mailto:James.Zhu@amd.com>> Subject: [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start Fix race condition issue when multiple vcn starts are called. Signed-off-by: James Zhu <James.Zhu@amd.com<mailto:James.Zhu@amd.com>> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f96464e..aa7663f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(&adev->vcn.vcn_pg_lock); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +211,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(&adev->vcn.vcn_pg_lock); return 0; } @@ -321,6 +323,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + mutex_lock(&adev->vcn.vcn_pg_lock); if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -345,6 +348,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); } + mutex_unlock(&adev->vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..2ae110d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,7 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, -- 2.7.4 [-- Attachment #1.2: Type: text/html, Size: 8665 bytes --] [-- Attachment #2: Type: text/plain, Size: 154 bytes --] _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2020-02-12 16:13 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-02-10 18:06 [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start James Zhu 2020-02-10 18:06 ` [PATCH 2/2] drm/amdgpu/vcn: fix race condition issue for dpg unpause mode switch James Zhu 2020-02-12 14:27 ` [PATCH 1/2] drm/amdgpu/vcn: fix race condition issue for vcn start Zhu, James 2020-02-12 15:11 ` Liu, Leo 2020-02-12 16:13 ` Zhu, James
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).