* [PATCH] drm/amdgpu: remove ctx->lock
@ 2022-02-11 8:24 Ken Xue
2022-02-11 8:25 ` Christian König
2022-02-11 18:13 ` Andrey Grodzovsky
0 siblings, 2 replies; 4+ messages in thread
From: Ken Xue @ 2022-02-11 8:24 UTC (permalink / raw)
To: amd-gfx, Christian.Koenig, Andrey.Grodzovsky; +Cc: Ken Xue
KMD reports a warning on holding a lock from drm_syncobj_find_fence,
when running amdgpu_test case “syncobj timeline test”.
ctx->lock was designed to prevent concurrent "amdgpu_ctx_wait_prev_fence"
calls and avoid dead reservation lock from GPU reset. since no reservation
lock is held in lates GPU reset any more, ctx->lock can be simply removed
and concurrent "amdgpu_ctx_wait_prev_fence" call also can be prevented by
PD root bo reservation lock.
call stacks:
=================
//hold lock
amdgpu_cs_ioctl->amdgpu_cs_parser_init->mutex_lock(&parser->ctx->lock);
…
//report warning
amdgpu_cs_dependencies->amdgpu_cs_process_syncobj_timeline_in_dep \
->amdgpu_syncobj_lookup_and_add_to_sync -> drm_syncobj_find_fence \
-> lockdep_assert_none_held_once
…
amdgpu_cs_ioctl->amdgpu_cs_parser_fini->mutex_unlock(&parser->ctx->lock);
Signed-off-by: Ken Xue <Ken.Xue@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 --
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 -
3 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1cd677bd5d7e..b32a7cfbe1e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -127,8 +127,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
goto free_chunk;
}
- mutex_lock(&p->ctx->lock);
-
/* skip guilty context job */
if (atomic_read(&p->ctx->guilty) == 1) {
ret = -ECANCELED;
@@ -571,6 +569,16 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto out;
}
+ /* Move fence waiting after getting reservation lock of
+ * PD root. Then there is no need on a ctx mutex lock.
+ */
+ r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
+ goto error_validate;
+ }
+
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
&p->bytes_moved_vis_threshold);
p->bytes_moved = 0;
@@ -682,7 +690,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
dma_fence_put(parser->fence);
if (parser->ctx) {
- mutex_unlock(&parser->ctx->lock);
amdgpu_ctx_put(parser->ctx);
}
if (parser->bo_list)
@@ -926,7 +933,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (parser->job->uf_addr && ring->funcs->no_user_fence)
return -EINVAL;
- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
+ return 0;
}
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1321,7 +1328,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
goto out;
r = amdgpu_cs_submit(&parser, cs);
-
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 2c929fa40379..1c72f6095f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -230,7 +230,6 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
@@ -352,7 +351,6 @@ static void amdgpu_ctx_fini(struct kref *ref)
}
}
amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE);
- mutex_destroy(&ctx->lock);
kfree(ctx);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 142f2f87d44c..d0cbfcea90f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -49,7 +49,6 @@ struct amdgpu_ctx {
bool preamble_presented;
int32_t init_priority;
int32_t override_priority;
- struct mutex lock;
atomic_t guilty;
unsigned long ras_counter_ce;
unsigned long ras_counter_ue;
--
2.17.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdgpu: remove ctx->lock
2022-02-11 8:24 [PATCH] drm/amdgpu: remove ctx->lock Ken Xue
@ 2022-02-11 8:25 ` Christian König
2022-02-11 18:13 ` Andrey Grodzovsky
1 sibling, 0 replies; 4+ messages in thread
From: Christian König @ 2022-02-11 8:25 UTC (permalink / raw)
To: Ken Xue, amd-gfx, Andrey.Grodzovsky
Am 11.02.22 um 09:24 schrieb Ken Xue:
> KMD reports a warning on holding a lock from drm_syncobj_find_fence,
> when running amdgpu_test case “syncobj timeline test”.
>
> ctx->lock was designed to prevent concurrent "amdgpu_ctx_wait_prev_fence"
> calls and avoid dead reservation lock from GPU reset. since no reservation
> lock is held in lates GPU reset any more, ctx->lock can be simply removed
> and concurrent "amdgpu_ctx_wait_prev_fence" call also can be prevented by
> PD root bo reservation lock.
>
> call stacks:
> =================
> //hold lock
> amdgpu_cs_ioctl->amdgpu_cs_parser_init->mutex_lock(&parser->ctx->lock);
> …
> //report warning
> amdgpu_cs_dependencies->amdgpu_cs_process_syncobj_timeline_in_dep \
> ->amdgpu_syncobj_lookup_and_add_to_sync -> drm_syncobj_find_fence \
> -> lockdep_assert_none_held_once
> …
> amdgpu_cs_ioctl->amdgpu_cs_parser_fini->mutex_unlock(&parser->ctx->lock);
>
> Signed-off-by: Ken Xue <Ken.Xue@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++++++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 --
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 -
> 3 files changed, 11 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1cd677bd5d7e..b32a7cfbe1e2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -127,8 +127,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
> goto free_chunk;
> }
>
> - mutex_lock(&p->ctx->lock);
> -
> /* skip guilty context job */
> if (atomic_read(&p->ctx->guilty) == 1) {
> ret = -ECANCELED;
> @@ -571,6 +569,16 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> goto out;
> }
>
> + /* Move fence waiting after getting reservation lock of
> + * PD root. Then there is no need on a ctx mutex lock.
> + */
> + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity);
> + if (unlikely(r != 0)) {
> + if (r != -ERESTARTSYS)
> + DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
> + goto error_validate;
> + }
> +
> amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
> &p->bytes_moved_vis_threshold);
> p->bytes_moved = 0;
> @@ -682,7 +690,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> dma_fence_put(parser->fence);
>
> if (parser->ctx) {
> - mutex_unlock(&parser->ctx->lock);
> amdgpu_ctx_put(parser->ctx);
> }
> if (parser->bo_list)
> @@ -926,7 +933,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
> if (parser->job->uf_addr && ring->funcs->no_user_fence)
> return -EINVAL;
>
> - return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
> + return 0;
> }
>
> static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
> @@ -1321,7 +1328,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> goto out;
>
> r = amdgpu_cs_submit(&parser, cs);
> -
> out:
> amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 2c929fa40379..1c72f6095f08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -230,7 +230,6 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>
> kref_init(&ctx->refcount);
> spin_lock_init(&ctx->ring_lock);
> - mutex_init(&ctx->lock);
>
> ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> ctx->reset_counter_query = ctx->reset_counter;
> @@ -352,7 +351,6 @@ static void amdgpu_ctx_fini(struct kref *ref)
> }
> }
> amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE);
> - mutex_destroy(&ctx->lock);
> kfree(ctx);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 142f2f87d44c..d0cbfcea90f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -49,7 +49,6 @@ struct amdgpu_ctx {
> bool preamble_presented;
> int32_t init_priority;
> int32_t override_priority;
> - struct mutex lock;
> atomic_t guilty;
> unsigned long ras_counter_ce;
> unsigned long ras_counter_ue;
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdgpu: remove ctx->lock
2022-02-11 8:24 [PATCH] drm/amdgpu: remove ctx->lock Ken Xue
2022-02-11 8:25 ` Christian König
@ 2022-02-11 18:13 ` Andrey Grodzovsky
2022-02-13 1:48 ` Xue, Ken
1 sibling, 1 reply; 4+ messages in thread
From: Andrey Grodzovsky @ 2022-02-11 18:13 UTC (permalink / raw)
To: Ken Xue, amd-gfx, Christian.Koenig
On 2022-02-11 03:24, Ken Xue wrote:
> KMD reports a warning on holding a lock from drm_syncobj_find_fence,
> when running amdgpu_test case “syncobj timeline test”.
>
> ctx->lock was designed to prevent concurrent "amdgpu_ctx_wait_prev_fence"
> calls and avoid dead reservation lock from GPU reset.
Just to help me understand - what do you mean by the dead reservation
lock above ? Can you point me to earlier code version where this did
take place ?
since no reservation
> lock is held in lates GPU reset any more,
Same here - can you point me when was it was held before and
not being held now ?
Andrey
ctx->lock can be simply removed
> and concurrent "amdgpu_ctx_wait_prev_fence" call also can be prevented by
> PD root bo reservation lock.
>
> call stacks:
> =================
> //hold lock
> amdgpu_cs_ioctl->amdgpu_cs_parser_init->mutex_lock(&parser->ctx->lock);
> …
> //report warning
> amdgpu_cs_dependencies->amdgpu_cs_process_syncobj_timeline_in_dep \
> ->amdgpu_syncobj_lookup_and_add_to_sync -> drm_syncobj_find_fence \
> -> lockdep_assert_none_held_once
> …
> amdgpu_cs_ioctl->amdgpu_cs_parser_fini->mutex_unlock(&parser->ctx->lock);
>
> Signed-off-by: Ken Xue <Ken.Xue@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++++++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 --
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 -
> 3 files changed, 11 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1cd677bd5d7e..b32a7cfbe1e2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -127,8 +127,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
> goto free_chunk;
> }
>
> - mutex_lock(&p->ctx->lock);
> -
> /* skip guilty context job */
> if (atomic_read(&p->ctx->guilty) == 1) {
> ret = -ECANCELED;
> @@ -571,6 +569,16 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> goto out;
> }
>
> + /* Move fence waiting after getting reservation lock of
> + * PD root. Then there is no need on a ctx mutex lock.
> + */
> + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity);
> + if (unlikely(r != 0)) {
> + if (r != -ERESTARTSYS)
> + DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
> + goto error_validate;
> + }
> +
> amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
> &p->bytes_moved_vis_threshold);
> p->bytes_moved = 0;
> @@ -682,7 +690,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> dma_fence_put(parser->fence);
>
> if (parser->ctx) {
> - mutex_unlock(&parser->ctx->lock);
> amdgpu_ctx_put(parser->ctx);
> }
> if (parser->bo_list)
> @@ -926,7 +933,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
> if (parser->job->uf_addr && ring->funcs->no_user_fence)
> return -EINVAL;
>
> - return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
> + return 0;
> }
>
> static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
> @@ -1321,7 +1328,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> goto out;
>
> r = amdgpu_cs_submit(&parser, cs);
> -
> out:
> amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 2c929fa40379..1c72f6095f08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -230,7 +230,6 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>
> kref_init(&ctx->refcount);
> spin_lock_init(&ctx->ring_lock);
> - mutex_init(&ctx->lock);
>
> ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> ctx->reset_counter_query = ctx->reset_counter;
> @@ -352,7 +351,6 @@ static void amdgpu_ctx_fini(struct kref *ref)
> }
> }
> amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE);
> - mutex_destroy(&ctx->lock);
> kfree(ctx);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 142f2f87d44c..d0cbfcea90f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -49,7 +49,6 @@ struct amdgpu_ctx {
> bool preamble_presented;
> int32_t init_priority;
> int32_t override_priority;
> - struct mutex lock;
> atomic_t guilty;
> unsigned long ras_counter_ce;
> unsigned long ras_counter_ue;
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [PATCH] drm/amdgpu: remove ctx->lock
2022-02-11 18:13 ` Andrey Grodzovsky
@ 2022-02-13 1:48 ` Xue, Ken
0 siblings, 0 replies; 4+ messages in thread
From: Xue, Ken @ 2022-02-13 1:48 UTC (permalink / raw)
To: Grodzovsky, Andrey, amd-gfx, Koenig, Christian
[AMD Official Use Only]
> > KMD reports a warning on holding a lock from drm_syncobj_find_fence,
> > when running amdgpu_test case “syncobj timeline test”.
> >
> > ctx->lock was designed to prevent concurrent
> "amdgpu_ctx_wait_prev_fence"
> > calls and avoid dead reservation lock from GPU reset.
>
> Just to help me understand - what do you mean by the dead reservation lock
> above ? Can you point me to earlier code version where this did take place ?
>
> since no reservation
> > lock is held in lates GPU reset any more,
>
> Same here - can you point me when was it was held before and not being
> held now ?
>
> Andrey
[Ken] I think it should be long time ago. I just got those information from https://yhbt.net/lore/all/715810ad-26cc-506d-624d-8e5024968ea0@gmail.com/T/#t.
Let me know if there is any misunderstanding.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-02-13 1:48 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-11 8:24 [PATCH] drm/amdgpu: remove ctx->lock Ken Xue
2022-02-11 8:25 ` Christian König
2022-02-11 18:13 ` Andrey Grodzovsky
2022-02-13 1:48 ` Xue, Ken
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.