All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <christian.koenig@amd.com>
To: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Cc: "Deucher, Alexander" <alexander.deucher@amd.com>,
	dri-devel <dri-devel@lists.freedesktop.org>,
	amd-gfx list <amd-gfx@lists.freedesktop.org>
Subject: Re: [6.1][regression] after commit dd80d9c8eecac8c516da5b240d01a35660ba6cb6 some games (Cyberpunk 2077, Forza Horizon 4/5) hang at start
Date: Wed, 26 Oct 2022 09:29:43 +0200	[thread overview]
Message-ID: <a537212d-4b42-4ba4-7707-1e397234c8b7@amd.com> (raw)
In-Reply-To: <CABXGCsNvFvJz4=N=JKYSGVcd=dKfQ3Nv_zOssMb0Z6oK79xZ7g@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 587 bytes --]

Attached is the original test patch rebased on current amd-staging-drm-next.

Can you test if this is enough to make sure that the games start without 
crashing by fetching the userptrs?

Thanks in advance,
Christian.

Am 21.10.22 um 14:36 schrieb Mikhail Gavrilov:
> On Fri, Oct 21, 2022 at 1:33 PM Christian König
> <christian.koenig@amd.com> wrote:
>> Hi,
>>
>> yes Bas already reported this issue, but I couldn't reproduce it. Need
>> to come up with a patch to narrow this down further.
>>
>> Can I send you something to test?
> I would appreciate to test any patches and ideas.
>

[-- Attachment #2: 0001-drm-amdgpu-partial-revert-remove-ctx-lock-v2.patch --]
[-- Type: text/x-patch, Size: 4010 bytes --]

From 852c78656f083394296b3d3b96db33608ce0f272 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Wed, 26 Oct 2022 09:26:01 +0200
Subject: [PATCH] drm/amdgpu: partial revert "remove ctx->lock" v2""
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 6c052af778a61977c271632044c754dbbca4f892.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 26 +++++++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1bbd39b3b0fc..0b331e8bfba6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -57,6 +57,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
 	if (!p->ctx)
 		return -EINVAL;
 
+	mutex_lock(&p->ctx->lock);
+
 	if (atomic_read(&p->ctx->guilty)) {
 		amdgpu_ctx_put(p->ctx);
 		return -ECANCELED;
@@ -578,6 +580,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
 	unsigned int ce_preempt = 0, de_preempt = 0;
 	int i, r;
 
+	/* TODO: Investigate why we still need the context lock */
+	mutex_unlock(&p->ctx->lock);
+
 	for (i = 0; i < p->nchunks; ++i) {
 		struct amdgpu_cs_chunk *chunk;
 
@@ -587,38 +592,41 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
 		case AMDGPU_CHUNK_ID_IB:
 			r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
 			if (r)
-				return r;
+				goto out;
 			break;
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
 			r = amdgpu_cs_p2_dependencies(p, chunk);
 			if (r)
-				return r;
+				goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 			r = amdgpu_cs_p2_syncobj_in(p, chunk);
 			if (r)
-				return r;
+				goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
 			r = amdgpu_cs_p2_syncobj_out(p, chunk);
 			if (r)
-				return r;
+				goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
 			r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
 			if (r)
-				return r;
+				goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
 			r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
 			if (r)
-				return r;
+				goto out;
 			break;
 		}
 	}
 
-	return 0;
+	r = 0;
+out:
+	mutex_lock(&p->ctx->lock);
+	return r;
 }
 
 /* Convert microseconds to bytes. */
@@ -1335,8 +1343,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
 
 	dma_fence_put(parser->fence);
 
-	if (parser->ctx)
+	if (parser->ctx) {
+		mutex_unlock(&parser->ctx->lock);
 		amdgpu_ctx_put(parser->ctx);
+	}
 	if (parser->bo_list)
 		amdgpu_bo_list_put(parser->bo_list);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 331aa191910c..3a23fa45bfed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -315,6 +315,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
 	kref_init(&ctx->refcount);
 	ctx->mgr = mgr;
 	spin_lock_init(&ctx->ring_lock);
+	mutex_init(&ctx->lock);
 
 	ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
 	ctx->reset_counter_query = ctx->reset_counter;
@@ -409,6 +410,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
 		drm_dev_exit(idx);
 	}
 
+	mutex_destroy(&ctx->lock);
 	kfree(ctx);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..cc7c8afff414 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -53,6 +53,7 @@ struct amdgpu_ctx {
 	bool				preamble_presented;
 	int32_t				init_priority;
 	int32_t				override_priority;
+	struct mutex			lock;
 	atomic_t			guilty;
 	unsigned long			ras_counter_ce;
 	unsigned long			ras_counter_ue;
-- 
2.25.1


WARNING: multiple messages have this Message-ID (diff)
From: "Christian König" <christian.koenig@amd.com>
To: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Cc: "Deucher, Alexander" <alexander.deucher@amd.com>,
	dri-devel <dri-devel@lists.freedesktop.org>,
	amd-gfx list <amd-gfx@lists.freedesktop.org>,
	Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Subject: Re: [6.1][regression] after commit dd80d9c8eecac8c516da5b240d01a35660ba6cb6 some games (Cyberpunk 2077, Forza Horizon 4/5) hang at start
Date: Wed, 26 Oct 2022 09:29:43 +0200	[thread overview]
Message-ID: <a537212d-4b42-4ba4-7707-1e397234c8b7@amd.com> (raw)
In-Reply-To: <CABXGCsNvFvJz4=N=JKYSGVcd=dKfQ3Nv_zOssMb0Z6oK79xZ7g@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 587 bytes --]

Attached is the original test patch rebased on current amd-staging-drm-next.

Can you test if this is enough to make sure that the games start without 
crashing by fetching the userptrs?

Thanks in advance,
Christian.

Am 21.10.22 um 14:36 schrieb Mikhail Gavrilov:
> On Fri, Oct 21, 2022 at 1:33 PM Christian König
> <christian.koenig@amd.com> wrote:
>> Hi,
>>
>> yes Bas already reported this issue, but I couldn't reproduce it. Need
>> to come up with a patch to narrow this down further.
>>
>> Can I send you something to test?
> I would appreciate to test any patches and ideas.
>

[-- Attachment #2: 0001-drm-amdgpu-partial-revert-remove-ctx-lock-v2.patch --]
[-- Type: text/x-patch, Size: 4010 bytes --]

From 852c78656f083394296b3d3b96db33608ce0f272 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Wed, 26 Oct 2022 09:26:01 +0200
Subject: [PATCH] drm/amdgpu: partial revert "remove ctx->lock" v2""
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 6c052af778a61977c271632044c754dbbca4f892.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 26 +++++++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1bbd39b3b0fc..0b331e8bfba6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -57,6 +57,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
 	if (!p->ctx)
 		return -EINVAL;
 
+	mutex_lock(&p->ctx->lock);
+
 	if (atomic_read(&p->ctx->guilty)) {
 		amdgpu_ctx_put(p->ctx);
 		return -ECANCELED;
@@ -578,6 +580,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
 	unsigned int ce_preempt = 0, de_preempt = 0;
 	int i, r;
 
+	/* TODO: Investigate why we still need the context lock */
+	mutex_unlock(&p->ctx->lock);
+
 	for (i = 0; i < p->nchunks; ++i) {
 		struct amdgpu_cs_chunk *chunk;
 
@@ -587,38 +592,41 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
 		case AMDGPU_CHUNK_ID_IB:
 			r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
 			if (r)
-				return r;
+				goto out;
 			break;
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
 			r = amdgpu_cs_p2_dependencies(p, chunk);
 			if (r)
-				return r;
+				goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 			r = amdgpu_cs_p2_syncobj_in(p, chunk);
 			if (r)
-				return r;
+				goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
 			r = amdgpu_cs_p2_syncobj_out(p, chunk);
 			if (r)
-				return r;
+				goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
 			r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
 			if (r)
-				return r;
+				goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
 			r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
 			if (r)
-				return r;
+				goto out;
 			break;
 		}
 	}
 
-	return 0;
+	r = 0;
+out:
+	mutex_lock(&p->ctx->lock);
+	return r;
 }
 
 /* Convert microseconds to bytes. */
@@ -1335,8 +1343,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
 
 	dma_fence_put(parser->fence);
 
-	if (parser->ctx)
+	if (parser->ctx) {
+		mutex_unlock(&parser->ctx->lock);
 		amdgpu_ctx_put(parser->ctx);
+	}
 	if (parser->bo_list)
 		amdgpu_bo_list_put(parser->bo_list);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 331aa191910c..3a23fa45bfed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -315,6 +315,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
 	kref_init(&ctx->refcount);
 	ctx->mgr = mgr;
 	spin_lock_init(&ctx->ring_lock);
+	mutex_init(&ctx->lock);
 
 	ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
 	ctx->reset_counter_query = ctx->reset_counter;
@@ -409,6 +410,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
 		drm_dev_exit(idx);
 	}
 
+	mutex_destroy(&ctx->lock);
 	kfree(ctx);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..cc7c8afff414 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -53,6 +53,7 @@ struct amdgpu_ctx {
 	bool				preamble_presented;
 	int32_t				init_priority;
 	int32_t				override_priority;
+	struct mutex			lock;
 	atomic_t			guilty;
 	unsigned long			ras_counter_ce;
 	unsigned long			ras_counter_ue;
-- 
2.25.1


  reply	other threads:[~2022-10-26  7:29 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-21  8:08 [6.1][regression] after commit dd80d9c8eecac8c516da5b240d01a35660ba6cb6 some games (Cyberpunk 2077, Forza Horizon 4/5) hang at start Mikhail Gavrilov
2022-10-21  8:08 ` Mikhail Gavrilov
2022-10-21  8:32 ` Christian König
2022-10-21  8:32   ` Christian König
2022-10-21 12:36   ` Mikhail Gavrilov
2022-10-21 12:36     ` Mikhail Gavrilov
2022-10-26  7:29     ` Christian König [this message]
2022-10-26  7:29       ` Christian König
2022-10-30 22:05       ` Mikhail Gavrilov
2022-10-30 22:05         ` Mikhail Gavrilov
2022-11-01 17:52         ` Christian König
2022-11-01 17:52           ` Christian König
2022-11-02 13:36           ` Mikhail Gavrilov
2022-11-02 13:36             ` Mikhail Gavrilov
2022-11-02 13:43             ` Christian König
2022-11-02 13:43               ` Christian König
2022-11-14  9:43               ` Thorsten Leemhuis
2022-11-14 13:22               ` Christian König
2022-11-14 13:22                 ` Christian König
2022-11-20 17:25                 ` [6.1][regression] after commit dd80d9c8eecac8c516da5b240d01a35660ba6cb6 some games (Cyberpunk 2077, Forza Horizon 4/5) hang at start #forregzbot Thorsten Leemhuis
2022-11-27 10:56                   ` Thorsten Leemhuis
2022-11-21 23:42                 ` [6.1][regression] after commit dd80d9c8eecac8c516da5b240d01a35660ba6cb6 some games (Cyberpunk 2077, Forza Horizon 4/5) hang at start Mikhail Gavrilov
2022-11-21 23:42                   ` Mikhail Gavrilov
2022-11-22  7:16                   ` Christian König
2022-11-22  7:16                     ` Christian König
2022-11-28 23:16                     ` Mikhail Gavrilov
2022-11-28 23:16                       ` Mikhail Gavrilov
2022-10-23 13:20 ` [6.1][regression] after commit dd80d9c8eecac8c516da5b240d01a35660ba6cb6 some games (Cyberpunk 2077, Forza Horizon 4/5) hang at start #forregzbot Thorsten Leemhuis
2022-10-23 13:20   ` Thorsten Leemhuis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a537212d-4b42-4ba4-7707-1e397234c8b7@amd.com \
    --to=christian.koenig@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=mikhail.v.gavrilov@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.