All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
To: <dri-devel@lists.freedesktop.org>, <amd-gfx@lists.freedesktop.org>
Cc: Monk.Liu@amd.com, horace.chen@amd.com, christian.koenig@amd.com
Subject: [RFC v2 2/8] drm/amdgpu: Move scheduler init to after XGMI is ready
Date: Wed, 22 Dec 2021 17:05:00 -0500	[thread overview]
Message-ID: <20211222220506.789133-3-andrey.grodzovsky@amd.com> (raw)
In-Reply-To: <20211222220506.789133-1-andrey.grodzovsky@amd.com>

Before we initialize schedulers we must know which reset
domain are we in - for single device there iis a single
domain per device and so single wq per device. For XGMI
the reset domain spans the entire XGMI hive and so the
reset wq is per hive.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 34 ++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
 3 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0f3e6c078f88..7c063fd37389 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2284,6 +2284,47 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
 	return r;
 }
 
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+	long timeout;
+	int r, i;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		/* No need to setup the GPU scheduler for rings that don't need it */
+		if (!ring || ring->no_scheduler)
+			continue;
+
+		switch (ring->funcs->type) {
+		case AMDGPU_RING_TYPE_GFX:
+			timeout = adev->gfx_timeout;
+			break;
+		case AMDGPU_RING_TYPE_COMPUTE:
+			timeout = adev->compute_timeout;
+			break;
+		case AMDGPU_RING_TYPE_SDMA:
+			timeout = adev->sdma_timeout;
+			break;
+		default:
+			timeout = adev->video_timeout;
+			break;
+		}
+
+		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+				   ring->num_hw_submission, amdgpu_job_hang_limit,
+				   timeout, adev->reset_domain.wq, ring->sched_score, ring->name);
+		if (r) {
+			DRM_ERROR("Failed to create scheduler on ring %s.\n",
+				  ring->name);
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+
 /**
  * amdgpu_device_ip_init - run init for hardware IPs
  *
@@ -2412,6 +2453,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		}
 	}
 
+	r = amdgpu_device_init_schedulers(adev);
+	if (r)
+		goto init_failed;
+
 	/* Don't init kfd if whole hive need to be reset during init */
 	if (!adev->gmc.xgmi.pending_reset)
 		amdgpu_amdkfd_device_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3b7e86ea7167..5527c68c51de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -456,8 +456,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 				  atomic_t *sched_score)
 {
 	struct amdgpu_device *adev = ring->adev;
-	long timeout;
-	int r;
 
 	if (!adev)
 		return -EINVAL;
@@ -477,36 +475,12 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 	spin_lock_init(&ring->fence_drv.lock);
 	ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
 					 GFP_KERNEL);
-	if (!ring->fence_drv.fences)
-		return -ENOMEM;
 
-	/* No need to setup the GPU scheduler for rings that don't need it */
-	if (ring->no_scheduler)
-		return 0;
+	ring->num_hw_submission = num_hw_submission;
+	ring->sched_score = sched_score;
 
-	switch (ring->funcs->type) {
-	case AMDGPU_RING_TYPE_GFX:
-		timeout = adev->gfx_timeout;
-		break;
-	case AMDGPU_RING_TYPE_COMPUTE:
-		timeout = adev->compute_timeout;
-		break;
-	case AMDGPU_RING_TYPE_SDMA:
-		timeout = adev->sdma_timeout;
-		break;
-	default:
-		timeout = adev->video_timeout;
-		break;
-	}
-
-	r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
-			   num_hw_submission, amdgpu_job_hang_limit,
-			   timeout, NULL, sched_score, ring->name);
-	if (r) {
-		DRM_ERROR("Failed to create scheduler on ring %s.\n",
-			  ring->name);
-		return r;
-	}
+	if (!ring->fence_drv.fences)
+		return -ENOMEM;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4d380e79752c..a4b8279e3011 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -253,6 +253,8 @@ struct amdgpu_ring {
 	bool			has_compute_vm_bug;
 	bool			no_scheduler;
 	int			hw_prio;
+	unsigned 		num_hw_submission;
+	atomic_t		*sched_score;
 };
 
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
-- 
2.25.1


WARNING: multiple messages have this Message-ID (diff)
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
To: <dri-devel@lists.freedesktop.org>, <amd-gfx@lists.freedesktop.org>
Cc: Monk.Liu@amd.com, Andrey Grodzovsky <andrey.grodzovsky@amd.com>,
	horace.chen@amd.com, christian.koenig@amd.com, daniel@ffwll.ch
Subject: [RFC v2 2/8] drm/amdgpu: Move scheduler init to after XGMI is ready
Date: Wed, 22 Dec 2021 17:05:00 -0500	[thread overview]
Message-ID: <20211222220506.789133-3-andrey.grodzovsky@amd.com> (raw)
In-Reply-To: <20211222220506.789133-1-andrey.grodzovsky@amd.com>

Before we initialize schedulers we must know which reset
domain are we in - for single device there iis a single
domain per device and so single wq per device. For XGMI
the reset domain spans the entire XGMI hive and so the
reset wq is per hive.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 34 ++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
 3 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0f3e6c078f88..7c063fd37389 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2284,6 +2284,47 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
 	return r;
 }
 
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+	long timeout;
+	int r, i;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		/* No need to setup the GPU scheduler for rings that don't need it */
+		if (!ring || ring->no_scheduler)
+			continue;
+
+		switch (ring->funcs->type) {
+		case AMDGPU_RING_TYPE_GFX:
+			timeout = adev->gfx_timeout;
+			break;
+		case AMDGPU_RING_TYPE_COMPUTE:
+			timeout = adev->compute_timeout;
+			break;
+		case AMDGPU_RING_TYPE_SDMA:
+			timeout = adev->sdma_timeout;
+			break;
+		default:
+			timeout = adev->video_timeout;
+			break;
+		}
+
+		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+				   ring->num_hw_submission, amdgpu_job_hang_limit,
+				   timeout, adev->reset_domain.wq, ring->sched_score, ring->name);
+		if (r) {
+			DRM_ERROR("Failed to create scheduler on ring %s.\n",
+				  ring->name);
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+
 /**
  * amdgpu_device_ip_init - run init for hardware IPs
  *
@@ -2412,6 +2453,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		}
 	}
 
+	r = amdgpu_device_init_schedulers(adev);
+	if (r)
+		goto init_failed;
+
 	/* Don't init kfd if whole hive need to be reset during init */
 	if (!adev->gmc.xgmi.pending_reset)
 		amdgpu_amdkfd_device_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3b7e86ea7167..5527c68c51de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -456,8 +456,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 				  atomic_t *sched_score)
 {
 	struct amdgpu_device *adev = ring->adev;
-	long timeout;
-	int r;
 
 	if (!adev)
 		return -EINVAL;
@@ -477,36 +475,12 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 	spin_lock_init(&ring->fence_drv.lock);
 	ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
 					 GFP_KERNEL);
-	if (!ring->fence_drv.fences)
-		return -ENOMEM;
 
-	/* No need to setup the GPU scheduler for rings that don't need it */
-	if (ring->no_scheduler)
-		return 0;
+	ring->num_hw_submission = num_hw_submission;
+	ring->sched_score = sched_score;
 
-	switch (ring->funcs->type) {
-	case AMDGPU_RING_TYPE_GFX:
-		timeout = adev->gfx_timeout;
-		break;
-	case AMDGPU_RING_TYPE_COMPUTE:
-		timeout = adev->compute_timeout;
-		break;
-	case AMDGPU_RING_TYPE_SDMA:
-		timeout = adev->sdma_timeout;
-		break;
-	default:
-		timeout = adev->video_timeout;
-		break;
-	}
-
-	r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
-			   num_hw_submission, amdgpu_job_hang_limit,
-			   timeout, NULL, sched_score, ring->name);
-	if (r) {
-		DRM_ERROR("Failed to create scheduler on ring %s.\n",
-			  ring->name);
-		return r;
-	}
+	if (!ring->fence_drv.fences)
+		return -ENOMEM;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4d380e79752c..a4b8279e3011 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -253,6 +253,8 @@ struct amdgpu_ring {
 	bool			has_compute_vm_bug;
 	bool			no_scheduler;
 	int			hw_prio;
+	unsigned 		num_hw_submission;
+	atomic_t		*sched_score;
 };
 
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
-- 
2.25.1


  parent reply	other threads:[~2021-12-22 22:06 UTC|newest]

Thread overview: 103+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-22 22:04 [RFC v2 0/8] Define and use reset domain for GPU recovery in amdgpu Andrey Grodzovsky
2021-12-22 22:04 ` Andrey Grodzovsky
2021-12-22 22:04 ` [RFC v2 1/8] drm/amdgpu: Introduce reset domain Andrey Grodzovsky
2021-12-22 22:04   ` Andrey Grodzovsky
2021-12-22 22:05 ` Andrey Grodzovsky [this message]
2021-12-22 22:05   ` [RFC v2 2/8] drm/amdgpu: Move scheduler init to after XGMI is ready Andrey Grodzovsky
2021-12-23  8:39   ` Christian König
2021-12-23  8:39     ` Christian König
2021-12-22 22:05 ` [RFC v2 3/8] drm/amdgpu: Fix crash on modprobe Andrey Grodzovsky
2021-12-22 22:05   ` Andrey Grodzovsky
2021-12-23  8:40   ` Christian König
2021-12-23  8:40     ` Christian König
2021-12-22 22:05 ` [RFC v2 4/8] drm/amdgpu: Serialize non TDR gpu recovery with TDRs Andrey Grodzovsky
2021-12-22 22:05   ` Andrey Grodzovsky
2021-12-23  8:41   ` Christian König
2021-12-23  8:41     ` Christian König
2022-01-05  9:54   ` Lazar, Lijo
2022-01-05  9:54     ` Lazar, Lijo
2022-01-05 12:31     ` Christian König
2022-01-05 12:31       ` Christian König
2022-01-05 13:11       ` Lazar, Lijo
2022-01-05 13:11         ` Lazar, Lijo
2022-01-05 13:15         ` Christian König
2022-01-05 13:15           ` Christian König
2022-01-05 13:26           ` Lazar, Lijo
2022-01-05 13:26             ` Lazar, Lijo
2022-01-05 13:41             ` Christian König
2022-01-05 13:41               ` Christian König
2022-01-05 18:11       ` Andrey Grodzovsky
2022-01-05 18:11         ` Andrey Grodzovsky
2022-01-17 19:14         ` Andrey Grodzovsky
2022-01-17 19:17           ` Christian König
2022-01-17 19:21             ` Andrey Grodzovsky
2022-01-26 15:52               ` Andrey Grodzovsky
2022-01-28 16:57                 ` Grodzovsky, Andrey
2022-02-07  2:41                   ` JingWen Chen
2022-02-07  3:08                     ` Grodzovsky, Andrey
2021-12-22 22:13 ` [RFC v2 5/8] drm/amd/virt: For SRIOV send GPU reset directly to TDR queue Andrey Grodzovsky
2021-12-22 22:13   ` Andrey Grodzovsky
2021-12-22 22:13   ` [RFC v2 6/8] drm/amdgpu: Drop hive->in_reset Andrey Grodzovsky
2021-12-22 22:13     ` Andrey Grodzovsky
2021-12-22 22:13   ` [RFC v2 7/8] drm/amdgpu: Drop concurrent GPU reset protection for device Andrey Grodzovsky
2021-12-22 22:13     ` Andrey Grodzovsky
2021-12-22 22:14   ` [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV Andrey Grodzovsky
2021-12-22 22:14     ` Andrey Grodzovsky
2021-12-23  8:42     ` Christian König
2021-12-23  8:42       ` Christian König
2021-12-23 10:14       ` Liu, Monk
2021-12-23 10:14         ` Liu, Monk
2021-12-24  8:58         ` Deng, Emily
2021-12-24  8:58           ` Deng, Emily
2021-12-24  9:57           ` JingWen Chen
2021-12-24  9:57             ` JingWen Chen
2021-12-30 18:45             ` Andrey Grodzovsky
2021-12-30 18:45               ` Andrey Grodzovsky
2022-01-03 10:17               ` Christian König
2022-01-03 10:17                 ` Christian König
2022-01-04  9:07                 ` JingWen Chen
2022-01-04  9:07                   ` JingWen Chen
2022-01-04 10:18                   ` Christian König
2022-01-04 10:18                     ` Christian König
2022-01-04 10:49                     ` Liu, Monk
2022-01-04 10:49                       ` Liu, Monk
2022-01-04 11:36                       ` Christian König
2022-01-04 11:36                         ` Christian König
2022-01-04 16:56                         ` Andrey Grodzovsky
2022-01-04 16:56                           ` Andrey Grodzovsky
2022-01-05  7:34                           ` JingWen Chen
2022-01-05  7:34                             ` JingWen Chen
2022-01-05  7:59                             ` Christian König
2022-01-05  7:59                               ` Christian König
2022-01-05 18:24                               ` Andrey Grodzovsky
2022-01-05 18:24                                 ` Andrey Grodzovsky
2022-01-06  4:59                                 ` JingWen Chen
2022-01-06  4:59                                   ` JingWen Chen
2022-01-06  5:18                                   ` JingWen Chen
2022-01-06  5:18                                     ` JingWen Chen
2022-01-06  9:13                                     ` Christian König
2022-01-06  9:13                                       ` Christian König
2022-01-06 19:13                                     ` Andrey Grodzovsky
2022-01-06 19:13                                       ` Andrey Grodzovsky
2022-01-07  3:57                                       ` JingWen Chen
2022-01-07  3:57                                         ` JingWen Chen
2022-01-07  5:46                                         ` JingWen Chen
2022-01-07  5:46                                           ` JingWen Chen
2022-01-07 16:02                                           ` Andrey Grodzovsky
2022-01-07 16:02                                             ` Andrey Grodzovsky
2022-01-12  6:28                                             ` JingWen Chen
2022-01-12  6:28                                               ` JingWen Chen
2022-01-04 17:13                         ` Liu, Shaoyun
2022-01-04 17:13                           ` Liu, Shaoyun
2022-01-04 20:54                           ` Andrey Grodzovsky
2022-01-04 20:54                             ` Andrey Grodzovsky
2022-01-05  0:01                             ` Liu, Shaoyun
2022-01-05  0:01                               ` Liu, Shaoyun
2022-01-05  7:25                         ` JingWen Chen
2022-01-05  7:25                           ` JingWen Chen
2021-12-30 18:39           ` Andrey Grodzovsky
2021-12-30 18:39             ` Andrey Grodzovsky
2021-12-23 18:07     ` Liu, Shaoyun
2021-12-23 18:07       ` Liu, Shaoyun
2021-12-23 18:29   ` [RFC v3 5/8] drm/amd/virt: For SRIOV send GPU reset directly to TDR queue Andrey Grodzovsky
2021-12-23 18:29     ` Andrey Grodzovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211222220506.789133-3-andrey.grodzovsky@amd.com \
    --to=andrey.grodzovsky@amd.com \
    --cc=Monk.Liu@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=horace.chen@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.