All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: correct sdma queue number in kfd device init
@ 2021-12-17 14:31 Guchun Chen
  2021-12-17 15:05 ` Sider, Graham
  2021-12-17 15:17 ` Kim, Jonathan
  0 siblings, 2 replies; 5+ messages in thread
From: Guchun Chen @ 2021-12-17 14:31 UTC (permalink / raw)
  To: amd-gfx, alexander.deucher, Graham.Sider, Felix.Kuehling, jonathan.kim
  Cc: Guchun Chen

sdma queue number is not correct like on vega20, this patch
promises the setting keeps the same after code refactor.
Additionally, improve code to use switch case to list IP
version to complete kfd device_info structure filling.
This keeps consistency with the IP parse code in amdgpu_discovery.c.

Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
Signed-off-by: Guchun Chen <guchun.chen@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 74 ++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index facc28f58c1f..e50bf992f298 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -59,11 +59,72 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
 
 static int kfd_resume(struct kfd_dev *kfd);
 
+static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
+{
+	uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
+
+	switch (sdma_version) {
+		case IP_VERSION(4, 0, 0):/* VEGA10 */
+		case IP_VERSION(4, 0, 1):/* VEGA12 */
+		case IP_VERSION(4, 1, 0):/* RAVEN */
+		case IP_VERSION(4, 1, 1):/* RAVEN */
+		case IP_VERSION(4, 1, 2):/* RENIOR */
+		case IP_VERSION(5, 2, 1):/* VANGOGH */
+		case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
+			kfd->device_info.num_sdma_queues_per_engine = 2;
+			break;
+		case IP_VERSION(4, 2, 0):/* VEGA20 */
+		case IP_VERSION(4, 2, 2):/* ARCTUTUS */
+		case IP_VERSION(4, 4, 0):/* ALDEBARAN */
+		case IP_VERSION(5, 0, 0):/* NAVI10 */
+		case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
+		case IP_VERSION(5, 0, 2):/* NAVI14 */
+		case IP_VERSION(5, 0, 5):/* NAVI12 */
+		case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
+		case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
+		case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
+			kfd->device_info.num_sdma_queues_per_engine = 8;
+			break;
+		default:
+			dev_err(kfd_device,
+				"Failed to find sdma ip blocks(SDMA_HWIP:0x%x) in %s\n",
+                                sdma_version, __func__);
+	}
+}
+
+static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
+{
+	uint32_t gc_version = KFD_GC_VERSION(kfd);
+
+	switch (gc_version) {
+	case IP_VERSION(9, 0, 1): /* VEGA10 */
+	case IP_VERSION(9, 2, 1): /* VEGA12 */
+	case IP_VERSION(9, 3, 0): /* RENOIR */
+	case IP_VERSION(9, 4, 0): /* VEGA20 */
+	case IP_VERSION(9, 4, 1): /* ARCTURUS */
+	case IP_VERSION(9, 4, 2): /* ALDEBARAN */
+	case IP_VERSION(10, 3, 1): /* VANGOGH */
+	case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
+	case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
+	case IP_VERSION(10, 1, 10): /* NAVI10 */
+	case IP_VERSION(10, 1, 2): /* NAVI12 */
+	case IP_VERSION(10, 1, 1): /* NAVI14 */
+	case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
+	case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
+	case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
+	case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
+		kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
+		break;
+	default:
+		dev_err(kfd_device, "Failed to find gc ip blocks(GC_HWIP:0x%x) in %s\n",
+				gc_version, __func__);
+	}
+}
+
 static void kfd_device_info_init(struct kfd_dev *kfd,
 				 bool vf, uint32_t gfx_target_version)
 {
 	uint32_t gc_version = KFD_GC_VERSION(kfd);
-	uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
 	uint32_t asic_type = kfd->adev->asic_type;
 
 	kfd->device_info.max_pasid_bits = 16;
@@ -75,16 +136,11 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
 	if (KFD_IS_SOC15(kfd)) {
 		kfd->device_info.doorbell_size = 8;
 		kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
-		kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
 		kfd->device_info.supports_cwsr = true;
 
-		if ((sdma_version >= IP_VERSION(4, 0, 0)  &&
-		     sdma_version <= IP_VERSION(4, 2, 0)) ||
-		     sdma_version == IP_VERSION(5, 2, 1)  ||
-		     sdma_version == IP_VERSION(5, 2, 3))
-			kfd->device_info.num_sdma_queues_per_engine = 2;
-		else
-			kfd->device_info.num_sdma_queues_per_engine = 8;
+		kfd_device_info_set_sdma_queue_num(kfd);
+
+		kfd_device_info_set_event_interrupt_class(kfd);
 
 		/* Raven */
 		if (gc_version == IP_VERSION(9, 1, 0) ||
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init
  2021-12-17 14:31 [PATCH] drm/amdkfd: correct sdma queue number in kfd device init Guchun Chen
@ 2021-12-17 15:05 ` Sider, Graham
  2021-12-17 15:34   ` Kim, Jonathan
  2021-12-17 15:17 ` Kim, Jonathan
  1 sibling, 1 reply; 5+ messages in thread
From: Sider, Graham @ 2021-12-17 15:05 UTC (permalink / raw)
  To: Chen, Guchun, amd-gfx, Deucher, Alexander, Kuehling, Felix, Kim,
	Jonathan

[Public]

> -----Original Message-----
> From: Chen, Guchun <Guchun.Chen@amd.com>
> Sent: Friday, December 17, 2021 9:31 AM
> To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Sider, Graham
> <Graham.Sider@amd.com>; Kuehling, Felix <Felix.Kuehling@amd.com>;
> Kim, Jonathan <Jonathan.Kim@amd.com>
> Cc: Chen, Guchun <Guchun.Chen@amd.com>
> Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init
> 
> sdma queue number is not correct like on vega20, this patch promises the
> setting keeps the same after code refactor.
> Additionally, improve code to use switch case to list IP version to complete
> kfd device_info structure filling.
> This keeps consistency with the IP parse code in amdgpu_discovery.c.
> 
> Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> Signed-off-by: Guchun Chen <guchun.chen@amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 74
> ++++++++++++++++++++++---
>  1 file changed, 65 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index facc28f58c1f..e50bf992f298 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -59,11 +59,72 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
> 
>  static int kfd_resume(struct kfd_dev *kfd);
> 
> +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) {
> +	uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
> +
> +	switch (sdma_version) {
> +		case IP_VERSION(4, 0, 0):/* VEGA10 */
> +		case IP_VERSION(4, 0, 1):/* VEGA12 */
> +		case IP_VERSION(4, 1, 0):/* RAVEN */
> +		case IP_VERSION(4, 1, 1):/* RAVEN */
> +		case IP_VERSION(4, 1, 2):/* RENIOR */
> +		case IP_VERSION(5, 2, 1):/* VANGOGH */
> +		case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> +			kfd->device_info.num_sdma_queues_per_engine =
> 2;
> +			break;
> +		case IP_VERSION(4, 2, 0):/* VEGA20 */

Thanks for spotting this Guchun. My previous patch should have used a "<" instead of a "<=" on IP_VERSION(4, 2, 0).

> +		case IP_VERSION(4, 2, 2):/* ARCTUTUS */
> +		case IP_VERSION(4, 4, 0):/* ALDEBARAN */
> +		case IP_VERSION(5, 0, 0):/* NAVI10 */
> +		case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
> +		case IP_VERSION(5, 0, 2):/* NAVI14 */
> +		case IP_VERSION(5, 0, 5):/* NAVI12 */
> +		case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
> +		case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
> +		case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
> +			kfd->device_info.num_sdma_queues_per_engine =
> 8;
> +			break;
> +		default:
> +			dev_err(kfd_device,
> +				"Failed to find sdma ip
> blocks(SDMA_HWIP:0x%x) in %s\n",
> +                                sdma_version, __func__);
> +	}
> +}
> +
> +static void kfd_device_info_set_event_interrupt_class(struct kfd_dev
> +*kfd) {
> +	uint32_t gc_version = KFD_GC_VERSION(kfd);
> +
> +	switch (gc_version) {
> +	case IP_VERSION(9, 0, 1): /* VEGA10 */
> +	case IP_VERSION(9, 2, 1): /* VEGA12 */
> +	case IP_VERSION(9, 3, 0): /* RENOIR */
> +	case IP_VERSION(9, 4, 0): /* VEGA20 */
> +	case IP_VERSION(9, 4, 1): /* ARCTURUS */
> +	case IP_VERSION(9, 4, 2): /* ALDEBARAN */
> +	case IP_VERSION(10, 3, 1): /* VANGOGH */
> +	case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
> +	case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
> +	case IP_VERSION(10, 1, 10): /* NAVI10 */
> +	case IP_VERSION(10, 1, 2): /* NAVI12 */
> +	case IP_VERSION(10, 1, 1): /* NAVI14 */
> +	case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
> +	case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
> +	case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
> +	case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
> +		kfd->device_info.event_interrupt_class =
> &event_interrupt_class_v9;
> +		break;
> +	default:
> +		dev_err(kfd_device, "Failed to find gc ip
> blocks(GC_HWIP:0x%x) in %s\n",
> +				gc_version, __func__);
> +	}
> +}

I understand the appeal of moving to a switch for the SDMA queue num above since its setting isn't very linear w.r.t. the SDMA versioning. That said I don't know if I understand moving to a switch for the event interrupt class here. To clarify, originally when I set all SOC15 to event_interrupt_class_v9 it was to follow what was in the device_info structs in drm-staging-next at that time--that said if the plan is in a following patch to change this such that gfx10 are set to to event_interrupt_class_v10, what's the reasoning not to format it along the lines of:

if (gc_version >= IP_VERSION(10, 1, 1)
	kfd->device_info.event_interrupt_class = &event_interrupt_class_v10;
else
	kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;

(Assuming this is done in the SOC15 case, and of course cases would need to be added here eventually for e.g. event_interrupt_class_v11, but not necessarily for every asic).

Best,
Graham

> +
>  static void kfd_device_info_init(struct kfd_dev *kfd,
>  				 bool vf, uint32_t gfx_target_version)  {
>  	uint32_t gc_version = KFD_GC_VERSION(kfd);
> -	uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
>  	uint32_t asic_type = kfd->adev->asic_type;
> 
>  	kfd->device_info.max_pasid_bits = 16;
> @@ -75,16 +136,11 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
>  	if (KFD_IS_SOC15(kfd)) {
>  		kfd->device_info.doorbell_size = 8;
>  		kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
> -		kfd->device_info.event_interrupt_class =
> &event_interrupt_class_v9;
>  		kfd->device_info.supports_cwsr = true;
> 
> -		if ((sdma_version >= IP_VERSION(4, 0, 0)  &&
> -		     sdma_version <= IP_VERSION(4, 2, 0)) ||
> -		     sdma_version == IP_VERSION(5, 2, 1)  ||
> -		     sdma_version == IP_VERSION(5, 2, 3))
> -			kfd->device_info.num_sdma_queues_per_engine =
> 2;
> -		else
> -			kfd->device_info.num_sdma_queues_per_engine =
> 8;
> +		kfd_device_info_set_sdma_queue_num(kfd);
> +
> +		kfd_device_info_set_event_interrupt_class(kfd);
> 
>  		/* Raven */
>  		if (gc_version == IP_VERSION(9, 1, 0) ||
> --
> 2.17.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init
  2021-12-17 14:31 [PATCH] drm/amdkfd: correct sdma queue number in kfd device init Guchun Chen
  2021-12-17 15:05 ` Sider, Graham
@ 2021-12-17 15:17 ` Kim, Jonathan
  1 sibling, 0 replies; 5+ messages in thread
From: Kim, Jonathan @ 2021-12-17 15:17 UTC (permalink / raw)
  To: Chen, Guchun, amd-gfx, Deucher, Alexander, Sider, Graham,
	Kuehling, Felix

[AMD Official Use Only]

Are safeguards required for KFD interrupt initialization to fail gracefully in the event of a non-assignment?
Same would apply when KGD forwards interrupts to the KFD (although the KFD device reference might not exist at this point if the above comment is handled well so a check may not apply in this case).

Also should the dev_errs mention what it's failing to do rather than just reporting that it could not find the HW IP block?
In the case of non-assignment of sdma queues per engine, it still seems like the KFD could move forward but the user wouldn't know what the context of the dev_err was.

Thanks,

Jon

> -----Original Message-----
> From: Chen, Guchun <Guchun.Chen@amd.com>
> Sent: December 17, 2021 9:31 AM
> To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Sider, Graham
> <Graham.Sider@amd.com>; Kuehling, Felix <Felix.Kuehling@amd.com>;
> Kim, Jonathan <Jonathan.Kim@amd.com>
> Cc: Chen, Guchun <Guchun.Chen@amd.com>
> Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd device
> init
>
> sdma queue number is not correct like on vega20, this patch promises the
> setting keeps the same after code refactor.
> Additionally, improve code to use switch case to list IP version to complete
> kfd device_info structure filling.
> This keeps consistency with the IP parse code in amdgpu_discovery.c.
>
> Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> Signed-off-by: Guchun Chen <guchun.chen@amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 74
> ++++++++++++++++++++++---
>  1 file changed, 65 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index facc28f58c1f..e50bf992f298 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -59,11 +59,72 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
>
>  static int kfd_resume(struct kfd_dev *kfd);
>
> +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) {
> +     uint32_t sdma_version = kfd->adev-
> >ip_versions[SDMA0_HWIP][0];
> +
> +     switch (sdma_version) {
> +             case IP_VERSION(4, 0, 0):/* VEGA10 */
> +             case IP_VERSION(4, 0, 1):/* VEGA12 */
> +             case IP_VERSION(4, 1, 0):/* RAVEN */
> +             case IP_VERSION(4, 1, 1):/* RAVEN */
> +             case IP_VERSION(4, 1, 2):/* RENIOR */
> +             case IP_VERSION(5, 2, 1):/* VANGOGH */
> +             case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> +                     kfd->device_info.num_sdma_queues_per_engine =
> 2;
> +                     break;
> +             case IP_VERSION(4, 2, 0):/* VEGA20 */
> +             case IP_VERSION(4, 2, 2):/* ARCTUTUS */
> +             case IP_VERSION(4, 4, 0):/* ALDEBARAN */
> +             case IP_VERSION(5, 0, 0):/* NAVI10 */
> +             case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
> +             case IP_VERSION(5, 0, 2):/* NAVI14 */
> +             case IP_VERSION(5, 0, 5):/* NAVI12 */
> +             case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
> +             case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
> +             case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
> +                     kfd->device_info.num_sdma_queues_per_engine =
> 8;
> +                     break;
> +             default:
> +                     dev_err(kfd_device,
> +                             "Failed to find sdma ip
> blocks(SDMA_HWIP:0x%x) in %s\n",
> +                                sdma_version, __func__);
> +     }
> +}
> +
> +static void kfd_device_info_set_event_interrupt_class(struct kfd_dev
> +*kfd) {
> +     uint32_t gc_version = KFD_GC_VERSION(kfd);
> +
> +     switch (gc_version) {
> +     case IP_VERSION(9, 0, 1): /* VEGA10 */
> +     case IP_VERSION(9, 2, 1): /* VEGA12 */
> +     case IP_VERSION(9, 3, 0): /* RENOIR */
> +     case IP_VERSION(9, 4, 0): /* VEGA20 */
> +     case IP_VERSION(9, 4, 1): /* ARCTURUS */
> +     case IP_VERSION(9, 4, 2): /* ALDEBARAN */
> +     case IP_VERSION(10, 3, 1): /* VANGOGH */
> +     case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
> +     case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
> +     case IP_VERSION(10, 1, 10): /* NAVI10 */
> +     case IP_VERSION(10, 1, 2): /* NAVI12 */
> +     case IP_VERSION(10, 1, 1): /* NAVI14 */
> +     case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
> +     case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
> +     case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
> +     case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
> +             kfd->device_info.event_interrupt_class =
> &event_interrupt_class_v9;
> +             break;
> +     default:
> +             dev_err(kfd_device, "Failed to find gc ip
> blocks(GC_HWIP:0x%x) in %s\n",
> +                             gc_version, __func__);
> +     }
> +}
> +
>  static void kfd_device_info_init(struct kfd_dev *kfd,
>                                bool vf, uint32_t gfx_target_version)  {
>       uint32_t gc_version = KFD_GC_VERSION(kfd);
> -     uint32_t sdma_version = kfd->adev-
> >ip_versions[SDMA0_HWIP][0];
>       uint32_t asic_type = kfd->adev->asic_type;
>
>       kfd->device_info.max_pasid_bits = 16;
> @@ -75,16 +136,11 @@ static void kfd_device_info_init(struct kfd_dev
> *kfd,
>       if (KFD_IS_SOC15(kfd)) {
>               kfd->device_info.doorbell_size = 8;
>               kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
> -             kfd->device_info.event_interrupt_class =
> &event_interrupt_class_v9;
>               kfd->device_info.supports_cwsr = true;
>
> -             if ((sdma_version >= IP_VERSION(4, 0, 0)  &&
> -                  sdma_version <= IP_VERSION(4, 2, 0)) ||
> -                  sdma_version == IP_VERSION(5, 2, 1)  ||
> -                  sdma_version == IP_VERSION(5, 2, 3))
> -                     kfd->device_info.num_sdma_queues_per_engine =
> 2;
> -             else
> -                     kfd->device_info.num_sdma_queues_per_engine =
> 8;
> +             kfd_device_info_set_sdma_queue_num(kfd);
> +
> +             kfd_device_info_set_event_interrupt_class(kfd);
>
>               /* Raven */
>               if (gc_version == IP_VERSION(9, 1, 0) ||
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init
  2021-12-17 15:05 ` Sider, Graham
@ 2021-12-17 15:34   ` Kim, Jonathan
  2021-12-18  2:44     ` Chen, Guchun
  0 siblings, 1 reply; 5+ messages in thread
From: Kim, Jonathan @ 2021-12-17 15:34 UTC (permalink / raw)
  To: Sider, Graham, Chen, Guchun, amd-gfx, Deucher, Alexander,
	Kuehling, Felix



> -----Original Message-----
> From: Sider, Graham <Graham.Sider@amd.com>
> Sent: December 17, 2021 10:06 AM
> To: Chen, Guchun <Guchun.Chen@amd.com>; amd-
> gfx@lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Kuehling, Felix
> <Felix.Kuehling@amd.com>; Kim, Jonathan <Jonathan.Kim@amd.com>
> Subject: RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd
> device init
> 
> [Public]
> 
> > -----Original Message-----
> > From: Chen, Guchun <Guchun.Chen@amd.com>
> > Sent: Friday, December 17, 2021 9:31 AM
> > To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> > <Alexander.Deucher@amd.com>; Sider, Graham
> <Graham.Sider@amd.com>;
> > Kuehling, Felix <Felix.Kuehling@amd.com>; Kim, Jonathan
> > <Jonathan.Kim@amd.com>
> > Cc: Chen, Guchun <Guchun.Chen@amd.com>
> > Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd device
> > init
> >
> > sdma queue number is not correct like on vega20, this patch promises
> > the setting keeps the same after code refactor.
> > Additionally, improve code to use switch case to list IP version to
> > complete kfd device_info structure filling.
> > This keeps consistency with the IP parse code in amdgpu_discovery.c.
> >
> > Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> > Signed-off-by: Guchun Chen <guchun.chen@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 74
> > ++++++++++++++++++++++---
> >  1 file changed, 65 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index facc28f58c1f..e50bf992f298 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -59,11 +59,72 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
> >
> >  static int kfd_resume(struct kfd_dev *kfd);
> >
> > +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
> {
> > +	uint32_t sdma_version = kfd->adev-
> >ip_versions[SDMA0_HWIP][0];
> > +
> > +	switch (sdma_version) {
> > +		case IP_VERSION(4, 0, 0):/* VEGA10 */
> > +		case IP_VERSION(4, 0, 1):/* VEGA12 */
> > +		case IP_VERSION(4, 1, 0):/* RAVEN */
> > +		case IP_VERSION(4, 1, 1):/* RAVEN */
> > +		case IP_VERSION(4, 1, 2):/* RENIOR */
> > +		case IP_VERSION(5, 2, 1):/* VANGOGH */
> > +		case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> > +			kfd->device_info.num_sdma_queues_per_engine =
> > 2;
> > +			break;
> > +		case IP_VERSION(4, 2, 0):/* VEGA20 */
> 
> Thanks for spotting this Guchun. My previous patch should have used a "<"
> instead of a "<=" on IP_VERSION(4, 2, 0).
> 
> > +		case IP_VERSION(4, 2, 2):/* ARCTUTUS */
> > +		case IP_VERSION(4, 4, 0):/* ALDEBARAN */
> > +		case IP_VERSION(5, 0, 0):/* NAVI10 */
> > +		case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
> > +		case IP_VERSION(5, 0, 2):/* NAVI14 */
> > +		case IP_VERSION(5, 0, 5):/* NAVI12 */
> > +		case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
> > +		case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
> > +		case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
> > +			kfd->device_info.num_sdma_queues_per_engine =
> > 8;
> > +			break;
> > +		default:
> > +			dev_err(kfd_device,
> > +				"Failed to find sdma ip
> > blocks(SDMA_HWIP:0x%x) in %s\n",
> > +                                sdma_version, __func__);
> > +	}
> > +}
> > +
> > +static void kfd_device_info_set_event_interrupt_class(struct kfd_dev
> > +*kfd) {
> > +	uint32_t gc_version = KFD_GC_VERSION(kfd);
> > +
> > +	switch (gc_version) {
> > +	case IP_VERSION(9, 0, 1): /* VEGA10 */
> > +	case IP_VERSION(9, 2, 1): /* VEGA12 */
> > +	case IP_VERSION(9, 3, 0): /* RENOIR */
> > +	case IP_VERSION(9, 4, 0): /* VEGA20 */
> > +	case IP_VERSION(9, 4, 1): /* ARCTURUS */
> > +	case IP_VERSION(9, 4, 2): /* ALDEBARAN */
> > +	case IP_VERSION(10, 3, 1): /* VANGOGH */
> > +	case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
> > +	case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
> > +	case IP_VERSION(10, 1, 10): /* NAVI10 */
> > +	case IP_VERSION(10, 1, 2): /* NAVI12 */
> > +	case IP_VERSION(10, 1, 1): /* NAVI14 */
> > +	case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
> > +	case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
> > +	case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
> > +	case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
> > +		kfd->device_info.event_interrupt_class =
> > &event_interrupt_class_v9;
> > +		break;
> > +	default:
> > +		dev_err(kfd_device, "Failed to find gc ip
> > blocks(GC_HWIP:0x%x) in %s\n",
> > +				gc_version, __func__);
> > +	}
> > +}
> 
> I understand the appeal of moving to a switch for the SDMA queue num
> above since its setting isn't very linear w.r.t. the SDMA versioning. That said
> I don't know if I understand moving to a switch for the event interrupt class
> here. To clarify, originally when I set all SOC15 to event_interrupt_class_v9
> it was to follow what was in the device_info structs in drm-staging-next at
> that time--that said if the plan is in a following patch to change this such
> that gfx10 are set to to event_interrupt_class_v10, what's the reasoning
> not to format it along the lines of:
> 
> if (gc_version >= IP_VERSION(10, 1, 1)
> 	kfd->device_info.event_interrupt_class =
> &event_interrupt_class_v10; else
> 	kfd->device_info.event_interrupt_class =
> &event_interrupt_class_v9;
> 
> (Assuming this is done in the SOC15 case, and of course cases would need
> to be added here eventually for e.g. event_interrupt_class_v11, but not
> necessarily for every asic).

Explicit hard checks with a non-assignment on default might have advantages by not allowing the KFD to proceed to load for unregistered devices or force developers to assign the correct interrupt class without making assumptions.
But that means more maintenance and additional handling on non-assignment cases to fail gracefully.

Thanks,

Jon 

> 
> Best,
> Graham
> 
> > +
> >  static void kfd_device_info_init(struct kfd_dev *kfd,
> >  				 bool vf, uint32_t gfx_target_version)  {
> >  	uint32_t gc_version = KFD_GC_VERSION(kfd);
> > -	uint32_t sdma_version = kfd->adev-
> >ip_versions[SDMA0_HWIP][0];
> >  	uint32_t asic_type = kfd->adev->asic_type;
> >
> >  	kfd->device_info.max_pasid_bits = 16; @@ -75,16 +136,11 @@
> static
> > void kfd_device_info_init(struct kfd_dev *kfd,
> >  	if (KFD_IS_SOC15(kfd)) {
> >  		kfd->device_info.doorbell_size = 8;
> >  		kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
> > -		kfd->device_info.event_interrupt_class =
> > &event_interrupt_class_v9;
> >  		kfd->device_info.supports_cwsr = true;
> >
> > -		if ((sdma_version >= IP_VERSION(4, 0, 0)  &&
> > -		     sdma_version <= IP_VERSION(4, 2, 0)) ||
> > -		     sdma_version == IP_VERSION(5, 2, 1)  ||
> > -		     sdma_version == IP_VERSION(5, 2, 3))
> > -			kfd->device_info.num_sdma_queues_per_engine =
> > 2;
> > -		else
> > -			kfd->device_info.num_sdma_queues_per_engine =
> > 8;
> > +		kfd_device_info_set_sdma_queue_num(kfd);
> > +
> > +		kfd_device_info_set_event_interrupt_class(kfd);
> >
> >  		/* Raven */
> >  		if (gc_version == IP_VERSION(9, 1, 0) ||
> > --
> > 2.17.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init
  2021-12-17 15:34   ` Kim, Jonathan
@ 2021-12-18  2:44     ` Chen, Guchun
  0 siblings, 0 replies; 5+ messages in thread
From: Chen, Guchun @ 2021-12-18  2:44 UTC (permalink / raw)
  To: Kim, Jonathan, Sider, Graham, amd-gfx, Deucher, Alexander,
	Kuehling, Felix

[Public]

Hi Graham,

My general thought is, from what I observed, IP version does not change in a linear variation manner, so moving to switch case may be easier for user to decode this. Also, I want to get the code aligned with the IP parse code in amdgpu_discovery.c.

Please correct me if I am wrong.

Regards,
Guchun

-----Original Message-----
From: Kim, Jonathan <Jonathan.Kim@amd.com> 
Sent: Friday, December 17, 2021 11:35 PM
To: Sider, Graham <Graham.Sider@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; amd-gfx@lists.freedesktop.org; Deucher, Alexander <Alexander.Deucher@amd.com>; Kuehling, Felix <Felix.Kuehling@amd.com>
Subject: RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init



> -----Original Message-----
> From: Sider, Graham <Graham.Sider@amd.com>
> Sent: December 17, 2021 10:06 AM
> To: Chen, Guchun <Guchun.Chen@amd.com>; amd- 
> gfx@lists.freedesktop.org; Deucher, Alexander 
> <Alexander.Deucher@amd.com>; Kuehling, Felix <Felix.Kuehling@amd.com>; 
> Kim, Jonathan <Jonathan.Kim@amd.com>
> Subject: RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd 
> device init
> 
> [Public]
> 
> > -----Original Message-----
> > From: Chen, Guchun <Guchun.Chen@amd.com>
> > Sent: Friday, December 17, 2021 9:31 AM
> > To: amd-gfx@lists.freedesktop.org; Deucher, Alexander 
> > <Alexander.Deucher@amd.com>; Sider, Graham
> <Graham.Sider@amd.com>;
> > Kuehling, Felix <Felix.Kuehling@amd.com>; Kim, Jonathan 
> > <Jonathan.Kim@amd.com>
> > Cc: Chen, Guchun <Guchun.Chen@amd.com>
> > Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd device 
> > init
> >
> > sdma queue number is not correct like on vega20, this patch promises 
> > the setting keeps the same after code refactor.
> > Additionally, improve code to use switch case to list IP version to 
> > complete kfd device_info structure filling.
> > This keeps consistency with the IP parse code in amdgpu_discovery.c.
> >
> > Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> > Signed-off-by: Guchun Chen <guchun.chen@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 74
> > ++++++++++++++++++++++---
> >  1 file changed, 65 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index facc28f58c1f..e50bf992f298 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -59,11 +59,72 @@ static void kfd_gtt_sa_fini(struct kfd_dev 
> > *kfd);
> >
> >  static int kfd_resume(struct kfd_dev *kfd);
> >
> > +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
> {
> > +	uint32_t sdma_version = kfd->adev-
> >ip_versions[SDMA0_HWIP][0];
> > +
> > +	switch (sdma_version) {
> > +		case IP_VERSION(4, 0, 0):/* VEGA10 */
> > +		case IP_VERSION(4, 0, 1):/* VEGA12 */
> > +		case IP_VERSION(4, 1, 0):/* RAVEN */
> > +		case IP_VERSION(4, 1, 1):/* RAVEN */
> > +		case IP_VERSION(4, 1, 2):/* RENIOR */
> > +		case IP_VERSION(5, 2, 1):/* VANGOGH */
> > +		case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> > +			kfd->device_info.num_sdma_queues_per_engine =
> > 2;
> > +			break;
> > +		case IP_VERSION(4, 2, 0):/* VEGA20 */
> 
> Thanks for spotting this Guchun. My previous patch should have used a "<"
> instead of a "<=" on IP_VERSION(4, 2, 0).
> 
> > +		case IP_VERSION(4, 2, 2):/* ARCTUTUS */
> > +		case IP_VERSION(4, 4, 0):/* ALDEBARAN */
> > +		case IP_VERSION(5, 0, 0):/* NAVI10 */
> > +		case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
> > +		case IP_VERSION(5, 0, 2):/* NAVI14 */
> > +		case IP_VERSION(5, 0, 5):/* NAVI12 */
> > +		case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
> > +		case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
> > +		case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
> > +			kfd->device_info.num_sdma_queues_per_engine =
> > 8;
> > +			break;
> > +		default:
> > +			dev_err(kfd_device,
> > +				"Failed to find sdma ip
> > blocks(SDMA_HWIP:0x%x) in %s\n",
> > +                                sdma_version, __func__);
> > +	}
> > +}
> > +
> > +static void kfd_device_info_set_event_interrupt_class(struct 
> > +kfd_dev
> > +*kfd) {
> > +	uint32_t gc_version = KFD_GC_VERSION(kfd);
> > +
> > +	switch (gc_version) {
> > +	case IP_VERSION(9, 0, 1): /* VEGA10 */
> > +	case IP_VERSION(9, 2, 1): /* VEGA12 */
> > +	case IP_VERSION(9, 3, 0): /* RENOIR */
> > +	case IP_VERSION(9, 4, 0): /* VEGA20 */
> > +	case IP_VERSION(9, 4, 1): /* ARCTURUS */
> > +	case IP_VERSION(9, 4, 2): /* ALDEBARAN */
> > +	case IP_VERSION(10, 3, 1): /* VANGOGH */
> > +	case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
> > +	case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
> > +	case IP_VERSION(10, 1, 10): /* NAVI10 */
> > +	case IP_VERSION(10, 1, 2): /* NAVI12 */
> > +	case IP_VERSION(10, 1, 1): /* NAVI14 */
> > +	case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
> > +	case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
> > +	case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
> > +	case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
> > +		kfd->device_info.event_interrupt_class =
> > &event_interrupt_class_v9;
> > +		break;
> > +	default:
> > +		dev_err(kfd_device, "Failed to find gc ip
> > blocks(GC_HWIP:0x%x) in %s\n",
> > +				gc_version, __func__);
> > +	}
> > +}
> 
> I understand the appeal of moving to a switch for the SDMA queue num 
> above since its setting isn't very linear w.r.t. the SDMA versioning. 
> That said I don't know if I understand moving to a switch for the 
> event interrupt class here. To clarify, originally when I set all 
> SOC15 to event_interrupt_class_v9 it was to follow what was in the 
> device_info structs in drm-staging-next at that time--that said if the 
> plan is in a following patch to change this such that gfx10 are set to 
> to event_interrupt_class_v10, what's the reasoning not to format it along the lines of:
> 
> if (gc_version >= IP_VERSION(10, 1, 1)
> 	kfd->device_info.event_interrupt_class = &event_interrupt_class_v10; 
> else
> 	kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
> 
> (Assuming this is done in the SOC15 case, and of course cases would 
> need to be added here eventually for e.g. event_interrupt_class_v11, 
> but not necessarily for every asic).

Explicit hard checks with a non-assignment on default might have advantages by not allowing the KFD to proceed to load for unregistered devices or force developers to assign the correct interrupt class without making assumptions.
But that means more maintenance and additional handling on non-assignment cases to fail gracefully.

Thanks,

Jon 

> 
> Best,
> Graham
> 
> > +
> >  static void kfd_device_info_init(struct kfd_dev *kfd,
> >  				 bool vf, uint32_t gfx_target_version)  {
> >  	uint32_t gc_version = KFD_GC_VERSION(kfd);
> > -	uint32_t sdma_version = kfd->adev-
> >ip_versions[SDMA0_HWIP][0];
> >  	uint32_t asic_type = kfd->adev->asic_type;
> >
> >  	kfd->device_info.max_pasid_bits = 16; @@ -75,16 +136,11 @@
> static
> > void kfd_device_info_init(struct kfd_dev *kfd,
> >  	if (KFD_IS_SOC15(kfd)) {
> >  		kfd->device_info.doorbell_size = 8;
> >  		kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
> > -		kfd->device_info.event_interrupt_class =
> > &event_interrupt_class_v9;
> >  		kfd->device_info.supports_cwsr = true;
> >
> > -		if ((sdma_version >= IP_VERSION(4, 0, 0)  &&
> > -		     sdma_version <= IP_VERSION(4, 2, 0)) ||
> > -		     sdma_version == IP_VERSION(5, 2, 1)  ||
> > -		     sdma_version == IP_VERSION(5, 2, 3))
> > -			kfd->device_info.num_sdma_queues_per_engine =
> > 2;
> > -		else
> > -			kfd->device_info.num_sdma_queues_per_engine =
> > 8;
> > +		kfd_device_info_set_sdma_queue_num(kfd);
> > +
> > +		kfd_device_info_set_event_interrupt_class(kfd);
> >
> >  		/* Raven */
> >  		if (gc_version == IP_VERSION(9, 1, 0) ||
> > --
> > 2.17.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-12-18  2:45 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-17 14:31 [PATCH] drm/amdkfd: correct sdma queue number in kfd device init Guchun Chen
2021-12-17 15:05 ` Sider, Graham
2021-12-17 15:34   ` Kim, Jonathan
2021-12-18  2:44     ` Chen, Guchun
2021-12-17 15:17 ` Kim, Jonathan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.