All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want
@ 2020-07-27  8:21 Monk Liu
  2020-07-27  9:26 ` Christian König
  0 siblings, 1 reply; 4+ messages in thread
From: Monk Liu @ 2020-07-27  8:21 UTC (permalink / raw)
  To: amd-gfx; +Cc: Monk Liu

what:
KCQ cost many clocks during world switch which impacts a lot to multi-VF
performance

how:
introduce a paramter to control the number of KCQ to avoid performance
drop if there is no KQC needed

notes:
this paramter only affects gfx 8/9/10

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 27 +++++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30 +++++++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29 ++++++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31 +++++++++++++++---------------
 7 files changed, 69 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..71a3d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq_user_set;
 
 #define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..61c7583 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 
 	amdgpu_gmc_tmz_set(adev);
 
+	if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
+		amdgpu_num_kcq_user_set = 8;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..03a94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq_user_set = 8;
 
 struct amdgpu_mgpu_info mgpu_info = {
 	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0b59049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-	int i, queue, pipe, mec;
+	int i, queue, pipe, mec, j = 0;
 	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
 
 	/* policy for amdgpu compute queue ownership */
@@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 
 		if (multipipe_policy) {
 			/* policy: amdgpu owns the first two queues of the first MEC */
-			if (mec == 0 && queue < 2)
-				set_bit(i, adev->gfx.mec.queue_bitmap);
+			if (mec == 0 && queue < 2) {
+				if (j++ < adev->gfx.num_compute_rings)
+					set_bit(i, adev->gfx.mec.queue_bitmap);
+				else
+					break;
+			}
 		} else {
 			/* policy: amdgpu owns all queues in the first pipe */
-			if (mec == 0 && pipe == 0)
-				set_bit(i, adev->gfx.mec.queue_bitmap);
+			if (mec == 0 && pipe == 0) {
+				if (j++ < adev->gfx.num_compute_rings)
+					set_bit(i, adev->gfx.mec.queue_bitmap);
+				else
+					break;
+			}
 		}
 	}
 
-	/* update the number of active compute rings */
-	adev->gfx.num_compute_rings =
-		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-
-	/* If you hit this case and edited the policy, you probably just
-	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
-	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
-		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
 }
 
 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index db9f1e8..2ad8393 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
 	amdgpu_gfx_compute_queue_acquire(adev);
 	mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
 
-	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &adev->gfx.mec.hpd_eop_obj,
-				      &adev->gfx.mec.hpd_eop_gpu_addr,
-				      (void **)&hpd);
-	if (r) {
-		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
-		gfx_v10_0_mec_fini(adev);
-		return r;
-	}
+	if (mec_hpd_size) {
+		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+									  AMDGPU_GEM_DOMAIN_GTT,
+									  &adev->gfx.mec.hpd_eop_obj,
+									  &adev->gfx.mec.hpd_eop_gpu_addr,
+									  (void **)&hpd);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+			gfx_v10_0_mec_fini(adev);
+			return r;
+		}
 
-	memset(hpd, 0, mec_hpd_size);
+		memset(hpd, 0, mec_hpd_size);
 
-	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
-	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+	}
 
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
 		mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
@@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
 		break;
 	}
 
-	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
 
 	gfx_v10_0_set_kiq_pm4_funcs(adev);
 	gfx_v10_0_set_ring_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8d72089..6d95b4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 	amdgpu_gfx_compute_queue_acquire(adev);
 
 	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
+	if (mec_hpd_size) {
+		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+									  AMDGPU_GEM_DOMAIN_VRAM,
+									  &adev->gfx.mec.hpd_eop_obj,
+									  &adev->gfx.mec.hpd_eop_gpu_addr,
+									  (void **)&hpd);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+			return r;
+		}
 
-	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->gfx.mec.hpd_eop_obj,
-				      &adev->gfx.mec.hpd_eop_gpu_addr,
-				      (void **)&hpd);
-	if (r) {
-		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
-		return r;
-	}
-
-	memset(hpd, 0, mec_hpd_size);
+		memset(hpd, 0, mec_hpd_size);
 
-	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
-	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+	}
 
 	return 0;
 }
@@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
 	gfx_v8_0_set_ring_funcs(adev);
 	gfx_v8_0_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e4e751f..43bcfe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 	/* take ownership of the relevant compute queues */
 	amdgpu_gfx_compute_queue_acquire(adev);
 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
+	if (mec_hpd_size) {
+		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+									  AMDGPU_GEM_DOMAIN_VRAM,
+									  &adev->gfx.mec.hpd_eop_obj,
+									  &adev->gfx.mec.hpd_eop_gpu_addr,
+									  (void **)&hpd);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+			gfx_v9_0_mec_fini(adev);
+			return r;
+		}
 
-	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->gfx.mec.hpd_eop_obj,
-				      &adev->gfx.mec.hpd_eop_gpu_addr,
-				      (void **)&hpd);
-	if (r) {
-		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
-		gfx_v9_0_mec_fini(adev);
-		return r;
-	}
-
-	memset(hpd, 0, mec_hpd_size);
+		memset(hpd, 0, mec_hpd_size);
 
-	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
-	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+	}
 
 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 
@@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
 		adev->gfx.num_gfx_rings = 0;
 	else
 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
 	gfx_v9_0_set_kiq_pm4_funcs(adev);
 	gfx_v9_0_set_ring_funcs(adev);
 	gfx_v9_0_set_irq_funcs(adev);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want
  2020-07-27  8:21 [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want Monk Liu
@ 2020-07-27  9:26 ` Christian König
  2020-07-27 16:02   ` Felix Kuehling
  0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2020-07-27  9:26 UTC (permalink / raw)
  To: Monk Liu, amd-gfx

Am 27.07.20 um 10:21 schrieb Monk Liu:
> what:
> KCQ cost many clocks during world switch which impacts a lot to multi-VF
> performance
>
> how:
> introduce a paramter to control the number of KCQ to avoid performance
> drop if there is no KQC needed
>
> notes:
> this paramter only affects gfx 8/9/10

Sounds like a good idea to me, but that needs a different name. Outside 
AMD most people don't know what a KCQ is.

Just use compute queue or similar as name for this.

Another comment below.

>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 27 +++++++++++++-------------
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30 +++++++++++++++--------------
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29 ++++++++++++++--------------
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31 +++++++++++++++---------------
>   7 files changed, 69 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..71a3d6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
>   #ifdef CONFIG_DRM_AMDGPU_CIK
>   extern int amdgpu_cik_support;
>   #endif
> +extern int amdgpu_num_kcq_user_set;
>   
>   #define AMDGPU_VM_MAX_NUM_CTX			4096
>   #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..61c7583 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
>   
>   	amdgpu_gmc_tmz_set(adev);
>   
> +	if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
> +		amdgpu_num_kcq_user_set = 8;

This needs a warning or error message if we overwrite invalid user 
provided parameters.

Christian.

> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..03a94e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry;
>   int amdgpu_force_asic_type = -1;
>   int amdgpu_tmz = 0;
>   int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq_user_set = 8;
>   
>   struct amdgpu_mgpu_info mgpu_info = {
>   	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>   MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>   module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>   
> +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
> +
>   static const struct pci_device_id pciidlist[] = {
>   #ifdef  CONFIG_DRM_AMDGPU_SI
>   	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..0b59049 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>   
>   void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>   {
> -	int i, queue, pipe, mec;
> +	int i, queue, pipe, mec, j = 0;
>   	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>   
>   	/* policy for amdgpu compute queue ownership */
> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>   
>   		if (multipipe_policy) {
>   			/* policy: amdgpu owns the first two queues of the first MEC */
> -			if (mec == 0 && queue < 2)
> -				set_bit(i, adev->gfx.mec.queue_bitmap);
> +			if (mec == 0 && queue < 2) {
> +				if (j++ < adev->gfx.num_compute_rings)
> +					set_bit(i, adev->gfx.mec.queue_bitmap);
> +				else
> +					break;
> +			}
>   		} else {
>   			/* policy: amdgpu owns all queues in the first pipe */
> -			if (mec == 0 && pipe == 0)
> -				set_bit(i, adev->gfx.mec.queue_bitmap);
> +			if (mec == 0 && pipe == 0) {
> +				if (j++ < adev->gfx.num_compute_rings)
> +					set_bit(i, adev->gfx.mec.queue_bitmap);
> +				else
> +					break;
> +			}
>   		}
>   	}
>   
> -	/* update the number of active compute rings */
> -	adev->gfx.num_compute_rings =
> -		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> -
> -	/* If you hit this case and edited the policy, you probably just
> -	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
> -	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
> -		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
>   }
>   
>   void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index db9f1e8..2ad8393 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
>   	amdgpu_gfx_compute_queue_acquire(adev);
>   	mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>   
> -	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_GTT,
> -				      &adev->gfx.mec.hpd_eop_obj,
> -				      &adev->gfx.mec.hpd_eop_gpu_addr,
> -				      (void **)&hpd);
> -	if (r) {
> -		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -		gfx_v10_0_mec_fini(adev);
> -		return r;
> -	}
> +	if (mec_hpd_size) {
> +		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +									  AMDGPU_GEM_DOMAIN_GTT,
> +									  &adev->gfx.mec.hpd_eop_obj,
> +									  &adev->gfx.mec.hpd_eop_gpu_addr,
> +									  (void **)&hpd);
> +		if (r) {
> +			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +			gfx_v10_0_mec_fini(adev);
> +			return r;
> +		}
>   
> -	memset(hpd, 0, mec_hpd_size);
> +		memset(hpd, 0, mec_hpd_size);
>   
> -	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +	}
>   
>   	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
>   		mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
>   		break;
>   	}
>   
> -	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>   
>   	gfx_v10_0_set_kiq_pm4_funcs(adev);
>   	gfx_v10_0_set_ring_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8d72089..6d95b4b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
>   	amdgpu_gfx_compute_queue_acquire(adev);
>   
>   	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
> +	if (mec_hpd_size) {
> +		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +									  AMDGPU_GEM_DOMAIN_VRAM,
> +									  &adev->gfx.mec.hpd_eop_obj,
> +									  &adev->gfx.mec.hpd_eop_gpu_addr,
> +									  (void **)&hpd);
> +		if (r) {
> +			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +			return r;
> +		}
>   
> -	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      &adev->gfx.mec.hpd_eop_obj,
> -				      &adev->gfx.mec.hpd_eop_gpu_addr,
> -				      (void **)&hpd);
> -	if (r) {
> -		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -		return r;
> -	}
> -
> -	memset(hpd, 0, mec_hpd_size);
> +		memset(hpd, 0, mec_hpd_size);
>   
> -	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +	}
>   
>   	return 0;
>   }
> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
>   	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
> -	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>   	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
>   	gfx_v8_0_set_ring_funcs(adev);
>   	gfx_v8_0_set_irq_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e4e751f..43bcfe3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
>   	/* take ownership of the relevant compute queues */
>   	amdgpu_gfx_compute_queue_acquire(adev);
>   	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
> +	if (mec_hpd_size) {
> +		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +									  AMDGPU_GEM_DOMAIN_VRAM,
> +									  &adev->gfx.mec.hpd_eop_obj,
> +									  &adev->gfx.mec.hpd_eop_gpu_addr,
> +									  (void **)&hpd);
> +		if (r) {
> +			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +			gfx_v9_0_mec_fini(adev);
> +			return r;
> +		}
>   
> -	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      &adev->gfx.mec.hpd_eop_obj,
> -				      &adev->gfx.mec.hpd_eop_gpu_addr,
> -				      (void **)&hpd);
> -	if (r) {
> -		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -		gfx_v9_0_mec_fini(adev);
> -		return r;
> -	}
> -
> -	memset(hpd, 0, mec_hpd_size);
> +		memset(hpd, 0, mec_hpd_size);
>   
> -	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +	}
>   
>   	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
>   
> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
>   		adev->gfx.num_gfx_rings = 0;
>   	else
>   		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> -	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>   	gfx_v9_0_set_kiq_pm4_funcs(adev);
>   	gfx_v9_0_set_ring_funcs(adev);
>   	gfx_v9_0_set_irq_funcs(adev);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want
  2020-07-27  9:26 ` Christian König
@ 2020-07-27 16:02   ` Felix Kuehling
  2020-07-27 19:34     ` Christian König
  0 siblings, 1 reply; 4+ messages in thread
From: Felix Kuehling @ 2020-07-27 16:02 UTC (permalink / raw)
  To: christian.koenig, Monk Liu, amd-gfx

Am 2020-07-27 um 5:26 a.m. schrieb Christian König:
> Am 27.07.20 um 10:21 schrieb Monk Liu:
>> what:
>> KCQ cost many clocks during world switch which impacts a lot to multi-VF
>> performance
>>
>> how:
>> introduce a paramter to control the number of KCQ to avoid performance
>> drop if there is no KQC needed
>>
>> notes:
>> this paramter only affects gfx 8/9/10
>
> Sounds like a good idea to me, but that needs a different name.
> Outside AMD most people don't know what a KCQ is.
>
> Just use compute queue or similar as name for this.

Just "compute queue" would be confusing for ROCm users. Maybe "legacy
compute queues"?

Regards,
  Felix


>
> Another comment below.
>
>>
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 27
>> +++++++++++++-------------
>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30
>> +++++++++++++++--------------
>>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29
>> ++++++++++++++--------------
>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31
>> +++++++++++++++---------------
>>   7 files changed, 69 insertions(+), 56 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index e97c088..71a3d6a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
>>   #ifdef CONFIG_DRM_AMDGPU_CIK
>>   extern int amdgpu_cik_support;
>>   #endif
>> +extern int amdgpu_num_kcq_user_set;
>>     #define AMDGPU_VM_MAX_NUM_CTX            4096
>>   #define AMDGPU_SG_THRESHOLD            (256*1024*1024)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 62ecac9..61c7583 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct
>> amdgpu_device *adev)
>>         amdgpu_gmc_tmz_set(adev);
>>   +    if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
>> +        amdgpu_num_kcq_user_set = 8;
>
> This needs a warning or error message if we overwrite invalid user
> provided parameters.
>
> Christian.
>
>> +
>>       return 0;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 6291f5f..03a94e9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -150,6 +150,7 @@ int amdgpu_noretry;
>>   int amdgpu_force_asic_type = -1;
>>   int amdgpu_tmz = 0;
>>   int amdgpu_reset_method = -1; /* auto */
>> +int amdgpu_num_kcq_user_set = 8;
>>     struct amdgpu_mgpu_info mgpu_info = {
>>       .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
>> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>>   MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto
>> (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>>   module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>>   +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if
>> set to greater than 8 or less than 0, only affect gfx 8+)");
>> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
>> +
>>   static const struct pci_device_id pciidlist[] = {
>>   #ifdef  CONFIG_DRM_AMDGPU_SI
>>       {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 8eff017..0b59049 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -202,7 +202,7 @@ bool
>> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>>     void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>>   {
>> -    int i, queue, pipe, mec;
>> +    int i, queue, pipe, mec, j = 0;
>>       bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>>         /* policy for amdgpu compute queue ownership */
>> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct
>> amdgpu_device *adev)
>>             if (multipipe_policy) {
>>               /* policy: amdgpu owns the first two queues of the
>> first MEC */
>> -            if (mec == 0 && queue < 2)
>> -                set_bit(i, adev->gfx.mec.queue_bitmap);
>> +            if (mec == 0 && queue < 2) {
>> +                if (j++ < adev->gfx.num_compute_rings)
>> +                    set_bit(i, adev->gfx.mec.queue_bitmap);
>> +                else
>> +                    break;
>> +            }
>>           } else {
>>               /* policy: amdgpu owns all queues in the first pipe */
>> -            if (mec == 0 && pipe == 0)
>> -                set_bit(i, adev->gfx.mec.queue_bitmap);
>> +            if (mec == 0 && pipe == 0) {
>> +                if (j++ < adev->gfx.num_compute_rings)
>> +                    set_bit(i, adev->gfx.mec.queue_bitmap);
>> +                else
>> +                    break;
>> +            }
>>           }
>>       }
>>   -    /* update the number of active compute rings */
>> -    adev->gfx.num_compute_rings =
>> -        bitmap_weight(adev->gfx.mec.queue_bitmap,
>> AMDGPU_MAX_COMPUTE_QUEUES);
>> -
>> -    /* If you hit this case and edited the policy, you probably just
>> -     * need to increase AMDGPU_MAX_COMPUTE_RINGS */
>> -    if (WARN_ON(adev->gfx.num_compute_rings >
>> AMDGPU_MAX_COMPUTE_RINGS))
>> -        adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +    dev_info(adev->dev, "mec queue bitmap weight=%d\n",
>> bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
>>   }
>>     void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index db9f1e8..2ad8393 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct
>> amdgpu_device *adev)
>>       amdgpu_gfx_compute_queue_acquire(adev);
>>       mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>>   -    r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> -                      AMDGPU_GEM_DOMAIN_GTT,
>> -                      &adev->gfx.mec.hpd_eop_obj,
>> -                      &adev->gfx.mec.hpd_eop_gpu_addr,
>> -                      (void **)&hpd);
>> -    if (r) {
>> -        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> -        gfx_v10_0_mec_fini(adev);
>> -        return r;
>> -    }
>> +    if (mec_hpd_size) {
>> +        r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> +                                      AMDGPU_GEM_DOMAIN_GTT,
>> +                                      &adev->gfx.mec.hpd_eop_obj,
>> +                                      &adev->gfx.mec.hpd_eop_gpu_addr,
>> +                                      (void **)&hpd);
>> +        if (r) {
>> +            dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> +            gfx_v10_0_mec_fini(adev);
>> +            return r;
>> +        }
>>   -    memset(hpd, 0, mec_hpd_size);
>> +        memset(hpd, 0, mec_hpd_size);
>>   -    amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> -    amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> +        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +    }
>>         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
>>           mec_hdr = (const struct gfx_firmware_header_v1_0
>> *)adev->gfx.mec_fw->data;
>> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
>>           break;
>>       }
>>   -    adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +    adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>>         gfx_v10_0_set_kiq_pm4_funcs(adev);
>>       gfx_v10_0_set_ring_funcs(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 8d72089..6d95b4b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct
>> amdgpu_device *adev)
>>       amdgpu_gfx_compute_queue_acquire(adev);
>>         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
>> +    if (mec_hpd_size) {
>> +        r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> +                                      AMDGPU_GEM_DOMAIN_VRAM,
>> +                                      &adev->gfx.mec.hpd_eop_obj,
>> +                                      &adev->gfx.mec.hpd_eop_gpu_addr,
>> +                                      (void **)&hpd);
>> +        if (r) {
>> +            dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> +            return r;
>> +        }
>>   -    r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> -                      AMDGPU_GEM_DOMAIN_VRAM,
>> -                      &adev->gfx.mec.hpd_eop_obj,
>> -                      &adev->gfx.mec.hpd_eop_gpu_addr,
>> -                      (void **)&hpd);
>> -    if (r) {
>> -        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> -        return r;
>> -    }
>> -
>> -    memset(hpd, 0, mec_hpd_size);
>> +        memset(hpd, 0, mec_hpd_size);
>>   -    amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> -    amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> +        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +    }
>>         return 0;
>>   }
>> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
>>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
>> -    adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +    adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>>       adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
>>       gfx_v8_0_set_ring_funcs(adev);
>>       gfx_v8_0_set_irq_funcs(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index e4e751f..43bcfe3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct
>> amdgpu_device *adev)
>>       /* take ownership of the relevant compute queues */
>>       amdgpu_gfx_compute_queue_acquire(adev);
>>       mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
>> +    if (mec_hpd_size) {
>> +        r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> +                                      AMDGPU_GEM_DOMAIN_VRAM,
>> +                                      &adev->gfx.mec.hpd_eop_obj,
>> +                                      &adev->gfx.mec.hpd_eop_gpu_addr,
>> +                                      (void **)&hpd);
>> +        if (r) {
>> +            dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> +            gfx_v9_0_mec_fini(adev);
>> +            return r;
>> +        }
>>   -    r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> -                      AMDGPU_GEM_DOMAIN_VRAM,
>> -                      &adev->gfx.mec.hpd_eop_obj,
>> -                      &adev->gfx.mec.hpd_eop_gpu_addr,
>> -                      (void **)&hpd);
>> -    if (r) {
>> -        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> -        gfx_v9_0_mec_fini(adev);
>> -        return r;
>> -    }
>> -
>> -    memset(hpd, 0, mec_hpd_size);
>> +        memset(hpd, 0, mec_hpd_size);
>>   -    amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> -    amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> +        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +    }
>>         mec_hdr = (const struct gfx_firmware_header_v1_0
>> *)adev->gfx.mec_fw->data;
>>   @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
>>           adev->gfx.num_gfx_rings = 0;
>>       else
>>           adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
>> -    adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +    adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>>       gfx_v9_0_set_kiq_pm4_funcs(adev);
>>       gfx_v9_0_set_ring_funcs(adev);
>>       gfx_v9_0_set_irq_funcs(adev);
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cfelix.kuehling%40amd.com%7Ce56893660d9d41b9389f08d8320f21b0%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637314387926556351&amp;sdata=jqGPZmx8HRKt4V1uZlGDIQWW5vKckF%2B%2Fc%2FX6%2F7joznQ%3D&amp;reserved=0
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want
  2020-07-27 16:02   ` Felix Kuehling
@ 2020-07-27 19:34     ` Christian König
  0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2020-07-27 19:34 UTC (permalink / raw)
  To: Felix Kuehling, Monk Liu, amd-gfx

Am 27.07.20 um 18:02 schrieb Felix Kuehling:
> Am 2020-07-27 um 5:26 a.m. schrieb Christian König:
>> Am 27.07.20 um 10:21 schrieb Monk Liu:
>>> what:
>>> KCQ cost many clocks during world switch which impacts a lot to multi-VF
>>> performance
>>>
>>> how:
>>> introduce a paramter to control the number of KCQ to avoid performance
>>> drop if there is no KQC needed
>>>
>>> notes:
>>> this paramter only affects gfx 8/9/10
>> Sounds like a good idea to me, but that needs a different name.
>> Outside AMD most people don't know what a KCQ is.
>>
>> Just use compute queue or similar as name for this.
> Just "compute queue" would be confusing for ROCm users. Maybe "legacy
> compute queues"?

"kernel compute queues" is just fine, we just shouldn't shorten it.

And we should especially drop the "_user_set" postfix.

Regards,
Christian.

>
> Regards,
>    Felix
>
>
>> Another comment below.
>>
>>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 ++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 27
>>> +++++++++++++-------------
>>>    drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30
>>> +++++++++++++++--------------
>>>    drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29
>>> ++++++++++++++--------------
>>>    drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31
>>> +++++++++++++++---------------
>>>    7 files changed, 69 insertions(+), 56 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index e97c088..71a3d6a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
>>>    #ifdef CONFIG_DRM_AMDGPU_CIK
>>>    extern int amdgpu_cik_support;
>>>    #endif
>>> +extern int amdgpu_num_kcq_user_set;
>>>      #define AMDGPU_VM_MAX_NUM_CTX            4096
>>>    #define AMDGPU_SG_THRESHOLD            (256*1024*1024)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 62ecac9..61c7583 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct
>>> amdgpu_device *adev)
>>>          amdgpu_gmc_tmz_set(adev);
>>>    +    if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
>>> +        amdgpu_num_kcq_user_set = 8;
>> This needs a warning or error message if we overwrite invalid user
>> provided parameters.
>>
>> Christian.
>>
>>> +
>>>        return 0;
>>>    }
>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index 6291f5f..03a94e9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -150,6 +150,7 @@ int amdgpu_noretry;
>>>    int amdgpu_force_asic_type = -1;
>>>    int amdgpu_tmz = 0;
>>>    int amdgpu_reset_method = -1; /* auto */
>>> +int amdgpu_num_kcq_user_set = 8;
>>>      struct amdgpu_mgpu_info mgpu_info = {
>>>        .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
>>> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>>>    MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto
>>> (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>>>    module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>>>    +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if
>>> set to greater than 8 or less than 0, only affect gfx 8+)");
>>> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
>>> +
>>>    static const struct pci_device_id pciidlist[] = {
>>>    #ifdef  CONFIG_DRM_AMDGPU_SI
>>>        {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> index 8eff017..0b59049 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> @@ -202,7 +202,7 @@ bool
>>> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>>>      void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>>>    {
>>> -    int i, queue, pipe, mec;
>>> +    int i, queue, pipe, mec, j = 0;
>>>        bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>>>          /* policy for amdgpu compute queue ownership */
>>> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct
>>> amdgpu_device *adev)
>>>              if (multipipe_policy) {
>>>                /* policy: amdgpu owns the first two queues of the
>>> first MEC */
>>> -            if (mec == 0 && queue < 2)
>>> -                set_bit(i, adev->gfx.mec.queue_bitmap);
>>> +            if (mec == 0 && queue < 2) {
>>> +                if (j++ < adev->gfx.num_compute_rings)
>>> +                    set_bit(i, adev->gfx.mec.queue_bitmap);
>>> +                else
>>> +                    break;
>>> +            }
>>>            } else {
>>>                /* policy: amdgpu owns all queues in the first pipe */
>>> -            if (mec == 0 && pipe == 0)
>>> -                set_bit(i, adev->gfx.mec.queue_bitmap);
>>> +            if (mec == 0 && pipe == 0) {
>>> +                if (j++ < adev->gfx.num_compute_rings)
>>> +                    set_bit(i, adev->gfx.mec.queue_bitmap);
>>> +                else
>>> +                    break;
>>> +            }
>>>            }
>>>        }
>>>    -    /* update the number of active compute rings */
>>> -    adev->gfx.num_compute_rings =
>>> -        bitmap_weight(adev->gfx.mec.queue_bitmap,
>>> AMDGPU_MAX_COMPUTE_QUEUES);
>>> -
>>> -    /* If you hit this case and edited the policy, you probably just
>>> -     * need to increase AMDGPU_MAX_COMPUTE_RINGS */
>>> -    if (WARN_ON(adev->gfx.num_compute_rings >
>>> AMDGPU_MAX_COMPUTE_RINGS))
>>> -        adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>>> +    dev_info(adev->dev, "mec queue bitmap weight=%d\n",
>>> bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
>>>    }
>>>      void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> index db9f1e8..2ad8393 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct
>>> amdgpu_device *adev)
>>>        amdgpu_gfx_compute_queue_acquire(adev);
>>>        mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>>>    -    r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> -                      AMDGPU_GEM_DOMAIN_GTT,
>>> -                      &adev->gfx.mec.hpd_eop_obj,
>>> -                      &adev->gfx.mec.hpd_eop_gpu_addr,
>>> -                      (void **)&hpd);
>>> -    if (r) {
>>> -        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> -        gfx_v10_0_mec_fini(adev);
>>> -        return r;
>>> -    }
>>> +    if (mec_hpd_size) {
>>> +        r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> +                                      AMDGPU_GEM_DOMAIN_GTT,
>>> +                                      &adev->gfx.mec.hpd_eop_obj,
>>> +                                      &adev->gfx.mec.hpd_eop_gpu_addr,
>>> +                                      (void **)&hpd);
>>> +        if (r) {
>>> +            dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> +            gfx_v10_0_mec_fini(adev);
>>> +            return r;
>>> +        }
>>>    -    memset(hpd, 0, mec_hpd_size);
>>> +        memset(hpd, 0, mec_hpd_size);
>>>    -    amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> -    amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> +        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> +        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> +    }
>>>          if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
>>>            mec_hdr = (const struct gfx_firmware_header_v1_0
>>> *)adev->gfx.mec_fw->data;
>>> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
>>>            break;
>>>        }
>>>    -    adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>>> +    adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>>>          gfx_v10_0_set_kiq_pm4_funcs(adev);
>>>        gfx_v10_0_set_ring_funcs(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> index 8d72089..6d95b4b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct
>>> amdgpu_device *adev)
>>>        amdgpu_gfx_compute_queue_acquire(adev);
>>>          mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
>>> +    if (mec_hpd_size) {
>>> +        r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> +                                      AMDGPU_GEM_DOMAIN_VRAM,
>>> +                                      &adev->gfx.mec.hpd_eop_obj,
>>> +                                      &adev->gfx.mec.hpd_eop_gpu_addr,
>>> +                                      (void **)&hpd);
>>> +        if (r) {
>>> +            dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> +            return r;
>>> +        }
>>>    -    r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> -                      AMDGPU_GEM_DOMAIN_VRAM,
>>> -                      &adev->gfx.mec.hpd_eop_obj,
>>> -                      &adev->gfx.mec.hpd_eop_gpu_addr,
>>> -                      (void **)&hpd);
>>> -    if (r) {
>>> -        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> -        return r;
>>> -    }
>>> -
>>> -    memset(hpd, 0, mec_hpd_size);
>>> +        memset(hpd, 0, mec_hpd_size);
>>>    -    amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> -    amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> +        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> +        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> +    }
>>>          return 0;
>>>    }
>>> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
>>>        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>>          adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
>>> -    adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>>> +    adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>>>        adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
>>>        gfx_v8_0_set_ring_funcs(adev);
>>>        gfx_v8_0_set_irq_funcs(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> index e4e751f..43bcfe3 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct
>>> amdgpu_device *adev)
>>>        /* take ownership of the relevant compute queues */
>>>        amdgpu_gfx_compute_queue_acquire(adev);
>>>        mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
>>> +    if (mec_hpd_size) {
>>> +        r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> +                                      AMDGPU_GEM_DOMAIN_VRAM,
>>> +                                      &adev->gfx.mec.hpd_eop_obj,
>>> +                                      &adev->gfx.mec.hpd_eop_gpu_addr,
>>> +                                      (void **)&hpd);
>>> +        if (r) {
>>> +            dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> +            gfx_v9_0_mec_fini(adev);
>>> +            return r;
>>> +        }
>>>    -    r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> -                      AMDGPU_GEM_DOMAIN_VRAM,
>>> -                      &adev->gfx.mec.hpd_eop_obj,
>>> -                      &adev->gfx.mec.hpd_eop_gpu_addr,
>>> -                      (void **)&hpd);
>>> -    if (r) {
>>> -        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> -        gfx_v9_0_mec_fini(adev);
>>> -        return r;
>>> -    }
>>> -
>>> -    memset(hpd, 0, mec_hpd_size);
>>> +        memset(hpd, 0, mec_hpd_size);
>>>    -    amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> -    amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> +        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> +        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> +    }
>>>          mec_hdr = (const struct gfx_firmware_header_v1_0
>>> *)adev->gfx.mec_fw->data;
>>>    @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
>>>            adev->gfx.num_gfx_rings = 0;
>>>        else
>>>            adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
>>> -    adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>>> +    adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>>>        gfx_v9_0_set_kiq_pm4_funcs(adev);
>>>        gfx_v9_0_set_ring_funcs(adev);
>>>        gfx_v9_0_set_irq_funcs(adev);
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cfelix.kuehling%40amd.com%7Ce56893660d9d41b9389f08d8320f21b0%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637314387926556351&amp;sdata=jqGPZmx8HRKt4V1uZlGDIQWW5vKckF%2B%2Fc%2FX6%2F7joznQ%3D&amp;reserved=0
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-07-27 19:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-27  8:21 [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want Monk Liu
2020-07-27  9:26 ` Christian König
2020-07-27 16:02   ` Felix Kuehling
2020-07-27 19:34     ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.