* [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want
@ 2020-07-27 8:21 Monk Liu
2020-07-27 9:26 ` Christian König
0 siblings, 1 reply; 4+ messages in thread
From: Monk Liu @ 2020-07-27 8:21 UTC (permalink / raw)
To: amd-gfx; +Cc: Monk Liu
what:
KCQ cost many clocks during world switch which impacts a lot to multi-VF
performance
how:
introduce a paramter to control the number of KCQ to avoid performance
drop if there is no KQC needed
notes:
this paramter only affects gfx 8/9/10
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 27 +++++++++++++-------------
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++++++++--------------
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 ++++++++++++++--------------
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 +++++++++++++++---------------
7 files changed, 69 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..71a3d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
#ifdef CONFIG_DRM_AMDGPU_CIK
extern int amdgpu_cik_support;
#endif
+extern int amdgpu_num_kcq_user_set;
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..61c7583 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_gmc_tmz_set(adev);
+ if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
+ amdgpu_num_kcq_user_set = 8;
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..03a94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
int amdgpu_force_asic_type = -1;
int amdgpu_tmz = 0;
int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq_user_set = 8;
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
module_param_named(reset_method, amdgpu_reset_method, int, 0444);
+MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
+
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0b59049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, pipe, mec;
+ int i, queue, pipe, mec, j = 0;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
/* policy for amdgpu compute queue ownership */
@@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
if (multipipe_policy) {
/* policy: amdgpu owns the first two queues of the first MEC */
- if (mec == 0 && queue < 2)
- set_bit(i, adev->gfx.mec.queue_bitmap);
+ if (mec == 0 && queue < 2) {
+ if (j++ < adev->gfx.num_compute_rings)
+ set_bit(i, adev->gfx.mec.queue_bitmap);
+ else
+ break;
+ }
} else {
/* policy: amdgpu owns all queues in the first pipe */
- if (mec == 0 && pipe == 0)
- set_bit(i, adev->gfx.mec.queue_bitmap);
+ if (mec == 0 && pipe == 0) {
+ if (j++ < adev->gfx.num_compute_rings)
+ set_bit(i, adev->gfx.mec.queue_bitmap);
+ else
+ break;
+ }
}
}
- /* update the number of active compute rings */
- adev->gfx.num_compute_rings =
- bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-
- /* If you hit this case and edited the policy, you probably just
- * need to increase AMDGPU_MAX_COMPUTE_RINGS */
- if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
}
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index db9f1e8..2ad8393 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
- r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->gfx.mec.hpd_eop_obj,
- &adev->gfx.mec.hpd_eop_gpu_addr,
- (void **)&hpd);
- if (r) {
- dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
- gfx_v10_0_mec_fini(adev);
- return r;
- }
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v10_0_mec_fini(adev);
+ return r;
+ }
- memset(hpd, 0, mec_hpd_size);
+ memset(hpd, 0, mec_hpd_size);
- amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
- amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
@@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
break;
}
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
gfx_v10_0_set_kiq_pm4_funcs(adev);
gfx_v10_0_set_ring_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8d72089..6d95b4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ return r;
+ }
- r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->gfx.mec.hpd_eop_obj,
- &adev->gfx.mec.hpd_eop_gpu_addr,
- (void **)&hpd);
- if (r) {
- dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
- return r;
- }
-
- memset(hpd, 0, mec_hpd_size);
+ memset(hpd, 0, mec_hpd_size);
- amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
- amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
return 0;
}
@@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
gfx_v8_0_set_ring_funcs(adev);
gfx_v8_0_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e4e751f..43bcfe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
- r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->gfx.mec.hpd_eop_obj,
- &adev->gfx.mec.hpd_eop_gpu_addr,
- (void **)&hpd);
- if (r) {
- dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
- gfx_v9_0_mec_fini(adev);
- return r;
- }
-
- memset(hpd, 0, mec_hpd_size);
+ memset(hpd, 0, mec_hpd_size);
- amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
- amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
@@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
adev->gfx.num_gfx_rings = 0;
else
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
gfx_v9_0_set_kiq_pm4_funcs(adev);
gfx_v9_0_set_ring_funcs(adev);
gfx_v9_0_set_irq_funcs(adev);
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want
2020-07-27 8:21 [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want Monk Liu
@ 2020-07-27 9:26 ` Christian König
2020-07-27 16:02 ` Felix Kuehling
0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2020-07-27 9:26 UTC (permalink / raw)
To: Monk Liu, amd-gfx
Am 27.07.20 um 10:21 schrieb Monk Liu:
> what:
> KCQ cost many clocks during world switch which impacts a lot to multi-VF
> performance
>
> how:
> introduce a paramter to control the number of KCQ to avoid performance
> drop if there is no KQC needed
>
> notes:
> this paramter only affects gfx 8/9/10
Sounds like a good idea to me, but that needs a different name. Outside
AMD most people don't know what a KCQ is.
Just use compute queue or similar as name for this.
Another comment below.
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 27 +++++++++++++-------------
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++++++++--------------
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 ++++++++++++++--------------
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 +++++++++++++++---------------
> 7 files changed, 69 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..71a3d6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
> #ifdef CONFIG_DRM_AMDGPU_CIK
> extern int amdgpu_cik_support;
> #endif
> +extern int amdgpu_num_kcq_user_set;
>
> #define AMDGPU_VM_MAX_NUM_CTX 4096
> #define AMDGPU_SG_THRESHOLD (256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..61c7583 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
>
> amdgpu_gmc_tmz_set(adev);
>
> + if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
> + amdgpu_num_kcq_user_set = 8;
This needs a warning or error message if we overwrite invalid user
provided parameters.
Christian.
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..03a94e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry;
> int amdgpu_force_asic_type = -1;
> int amdgpu_tmz = 0;
> int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq_user_set = 8;
>
> struct amdgpu_mgpu_info mgpu_info = {
> .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>
> +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
> +
> static const struct pci_device_id pciidlist[] = {
> #ifdef CONFIG_DRM_AMDGPU_SI
> {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..0b59049 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>
> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
> {
> - int i, queue, pipe, mec;
> + int i, queue, pipe, mec, j = 0;
> bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>
> /* policy for amdgpu compute queue ownership */
> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>
> if (multipipe_policy) {
> /* policy: amdgpu owns the first two queues of the first MEC */
> - if (mec == 0 && queue < 2)
> - set_bit(i, adev->gfx.mec.queue_bitmap);
> + if (mec == 0 && queue < 2) {
> + if (j++ < adev->gfx.num_compute_rings)
> + set_bit(i, adev->gfx.mec.queue_bitmap);
> + else
> + break;
> + }
> } else {
> /* policy: amdgpu owns all queues in the first pipe */
> - if (mec == 0 && pipe == 0)
> - set_bit(i, adev->gfx.mec.queue_bitmap);
> + if (mec == 0 && pipe == 0) {
> + if (j++ < adev->gfx.num_compute_rings)
> + set_bit(i, adev->gfx.mec.queue_bitmap);
> + else
> + break;
> + }
> }
> }
>
> - /* update the number of active compute rings */
> - adev->gfx.num_compute_rings =
> - bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> -
> - /* If you hit this case and edited the policy, you probably just
> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */
> - if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
> }
>
> void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index db9f1e8..2ad8393 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
> amdgpu_gfx_compute_queue_acquire(adev);
> mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_GTT,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> - if (r) {
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> - gfx_v10_0_mec_fini(adev);
> - return r;
> - }
> + if (mec_hpd_size) {
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
> + if (r) {
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> + gfx_v10_0_mec_fini(adev);
> + return r;
> + }
>
> - memset(hpd, 0, mec_hpd_size);
> + memset(hpd, 0, mec_hpd_size);
>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + }
>
> if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
> mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
> break;
> }
>
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>
> gfx_v10_0_set_kiq_pm4_funcs(adev);
> gfx_v10_0_set_ring_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8d72089..6d95b4b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
> amdgpu_gfx_compute_queue_acquire(adev);
>
> mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
> + if (mec_hpd_size) {
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
> + if (r) {
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> + return r;
> + }
>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> - if (r) {
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> - return r;
> - }
> -
> - memset(hpd, 0, mec_hpd_size);
> + memset(hpd, 0, mec_hpd_size);
>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + }
>
> return 0;
> }
> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
> adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
> gfx_v8_0_set_ring_funcs(adev);
> gfx_v8_0_set_irq_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e4e751f..43bcfe3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
> /* take ownership of the relevant compute queues */
> amdgpu_gfx_compute_queue_acquire(adev);
> mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
> + if (mec_hpd_size) {
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
> + if (r) {
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> + gfx_v9_0_mec_fini(adev);
> + return r;
> + }
>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> - if (r) {
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> - gfx_v9_0_mec_fini(adev);
> - return r;
> - }
> -
> - memset(hpd, 0, mec_hpd_size);
> + memset(hpd, 0, mec_hpd_size);
>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + }
>
> mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
>
> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
> adev->gfx.num_gfx_rings = 0;
> else
> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
> gfx_v9_0_set_kiq_pm4_funcs(adev);
> gfx_v9_0_set_ring_funcs(adev);
> gfx_v9_0_set_irq_funcs(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want
2020-07-27 9:26 ` Christian König
@ 2020-07-27 16:02 ` Felix Kuehling
2020-07-27 19:34 ` Christian König
0 siblings, 1 reply; 4+ messages in thread
From: Felix Kuehling @ 2020-07-27 16:02 UTC (permalink / raw)
To: christian.koenig, Monk Liu, amd-gfx
Am 2020-07-27 um 5:26 a.m. schrieb Christian König:
> Am 27.07.20 um 10:21 schrieb Monk Liu:
>> what:
>> KCQ cost many clocks during world switch which impacts a lot to multi-VF
>> performance
>>
>> how:
>> introduce a paramter to control the number of KCQ to avoid performance
>> drop if there is no KQC needed
>>
>> notes:
>> this paramter only affects gfx 8/9/10
>
> Sounds like a good idea to me, but that needs a different name.
> Outside AMD most people don't know what a KCQ is.
>
> Just use compute queue or similar as name for this.
Just "compute queue" would be confusing for ROCm users. Maybe "legacy
compute queues"?
Regards,
Felix
>
> Another comment below.
>
>>
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 27
>> +++++++++++++-------------
>> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30
>> +++++++++++++++--------------
>> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29
>> ++++++++++++++--------------
>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31
>> +++++++++++++++---------------
>> 7 files changed, 69 insertions(+), 56 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index e97c088..71a3d6a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
>> #ifdef CONFIG_DRM_AMDGPU_CIK
>> extern int amdgpu_cik_support;
>> #endif
>> +extern int amdgpu_num_kcq_user_set;
>> #define AMDGPU_VM_MAX_NUM_CTX 4096
>> #define AMDGPU_SG_THRESHOLD (256*1024*1024)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 62ecac9..61c7583 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct
>> amdgpu_device *adev)
>> amdgpu_gmc_tmz_set(adev);
>> + if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
>> + amdgpu_num_kcq_user_set = 8;
>
> This needs a warning or error message if we overwrite invalid user
> provided parameters.
>
> Christian.
>
>> +
>> return 0;
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 6291f5f..03a94e9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -150,6 +150,7 @@ int amdgpu_noretry;
>> int amdgpu_force_asic_type = -1;
>> int amdgpu_tmz = 0;
>> int amdgpu_reset_method = -1; /* auto */
>> +int amdgpu_num_kcq_user_set = 8;
>> struct amdgpu_mgpu_info mgpu_info = {
>> .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
>> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto
>> (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>> +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if
>> set to greater than 8 or less than 0, only affect gfx 8+)");
>> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
>> +
>> static const struct pci_device_id pciidlist[] = {
>> #ifdef CONFIG_DRM_AMDGPU_SI
>> {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 8eff017..0b59049 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -202,7 +202,7 @@ bool
>> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>> {
>> - int i, queue, pipe, mec;
>> + int i, queue, pipe, mec, j = 0;
>> bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>> /* policy for amdgpu compute queue ownership */
>> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct
>> amdgpu_device *adev)
>> if (multipipe_policy) {
>> /* policy: amdgpu owns the first two queues of the
>> first MEC */
>> - if (mec == 0 && queue < 2)
>> - set_bit(i, adev->gfx.mec.queue_bitmap);
>> + if (mec == 0 && queue < 2) {
>> + if (j++ < adev->gfx.num_compute_rings)
>> + set_bit(i, adev->gfx.mec.queue_bitmap);
>> + else
>> + break;
>> + }
>> } else {
>> /* policy: amdgpu owns all queues in the first pipe */
>> - if (mec == 0 && pipe == 0)
>> - set_bit(i, adev->gfx.mec.queue_bitmap);
>> + if (mec == 0 && pipe == 0) {
>> + if (j++ < adev->gfx.num_compute_rings)
>> + set_bit(i, adev->gfx.mec.queue_bitmap);
>> + else
>> + break;
>> + }
>> }
>> }
>> - /* update the number of active compute rings */
>> - adev->gfx.num_compute_rings =
>> - bitmap_weight(adev->gfx.mec.queue_bitmap,
>> AMDGPU_MAX_COMPUTE_QUEUES);
>> -
>> - /* If you hit this case and edited the policy, you probably just
>> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */
>> - if (WARN_ON(adev->gfx.num_compute_rings >
>> AMDGPU_MAX_COMPUTE_RINGS))
>> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> + dev_info(adev->dev, "mec queue bitmap weight=%d\n",
>> bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
>> }
>> void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index db9f1e8..2ad8393 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct
>> amdgpu_device *adev)
>> amdgpu_gfx_compute_queue_acquire(adev);
>> mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> - AMDGPU_GEM_DOMAIN_GTT,
>> - &adev->gfx.mec.hpd_eop_obj,
>> - &adev->gfx.mec.hpd_eop_gpu_addr,
>> - (void **)&hpd);
>> - if (r) {
>> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> - gfx_v10_0_mec_fini(adev);
>> - return r;
>> - }
>> + if (mec_hpd_size) {
>> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> + AMDGPU_GEM_DOMAIN_GTT,
>> + &adev->gfx.mec.hpd_eop_obj,
>> + &adev->gfx.mec.hpd_eop_gpu_addr,
>> + (void **)&hpd);
>> + if (r) {
>> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> + gfx_v10_0_mec_fini(adev);
>> + return r;
>> + }
>> - memset(hpd, 0, mec_hpd_size);
>> + memset(hpd, 0, mec_hpd_size);
>> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> + }
>> if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
>> mec_hdr = (const struct gfx_firmware_header_v1_0
>> *)adev->gfx.mec_fw->data;
>> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
>> break;
>> }
>> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>> gfx_v10_0_set_kiq_pm4_funcs(adev);
>> gfx_v10_0_set_ring_funcs(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 8d72089..6d95b4b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct
>> amdgpu_device *adev)
>> amdgpu_gfx_compute_queue_acquire(adev);
>> mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
>> + if (mec_hpd_size) {
>> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> + AMDGPU_GEM_DOMAIN_VRAM,
>> + &adev->gfx.mec.hpd_eop_obj,
>> + &adev->gfx.mec.hpd_eop_gpu_addr,
>> + (void **)&hpd);
>> + if (r) {
>> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> + return r;
>> + }
>> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> - AMDGPU_GEM_DOMAIN_VRAM,
>> - &adev->gfx.mec.hpd_eop_obj,
>> - &adev->gfx.mec.hpd_eop_gpu_addr,
>> - (void **)&hpd);
>> - if (r) {
>> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> - return r;
>> - }
>> -
>> - memset(hpd, 0, mec_hpd_size);
>> + memset(hpd, 0, mec_hpd_size);
>> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> + }
>> return 0;
>> }
>> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
>> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>> adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
>> gfx_v8_0_set_ring_funcs(adev);
>> gfx_v8_0_set_irq_funcs(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index e4e751f..43bcfe3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct
>> amdgpu_device *adev)
>> /* take ownership of the relevant compute queues */
>> amdgpu_gfx_compute_queue_acquire(adev);
>> mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
>> + if (mec_hpd_size) {
>> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> + AMDGPU_GEM_DOMAIN_VRAM,
>> + &adev->gfx.mec.hpd_eop_obj,
>> + &adev->gfx.mec.hpd_eop_gpu_addr,
>> + (void **)&hpd);
>> + if (r) {
>> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> + gfx_v9_0_mec_fini(adev);
>> + return r;
>> + }
>> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> - AMDGPU_GEM_DOMAIN_VRAM,
>> - &adev->gfx.mec.hpd_eop_obj,
>> - &adev->gfx.mec.hpd_eop_gpu_addr,
>> - (void **)&hpd);
>> - if (r) {
>> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> - gfx_v9_0_mec_fini(adev);
>> - return r;
>> - }
>> -
>> - memset(hpd, 0, mec_hpd_size);
>> + memset(hpd, 0, mec_hpd_size);
>> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> + }
>> mec_hdr = (const struct gfx_firmware_header_v1_0
>> *)adev->gfx.mec_fw->data;
>> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
>> adev->gfx.num_gfx_rings = 0;
>> else
>> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
>> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>> gfx_v9_0_set_kiq_pm4_funcs(adev);
>> gfx_v9_0_set_ring_funcs(adev);
>> gfx_v9_0_set_irq_funcs(adev);
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cfelix.kuehling%40amd.com%7Ce56893660d9d41b9389f08d8320f21b0%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637314387926556351&sdata=jqGPZmx8HRKt4V1uZlGDIQWW5vKckF%2B%2Fc%2FX6%2F7joznQ%3D&reserved=0
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want
2020-07-27 16:02 ` Felix Kuehling
@ 2020-07-27 19:34 ` Christian König
0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2020-07-27 19:34 UTC (permalink / raw)
To: Felix Kuehling, Monk Liu, amd-gfx
Am 27.07.20 um 18:02 schrieb Felix Kuehling:
> Am 2020-07-27 um 5:26 a.m. schrieb Christian König:
>> Am 27.07.20 um 10:21 schrieb Monk Liu:
>>> what:
>>> KCQ cost many clocks during world switch which impacts a lot to multi-VF
>>> performance
>>>
>>> how:
>>> introduce a paramter to control the number of KCQ to avoid performance
>>> drop if there is no KQC needed
>>>
>>> notes:
>>> this paramter only affects gfx 8/9/10
>> Sounds like a good idea to me, but that needs a different name.
>> Outside AMD most people don't know what a KCQ is.
>>
>> Just use compute queue or similar as name for this.
> Just "compute queue" would be confusing for ROCm users. Maybe "legacy
> compute queues"?
"kernel compute queues" is just fine, we just shouldn't shorten it.
And we should especially drop the "_user_set" postfix.
Regards,
Christian.
>
> Regards,
> Felix
>
>
>> Another comment below.
>>
>>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 27
>>> +++++++++++++-------------
>>> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30
>>> +++++++++++++++--------------
>>> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29
>>> ++++++++++++++--------------
>>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31
>>> +++++++++++++++---------------
>>> 7 files changed, 69 insertions(+), 56 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index e97c088..71a3d6a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
>>> #ifdef CONFIG_DRM_AMDGPU_CIK
>>> extern int amdgpu_cik_support;
>>> #endif
>>> +extern int amdgpu_num_kcq_user_set;
>>> #define AMDGPU_VM_MAX_NUM_CTX 4096
>>> #define AMDGPU_SG_THRESHOLD (256*1024*1024)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 62ecac9..61c7583 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct
>>> amdgpu_device *adev)
>>> amdgpu_gmc_tmz_set(adev);
>>> + if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
>>> + amdgpu_num_kcq_user_set = 8;
>> This needs a warning or error message if we overwrite invalid user
>> provided parameters.
>>
>> Christian.
>>
>>> +
>>> return 0;
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index 6291f5f..03a94e9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -150,6 +150,7 @@ int amdgpu_noretry;
>>> int amdgpu_force_asic_type = -1;
>>> int amdgpu_tmz = 0;
>>> int amdgpu_reset_method = -1; /* auto */
>>> +int amdgpu_num_kcq_user_set = 8;
>>> struct amdgpu_mgpu_info mgpu_info = {
>>> .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
>>> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>>> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto
>>> (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>>> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>>> +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if
>>> set to greater than 8 or less than 0, only affect gfx 8+)");
>>> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
>>> +
>>> static const struct pci_device_id pciidlist[] = {
>>> #ifdef CONFIG_DRM_AMDGPU_SI
>>> {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> index 8eff017..0b59049 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> @@ -202,7 +202,7 @@ bool
>>> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>>> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>>> {
>>> - int i, queue, pipe, mec;
>>> + int i, queue, pipe, mec, j = 0;
>>> bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>>> /* policy for amdgpu compute queue ownership */
>>> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct
>>> amdgpu_device *adev)
>>> if (multipipe_policy) {
>>> /* policy: amdgpu owns the first two queues of the
>>> first MEC */
>>> - if (mec == 0 && queue < 2)
>>> - set_bit(i, adev->gfx.mec.queue_bitmap);
>>> + if (mec == 0 && queue < 2) {
>>> + if (j++ < adev->gfx.num_compute_rings)
>>> + set_bit(i, adev->gfx.mec.queue_bitmap);
>>> + else
>>> + break;
>>> + }
>>> } else {
>>> /* policy: amdgpu owns all queues in the first pipe */
>>> - if (mec == 0 && pipe == 0)
>>> - set_bit(i, adev->gfx.mec.queue_bitmap);
>>> + if (mec == 0 && pipe == 0) {
>>> + if (j++ < adev->gfx.num_compute_rings)
>>> + set_bit(i, adev->gfx.mec.queue_bitmap);
>>> + else
>>> + break;
>>> + }
>>> }
>>> }
>>> - /* update the number of active compute rings */
>>> - adev->gfx.num_compute_rings =
>>> - bitmap_weight(adev->gfx.mec.queue_bitmap,
>>> AMDGPU_MAX_COMPUTE_QUEUES);
>>> -
>>> - /* If you hit this case and edited the policy, you probably just
>>> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */
>>> - if (WARN_ON(adev->gfx.num_compute_rings >
>>> AMDGPU_MAX_COMPUTE_RINGS))
>>> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>>> + dev_info(adev->dev, "mec queue bitmap weight=%d\n",
>>> bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
>>> }
>>> void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> index db9f1e8..2ad8393 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct
>>> amdgpu_device *adev)
>>> amdgpu_gfx_compute_queue_acquire(adev);
>>> mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>>> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> - AMDGPU_GEM_DOMAIN_GTT,
>>> - &adev->gfx.mec.hpd_eop_obj,
>>> - &adev->gfx.mec.hpd_eop_gpu_addr,
>>> - (void **)&hpd);
>>> - if (r) {
>>> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> - gfx_v10_0_mec_fini(adev);
>>> - return r;
>>> - }
>>> + if (mec_hpd_size) {
>>> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> + AMDGPU_GEM_DOMAIN_GTT,
>>> + &adev->gfx.mec.hpd_eop_obj,
>>> + &adev->gfx.mec.hpd_eop_gpu_addr,
>>> + (void **)&hpd);
>>> + if (r) {
>>> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> + gfx_v10_0_mec_fini(adev);
>>> + return r;
>>> + }
>>> - memset(hpd, 0, mec_hpd_size);
>>> + memset(hpd, 0, mec_hpd_size);
>>> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> + }
>>> if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
>>> mec_hdr = (const struct gfx_firmware_header_v1_0
>>> *)adev->gfx.mec_fw->data;
>>> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
>>> break;
>>> }
>>> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>>> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>>> gfx_v10_0_set_kiq_pm4_funcs(adev);
>>> gfx_v10_0_set_ring_funcs(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> index 8d72089..6d95b4b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct
>>> amdgpu_device *adev)
>>> amdgpu_gfx_compute_queue_acquire(adev);
>>> mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
>>> + if (mec_hpd_size) {
>>> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>> + &adev->gfx.mec.hpd_eop_obj,
>>> + &adev->gfx.mec.hpd_eop_gpu_addr,
>>> + (void **)&hpd);
>>> + if (r) {
>>> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> + return r;
>>> + }
>>> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> - AMDGPU_GEM_DOMAIN_VRAM,
>>> - &adev->gfx.mec.hpd_eop_obj,
>>> - &adev->gfx.mec.hpd_eop_gpu_addr,
>>> - (void **)&hpd);
>>> - if (r) {
>>> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> - return r;
>>> - }
>>> -
>>> - memset(hpd, 0, mec_hpd_size);
>>> + memset(hpd, 0, mec_hpd_size);
>>> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> + }
>>> return 0;
>>> }
>>> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
>>> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>>> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>>> adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
>>> gfx_v8_0_set_ring_funcs(adev);
>>> gfx_v8_0_set_irq_funcs(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> index e4e751f..43bcfe3 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct
>>> amdgpu_device *adev)
>>> /* take ownership of the relevant compute queues */
>>> amdgpu_gfx_compute_queue_acquire(adev);
>>> mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
>>> + if (mec_hpd_size) {
>>> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>> + &adev->gfx.mec.hpd_eop_obj,
>>> + &adev->gfx.mec.hpd_eop_gpu_addr,
>>> + (void **)&hpd);
>>> + if (r) {
>>> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> + gfx_v9_0_mec_fini(adev);
>>> + return r;
>>> + }
>>> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>>> - AMDGPU_GEM_DOMAIN_VRAM,
>>> - &adev->gfx.mec.hpd_eop_obj,
>>> - &adev->gfx.mec.hpd_eop_gpu_addr,
>>> - (void **)&hpd);
>>> - if (r) {
>>> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>>> - gfx_v9_0_mec_fini(adev);
>>> - return r;
>>> - }
>>> -
>>> - memset(hpd, 0, mec_hpd_size);
>>> + memset(hpd, 0, mec_hpd_size);
>>> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>>> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>> + }
>>> mec_hdr = (const struct gfx_firmware_header_v1_0
>>> *)adev->gfx.mec_fw->data;
>>> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
>>> adev->gfx.num_gfx_rings = 0;
>>> else
>>> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
>>> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>>> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>>> gfx_v9_0_set_kiq_pm4_funcs(adev);
>>> gfx_v9_0_set_ring_funcs(adev);
>>> gfx_v9_0_set_irq_funcs(adev);
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cfelix.kuehling%40amd.com%7Ce56893660d9d41b9389f08d8320f21b0%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637314387926556351&sdata=jqGPZmx8HRKt4V1uZlGDIQWW5vKckF%2B%2Fc%2FX6%2F7joznQ%3D&reserved=0
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-07-27 19:34 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-27 8:21 [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want Monk Liu
2020-07-27 9:26 ` Christian König
2020-07-27 16:02 ` Felix Kuehling
2020-07-27 19:34 ` Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.