* [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4)
@ 2020-07-31 3:42 Monk Liu
2020-07-31 4:53 ` Felix Kuehling
0 siblings, 1 reply; 5+ messages in thread
From: Monk Liu @ 2020-07-31 3:42 UTC (permalink / raw)
To: amd-gfx; +Cc: Monk Liu
what:
the MQD's save and restore of KCQ (kernel compute queue)
cost lots of clocks during world switch which impacts a lot
to multi-VF performance
how:
introduce a paramter to control the number of KCQ to avoid
performance drop if there is no kernel compute queue needed
notes:
this paramter only affects gfx 8/9/10
v2:
refine namings
v3:
choose queues for each ring to that try best to cross pipes evenly.
v4:
fix indentation
some cleanupsin the gfx_compute_queue_acquire() function
TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 52 +++++++++++++-----------------
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++--------
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 +++++++++--------
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 +++++++++---------
7 files changed, 80 insertions(+), 72 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..de11136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
#ifdef CONFIG_DRM_AMDGPU_CIK
extern int amdgpu_cik_support;
#endif
+extern int amdgpu_num_kcq;
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..cf445bab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_gmc_tmz_set(adev);
+ if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
+ amdgpu_num_kcq = 8;
+ dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..b545c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
int amdgpu_force_asic_type = -1;
int amdgpu_tmz = 0;
int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq = -1;
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
module_param_named(reset_method, amdgpu_reset_method, int, 0444);
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..b43df8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,40 +202,34 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, pipe, mec;
+ int i, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+ int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe,
+ adev->gfx.num_compute_rings);
+
+ if (multipipe_policy) {
+ /* policy: make queues evenly cross all pipes on MEC1 only */
+ for (i = 0; i < max_queues_per_mec; i++) {
+ pipe = i % adev->gfx.mec.num_pipe_per_mec;
+ queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+ adev->gfx.mec.num_queue_per_pipe;
+
+ set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+ adev->gfx.mec.queue_bitmap);
+ }
+ } else {
+ /* policy: amdgpu owns all queues in the given pipe */
+ for (i = 0; i < max_queues_per_mec; ++i) {
+ queue = i % adev->gfx.mec.num_queue_per_pipe;
+ pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
- /* policy for amdgpu compute queue ownership */
- for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- queue = i % adev->gfx.mec.num_queue_per_pipe;
- pipe = (i / adev->gfx.mec.num_queue_per_pipe)
- % adev->gfx.mec.num_pipe_per_mec;
- mec = (i / adev->gfx.mec.num_queue_per_pipe)
- / adev->gfx.mec.num_pipe_per_mec;
-
- /* we've run out of HW */
- if (mec >= adev->gfx.mec.num_mec)
- break;
-
- if (multipipe_policy) {
- /* policy: amdgpu owns the first two queues of the first MEC */
- if (mec == 0 && queue < 2)
- set_bit(i, adev->gfx.mec.queue_bitmap);
- } else {
- /* policy: amdgpu owns all queues in the first pipe */
- if (mec == 0 && pipe == 0)
- set_bit(i, adev->gfx.mec.queue_bitmap);
+ set_bit(i, adev->gfx.mec.queue_bitmap);
}
}
- /* update the number of active compute rings */
- adev->gfx.num_compute_rings =
- bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-
- /* If you hit this case and edited the policy, you probably just
- * need to increase AMDGPU_MAX_COMPUTE_RINGS */
- if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
}
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index db9f1e8..3a93b3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
- r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->gfx.mec.hpd_eop_obj,
- &adev->gfx.mec.hpd_eop_gpu_addr,
- (void **)&hpd);
- if (r) {
- dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
- gfx_v10_0_mec_fini(adev);
- return r;
- }
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v10_0_mec_fini(adev);
+ return r;
+ }
- memset(hpd, 0, mec_hpd_size);
+ memset(hpd, 0, mec_hpd_size);
- amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
- amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
@@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
break;
}
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = amdgpu_num_kcq;
gfx_v10_0_set_kiq_pm4_funcs(adev);
gfx_v10_0_set_ring_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8d72089..eb4b812 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ return r;
+ }
- r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->gfx.mec.hpd_eop_obj,
- &adev->gfx.mec.hpd_eop_gpu_addr,
- (void **)&hpd);
- if (r) {
- dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
- return r;
- }
-
- memset(hpd, 0, mec_hpd_size);
+ memset(hpd, 0, mec_hpd_size);
- amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
- amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
return 0;
}
@@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = amdgpu_num_kcq;
adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
gfx_v8_0_set_ring_funcs(adev);
gfx_v8_0_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e4e751f..43ad044 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
- r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->gfx.mec.hpd_eop_obj,
- &adev->gfx.mec.hpd_eop_gpu_addr,
- (void **)&hpd);
- if (r) {
- dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
- gfx_v9_0_mec_fini(adev);
- return r;
- }
-
- memset(hpd, 0, mec_hpd_size);
+ memset(hpd, 0, mec_hpd_size);
- amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
- amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
@@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
adev->gfx.num_gfx_rings = 0;
else
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = amdgpu_num_kcq;
gfx_v9_0_set_kiq_pm4_funcs(adev);
gfx_v9_0_set_ring_funcs(adev);
gfx_v9_0_set_irq_funcs(adev);
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4)
2020-07-31 3:42 [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4) Monk Liu
@ 2020-07-31 4:53 ` Felix Kuehling
2020-07-31 7:43 ` Liu, Monk
0 siblings, 1 reply; 5+ messages in thread
From: Felix Kuehling @ 2020-07-31 4:53 UTC (permalink / raw)
To: Monk Liu, amd-gfx
Am 2020-07-30 um 11:42 p.m. schrieb Monk Liu:
> what:
> the MQD's save and restore of KCQ (kernel compute queue)
> cost lots of clocks during world switch which impacts a lot
> to multi-VF performance
>
> how:
> introduce a paramter to control the number of KCQ to avoid
> performance drop if there is no kernel compute queue needed
>
> notes:
> this paramter only affects gfx 8/9/10
>
> v2:
> refine namings
>
> v3:
> choose queues for each ring to that try best to cross pipes evenly.
>
> v4:
> fix indentation
> some cleanupsin the gfx_compute_queue_acquire() function
>
> TODO:
> in the future we will let hypervisor driver to set this paramter
> automatically thus no need for user to configure it through
> modprobe in virtual machine
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 52 +++++++++++++-----------------
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++--------
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 +++++++++--------
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 +++++++++---------
> 7 files changed, 80 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..de11136 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
> #ifdef CONFIG_DRM_AMDGPU_CIK
> extern int amdgpu_cik_support;
> #endif
> +extern int amdgpu_num_kcq;
>
> #define AMDGPU_VM_MAX_NUM_CTX 4096
> #define AMDGPU_SG_THRESHOLD (256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..cf445bab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
>
> amdgpu_gmc_tmz_set(adev);
>
> + if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
> + amdgpu_num_kcq = 8;
> + dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");
> + }
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..b545c40 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry;
> int amdgpu_force_asic_type = -1;
> int amdgpu_tmz = 0;
> int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq = -1;
>
> struct amdgpu_mgpu_info mgpu_info = {
> .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>
> +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
> +module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
> +
> static const struct pci_device_id pciidlist[] = {
> #ifdef CONFIG_DRM_AMDGPU_SI
> {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..b43df8e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,40 +202,34 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>
> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
> {
> - int i, queue, pipe, mec;
> + int i, queue, pipe;
> bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
> + int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
> + adev->gfx.mec.num_queue_per_pipe,
> + adev->gfx.num_compute_rings);
> +
> + if (multipipe_policy) {
> + /* policy: make queues evenly cross all pipes on MEC1 only */
> + for (i = 0; i < max_queues_per_mec; i++) {
> + pipe = i % adev->gfx.mec.num_pipe_per_mec;
> + queue = (i / adev->gfx.mec.num_pipe_per_mec) %
> + adev->gfx.mec.num_queue_per_pipe;
> +
> + set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
> + adev->gfx.mec.queue_bitmap);
Indentation looks slightly off here.
> + }
> + } else {
> + /* policy: amdgpu owns all queues in the given pipe */
> + for (i = 0; i < max_queues_per_mec; ++i) {
> + queue = i % adev->gfx.mec.num_queue_per_pipe;
> + pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> + % adev->gfx.mec.num_pipe_per_mec;
>
> - /* policy for amdgpu compute queue ownership */
> - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> - queue = i % adev->gfx.mec.num_queue_per_pipe;
> - pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> - % adev->gfx.mec.num_pipe_per_mec;
> - mec = (i / adev->gfx.mec.num_queue_per_pipe)
> - / adev->gfx.mec.num_pipe_per_mec;
> -
> - /* we've run out of HW */
> - if (mec >= adev->gfx.mec.num_mec)
> - break;
> -
> - if (multipipe_policy) {
> - /* policy: amdgpu owns the first two queues of the first MEC */
> - if (mec == 0 && queue < 2)
> - set_bit(i, adev->gfx.mec.queue_bitmap);
> - } else {
> - /* policy: amdgpu owns all queues in the first pipe */
> - if (mec == 0 && pipe == 0)
> - set_bit(i, adev->gfx.mec.queue_bitmap);
> + set_bit(i, adev->gfx.mec.queue_bitmap);
If you're not even using queue and pipe here, you don't need to
calculate them above.
> }
> }
>
> - /* update the number of active compute rings */
> - adev->gfx.num_compute_rings =
> - bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> -
> - /* If you hit this case and edited the policy, you probably just
> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */
> - if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
Should this be a debug message? I don't see this being very useful to a
normal user.
> }
>
> void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index db9f1e8..3a93b3c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
> amdgpu_gfx_compute_queue_acquire(adev);
> mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_GTT,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> - if (r) {
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> - gfx_v10_0_mec_fini(adev);
> - return r;
> - }
> + if (mec_hpd_size) {
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
Indentation is still wrong here.
> + if (r) {
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> + gfx_v10_0_mec_fini(adev);
> + return r;
> + }
>
> - memset(hpd, 0, mec_hpd_size);
> + memset(hpd, 0, mec_hpd_size);
>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + }
>
> if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
> mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
> break;
> }
>
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + adev->gfx.num_compute_rings = amdgpu_num_kcq;
>
> gfx_v10_0_set_kiq_pm4_funcs(adev);
> gfx_v10_0_set_ring_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8d72089..eb4b812 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
> amdgpu_gfx_compute_queue_acquire(adev);
>
> mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
> + if (mec_hpd_size) {
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
Wrong indentation.
> + if (r) {
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> + return r;
> + }
>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> - if (r) {
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> - return r;
> - }
> -
> - memset(hpd, 0, mec_hpd_size);
> + memset(hpd, 0, mec_hpd_size);
>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + }
>
> return 0;
> }
> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + adev->gfx.num_compute_rings = amdgpu_num_kcq;
> adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
> gfx_v8_0_set_ring_funcs(adev);
> gfx_v8_0_set_irq_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e4e751f..43ad044 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
> /* take ownership of the relevant compute queues */
> amdgpu_gfx_compute_queue_acquire(adev);
> mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
> + if (mec_hpd_size) {
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
Wrong indentation.
Regards,
Felix
> + if (r) {
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> + gfx_v9_0_mec_fini(adev);
> + return r;
> + }
>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> - if (r) {
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> - gfx_v9_0_mec_fini(adev);
> - return r;
> - }
> -
> - memset(hpd, 0, mec_hpd_size);
> + memset(hpd, 0, mec_hpd_size);
>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + }
>
> mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
>
> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
> adev->gfx.num_gfx_rings = 0;
> else
> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + adev->gfx.num_compute_rings = amdgpu_num_kcq;
> gfx_v9_0_set_kiq_pm4_funcs(adev);
> gfx_v9_0_set_ring_funcs(adev);
> gfx_v9_0_set_irq_funcs(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4)
2020-07-31 4:53 ` Felix Kuehling
@ 2020-07-31 7:43 ` Liu, Monk
2020-07-31 7:49 ` Liu, Monk
0 siblings, 1 reply; 5+ messages in thread
From: Liu, Monk @ 2020-07-31 7:43 UTC (permalink / raw)
To: Kuehling, Felix, amd-gfx
[AMD Official Use Only - Internal Distribution Only]
Felix
I cannot let the indentation look perfect when it goes out from the email , don't know why
It looks well in my VIM although
_____________________________________
Monk Liu|GPU Virtualization Team |AMD
-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Friday, July 31, 2020 12:54 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4)
Am 2020-07-30 um 11:42 p.m. schrieb Monk Liu:
> what:
> the MQD's save and restore of KCQ (kernel compute queue) cost lots of
> clocks during world switch which impacts a lot to multi-VF performance
>
> how:
> introduce a paramter to control the number of KCQ to avoid performance
> drop if there is no kernel compute queue needed
>
> notes:
> this paramter only affects gfx 8/9/10
>
> v2:
> refine namings
>
> v3:
> choose queues for each ring to that try best to cross pipes evenly.
>
> v4:
> fix indentation
> some cleanupsin the gfx_compute_queue_acquire() function
>
> TODO:
> in the future we will let hypervisor driver to set this paramter
> automatically thus no need for user to configure it through modprobe
> in virtual machine
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 52 +++++++++++++-----------------
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++--------
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 +++++++++--------
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 +++++++++---------
> 7 files changed, 80 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..de11136 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support; #ifdef
> CONFIG_DRM_AMDGPU_CIK extern int amdgpu_cik_support; #endif
> +extern int amdgpu_num_kcq;
>
> #define AMDGPU_VM_MAX_NUM_CTX4096
> #define AMDGPU_SG_THRESHOLD(256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..cf445bab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct
> amdgpu_device *adev)
>
> amdgpu_gmc_tmz_set(adev);
>
> +if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
> +amdgpu_num_kcq = 8;
> +dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");
> +}
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..b545c40 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry; int amdgpu_force_asic_type =
> -1; int amdgpu_tmz = 0; int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq = -1;
>
> struct amdgpu_mgpu_info mgpu_info = {
> .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default),
> 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>
> +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want
> +to setup (8 if set to greater than 8 or less than 0, only affect gfx
> +8+)"); module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
> +
> static const struct pci_device_id pciidlist[] = { #ifdef
> CONFIG_DRM_AMDGPU_SI
> {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff
> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..b43df8e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,40 +202,34 @@ bool
> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>
> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) {
> -int i, queue, pipe, mec;
> +int i, queue, pipe;
> bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
> +int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
> + adev->gfx.mec.num_queue_per_pipe,
> + adev->gfx.num_compute_rings);
> +
> +if (multipipe_policy) {
> +/* policy: make queues evenly cross all pipes on MEC1 only */
> +for (i = 0; i < max_queues_per_mec; i++) {
> +pipe = i % adev->gfx.mec.num_pipe_per_mec;
> +queue = (i / adev->gfx.mec.num_pipe_per_mec) %
> +adev->gfx.mec.num_queue_per_pipe;
> +
> +set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
> +adev->gfx.mec.queue_bitmap);
Indentation looks slightly off here.
> +}
> +} else {
> +/* policy: amdgpu owns all queues in the given pipe */
> +for (i = 0; i < max_queues_per_mec; ++i) {
> +queue = i % adev->gfx.mec.num_queue_per_pipe;
> +pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> +% adev->gfx.mec.num_pipe_per_mec;
>
> -/* policy for amdgpu compute queue ownership */
> -for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> -queue = i % adev->gfx.mec.num_queue_per_pipe;
> -pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> -% adev->gfx.mec.num_pipe_per_mec;
> -mec = (i / adev->gfx.mec.num_queue_per_pipe)
> -/ adev->gfx.mec.num_pipe_per_mec;
> -
> -/* we've run out of HW */
> -if (mec >= adev->gfx.mec.num_mec)
> -break;
> -
> -if (multipipe_policy) {
> -/* policy: amdgpu owns the first two queues of the first MEC */
> -if (mec == 0 && queue < 2)
> -set_bit(i, adev->gfx.mec.queue_bitmap);
> -} else {
> -/* policy: amdgpu owns all queues in the first pipe */
> -if (mec == 0 && pipe == 0)
> -set_bit(i, adev->gfx.mec.queue_bitmap);
> +set_bit(i, adev->gfx.mec.queue_bitmap);
If you're not even using queue and pipe here, you don't need to calculate them above.
> }
> }
>
> -/* update the number of active compute rings */
> -adev->gfx.num_compute_rings =
> -bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> -
> -/* If you hit this case and edited the policy, you probably just
> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */
> -if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +dev_info(adev->dev, "mec queue bitmap weight=%d\n",
> +bitmap_weight(adev->gfx.mec.queue_bitmap,
> +AMDGPU_MAX_COMPUTE_QUEUES));
Should this be a debug message? I don't see this being very useful to a normal user.
> }
>
> void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index db9f1e8..3a93b3c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
> amdgpu_gfx_compute_queue_acquire(adev);
> mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>
> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_GTT,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> -if (r) {
> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -gfx_v10_0_mec_fini(adev);
> -return r;
> -}
> +if (mec_hpd_size) {
> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
Indentation is still wrong here.
> +if (r) {
> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +gfx_v10_0_mec_fini(adev);
> +return r;
> +}
>
> -memset(hpd, 0, mec_hpd_size);
> +memset(hpd, 0, mec_hpd_size);
>
> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +}
>
> if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
> mec_hdr = (const struct gfx_firmware_header_v1_0
> *)adev->gfx.mec_fw->data; @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
> break;
> }
>
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>
> gfx_v10_0_set_kiq_pm4_funcs(adev);
> gfx_v10_0_set_ring_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8d72089..eb4b812 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
> amdgpu_gfx_compute_queue_acquire(adev);
>
> mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
> +if (mec_hpd_size) {
> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
Wrong indentation.
> +if (r) {
> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +return r;
> +}
>
> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> -if (r) {
> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -return r;
> -}
> -
> -memset(hpd, 0, mec_hpd_size);
> +memset(hpd, 0, mec_hpd_size);
>
> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +}
>
> return 0;
> }
> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
> adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
> gfx_v8_0_set_ring_funcs(adev);
> gfx_v8_0_set_irq_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e4e751f..43ad044 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
> /* take ownership of the relevant compute queues */
> amdgpu_gfx_compute_queue_acquire(adev);
> mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
> +if (mec_hpd_size) {
> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
Wrong indentation.
Regards,
Felix
> +if (r) {
> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +gfx_v9_0_mec_fini(adev);
> +return r;
> +}
>
> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> -if (r) {
> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -gfx_v9_0_mec_fini(adev);
> -return r;
> -}
> -
> -memset(hpd, 0, mec_hpd_size);
> +memset(hpd, 0, mec_hpd_size);
>
> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +}
>
> mec_hdr = (const struct gfx_firmware_header_v1_0
> *)adev->gfx.mec_fw->data;
>
> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
> adev->gfx.num_gfx_rings = 0;
> else
> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
> gfx_v9_0_set_kiq_pm4_funcs(adev);
> gfx_v9_0_set_ring_funcs(adev);
> gfx_v9_0_set_irq_funcs(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4)
2020-07-31 7:43 ` Liu, Monk
@ 2020-07-31 7:49 ` Liu, Monk
0 siblings, 0 replies; 5+ messages in thread
From: Liu, Monk @ 2020-07-31 7:49 UTC (permalink / raw)
To: Liu, Monk, Kuehling, Felix, amd-gfx
[AMD Official Use Only - Internal Distribution Only]
Please check V5 and see if it looks better
I use automatic indentation on those lines
_____________________________________
Monk Liu|GPU Virtualization Team |AMD
-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Liu, Monk
Sent: Friday, July 31, 2020 3:43 PM
To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4)
[AMD Official Use Only - Internal Distribution Only]
[AMD Official Use Only - Internal Distribution Only]
Felix
I cannot let the indentation look perfect when it goes out from the email , don't know why
It looks well in my VIM although
_____________________________________
Monk Liu|GPU Virtualization Team |AMD
-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Friday, July 31, 2020 12:54 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4)
Am 2020-07-30 um 11:42 p.m. schrieb Monk Liu:
> what:
> the MQD's save and restore of KCQ (kernel compute queue) cost lots of
> clocks during world switch which impacts a lot to multi-VF performance
>
> how:
> introduce a paramter to control the number of KCQ to avoid performance
> drop if there is no kernel compute queue needed
>
> notes:
> this paramter only affects gfx 8/9/10
>
> v2:
> refine namings
>
> v3:
> choose queues for each ring to that try best to cross pipes evenly.
>
> v4:
> fix indentation
> some cleanupsin the gfx_compute_queue_acquire() function
>
> TODO:
> in the future we will let hypervisor driver to set this paramter
> automatically thus no need for user to configure it through modprobe
> in virtual machine
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 52 +++++++++++++-----------------
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++--------
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 +++++++++--------
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 +++++++++---------
> 7 files changed, 80 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..de11136 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support; #ifdef
> CONFIG_DRM_AMDGPU_CIK extern int amdgpu_cik_support; #endif
> +extern int amdgpu_num_kcq;
>
> #define AMDGPU_VM_MAX_NUM_CTX4096
> #define AMDGPU_SG_THRESHOLD(256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..cf445bab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct
> amdgpu_device *adev)
>
> amdgpu_gmc_tmz_set(adev);
>
> +if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { amdgpu_num_kcq = 8;
> +dev_warn(adev->dev, "set kernel compute queue number to 8 due to
> +invalid paramter provided by user\n"); }
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..b545c40 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry; int amdgpu_force_asic_type =
> -1; int amdgpu_tmz = 0; int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq = -1;
>
> struct amdgpu_mgpu_info mgpu_info = { .mutex =
> __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default),
> 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>
> +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want
> +to setup (8 if set to greater than 8 or less than 0, only affect gfx
> +8+)"); module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
> +
> static const struct pci_device_id pciidlist[] = { #ifdef
> CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
> CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..b43df8e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,40 +202,34 @@ bool
> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>
> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) {
> -int i, queue, pipe, mec;
> +int i, queue, pipe;
> bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
> +int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
> + adev->gfx.mec.num_queue_per_pipe,
> + adev->gfx.num_compute_rings);
> +
> +if (multipipe_policy) {
> +/* policy: make queues evenly cross all pipes on MEC1 only */ for (i
> += 0; i < max_queues_per_mec; i++) { pipe = i %
> +adev->gfx.mec.num_pipe_per_mec; queue = (i /
> +adev->gfx.mec.num_pipe_per_mec) %
> +adev->gfx.mec.num_queue_per_pipe;
> +
> +set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
> +adev->gfx.mec.queue_bitmap);
Indentation looks slightly off here.
> +}
> +} else {
> +/* policy: amdgpu owns all queues in the given pipe */ for (i = 0; i
> +< max_queues_per_mec; ++i) { queue = i %
> +adev->gfx.mec.num_queue_per_pipe;
> +pipe = (i / adev->gfx.mec.num_queue_per_pipe) %
> +adev->gfx.mec.num_pipe_per_mec;
>
> -/* policy for amdgpu compute queue ownership */ -for (i = 0; i <
> AMDGPU_MAX_COMPUTE_QUEUES; ++i) { -queue = i %
> adev->gfx.mec.num_queue_per_pipe; -pipe = (i /
> adev->gfx.mec.num_queue_per_pipe) -% adev->gfx.mec.num_pipe_per_mec;
> -mec = (i / adev->gfx.mec.num_queue_per_pipe) -/
> adev->gfx.mec.num_pipe_per_mec;
> -
> -/* we've run out of HW */
> -if (mec >= adev->gfx.mec.num_mec)
> -break;
> -
> -if (multipipe_policy) {
> -/* policy: amdgpu owns the first two queues of the first MEC */ -if
> (mec == 0 && queue < 2) -set_bit(i, adev->gfx.mec.queue_bitmap); -}
> else {
> -/* policy: amdgpu owns all queues in the first pipe */ -if (mec == 0
> && pipe == 0) -set_bit(i, adev->gfx.mec.queue_bitmap);
> +set_bit(i, adev->gfx.mec.queue_bitmap);
If you're not even using queue and pipe here, you don't need to calculate them above.
> }
> }
>
> -/* update the number of active compute rings */
> -adev->gfx.num_compute_rings =
> -bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> -
> -/* If you hit this case and edited the policy, you probably just
> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */ -if
> (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +dev_info(adev->dev, "mec queue bitmap weight=%d\n",
> +bitmap_weight(adev->gfx.mec.queue_bitmap,
> +AMDGPU_MAX_COMPUTE_QUEUES));
Should this be a debug message? I don't see this being very useful to a normal user.
> }
>
> void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index db9f1e8..3a93b3c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct
> amdgpu_device *adev) amdgpu_gfx_compute_queue_acquire(adev);
> mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>
> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_GTT,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> -if (r) {
> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -gfx_v10_0_mec_fini(adev); -return r; -}
> +if (mec_hpd_size) {
> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
Indentation is still wrong here.
> +if (r) {
> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +gfx_v10_0_mec_fini(adev); return r; }
>
> -memset(hpd, 0, mec_hpd_size);
> +memset(hpd, 0, mec_hpd_size);
>
> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +}
>
> if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mec_hdr =
> (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; @@
> -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
> break; }
>
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>
> gfx_v10_0_set_kiq_pm4_funcs(adev);
> gfx_v10_0_set_ring_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8d72089..eb4b812 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct
> amdgpu_device *adev) amdgpu_gfx_compute_queue_acquire(adev);
>
> mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
> +if (mec_hpd_size) {
> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
Wrong indentation.
> +if (r) {
> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r;
> +}
>
> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> -if (r) {
> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); -return r;
> -}
> -
> -memset(hpd, 0, mec_hpd_size);
> +memset(hpd, 0, mec_hpd_size);
>
> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +}
>
> return 0;
> }
> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
> adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
> gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); diff
> --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e4e751f..43ad044 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct
> amdgpu_device *adev)
> /* take ownership of the relevant compute queues */
> amdgpu_gfx_compute_queue_acquire(adev);
> mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
> +if (mec_hpd_size) {
> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
Wrong indentation.
Regards,
Felix
> +if (r) {
> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +gfx_v9_0_mec_fini(adev); return r; }
>
> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> -if (r) {
> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -gfx_v9_0_mec_fini(adev); -return r; -}
> -
> -memset(hpd, 0, mec_hpd_size);
> +memset(hpd, 0, mec_hpd_size);
>
> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +}
>
> mec_hdr = (const struct gfx_firmware_header_v1_0
> *)adev->gfx.mec_fw->data;
>
> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
> adev->gfx.num_gfx_rings = 0; else adev->gfx.num_gfx_rings =
> GFX9_NUM_GFX_RINGS;
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
> gfx_v9_0_set_kiq_pm4_funcs(adev);
> gfx_v9_0_set_ring_funcs(adev);
> gfx_v9_0_set_irq_funcs(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cmonk.liu%40amd.com%7C87a7039bd88d490c0a8f08d835255d02%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637317781877751181&sdata=9grNTpMPabbzCZdPsW70uBDiOH8fSEW%2F7aawtoZF9J4%3D&reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4)
@ 2020-07-31 2:12 Monk Liu
0 siblings, 0 replies; 5+ messages in thread
From: Monk Liu @ 2020-07-31 2:12 UTC (permalink / raw)
To: amd-gfx; +Cc: Monk Liu
what:
the MQD's save and restore of KCQ (kernel compute queue)
cost lots of clocks during world switch which impacts a lot
to multi-VF performance
how:
introduce a paramter to control the number of KCQ to avoid
performance drop if there is no kernel compute queue needed
notes:
this paramter only affects gfx 8/9/10
v2:
refine namings
v3:
choose queues for each ring to that try best to cross pipes evenly.
v4:
fix indentation
some cleanupsin the gfx_compute_queue_acquire() function
TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 52 +++++++++++++-----------------
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++--------
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 +++++++++--------
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 +++++++++---------
7 files changed, 80 insertions(+), 72 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..de11136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
#ifdef CONFIG_DRM_AMDGPU_CIK
extern int amdgpu_cik_support;
#endif
+extern int amdgpu_num_kcq;
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..cf445bab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_gmc_tmz_set(adev);
+ if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
+ amdgpu_num_kcq = 8;
+ dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..b545c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
int amdgpu_force_asic_type = -1;
int amdgpu_tmz = 0;
int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq = -1;
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
module_param_named(reset_method, amdgpu_reset_method, int, 0444);
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..24b3461 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,40 +202,34 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, pipe, mec;
+ int i, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+ int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe,
+ adev->gfx.num_compute_rings);
+
+ if (multipipe_policy) {
+ /* policy: make queues evenly cross all pipes on MEC1 only */
+ for (i = 0; i < max_queues_per_mec; i++) {
+ pipe = i % adev->gfx.mec.num_pipe_per_mec;
+ queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+ adev->gfx.mec.num_queue_per_pipe;
+
+ set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+ adev->gfx.mec.queue_bitmap);
+ }
+ } else {
+ /* policy: amdgpu owns all queues in the given pipe */
+ for (i = 0; i < max_queues_per_mec; ++i) {
+ queue = i % adev->gfx.mec.num_queue_per_pipe;
+ pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
- /* policy for amdgpu compute queue ownership */
- for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- queue = i % adev->gfx.mec.num_queue_per_pipe;
- pipe = (i / adev->gfx.mec.num_queue_per_pipe)
- % adev->gfx.mec.num_pipe_per_mec;
- mec = (i / adev->gfx.mec.num_queue_per_pipe)
- / adev->gfx.mec.num_pipe_per_mec;
-
- /* we've run out of HW */
- if (mec >= adev->gfx.mec.num_mec)
- break;
-
- if (multipipe_policy) {
- /* policy: amdgpu owns the first two queues of the first MEC */
- if (mec == 0 && queue < 2)
- set_bit(i, adev->gfx.mec.queue_bitmap);
- } else {
- /* policy: amdgpu owns all queues in the first pipe */
- if (mec == 0 && pipe == 0)
- set_bit(i, adev->gfx.mec.queue_bitmap);
+ set_bit(i, adev->gfx.mec.queue_bitmap);
}
}
- /* update the number of active compute rings */
- adev->gfx.num_compute_rings =
- bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-
- /* If you hit this case and edited the policy, you probably just
- * need to increase AMDGPU_MAX_COMPUTE_RINGS */
- if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
}
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index db9f1e8..3a93b3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
- r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->gfx.mec.hpd_eop_obj,
- &adev->gfx.mec.hpd_eop_gpu_addr,
- (void **)&hpd);
- if (r) {
- dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
- gfx_v10_0_mec_fini(adev);
- return r;
- }
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v10_0_mec_fini(adev);
+ return r;
+ }
- memset(hpd, 0, mec_hpd_size);
+ memset(hpd, 0, mec_hpd_size);
- amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
- amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
@@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
break;
}
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = amdgpu_num_kcq;
gfx_v10_0_set_kiq_pm4_funcs(adev);
gfx_v10_0_set_ring_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8d72089..eb4b812 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ return r;
+ }
- r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->gfx.mec.hpd_eop_obj,
- &adev->gfx.mec.hpd_eop_gpu_addr,
- (void **)&hpd);
- if (r) {
- dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
- return r;
- }
-
- memset(hpd, 0, mec_hpd_size);
+ memset(hpd, 0, mec_hpd_size);
- amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
- amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
return 0;
}
@@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = amdgpu_num_kcq;
adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
gfx_v8_0_set_ring_funcs(adev);
gfx_v8_0_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e4e751f..43ad044 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
- r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->gfx.mec.hpd_eop_obj,
- &adev->gfx.mec.hpd_eop_gpu_addr,
- (void **)&hpd);
- if (r) {
- dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
- gfx_v9_0_mec_fini(adev);
- return r;
- }
-
- memset(hpd, 0, mec_hpd_size);
+ memset(hpd, 0, mec_hpd_size);
- amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
- amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
@@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
adev->gfx.num_gfx_rings = 0;
else
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = amdgpu_num_kcq;
gfx_v9_0_set_kiq_pm4_funcs(adev);
gfx_v9_0_set_ring_funcs(adev);
gfx_v9_0_set_irq_funcs(adev);
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2020-07-31 7:49 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-31 3:42 [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4) Monk Liu
2020-07-31 4:53 ` Felix Kuehling
2020-07-31 7:43 ` Liu, Monk
2020-07-31 7:49 ` Liu, Monk
-- strict thread matches above, loose matches on Subject: below --
2020-07-31 2:12 Monk Liu
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.