All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3)
@ 2020-07-28  9:00 Monk Liu
  2020-07-28 14:38 ` Felix Kuehling
  2020-07-30  8:15 ` Christian König
  0 siblings, 2 replies; 6+ messages in thread
From: Monk Liu @ 2020-07-28  9:00 UTC (permalink / raw)
  To: amd-gfx; +Cc: Monk Liu

what:
the MQD's save and restore of KCQ (kernel compute queue)
cost lots of clocks during world switch which impacts a lot
to multi-VF performance

how:
introduce a paramter to control the number of KCQ to avoid
performance drop if there is no kernel compute queue needed

notes:
this paramter only affects gfx 8/9/10

v2:
refine namings

v3:
choose queues for each ring to that try best to cross pipes evenly.

TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 58 +++++++++++++++---------------
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30 ++++++++--------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29 +++++++--------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31 ++++++++--------
 7 files changed, 87 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..de11136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq;
 
 #define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..cf445bab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 
 	amdgpu_gmc_tmz_set(adev);
 
+	if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
+		amdgpu_num_kcq = 8;
+		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..b545c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq = -1;
 
 struct amdgpu_mgpu_info mgpu_info = {
 	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..f83a9a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,40 +202,42 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-	int i, queue, pipe, mec;
+	int i, queue, pipe;
 	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
+								 adev->gfx.mec.num_queue_per_pipe,
+								 adev->gfx.num_compute_rings);
+
+	if (multipipe_policy) {
+		/* policy: make queues evenly cross all pipes on MEC1 only */
+		for (i = 0; i < max_queues_per_mec; i++) {
+			pipe = i % adev->gfx.mec.num_pipe_per_mec;
+			queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+				adev->gfx.mec.num_queue_per_pipe;
+
+			set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+					adev->gfx.mec.queue_bitmap);
+		}
+	} else {
+		int mec;
 
-	/* policy for amdgpu compute queue ownership */
-	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
-		queue = i % adev->gfx.mec.num_queue_per_pipe;
-		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-			% adev->gfx.mec.num_pipe_per_mec;
-		mec = (i / adev->gfx.mec.num_queue_per_pipe)
-			/ adev->gfx.mec.num_pipe_per_mec;
-
-		/* we've run out of HW */
-		if (mec >= adev->gfx.mec.num_mec)
-			break;
+		/* policy: amdgpu owns all queues in the given pipe */
+		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+			queue = i % adev->gfx.mec.num_queue_per_pipe;
+			pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+				% adev->gfx.mec.num_pipe_per_mec;
+			mec = (i / adev->gfx.mec.num_queue_per_pipe)
+				/ adev->gfx.mec.num_pipe_per_mec;
 
-		if (multipipe_policy) {
-			/* policy: amdgpu owns the first two queues of the first MEC */
-			if (mec == 0 && queue < 2)
-				set_bit(i, adev->gfx.mec.queue_bitmap);
-		} else {
-			/* policy: amdgpu owns all queues in the first pipe */
-			if (mec == 0 && pipe == 0)
-				set_bit(i, adev->gfx.mec.queue_bitmap);
+			/* we've run out of HW */
+			if (mec >= adev->gfx.mec.num_mec)
+				break;
+
+			set_bit(i, adev->gfx.mec.queue_bitmap);
 		}
 	}
 
-	/* update the number of active compute rings */
-	adev->gfx.num_compute_rings =
-		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-
-	/* If you hit this case and edited the policy, you probably just
-	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
-	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
-		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
 }
 
 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index db9f1e8..3a93b3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
 	amdgpu_gfx_compute_queue_acquire(adev);
 	mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
 
-	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &adev->gfx.mec.hpd_eop_obj,
-				      &adev->gfx.mec.hpd_eop_gpu_addr,
-				      (void **)&hpd);
-	if (r) {
-		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
-		gfx_v10_0_mec_fini(adev);
-		return r;
-	}
+	if (mec_hpd_size) {
+		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+									  AMDGPU_GEM_DOMAIN_GTT,
+									  &adev->gfx.mec.hpd_eop_obj,
+									  &adev->gfx.mec.hpd_eop_gpu_addr,
+									  (void **)&hpd);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+			gfx_v10_0_mec_fini(adev);
+			return r;
+		}
 
-	memset(hpd, 0, mec_hpd_size);
+		memset(hpd, 0, mec_hpd_size);
 
-	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
-	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+	}
 
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
 		mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
@@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
 		break;
 	}
 
-	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = amdgpu_num_kcq;
 
 	gfx_v10_0_set_kiq_pm4_funcs(adev);
 	gfx_v10_0_set_ring_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8d72089..eb4b812 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 	amdgpu_gfx_compute_queue_acquire(adev);
 
 	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
+	if (mec_hpd_size) {
+		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+									  AMDGPU_GEM_DOMAIN_VRAM,
+									  &adev->gfx.mec.hpd_eop_obj,
+									  &adev->gfx.mec.hpd_eop_gpu_addr,
+									  (void **)&hpd);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+			return r;
+		}
 
-	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->gfx.mec.hpd_eop_obj,
-				      &adev->gfx.mec.hpd_eop_gpu_addr,
-				      (void **)&hpd);
-	if (r) {
-		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
-		return r;
-	}
-
-	memset(hpd, 0, mec_hpd_size);
+		memset(hpd, 0, mec_hpd_size);
 
-	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
-	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+	}
 
 	return 0;
 }
@@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = amdgpu_num_kcq;
 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
 	gfx_v8_0_set_ring_funcs(adev);
 	gfx_v8_0_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e4e751f..43ad044 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 	/* take ownership of the relevant compute queues */
 	amdgpu_gfx_compute_queue_acquire(adev);
 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
+	if (mec_hpd_size) {
+		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+									  AMDGPU_GEM_DOMAIN_VRAM,
+									  &adev->gfx.mec.hpd_eop_obj,
+									  &adev->gfx.mec.hpd_eop_gpu_addr,
+									  (void **)&hpd);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+			gfx_v9_0_mec_fini(adev);
+			return r;
+		}
 
-	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->gfx.mec.hpd_eop_obj,
-				      &adev->gfx.mec.hpd_eop_gpu_addr,
-				      (void **)&hpd);
-	if (r) {
-		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
-		gfx_v9_0_mec_fini(adev);
-		return r;
-	}
-
-	memset(hpd, 0, mec_hpd_size);
+		memset(hpd, 0, mec_hpd_size);
 
-	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
-	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+	}
 
 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 
@@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
 		adev->gfx.num_gfx_rings = 0;
 	else
 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = amdgpu_num_kcq;
 	gfx_v9_0_set_kiq_pm4_funcs(adev);
 	gfx_v9_0_set_ring_funcs(adev);
 	gfx_v9_0_set_irq_funcs(adev);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3)
  2020-07-28  9:00 [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3) Monk Liu
@ 2020-07-28 14:38 ` Felix Kuehling
  2020-07-31  2:11   ` Liu, Monk
  2020-07-30  8:15 ` Christian König
  1 sibling, 1 reply; 6+ messages in thread
From: Felix Kuehling @ 2020-07-28 14:38 UTC (permalink / raw)
  To: Monk Liu, amd-gfx

Am 2020-07-28 um 5:00 a.m. schrieb Monk Liu:
> what:
> the MQD's save and restore of KCQ (kernel compute queue)
> cost lots of clocks during world switch which impacts a lot
> to multi-VF performance
>
> how:
> introduce a paramter to control the number of KCQ to avoid
> performance drop if there is no kernel compute queue needed
>
> notes:
> this paramter only affects gfx 8/9/10
>
> v2:
> refine namings
>
> v3:
> choose queues for each ring to that try best to cross pipes evenly.

Thanks. Some more suggestions for simplifications inline.


>
> TODO:
> in the future we will let hypervisor driver to set this paramter
> automatically thus no need for user to configure it through
> modprobe in virtual machine
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 58 +++++++++++++++---------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30 ++++++++--------
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29 +++++++--------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31 ++++++++--------
>  7 files changed, 87 insertions(+), 71 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..de11136 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
>  #ifdef CONFIG_DRM_AMDGPU_CIK
>  extern int amdgpu_cik_support;
>  #endif
> +extern int amdgpu_num_kcq;
>  
>  #define AMDGPU_VM_MAX_NUM_CTX			4096
>  #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..cf445bab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
>  
>  	amdgpu_gmc_tmz_set(adev);
>  
> +	if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
> +		amdgpu_num_kcq = 8;
> +		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..b545c40 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry;
>  int amdgpu_force_asic_type = -1;
>  int amdgpu_tmz = 0;
>  int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq = -1;
>  
>  struct amdgpu_mgpu_info mgpu_info = {
>  	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>  MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>  module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>  
> +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
> +module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
> +
>  static const struct pci_device_id pciidlist[] = {
>  #ifdef  CONFIG_DRM_AMDGPU_SI
>  	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..f83a9a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,40 +202,42 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>  
>  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>  {
> -	int i, queue, pipe, mec;
> +	int i, queue, pipe;
>  	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
> +	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
> +								 adev->gfx.mec.num_queue_per_pipe,
> +								 adev->gfx.num_compute_rings);

Indentation looks wrong. Did you use the wrong TAB size?


> +
> +	if (multipipe_policy) {
> +		/* policy: make queues evenly cross all pipes on MEC1 only */
> +		for (i = 0; i < max_queues_per_mec; i++) {
> +			pipe = i % adev->gfx.mec.num_pipe_per_mec;
> +			queue = (i / adev->gfx.mec.num_pipe_per_mec) %
> +				adev->gfx.mec.num_queue_per_pipe;
> +
> +			set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
> +					adev->gfx.mec.queue_bitmap);
> +		}
> +	} else {
> +		int mec;
>  
> -	/* policy for amdgpu compute queue ownership */
> -	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> -		queue = i % adev->gfx.mec.num_queue_per_pipe;
> -		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> -			% adev->gfx.mec.num_pipe_per_mec;
> -		mec = (i / adev->gfx.mec.num_queue_per_pipe)
> -			/ adev->gfx.mec.num_pipe_per_mec;
> -
> -		/* we've run out of HW */
> -		if (mec >= adev->gfx.mec.num_mec)
> -			break;
> +		/* policy: amdgpu owns all queues in the given pipe */
> +		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {

You could also use i < max_queues_per_mec here.


> +			queue = i % adev->gfx.mec.num_queue_per_pipe;
> +			pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> +				% adev->gfx.mec.num_pipe_per_mec;
> +			mec = (i / adev->gfx.mec.num_queue_per_pipe)
> +				/ adev->gfx.mec.num_pipe_per_mec;

Then mec will always be 0 and you can eliminate that variable.


>  
> -		if (multipipe_policy) {
> -			/* policy: amdgpu owns the first two queues of the first MEC */
> -			if (mec == 0 && queue < 2)
> -				set_bit(i, adev->gfx.mec.queue_bitmap);
> -		} else {
> -			/* policy: amdgpu owns all queues in the first pipe */
> -			if (mec == 0 && pipe == 0)
> -				set_bit(i, adev->gfx.mec.queue_bitmap);
> +			/* we've run out of HW */
> +			if (mec >= adev->gfx.mec.num_mec)
> +				break;

And you won't need this if (...) break; any more.

It'll make the two policy cases look more similar, so it's easier to see
how they are actually different.

Regards,
  Felix


> +
> +			set_bit(i, adev->gfx.mec.queue_bitmap);
>  		}
>  	}
>  
> -	/* update the number of active compute rings */
> -	adev->gfx.num_compute_rings =
> -		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> -
> -	/* If you hit this case and edited the policy, you probably just
> -	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
> -	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
> -		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
>  }
>  
>  void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index db9f1e8..3a93b3c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
>  	amdgpu_gfx_compute_queue_acquire(adev);
>  	mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>  
> -	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_GTT,
> -				      &adev->gfx.mec.hpd_eop_obj,
> -				      &adev->gfx.mec.hpd_eop_gpu_addr,
> -				      (void **)&hpd);
> -	if (r) {
> -		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -		gfx_v10_0_mec_fini(adev);
> -		return r;
> -	}
> +	if (mec_hpd_size) {
> +		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +									  AMDGPU_GEM_DOMAIN_GTT,
> +									  &adev->gfx.mec.hpd_eop_obj,
> +									  &adev->gfx.mec.hpd_eop_gpu_addr,
> +									  (void **)&hpd);
> +		if (r) {
> +			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +			gfx_v10_0_mec_fini(adev);
> +			return r;
> +		}
>  
> -	memset(hpd, 0, mec_hpd_size);
> +		memset(hpd, 0, mec_hpd_size);
>  
> -	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +	}
>  
>  	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
>  		mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
>  		break;
>  	}
>  
> -	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	adev->gfx.num_compute_rings = amdgpu_num_kcq;
>  
>  	gfx_v10_0_set_kiq_pm4_funcs(adev);
>  	gfx_v10_0_set_ring_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8d72089..eb4b812 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
>  	amdgpu_gfx_compute_queue_acquire(adev);
>  
>  	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
> +	if (mec_hpd_size) {
> +		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +									  AMDGPU_GEM_DOMAIN_VRAM,
> +									  &adev->gfx.mec.hpd_eop_obj,
> +									  &adev->gfx.mec.hpd_eop_gpu_addr,
> +									  (void **)&hpd);
> +		if (r) {
> +			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +			return r;
> +		}
>  
> -	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      &adev->gfx.mec.hpd_eop_obj,
> -				      &adev->gfx.mec.hpd_eop_gpu_addr,
> -				      (void **)&hpd);
> -	if (r) {
> -		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -		return r;
> -	}
> -
> -	memset(hpd, 0, mec_hpd_size);
> +		memset(hpd, 0, mec_hpd_size);
>  
> -	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +	}
>  
>  	return 0;
>  }
> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>  
>  	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
> -	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	adev->gfx.num_compute_rings = amdgpu_num_kcq;
>  	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
>  	gfx_v8_0_set_ring_funcs(adev);
>  	gfx_v8_0_set_irq_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e4e751f..43ad044 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
>  	/* take ownership of the relevant compute queues */
>  	amdgpu_gfx_compute_queue_acquire(adev);
>  	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
> +	if (mec_hpd_size) {
> +		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +									  AMDGPU_GEM_DOMAIN_VRAM,
> +									  &adev->gfx.mec.hpd_eop_obj,
> +									  &adev->gfx.mec.hpd_eop_gpu_addr,
> +									  (void **)&hpd);
> +		if (r) {
> +			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +			gfx_v9_0_mec_fini(adev);
> +			return r;
> +		}
>  
> -	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      &adev->gfx.mec.hpd_eop_obj,
> -				      &adev->gfx.mec.hpd_eop_gpu_addr,
> -				      (void **)&hpd);
> -	if (r) {
> -		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -		gfx_v9_0_mec_fini(adev);
> -		return r;
> -	}
> -
> -	memset(hpd, 0, mec_hpd_size);
> +		memset(hpd, 0, mec_hpd_size);
>  
> -	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +	}
>  
>  	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
>  
> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
>  		adev->gfx.num_gfx_rings = 0;
>  	else
>  		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> -	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	adev->gfx.num_compute_rings = amdgpu_num_kcq;
>  	gfx_v9_0_set_kiq_pm4_funcs(adev);
>  	gfx_v9_0_set_ring_funcs(adev);
>  	gfx_v9_0_set_irq_funcs(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3)
  2020-07-28  9:00 [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3) Monk Liu
  2020-07-28 14:38 ` Felix Kuehling
@ 2020-07-30  8:15 ` Christian König
  1 sibling, 0 replies; 6+ messages in thread
From: Christian König @ 2020-07-30  8:15 UTC (permalink / raw)
  To: Monk Liu, amd-gfx

Am 28.07.20 um 11:00 schrieb Monk Liu:
> what:
> the MQD's save and restore of KCQ (kernel compute queue)
> cost lots of clocks during world switch which impacts a lot
> to multi-VF performance
>
> how:
> introduce a paramter to control the number of KCQ to avoid
> performance drop if there is no kernel compute queue needed
>
> notes:
> this paramter only affects gfx 8/9/10
>
> v2:
> refine namings
>
> v3:
> choose queues for each ring to that try best to cross pipes evenly.
>
> TODO:
> in the future we will let hypervisor driver to set this paramter
> automatically thus no need for user to configure it through
> modprobe in virtual machine
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

I would still call it num_kernel_compute_queues instead of num_kcq, but 
that is just a nit pick.

Acked-by: Christian König <christian.koenig@amd.com> for the patch, but 
I would ask Felix for his rb. He knows all this MES, pipe and queue 
stuff much better than I do.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 58 +++++++++++++++---------------
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30 ++++++++--------
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29 +++++++--------
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31 ++++++++--------
>   7 files changed, 87 insertions(+), 71 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..de11136 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
>   #ifdef CONFIG_DRM_AMDGPU_CIK
>   extern int amdgpu_cik_support;
>   #endif
> +extern int amdgpu_num_kcq;
>   
>   #define AMDGPU_VM_MAX_NUM_CTX			4096
>   #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..cf445bab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
>   
>   	amdgpu_gmc_tmz_set(adev);
>   
> +	if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
> +		amdgpu_num_kcq = 8;
> +		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");
> +	}
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..b545c40 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry;
>   int amdgpu_force_asic_type = -1;
>   int amdgpu_tmz = 0;
>   int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq = -1;
>   
>   struct amdgpu_mgpu_info mgpu_info = {
>   	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>   MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>   module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>   
> +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
> +module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
> +
>   static const struct pci_device_id pciidlist[] = {
>   #ifdef  CONFIG_DRM_AMDGPU_SI
>   	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..f83a9a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,40 +202,42 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>   
>   void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>   {
> -	int i, queue, pipe, mec;
> +	int i, queue, pipe;
>   	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
> +	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
> +								 adev->gfx.mec.num_queue_per_pipe,
> +								 adev->gfx.num_compute_rings);
> +
> +	if (multipipe_policy) {
> +		/* policy: make queues evenly cross all pipes on MEC1 only */
> +		for (i = 0; i < max_queues_per_mec; i++) {
> +			pipe = i % adev->gfx.mec.num_pipe_per_mec;
> +			queue = (i / adev->gfx.mec.num_pipe_per_mec) %
> +				adev->gfx.mec.num_queue_per_pipe;
> +
> +			set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
> +					adev->gfx.mec.queue_bitmap);
> +		}
> +	} else {
> +		int mec;
>   
> -	/* policy for amdgpu compute queue ownership */
> -	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> -		queue = i % adev->gfx.mec.num_queue_per_pipe;
> -		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> -			% adev->gfx.mec.num_pipe_per_mec;
> -		mec = (i / adev->gfx.mec.num_queue_per_pipe)
> -			/ adev->gfx.mec.num_pipe_per_mec;
> -
> -		/* we've run out of HW */
> -		if (mec >= adev->gfx.mec.num_mec)
> -			break;
> +		/* policy: amdgpu owns all queues in the given pipe */
> +		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
> +			queue = i % adev->gfx.mec.num_queue_per_pipe;
> +			pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> +				% adev->gfx.mec.num_pipe_per_mec;
> +			mec = (i / adev->gfx.mec.num_queue_per_pipe)
> +				/ adev->gfx.mec.num_pipe_per_mec;
>   
> -		if (multipipe_policy) {
> -			/* policy: amdgpu owns the first two queues of the first MEC */
> -			if (mec == 0 && queue < 2)
> -				set_bit(i, adev->gfx.mec.queue_bitmap);
> -		} else {
> -			/* policy: amdgpu owns all queues in the first pipe */
> -			if (mec == 0 && pipe == 0)
> -				set_bit(i, adev->gfx.mec.queue_bitmap);
> +			/* we've run out of HW */
> +			if (mec >= adev->gfx.mec.num_mec)
> +				break;
> +
> +			set_bit(i, adev->gfx.mec.queue_bitmap);
>   		}
>   	}
>   
> -	/* update the number of active compute rings */
> -	adev->gfx.num_compute_rings =
> -		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> -
> -	/* If you hit this case and edited the policy, you probably just
> -	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
> -	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
> -		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
>   }
>   
>   void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index db9f1e8..3a93b3c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
>   	amdgpu_gfx_compute_queue_acquire(adev);
>   	mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>   
> -	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_GTT,
> -				      &adev->gfx.mec.hpd_eop_obj,
> -				      &adev->gfx.mec.hpd_eop_gpu_addr,
> -				      (void **)&hpd);
> -	if (r) {
> -		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -		gfx_v10_0_mec_fini(adev);
> -		return r;
> -	}
> +	if (mec_hpd_size) {
> +		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +									  AMDGPU_GEM_DOMAIN_GTT,
> +									  &adev->gfx.mec.hpd_eop_obj,
> +									  &adev->gfx.mec.hpd_eop_gpu_addr,
> +									  (void **)&hpd);
> +		if (r) {
> +			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +			gfx_v10_0_mec_fini(adev);
> +			return r;
> +		}
>   
> -	memset(hpd, 0, mec_hpd_size);
> +		memset(hpd, 0, mec_hpd_size);
>   
> -	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +	}
>   
>   	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
>   		mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
>   		break;
>   	}
>   
> -	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	adev->gfx.num_compute_rings = amdgpu_num_kcq;
>   
>   	gfx_v10_0_set_kiq_pm4_funcs(adev);
>   	gfx_v10_0_set_ring_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8d72089..eb4b812 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
>   	amdgpu_gfx_compute_queue_acquire(adev);
>   
>   	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
> +	if (mec_hpd_size) {
> +		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +									  AMDGPU_GEM_DOMAIN_VRAM,
> +									  &adev->gfx.mec.hpd_eop_obj,
> +									  &adev->gfx.mec.hpd_eop_gpu_addr,
> +									  (void **)&hpd);
> +		if (r) {
> +			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +			return r;
> +		}
>   
> -	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      &adev->gfx.mec.hpd_eop_obj,
> -				      &adev->gfx.mec.hpd_eop_gpu_addr,
> -				      (void **)&hpd);
> -	if (r) {
> -		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -		return r;
> -	}
> -
> -	memset(hpd, 0, mec_hpd_size);
> +		memset(hpd, 0, mec_hpd_size);
>   
> -	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +	}
>   
>   	return 0;
>   }
> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
>   	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
> -	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	adev->gfx.num_compute_rings = amdgpu_num_kcq;
>   	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
>   	gfx_v8_0_set_ring_funcs(adev);
>   	gfx_v8_0_set_irq_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e4e751f..43ad044 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
>   	/* take ownership of the relevant compute queues */
>   	amdgpu_gfx_compute_queue_acquire(adev);
>   	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
> +	if (mec_hpd_size) {
> +		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +									  AMDGPU_GEM_DOMAIN_VRAM,
> +									  &adev->gfx.mec.hpd_eop_obj,
> +									  &adev->gfx.mec.hpd_eop_gpu_addr,
> +									  (void **)&hpd);
> +		if (r) {
> +			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +			gfx_v9_0_mec_fini(adev);
> +			return r;
> +		}
>   
> -	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      &adev->gfx.mec.hpd_eop_obj,
> -				      &adev->gfx.mec.hpd_eop_gpu_addr,
> -				      (void **)&hpd);
> -	if (r) {
> -		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -		gfx_v9_0_mec_fini(adev);
> -		return r;
> -	}
> -
> -	memset(hpd, 0, mec_hpd_size);
> +		memset(hpd, 0, mec_hpd_size);
>   
> -	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +	}
>   
>   	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
>   
> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
>   		adev->gfx.num_gfx_rings = 0;
>   	else
>   		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> -	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +	adev->gfx.num_compute_rings = amdgpu_num_kcq;
>   	gfx_v9_0_set_kiq_pm4_funcs(adev);
>   	gfx_v9_0_set_ring_funcs(adev);
>   	gfx_v9_0_set_irq_funcs(adev);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3)
  2020-07-28 14:38 ` Felix Kuehling
@ 2020-07-31  2:11   ` Liu, Monk
  2020-07-31  2:19     ` Felix Kuehling
  0 siblings, 1 reply; 6+ messages in thread
From: Liu, Monk @ 2020-07-31  2:11 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx

[AMD Official Use Only - Internal Distribution Only]

>>> Indentation looks wrong. Did you use the wrong TAB size?

My TAB size is 4 space, and I don't know why it looks strange , but I can use space to replace the TABs anyway

Will send v4 patch against other suggestions from your side soon

_____________________________________
Monk Liu|GPU Virtualization Team |AMD


-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Tuesday, July 28, 2020 10:38 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3)

Am 2020-07-28 um 5:00 a.m. schrieb Monk Liu:
> what:
> the MQD's save and restore of KCQ (kernel compute queue) cost lots of
> clocks during world switch which impacts a lot to multi-VF performance
>
> how:
> introduce a paramter to control the number of KCQ to avoid performance
> drop if there is no kernel compute queue needed
>
> notes:
> this paramter only affects gfx 8/9/10
>
> v2:
> refine namings
>
> v3:
> choose queues for each ring to that try best to cross pipes evenly.

Thanks. Some more suggestions for simplifications inline.


>
> TODO:
> in the future we will let hypervisor driver to set this paramter
> automatically thus no need for user to configure it through modprobe
> in virtual machine
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 58 +++++++++++++++---------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30 ++++++++--------
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29 +++++++--------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31 ++++++++--------
>  7 files changed, 87 insertions(+), 71 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..de11136 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;  #ifdef
> CONFIG_DRM_AMDGPU_CIK  extern int amdgpu_cik_support;  #endif
> +extern int amdgpu_num_kcq;
>
>  #define AMDGPU_VM_MAX_NUM_CTX4096
>  #define AMDGPU_SG_THRESHOLD(256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..cf445bab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct
> amdgpu_device *adev)
>
>  amdgpu_gmc_tmz_set(adev);
>
> +if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
> +amdgpu_num_kcq = 8;
> +dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");
> +}
> +
>  return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..b545c40 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry;  int amdgpu_force_asic_type =
> -1;  int amdgpu_tmz = 0;  int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq = -1;
>
>  struct amdgpu_mgpu_info mgpu_info = {
>  .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default),
> 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>
> +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want
> +to setup (8 if set to greater than 8 or less than 0, only affect gfx
> +8+)"); module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
> +
>  static const struct pci_device_id pciidlist[] = {  #ifdef
> CONFIG_DRM_AMDGPU_SI
>  {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff
> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..f83a9a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,40 +202,42 @@ bool
> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>
>  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)  {
> -int i, queue, pipe, mec;
> +int i, queue, pipe;
>  bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
> +int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
> + adev->gfx.mec.num_queue_per_pipe,
> + adev->gfx.num_compute_rings);

Indentation looks wrong. Did you use the wrong TAB size?


> +
> +if (multipipe_policy) {
> +/* policy: make queues evenly cross all pipes on MEC1 only */
> +for (i = 0; i < max_queues_per_mec; i++) {
> +pipe = i % adev->gfx.mec.num_pipe_per_mec;
> +queue = (i / adev->gfx.mec.num_pipe_per_mec) %
> +adev->gfx.mec.num_queue_per_pipe;
> +
> +set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
> +adev->gfx.mec.queue_bitmap);
> +}
> +} else {
> +int mec;
>
> -/* policy for amdgpu compute queue ownership */
> -for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> -queue = i % adev->gfx.mec.num_queue_per_pipe;
> -pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> -% adev->gfx.mec.num_pipe_per_mec;
> -mec = (i / adev->gfx.mec.num_queue_per_pipe)
> -/ adev->gfx.mec.num_pipe_per_mec;
> -
> -/* we've run out of HW */
> -if (mec >= adev->gfx.mec.num_mec)
> -break;
> +/* policy: amdgpu owns all queues in the given pipe */
> +for (i = 0; i < adev->gfx.num_compute_rings; ++i) {

You could also use i < max_queues_per_mec here.


> +queue = i % adev->gfx.mec.num_queue_per_pipe;
> +pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> +% adev->gfx.mec.num_pipe_per_mec;
> +mec = (i / adev->gfx.mec.num_queue_per_pipe)
> +/ adev->gfx.mec.num_pipe_per_mec;

Then mec will always be 0 and you can eliminate that variable.


>
> -if (multipipe_policy) {
> -/* policy: amdgpu owns the first two queues of the first MEC */
> -if (mec == 0 && queue < 2)
> -set_bit(i, adev->gfx.mec.queue_bitmap);
> -} else {
> -/* policy: amdgpu owns all queues in the first pipe */
> -if (mec == 0 && pipe == 0)
> -set_bit(i, adev->gfx.mec.queue_bitmap);
> +/* we've run out of HW */
> +if (mec >= adev->gfx.mec.num_mec)
> +break;

And you won't need this if (...) break; any more.

It'll make the two policy cases look more similar, so it's easier to see how they are actually different.

Regards,
  Felix


> +
> +set_bit(i, adev->gfx.mec.queue_bitmap);
>  }
>  }
>
> -/* update the number of active compute rings */
> -adev->gfx.num_compute_rings =
> -bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> -
> -/* If you hit this case and edited the policy, you probably just
> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */
> -if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +dev_info(adev->dev, "mec queue bitmap weight=%d\n",
> +bitmap_weight(adev->gfx.mec.queue_bitmap,
> +AMDGPU_MAX_COMPUTE_QUEUES));
>  }
>
>  void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index db9f1e8..3a93b3c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
>  amdgpu_gfx_compute_queue_acquire(adev);
>  mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>
> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -      AMDGPU_GEM_DOMAIN_GTT,
> -      &adev->gfx.mec.hpd_eop_obj,
> -      &adev->gfx.mec.hpd_eop_gpu_addr,
> -      (void **)&hpd);
> -if (r) {
> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -gfx_v10_0_mec_fini(adev);
> -return r;
> -}
> +if (mec_hpd_size) {
> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +  AMDGPU_GEM_DOMAIN_GTT,
> +  &adev->gfx.mec.hpd_eop_obj,
> +  &adev->gfx.mec.hpd_eop_gpu_addr,
> +  (void **)&hpd);
> +if (r) {
> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +gfx_v10_0_mec_fini(adev);
> +return r;
> +}
>
> -memset(hpd, 0, mec_hpd_size);
> +memset(hpd, 0, mec_hpd_size);
>
> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +}
>
>  if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
>  mec_hdr = (const struct gfx_firmware_header_v1_0
> *)adev->gfx.mec_fw->data; @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
>  break;
>  }
>
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>
>  gfx_v10_0_set_kiq_pm4_funcs(adev);
>  gfx_v10_0_set_ring_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8d72089..eb4b812 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
>  amdgpu_gfx_compute_queue_acquire(adev);
>
>  mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
> +if (mec_hpd_size) {
> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +  AMDGPU_GEM_DOMAIN_VRAM,
> +  &adev->gfx.mec.hpd_eop_obj,
> +  &adev->gfx.mec.hpd_eop_gpu_addr,
> +  (void **)&hpd);
> +if (r) {
> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +return r;
> +}
>
> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -      AMDGPU_GEM_DOMAIN_VRAM,
> -      &adev->gfx.mec.hpd_eop_obj,
> -      &adev->gfx.mec.hpd_eop_gpu_addr,
> -      (void **)&hpd);
> -if (r) {
> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -return r;
> -}
> -
> -memset(hpd, 0, mec_hpd_size);
> +memset(hpd, 0, mec_hpd_size);
>
> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +}
>
>  return 0;
>  }
> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
>  struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
>  adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>  adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
>  gfx_v8_0_set_ring_funcs(adev);
>  gfx_v8_0_set_irq_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e4e751f..43ad044 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
>  /* take ownership of the relevant compute queues */
>  amdgpu_gfx_compute_queue_acquire(adev);
>  mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
> +if (mec_hpd_size) {
> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> +  AMDGPU_GEM_DOMAIN_VRAM,
> +  &adev->gfx.mec.hpd_eop_obj,
> +  &adev->gfx.mec.hpd_eop_gpu_addr,
> +  (void **)&hpd);
> +if (r) {
> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> +gfx_v9_0_mec_fini(adev);
> +return r;
> +}
>
> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> -      AMDGPU_GEM_DOMAIN_VRAM,
> -      &adev->gfx.mec.hpd_eop_obj,
> -      &adev->gfx.mec.hpd_eop_gpu_addr,
> -      (void **)&hpd);
> -if (r) {
> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> -gfx_v9_0_mec_fini(adev);
> -return r;
> -}
> -
> -memset(hpd, 0, mec_hpd_size);
> +memset(hpd, 0, mec_hpd_size);
>
> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> +}
>
>  mec_hdr = (const struct gfx_firmware_header_v1_0
> *)adev->gfx.mec_fw->data;
>
> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
>  adev->gfx.num_gfx_rings = 0;
>  else
>  adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>  gfx_v9_0_set_kiq_pm4_funcs(adev);
>  gfx_v9_0_set_ring_funcs(adev);
>  gfx_v9_0_set_irq_funcs(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3)
  2020-07-31  2:11   ` Liu, Monk
@ 2020-07-31  2:19     ` Felix Kuehling
  2020-07-31  3:48       ` Liu, Monk
  0 siblings, 1 reply; 6+ messages in thread
From: Felix Kuehling @ 2020-07-31  2:19 UTC (permalink / raw)
  To: Liu, Monk, amd-gfx


Am 2020-07-30 um 10:11 p.m. schrieb Liu, Monk:
> [AMD Official Use Only - Internal Distribution Only]
>
>>>> Indentation looks wrong. Did you use the wrong TAB size?
> My TAB size is 4 space, and I don't know why it looks strange , but I can use space to replace the TABs anyway

The linux kernel uses TABs for indentation, and TAB width is 8. Do not
replace TABs with spaces for indentation, that would violate the kernel
coding style guide. Please change the settings of your editor.

See Documentation/process/coding-style.rst.

Regards,
  Felix


>
> Will send v4 patch against other suggestions from your side soon
>
> _____________________________________
> Monk Liu|GPU Virtualization Team |AMD
>
>
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling@amd.com>
> Sent: Tuesday, July 28, 2020 10:38 PM
> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3)
>
> Am 2020-07-28 um 5:00 a.m. schrieb Monk Liu:
>> what:
>> the MQD's save and restore of KCQ (kernel compute queue) cost lots of
>> clocks during world switch which impacts a lot to multi-VF performance
>>
>> how:
>> introduce a paramter to control the number of KCQ to avoid performance
>> drop if there is no kernel compute queue needed
>>
>> notes:
>> this paramter only affects gfx 8/9/10
>>
>> v2:
>> refine namings
>>
>> v3:
>> choose queues for each ring to that try best to cross pipes evenly.
> Thanks. Some more suggestions for simplifications inline.
>
>
>> TODO:
>> in the future we will let hypervisor driver to set this paramter
>> automatically thus no need for user to configure it through modprobe
>> in virtual machine
>>
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 +++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 58 +++++++++++++++---------------
>>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30 ++++++++--------
>>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29 +++++++--------
>>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31 ++++++++--------
>>  7 files changed, 87 insertions(+), 71 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index e97c088..de11136 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;  #ifdef
>> CONFIG_DRM_AMDGPU_CIK  extern int amdgpu_cik_support;  #endif
>> +extern int amdgpu_num_kcq;
>>
>>  #define AMDGPU_VM_MAX_NUM_CTX4096
>>  #define AMDGPU_SG_THRESHOLD(256*1024*1024)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 62ecac9..cf445bab 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct
>> amdgpu_device *adev)
>>
>>  amdgpu_gmc_tmz_set(adev);
>>
>> +if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
>> +amdgpu_num_kcq = 8;
>> +dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");
>> +}
>> +
>>  return 0;
>>  }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 6291f5f..b545c40 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -150,6 +150,7 @@ int amdgpu_noretry;  int amdgpu_force_asic_type =
>> -1;  int amdgpu_tmz = 0;  int amdgpu_reset_method = -1; /* auto */
>> +int amdgpu_num_kcq = -1;
>>
>>  struct amdgpu_mgpu_info mgpu_info = {
>>  .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
>> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default),
>> 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>>
>> +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want
>> +to setup (8 if set to greater than 8 or less than 0, only affect gfx
>> +8+)"); module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
>> +
>>  static const struct pci_device_id pciidlist[] = {  #ifdef
>> CONFIG_DRM_AMDGPU_SI
>>  {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff
>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 8eff017..f83a9a7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -202,40 +202,42 @@ bool
>> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>>
>>  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)  {
>> -int i, queue, pipe, mec;
>> +int i, queue, pipe;
>>  bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>> +int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
>> + adev->gfx.mec.num_queue_per_pipe,
>> + adev->gfx.num_compute_rings);
> Indentation looks wrong. Did you use the wrong TAB size?
>
>
>> +
>> +if (multipipe_policy) {
>> +/* policy: make queues evenly cross all pipes on MEC1 only */
>> +for (i = 0; i < max_queues_per_mec; i++) {
>> +pipe = i % adev->gfx.mec.num_pipe_per_mec;
>> +queue = (i / adev->gfx.mec.num_pipe_per_mec) %
>> +adev->gfx.mec.num_queue_per_pipe;
>> +
>> +set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
>> +adev->gfx.mec.queue_bitmap);
>> +}
>> +} else {
>> +int mec;
>>
>> -/* policy for amdgpu compute queue ownership */
>> -for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
>> -queue = i % adev->gfx.mec.num_queue_per_pipe;
>> -pipe = (i / adev->gfx.mec.num_queue_per_pipe)
>> -% adev->gfx.mec.num_pipe_per_mec;
>> -mec = (i / adev->gfx.mec.num_queue_per_pipe)
>> -/ adev->gfx.mec.num_pipe_per_mec;
>> -
>> -/* we've run out of HW */
>> -if (mec >= adev->gfx.mec.num_mec)
>> -break;
>> +/* policy: amdgpu owns all queues in the given pipe */
>> +for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
> You could also use i < max_queues_per_mec here.
>
>
>> +queue = i % adev->gfx.mec.num_queue_per_pipe;
>> +pipe = (i / adev->gfx.mec.num_queue_per_pipe)
>> +% adev->gfx.mec.num_pipe_per_mec;
>> +mec = (i / adev->gfx.mec.num_queue_per_pipe)
>> +/ adev->gfx.mec.num_pipe_per_mec;
> Then mec will always be 0 and you can eliminate that variable.
>
>
>> -if (multipipe_policy) {
>> -/* policy: amdgpu owns the first two queues of the first MEC */
>> -if (mec == 0 && queue < 2)
>> -set_bit(i, adev->gfx.mec.queue_bitmap);
>> -} else {
>> -/* policy: amdgpu owns all queues in the first pipe */
>> -if (mec == 0 && pipe == 0)
>> -set_bit(i, adev->gfx.mec.queue_bitmap);
>> +/* we've run out of HW */
>> +if (mec >= adev->gfx.mec.num_mec)
>> +break;
> And you won't need this if (...) break; any more.
>
> It'll make the two policy cases look more similar, so it's easier to see how they are actually different.
>
> Regards,
>   Felix
>
>
>> +
>> +set_bit(i, adev->gfx.mec.queue_bitmap);
>>  }
>>  }
>>
>> -/* update the number of active compute rings */
>> -adev->gfx.num_compute_rings =
>> -bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
>> -
>> -/* If you hit this case and edited the policy, you probably just
>> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */
>> -if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
>> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +dev_info(adev->dev, "mec queue bitmap weight=%d\n",
>> +bitmap_weight(adev->gfx.mec.queue_bitmap,
>> +AMDGPU_MAX_COMPUTE_QUEUES));
>>  }
>>
>>  void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index db9f1e8..3a93b3c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
>>  amdgpu_gfx_compute_queue_acquire(adev);
>>  mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>>
>> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> -      AMDGPU_GEM_DOMAIN_GTT,
>> -      &adev->gfx.mec.hpd_eop_obj,
>> -      &adev->gfx.mec.hpd_eop_gpu_addr,
>> -      (void **)&hpd);
>> -if (r) {
>> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> -gfx_v10_0_mec_fini(adev);
>> -return r;
>> -}
>> +if (mec_hpd_size) {
>> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> +  AMDGPU_GEM_DOMAIN_GTT,
>> +  &adev->gfx.mec.hpd_eop_obj,
>> +  &adev->gfx.mec.hpd_eop_gpu_addr,
>> +  (void **)&hpd);
>> +if (r) {
>> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> +gfx_v10_0_mec_fini(adev);
>> +return r;
>> +}
>>
>> -memset(hpd, 0, mec_hpd_size);
>> +memset(hpd, 0, mec_hpd_size);
>>
>> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +}
>>
>>  if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
>>  mec_hdr = (const struct gfx_firmware_header_v1_0
>> *)adev->gfx.mec_fw->data; @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
>>  break;
>>  }
>>
>> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>>
>>  gfx_v10_0_set_kiq_pm4_funcs(adev);
>>  gfx_v10_0_set_ring_funcs(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 8d72089..eb4b812 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
>>  amdgpu_gfx_compute_queue_acquire(adev);
>>
>>  mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
>> +if (mec_hpd_size) {
>> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> +  AMDGPU_GEM_DOMAIN_VRAM,
>> +  &adev->gfx.mec.hpd_eop_obj,
>> +  &adev->gfx.mec.hpd_eop_gpu_addr,
>> +  (void **)&hpd);
>> +if (r) {
>> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> +return r;
>> +}
>>
>> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> -      AMDGPU_GEM_DOMAIN_VRAM,
>> -      &adev->gfx.mec.hpd_eop_obj,
>> -      &adev->gfx.mec.hpd_eop_gpu_addr,
>> -      (void **)&hpd);
>> -if (r) {
>> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> -return r;
>> -}
>> -
>> -memset(hpd, 0, mec_hpd_size);
>> +memset(hpd, 0, mec_hpd_size);
>>
>> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +}
>>
>>  return 0;
>>  }
>> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
>>  struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>
>>  adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
>> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>>  adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
>>  gfx_v8_0_set_ring_funcs(adev);
>>  gfx_v8_0_set_irq_funcs(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index e4e751f..43ad044 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
>>  /* take ownership of the relevant compute queues */
>>  amdgpu_gfx_compute_queue_acquire(adev);
>>  mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
>> +if (mec_hpd_size) {
>> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> +  AMDGPU_GEM_DOMAIN_VRAM,
>> +  &adev->gfx.mec.hpd_eop_obj,
>> +  &adev->gfx.mec.hpd_eop_gpu_addr,
>> +  (void **)&hpd);
>> +if (r) {
>> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> +gfx_v9_0_mec_fini(adev);
>> +return r;
>> +}
>>
>> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> -      AMDGPU_GEM_DOMAIN_VRAM,
>> -      &adev->gfx.mec.hpd_eop_obj,
>> -      &adev->gfx.mec.hpd_eop_gpu_addr,
>> -      (void **)&hpd);
>> -if (r) {
>> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> -gfx_v9_0_mec_fini(adev);
>> -return r;
>> -}
>> -
>> -memset(hpd, 0, mec_hpd_size);
>> +memset(hpd, 0, mec_hpd_size);
>>
>> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +}
>>
>>  mec_hdr = (const struct gfx_firmware_header_v1_0
>> *)adev->gfx.mec_fw->data;
>>
>> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
>>  adev->gfx.num_gfx_rings = 0;
>>  else
>>  adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
>> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>>  gfx_v9_0_set_kiq_pm4_funcs(adev);
>>  gfx_v9_0_set_ring_funcs(adev);
>>  gfx_v9_0_set_irq_funcs(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3)
  2020-07-31  2:19     ` Felix Kuehling
@ 2020-07-31  3:48       ` Liu, Monk
  0 siblings, 0 replies; 6+ messages in thread
From: Liu, Monk @ 2020-07-31  3:48 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx

[AMD Official Use Only - Internal Distribution Only]

My latest patch  was already based on visually 8 SPACE per TAB config, but the TAB was *not* replaced by real eight spaces

_____________________________________
Monk Liu|GPU Virtualization Team |AMD


-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Friday, July 31, 2020 10:20 AM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3)


Am 2020-07-30 um 10:11 p.m. schrieb Liu, Monk:
> [AMD Official Use Only - Internal Distribution Only]
>
>>>> Indentation looks wrong. Did you use the wrong TAB size?
> My TAB size is 4 space, and I don't know why it looks strange , but I
> can use space to replace the TABs anyway

The linux kernel uses TABs for indentation, and TAB width is 8. Do not replace TABs with spaces for indentation, that would violate the kernel coding style guide. Please change the settings of your editor.

See Documentation/process/coding-style.rst.

Regards,
  Felix


>
> Will send v4 patch against other suggestions from your side soon
>
> _____________________________________
> Monk Liu|GPU Virtualization Team |AMD
>
>
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling@amd.com>
> Sent: Tuesday, July 28, 2020 10:38 PM
> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: introduce a new parameter to
> configure how many KCQ we want(v3)
>
> Am 2020-07-28 um 5:00 a.m. schrieb Monk Liu:
>> what:
>> the MQD's save and restore of KCQ (kernel compute queue) cost lots of
>> clocks during world switch which impacts a lot to multi-VF
>> performance
>>
>> how:
>> introduce a paramter to control the number of KCQ to avoid
>> performance drop if there is no kernel compute queue needed
>>
>> notes:
>> this paramter only affects gfx 8/9/10
>>
>> v2:
>> refine namings
>>
>> v3:
>> choose queues for each ring to that try best to cross pipes evenly.
> Thanks. Some more suggestions for simplifications inline.
>
>
>> TODO:
>> in the future we will let hypervisor driver to set this paramter
>> automatically thus no need for user to configure it through modprobe
>> in virtual machine
>>
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 +++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 58 +++++++++++++++---------------
>>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30 ++++++++--------
>>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29 +++++++--------
>>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31 ++++++++--------
>>  7 files changed, 87 insertions(+), 71 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index e97c088..de11136 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;  #ifdef
>> CONFIG_DRM_AMDGPU_CIK  extern int amdgpu_cik_support;  #endif
>> +extern int amdgpu_num_kcq;
>>
>>  #define AMDGPU_VM_MAX_NUM_CTX4096
>>  #define AMDGPU_SG_THRESHOLD(256*1024*1024)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 62ecac9..cf445bab 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1199,6 +1199,11 @@ static int
>> amdgpu_device_check_arguments(struct
>> amdgpu_device *adev)
>>
>>  amdgpu_gmc_tmz_set(adev);
>>
>> +if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { amdgpu_num_kcq = 8;
>> +dev_warn(adev->dev, "set kernel compute queue number to 8 due to
>> +invalid paramter provided by user\n"); }
>> +
>>  return 0;
>>  }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 6291f5f..b545c40 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -150,6 +150,7 @@ int amdgpu_noretry;  int amdgpu_force_asic_type =
>> -1;  int amdgpu_tmz = 0;  int amdgpu_reset_method = -1; /* auto */
>> +int amdgpu_num_kcq = -1;
>>
>>  struct amdgpu_mgpu_info mgpu_info = {  .mutex =
>> __MUTEX_INITIALIZER(mgpu_info.mutex),
>> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto
>> (default),
>> 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>>
>> +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want
>> +to setup (8 if set to greater than 8 or less than 0, only affect gfx
>> +8+)"); module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
>> +
>>  static const struct pci_device_id pciidlist[] = {  #ifdef
>> CONFIG_DRM_AMDGPU_SI  {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
>> CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 8eff017..f83a9a7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -202,40 +202,42 @@ bool
>> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>>
>>  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)  {
>> -int i, queue, pipe, mec;
>> +int i, queue, pipe;
>>  bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>> +int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
>> + adev->gfx.mec.num_queue_per_pipe,
>> + adev->gfx.num_compute_rings);
> Indentation looks wrong. Did you use the wrong TAB size?
>
>
>> +
>> +if (multipipe_policy) {
>> +/* policy: make queues evenly cross all pipes on MEC1 only */ for (i
>> += 0; i < max_queues_per_mec; i++) { pipe = i %
>> +adev->gfx.mec.num_pipe_per_mec; queue = (i /
>> +adev->gfx.mec.num_pipe_per_mec) %
>> +adev->gfx.mec.num_queue_per_pipe;
>> +
>> +set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
>> +adev->gfx.mec.queue_bitmap);
>> +}
>> +} else {
>> +int mec;
>>
>> -/* policy for amdgpu compute queue ownership */ -for (i = 0; i <
>> AMDGPU_MAX_COMPUTE_QUEUES; ++i) { -queue = i %
>> adev->gfx.mec.num_queue_per_pipe; -pipe = (i /
>> adev->gfx.mec.num_queue_per_pipe) -% adev->gfx.mec.num_pipe_per_mec;
>> -mec = (i / adev->gfx.mec.num_queue_per_pipe) -/
>> adev->gfx.mec.num_pipe_per_mec;
>> -
>> -/* we've run out of HW */
>> -if (mec >= adev->gfx.mec.num_mec)
>> -break;
>> +/* policy: amdgpu owns all queues in the given pipe */ for (i = 0; i
>> +< adev->gfx.num_compute_rings; ++i) {
> You could also use i < max_queues_per_mec here.
>
>
>> +queue = i % adev->gfx.mec.num_queue_per_pipe; pipe = (i /
>> +adev->gfx.mec.num_queue_per_pipe)
>> +% adev->gfx.mec.num_pipe_per_mec;
>> +mec = (i / adev->gfx.mec.num_queue_per_pipe) /
>> +adev->gfx.mec.num_pipe_per_mec;
> Then mec will always be 0 and you can eliminate that variable.
>
>
>> -if (multipipe_policy) {
>> -/* policy: amdgpu owns the first two queues of the first MEC */ -if
>> (mec == 0 && queue < 2) -set_bit(i, adev->gfx.mec.queue_bitmap); -}
>> else {
>> -/* policy: amdgpu owns all queues in the first pipe */ -if (mec == 0
>> && pipe == 0) -set_bit(i, adev->gfx.mec.queue_bitmap);
>> +/* we've run out of HW */
>> +if (mec >= adev->gfx.mec.num_mec)
>> +break;
> And you won't need this if (...) break; any more.
>
> It'll make the two policy cases look more similar, so it's easier to see how they are actually different.
>
> Regards,
>   Felix
>
>
>> +
>> +set_bit(i, adev->gfx.mec.queue_bitmap);
>>  }
>>  }
>>
>> -/* update the number of active compute rings */
>> -adev->gfx.num_compute_rings =
>> -bitmap_weight(adev->gfx.mec.queue_bitmap,
>> AMDGPU_MAX_COMPUTE_QUEUES);
>> -
>> -/* If you hit this case and edited the policy, you probably just
>> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */ -if
>> (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
>> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +dev_info(adev->dev, "mec queue bitmap weight=%d\n",
>> +bitmap_weight(adev->gfx.mec.queue_bitmap,
>> +AMDGPU_MAX_COMPUTE_QUEUES));
>>  }
>>
>>  void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index db9f1e8..3a93b3c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct
>> amdgpu_device *adev)  amdgpu_gfx_compute_queue_acquire(adev);
>>  mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>>
>> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> -      AMDGPU_GEM_DOMAIN_GTT,
>> -      &adev->gfx.mec.hpd_eop_obj,
>> -      &adev->gfx.mec.hpd_eop_gpu_addr,
>> -      (void **)&hpd);
>> -if (r) {
>> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> -gfx_v10_0_mec_fini(adev); -return r; -}
>> +if (mec_hpd_size) {
>> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> +  AMDGPU_GEM_DOMAIN_GTT,
>> +  &adev->gfx.mec.hpd_eop_obj,
>> +  &adev->gfx.mec.hpd_eop_gpu_addr,
>> +  (void **)&hpd);
>> +if (r) {
>> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> +gfx_v10_0_mec_fini(adev); return r; }
>>
>> -memset(hpd, 0, mec_hpd_size);
>> +memset(hpd, 0, mec_hpd_size);
>>
>> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +}
>>
>>  if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {  mec_hdr =
>> (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; @@
>> -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
>> break;  }
>>
>> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>>
>>  gfx_v10_0_set_kiq_pm4_funcs(adev);
>>  gfx_v10_0_set_ring_funcs(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 8d72089..eb4b812 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct
>> amdgpu_device *adev)  amdgpu_gfx_compute_queue_acquire(adev);
>>
>>  mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
>> +if (mec_hpd_size) {
>> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> +  AMDGPU_GEM_DOMAIN_VRAM,
>> +  &adev->gfx.mec.hpd_eop_obj,
>> +  &adev->gfx.mec.hpd_eop_gpu_addr,
>> +  (void **)&hpd);
>> +if (r) {
>> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r;
>> +}
>>
>> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> -      AMDGPU_GEM_DOMAIN_VRAM,
>> -      &adev->gfx.mec.hpd_eop_obj,
>> -      &adev->gfx.mec.hpd_eop_gpu_addr,
>> -      (void **)&hpd);
>> -if (r) {
>> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); -return
>> r; -}
>> -
>> -memset(hpd, 0, mec_hpd_size);
>> +memset(hpd, 0, mec_hpd_size);
>>
>> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +}
>>
>>  return 0;
>>  }
>> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>
>>  adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
>> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>>  adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
>> gfx_v8_0_set_ring_funcs(adev);  gfx_v8_0_set_irq_funcs(adev); diff
>> --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index e4e751f..43ad044 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct
>> amdgpu_device *adev)
>>  /* take ownership of the relevant compute queues */
>> amdgpu_gfx_compute_queue_acquire(adev);
>>  mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
>> +if (mec_hpd_size) {
>> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> +  AMDGPU_GEM_DOMAIN_VRAM,
>> +  &adev->gfx.mec.hpd_eop_obj,
>> +  &adev->gfx.mec.hpd_eop_gpu_addr,
>> +  (void **)&hpd);
>> +if (r) {
>> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> +gfx_v9_0_mec_fini(adev); return r; }
>>
>> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
>> -      AMDGPU_GEM_DOMAIN_VRAM,
>> -      &adev->gfx.mec.hpd_eop_obj,
>> -      &adev->gfx.mec.hpd_eop_gpu_addr,
>> -      (void **)&hpd);
>> -if (r) {
>> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> -gfx_v9_0_mec_fini(adev); -return r; -}
>> -
>> -memset(hpd, 0, mec_hpd_size);
>> +memset(hpd, 0, mec_hpd_size);
>>
>> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>> +}
>>
>>  mec_hdr = (const struct gfx_firmware_header_v1_0
>> *)adev->gfx.mec_fw->data;
>>
>> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
>> adev->gfx.num_gfx_rings = 0;  else  adev->gfx.num_gfx_rings =
>> GFX9_NUM_GFX_RINGS;
>> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
>> +adev->gfx.num_compute_rings = amdgpu_num_kcq;
>>  gfx_v9_0_set_kiq_pm4_funcs(adev);
>>  gfx_v9_0_set_ring_funcs(adev);
>>  gfx_v9_0_set_irq_funcs(adev);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-07-31  3:48 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-28  9:00 [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3) Monk Liu
2020-07-28 14:38 ` Felix Kuehling
2020-07-31  2:11   ` Liu, Monk
2020-07-31  2:19     ` Felix Kuehling
2020-07-31  3:48       ` Liu, Monk
2020-07-30  8:15 ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.