Monk or perhaps Felix, Do you by any chance know why the CS ioctl returns -EINVAL for all compute submissions if num_kcq <= 4 and what could cause that? If not, is there any way to disable mid-IB preemption for compute? Thanks, Marek On Fri, Jul 31, 2020 at 9:53 AM Felix Kuehling wrote: > Am 2020-07-31 um 3:51 a.m. schrieb Monk Liu: > > what: > > the MQD's save and restore of KCQ (kernel compute queue) > > cost lots of clocks during world switch which impacts a lot > > to multi-VF performance > > > > how: > > introduce a paramter to control the number of KCQ to avoid > > performance drop if there is no kernel compute queue needed > > > > notes: > > this paramter only affects gfx 8/9/10 > > > > v2: > > refine namings > > > > v3: > > choose queues for each ring to that try best to cross pipes evenly. > > > > v4: > > fix indentation > > some cleanupsin the gfx_compute_queue_acquire() > > > > v5: > > further fix on indentations > > more cleanupsin gfx_compute_queue_acquire() > > > > TODO: > > in the future we will let hypervisor driver to set this paramter > > automatically thus no need for user to configure it through > > modprobe in virtual machine > > > > Signed-off-by: Monk Liu > > This patch is Reviewed-by: Felix Kuehling > > > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 49 > ++++++++++++------------------ > > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++--------- > > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 +++++++++--------- > > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 ++++++++++--------- > > 7 files changed, 76 insertions(+), 73 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index e97c088..de11136 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -201,6 +201,7 @@ extern int amdgpu_si_support; > > #ifdef CONFIG_DRM_AMDGPU_CIK > > extern int amdgpu_cik_support; > > #endif > > +extern int amdgpu_num_kcq; > > > > #define AMDGPU_VM_MAX_NUM_CTX 4096 > > #define AMDGPU_SG_THRESHOLD (256*1024*1024) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > index 62ecac9..cf445bab 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct > amdgpu_device *adev) > > > > amdgpu_gmc_tmz_set(adev); > > > > + if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { > > + amdgpu_num_kcq = 8; > > + dev_warn(adev->dev, "set kernel compute queue number to 8 > due to invalid paramter provided by user\n"); > > + } > > + > > return 0; > > } > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > index 6291f5f..b545c40 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > @@ -150,6 +150,7 @@ int amdgpu_noretry; > > int amdgpu_force_asic_type = -1; > > int amdgpu_tmz = 0; > > int amdgpu_reset_method = -1; /* auto */ > > +int amdgpu_num_kcq = -1; > > > > struct amdgpu_mgpu_info mgpu_info = { > > .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), > > @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); > > MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), > 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); > > module_param_named(reset_method, amdgpu_reset_method, int, 0444); > > > > +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to > setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)"); > > +module_param_named(num_kcq, amdgpu_num_kcq, int, 0444); > > + > > static const struct pci_device_id pciidlist[] = { > > #ifdef CONFIG_DRM_AMDGPU_SI > > {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > index 8eff017..0cd9de6 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > @@ -202,40 +202,29 @@ bool > amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, > > > > void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) > > { > > - int i, queue, pipe, mec; > > + int i, queue, pipe; > > bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); > > - > > - /* policy for amdgpu compute queue ownership */ > > - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { > > - queue = i % adev->gfx.mec.num_queue_per_pipe; > > - pipe = (i / adev->gfx.mec.num_queue_per_pipe) > > - % adev->gfx.mec.num_pipe_per_mec; > > - mec = (i / adev->gfx.mec.num_queue_per_pipe) > > - / adev->gfx.mec.num_pipe_per_mec; > > - > > - /* we've run out of HW */ > > - if (mec >= adev->gfx.mec.num_mec) > > - break; > > - > > - if (multipipe_policy) { > > - /* policy: amdgpu owns the first two queues of the > first MEC */ > > - if (mec == 0 && queue < 2) > > - set_bit(i, adev->gfx.mec.queue_bitmap); > > - } else { > > - /* policy: amdgpu owns all queues in the first > pipe */ > > - if (mec == 0 && pipe == 0) > > - set_bit(i, adev->gfx.mec.queue_bitmap); > > + int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * > > + adev->gfx.mec.num_queue_per_pipe, > > + adev->gfx.num_compute_rings); > > + > > + if (multipipe_policy) { > > + /* policy: make queues evenly cross all pipes on MEC1 only > */ > > + for (i = 0; i < max_queues_per_mec; i++) { > > + pipe = i % adev->gfx.mec.num_pipe_per_mec; > > + queue = (i / adev->gfx.mec.num_pipe_per_mec) % > > + adev->gfx.mec.num_queue_per_pipe; > > + > > + set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + > queue, > > + adev->gfx.mec.queue_bitmap); > > } > > + } else { > > + /* policy: amdgpu owns all queues in the given pipe */ > > + for (i = 0; i < max_queues_per_mec; ++i) > > + set_bit(i, adev->gfx.mec.queue_bitmap); > > } > > > > - /* update the number of active compute rings */ > > - adev->gfx.num_compute_rings = > > - bitmap_weight(adev->gfx.mec.queue_bitmap, > AMDGPU_MAX_COMPUTE_QUEUES); > > - > > - /* If you hit this case and edited the policy, you probably just > > - * need to increase AMDGPU_MAX_COMPUTE_RINGS */ > > - if (WARN_ON(adev->gfx.num_compute_rings > > AMDGPU_MAX_COMPUTE_RINGS)) > > - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; > > + dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", > bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); > > } > > > > void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > index f571e25..4172bc8 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct > amdgpu_device *adev) > > amdgpu_gfx_compute_queue_acquire(adev); > > mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE; > > > > - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, > > - AMDGPU_GEM_DOMAIN_GTT, > > - &adev->gfx.mec.hpd_eop_obj, > > - &adev->gfx.mec.hpd_eop_gpu_addr, > > - (void **)&hpd); > > - if (r) { > > - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); > > - gfx_v10_0_mec_fini(adev); > > - return r; > > - } > > + if (mec_hpd_size) { > > + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, > PAGE_SIZE, > > + AMDGPU_GEM_DOMAIN_GTT, > > + &adev->gfx.mec.hpd_eop_obj, > > + > &adev->gfx.mec.hpd_eop_gpu_addr, > > + (void **)&hpd); > > + if (r) { > > + dev_warn(adev->dev, "(%d) create HDP EOP bo > failed\n", r); > > + gfx_v10_0_mec_fini(adev); > > + return r; > > + } > > > > - memset(hpd, 0, mec_hpd_size); > > + memset(hpd, 0, mec_hpd_size); > > > > - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); > > - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); > > + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); > > + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); > > + } > > > > if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { > > mec_hdr = (const struct gfx_firmware_header_v1_0 > *)adev->gfx.mec_fw->data; > > @@ -7162,7 +7164,7 @@ static int gfx_v10_0_early_init(void *handle) > > break; > > } > > > > - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; > > + adev->gfx.num_compute_rings = amdgpu_num_kcq; > > > > gfx_v10_0_set_kiq_pm4_funcs(adev); > > gfx_v10_0_set_ring_funcs(adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > > index 8d72089..7df567a 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > > @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct > amdgpu_device *adev) > > amdgpu_gfx_compute_queue_acquire(adev); > > > > mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; > > + if (mec_hpd_size) { > > + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, > PAGE_SIZE, > > + AMDGPU_GEM_DOMAIN_VRAM, > > + &adev->gfx.mec.hpd_eop_obj, > > + > &adev->gfx.mec.hpd_eop_gpu_addr, > > + (void **)&hpd); > > + if (r) { > > + dev_warn(adev->dev, "(%d) create HDP EOP bo > failed\n", r); > > + return r; > > + } > > > > - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, > > - AMDGPU_GEM_DOMAIN_VRAM, > > - &adev->gfx.mec.hpd_eop_obj, > > - &adev->gfx.mec.hpd_eop_gpu_addr, > > - (void **)&hpd); > > - if (r) { > > - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); > > - return r; > > - } > > - > > - memset(hpd, 0, mec_hpd_size); > > + memset(hpd, 0, mec_hpd_size); > > > > - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); > > - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); > > + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); > > + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); > > + } > > > > return 0; > > } > > @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle) > > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > > > adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; > > - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; > > + adev->gfx.num_compute_rings = amdgpu_num_kcq; > > adev->gfx.funcs = &gfx_v8_0_gfx_funcs; > > gfx_v8_0_set_ring_funcs(adev); > > gfx_v8_0_set_irq_funcs(adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > index e4e751f..ef07e59 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct > amdgpu_device *adev) > > /* take ownership of the relevant compute queues */ > > amdgpu_gfx_compute_queue_acquire(adev); > > mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; > > + if (mec_hpd_size) { > > + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, > PAGE_SIZE, > > + AMDGPU_GEM_DOMAIN_VRAM, > > + &adev->gfx.mec.hpd_eop_obj, > > + > &adev->gfx.mec.hpd_eop_gpu_addr, > > + (void **)&hpd); > > + if (r) { > > + dev_warn(adev->dev, "(%d) create HDP EOP bo > failed\n", r); > > + gfx_v9_0_mec_fini(adev); > > + return r; > > + } > > > > - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, > > - AMDGPU_GEM_DOMAIN_VRAM, > > - &adev->gfx.mec.hpd_eop_obj, > > - &adev->gfx.mec.hpd_eop_gpu_addr, > > - (void **)&hpd); > > - if (r) { > > - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); > > - gfx_v9_0_mec_fini(adev); > > - return r; > > - } > > - > > - memset(hpd, 0, mec_hpd_size); > > + memset(hpd, 0, mec_hpd_size); > > > > - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); > > - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); > > + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); > > + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); > > + } > > > > mec_hdr = (const struct gfx_firmware_header_v1_0 > *)adev->gfx.mec_fw->data; > > > > @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle) > > adev->gfx.num_gfx_rings = 0; > > else > > adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; > > - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; > > + adev->gfx.num_compute_rings = amdgpu_num_kcq; > > gfx_v9_0_set_kiq_pm4_funcs(adev); > > gfx_v9_0_set_ring_funcs(adev); > > gfx_v9_0_set_irq_funcs(adev); > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx >