All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: CWSR with software scheduler
@ 2021-08-09 20:40 Mukul Joshi
  2021-08-11  1:15 ` Kasiviswanathan, Harish
  0 siblings, 1 reply; 4+ messages in thread
From: Mukul Joshi @ 2021-08-09 20:40 UTC (permalink / raw)
  To: amd-gfx; +Cc: Felix.Kuehling, jay.cornwall, Mukul Joshi

This patch adds support to program trap handler settings
when loading driver with software scheduler (sched_policy=2).

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Suggested-by: Jay Cornwall <Jay.Cornwall@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 31 +++++++++++++++++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  | 31 +++++++++++++++++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 33 ++++++++++++++++++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 20 +++++++++--
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  3 ++
 5 files changed, 115 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 491acdf92f73..960acf68150a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -560,6 +560,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 		type = RESET_WAVES;
 		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+		type = SAVE_WAVES;
+		break;
 	default:
 		type = DRAIN_PIPE;
 		break;
@@ -754,6 +757,33 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
+static void program_trap_handler_settings(struct kgd_dev *kgd,
+		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+
+	/*
+	 * Program TBA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+			lower_32_bits(tba_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+			upper_32_bits(tba_addr >> 8) |
+			(1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+	/*
+	 * Program TMA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+			lower_32_bits(tma_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+			upper_32_bits(tma_addr >> 8));
+
+	unlock_srbm(kgd);
+}
+
 const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -774,4 +804,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
 	.get_atc_vmid_pasid_mapping_info =
 			get_atc_vmid_pasid_mapping_info,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.program_trap_handler_settings = program_trap_handler_settings,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index 1f5620cc3570..dac0d751d5af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -537,6 +537,9 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
 	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 		type = RESET_WAVES;
 		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+		type = SAVE_WAVES;
+		break;
 	default:
 		type = DRAIN_PIPE;
 		break;
@@ -658,6 +661,33 @@ static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t v
 	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
+static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd,
+			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+
+	/*
+	 * Program TBA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+			lower_32_bits(tba_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+			upper_32_bits(tba_addr >> 8) |
+			(1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+	/*
+	 * Program TMA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+			lower_32_bits(tma_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+			 upper_32_bits(tma_addr >> 8));
+
+	unlock_srbm(kgd);
+}
+
 #if 0
 uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
 				uint32_t trap_debug_wave_launch_mode,
@@ -820,6 +850,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
 	.address_watch_get_offset = address_watch_get_offset_v10_3,
 	.get_atc_vmid_pasid_mapping_info = NULL,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
+	.program_trap_handler_settings = program_trap_handler_settings_v10_3,
 #if 0
 	.enable_debug_trap = enable_debug_trap_v10_3,
 	.disable_debug_trap = disable_debug_trap_v10_3,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ed3014fbb563..154244916727 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -42,7 +42,8 @@
 enum hqd_dequeue_request_type {
 	NO_ACTION = 0,
 	DRAIN_PIPE,
-	RESET_WAVES
+	RESET_WAVES,
+	SAVE_WAVES
 };
 
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -566,6 +567,9 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 		type = RESET_WAVES;
 		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+		type = SAVE_WAVES;
+		break;
 	default:
 		type = DRAIN_PIPE;
 		break;
@@ -878,6 +882,32 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
 				adev->gfx.cu_info.max_waves_per_simd;
 }
 
+static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
+                        uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+
+	/*
+	 * Program TBA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+                        lower_32_bits(tba_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+                        upper_32_bits(tba_addr >> 8));
+
+	/*
+	 * Program TMA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+			lower_32_bits(tma_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+			upper_32_bits(tma_addr >> 8));
+
+	unlock_srbm(kgd);
+}
+
 const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -899,4 +929,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
 	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a972ef5eae68..6fd6b2248992 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -211,6 +211,14 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
 	WARN_ON(!old);
 }
 
+static void program_trap_handler_settings(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd)
+{
+	return dqm->dev->kfd2kgd->program_trap_handler_settings(
+						dqm->dev->kgd, qpd->vmid,
+						qpd->tba_addr, qpd->tma_addr);
+}
+
 static int allocate_vmid(struct device_queue_manager *dqm,
 			struct qcm_process_device *qpd,
 			struct queue *q)
@@ -241,6 +249,10 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 
 	program_sh_mem_settings(dqm, qpd);
 
+	if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 &&
+	    dqm->dev->cwsr_enabled)
+		program_trap_handler_settings(dqm, qpd);
+
 	/* qpd->page_table_base is set earlier when register_process()
 	 * is called, i.e. when the first queue is created.
 	 */
@@ -582,7 +594,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 		}
 
 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
-				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
+				(dqm->dev->cwsr_enabled?
+				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
 		if (retval) {
 			pr_err("destroy mqd failed\n");
@@ -675,7 +689,9 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
 			continue;
 
 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
-				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
+				(dqm->dev->cwsr_enabled?
+				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
 		if (retval && !ret)
 			/* Return the first error, but keep going to
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 95c656d205ed..c84bd7b2cf59 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -44,6 +44,7 @@ struct kgd_mem;
 enum kfd_preempt_type {
 	KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
 	KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
+	KFD_PREEMPT_TYPE_WAVEFRONT_SAVE
 };
 
 struct kfd_vm_fault_info {
@@ -298,6 +299,8 @@ struct kfd2kgd_calls {
 
 	void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt,
 			int *max_waves_per_cu);
+	void (*program_trap_handler_settings)(struct kgd_dev *kgd,
+			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
 };
 
 #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [PATCH] drm/amdkfd: CWSR with software scheduler
  2021-08-09 20:40 [PATCH] drm/amdkfd: CWSR with software scheduler Mukul Joshi
@ 2021-08-11  1:15 ` Kasiviswanathan, Harish
  2021-08-11 14:56   ` Joshi, Mukul
  0 siblings, 1 reply; 4+ messages in thread
From: Kasiviswanathan, Harish @ 2021-08-11  1:15 UTC (permalink / raw)
  To: Joshi, Mukul, amd-gfx; +Cc: Kuehling, Felix, Cornwall, Jay, Joshi, Mukul

[AMD Official Use Only]

Just few comments inline. With that acknowledged Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Mukul Joshi
Sent: Monday, August 9, 2021 4:41 PM
To: amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Cornwall, Jay <Jay.Cornwall@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
Subject: [PATCH] drm/amdkfd: CWSR with software scheduler

This patch adds support to program trap handler settings
when loading driver with software scheduler (sched_policy=2).

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Suggested-by: Jay Cornwall <Jay.Cornwall@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 31 +++++++++++++++++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  | 31 +++++++++++++++++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 33 ++++++++++++++++++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 20 +++++++++--
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  3 ++
 5 files changed, 115 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 491acdf92f73..960acf68150a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -560,6 +560,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 		type = RESET_WAVES;
 		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+		type = SAVE_WAVES;
+		break;
 	default:
 		type = DRAIN_PIPE;
 		break;
@@ -754,6 +757,33 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
+static void program_trap_handler_settings(struct kgd_dev *kgd,
+		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+
+	/*
+	 * Program TBA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+			lower_32_bits(tba_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+			upper_32_bits(tba_addr >> 8) |
+			(1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+	/*
+	 * Program TMA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+			lower_32_bits(tma_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+			upper_32_bits(tma_addr >> 8));
+
+	unlock_srbm(kgd);
+}
+
 const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -774,4 +804,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
 	.get_atc_vmid_pasid_mapping_info =
 			get_atc_vmid_pasid_mapping_info,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.program_trap_handler_settings = program_trap_handler_settings,

[HK]: Naming  not consistent. program_trap_handler_settings, program_trap_handler_settings_v10_3 and kgd_gfx_v9_program_trap_handler_settings

 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index 1f5620cc3570..dac0d751d5af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -537,6 +537,9 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
 	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 		type = RESET_WAVES;
 		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+		type = SAVE_WAVES;
+		break;
 	default:
 		type = DRAIN_PIPE;
 		break;
@@ -658,6 +661,33 @@ static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t v
 	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
+static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd,
+			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+
+	/*
+	 * Program TBA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+			lower_32_bits(tba_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+			upper_32_bits(tba_addr >> 8) |
+			(1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+	/*
+	 * Program TMA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+			lower_32_bits(tma_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+			 upper_32_bits(tma_addr >> 8));
+
+	unlock_srbm(kgd);
+}
+
 #if 0
 uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
 				uint32_t trap_debug_wave_launch_mode,
@@ -820,6 +850,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
 	.address_watch_get_offset = address_watch_get_offset_v10_3,
 	.get_atc_vmid_pasid_mapping_info = NULL,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
+	.program_trap_handler_settings = program_trap_handler_settings_v10_3,
 #if 0
 	.enable_debug_trap = enable_debug_trap_v10_3,
 	.disable_debug_trap = disable_debug_trap_v10_3,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ed3014fbb563..154244916727 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -42,7 +42,8 @@
 enum hqd_dequeue_request_type {
 	NO_ACTION = 0,
 	DRAIN_PIPE,
-	RESET_WAVES
+	RESET_WAVES,
+	SAVE_WAVES
 };
 
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -566,6 +567,9 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 		type = RESET_WAVES;
 		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+		type = SAVE_WAVES;
+		break;
 	default:
 		type = DRAIN_PIPE;
 		break;
@@ -878,6 +882,32 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
 				adev->gfx.cu_info.max_waves_per_simd;
 }
 
+static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
+                        uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+
+	/*
+	 * Program TBA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+                        lower_32_bits(tba_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+                        upper_32_bits(tba_addr >> 8));
+
+	/*
+	 * Program TMA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+			lower_32_bits(tma_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+			upper_32_bits(tma_addr >> 8));
+
+	unlock_srbm(kgd);
+}
+
 const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -899,4 +929,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
 	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a972ef5eae68..6fd6b2248992 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -211,6 +211,14 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
 	WARN_ON(!old);
 }
 
+static void program_trap_handler_settings(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd)
+{
+	return dqm->dev->kfd2kgd->program_trap_handler_settings(
+						dqm->dev->kgd, qpd->vmid,
+						qpd->tba_addr, qpd->tma_addr);

[HK]: Since, this is not defined for all GFX generations, safer to check if function pointer is not NULL. I see it is called only VEGA10+

+}
+
 static int allocate_vmid(struct device_queue_manager *dqm,
 			struct qcm_process_device *qpd,
 			struct queue *q)
@@ -241,6 +249,10 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 
 	program_sh_mem_settings(dqm, qpd);
 
+	if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 &&
+	    dqm->dev->cwsr_enabled)
+		program_trap_handler_settings(dqm, qpd);
+
 	/* qpd->page_table_base is set earlier when register_process()
 	 * is called, i.e. when the first queue is created.
 	 */
@@ -582,7 +594,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 		}
 
 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
-				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
+				(dqm->dev->cwsr_enabled?
+				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
 		if (retval) {
 			pr_err("destroy mqd failed\n");
@@ -675,7 +689,9 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
 			continue;
 
 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
-				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
+				(dqm->dev->cwsr_enabled?
+				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
 		if (retval && !ret)
 			/* Return the first error, but keep going to
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 95c656d205ed..c84bd7b2cf59 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -44,6 +44,7 @@ struct kgd_mem;
 enum kfd_preempt_type {
 	KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
 	KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
+	KFD_PREEMPT_TYPE_WAVEFRONT_SAVE
 };
 
 struct kfd_vm_fault_info {
@@ -298,6 +299,8 @@ struct kfd2kgd_calls {
 
 	void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt,
 			int *max_waves_per_cu);
+	void (*program_trap_handler_settings)(struct kgd_dev *kgd,
+			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
 };
 
 #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [PATCH] drm/amdkfd: CWSR with software scheduler
  2021-08-11  1:15 ` Kasiviswanathan, Harish
@ 2021-08-11 14:56   ` Joshi, Mukul
  2021-08-11 16:32     ` Kasiviswanathan, Harish
  0 siblings, 1 reply; 4+ messages in thread
From: Joshi, Mukul @ 2021-08-11 14:56 UTC (permalink / raw)
  To: Kasiviswanathan, Harish, amd-gfx; +Cc: Kuehling, Felix, Cornwall, Jay

[AMD Official Use Only]



> -----Original Message-----
> From: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
> Sent: Tuesday, August 10, 2021 9:15 PM
> To: Joshi, Mukul <Mukul.Joshi@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Cornwall, Jay
> <Jay.Cornwall@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
> Subject: RE: [PATCH] drm/amdkfd: CWSR with software scheduler
> 
> [AMD Official Use Only]
> 
> Just few comments inline. With that acknowledged Reviewed-by: Harish
> Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> 
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Mukul
> Joshi
> Sent: Monday, August 9, 2021 4:41 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Cornwall, Jay
> <Jay.Cornwall@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
> Subject: [PATCH] drm/amdkfd: CWSR with software scheduler
> 
> This patch adds support to program trap handler settings when loading driver
> with software scheduler (sched_policy=2).
> 
> Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
> Suggested-by: Jay Cornwall <Jay.Cornwall@amd.com>
> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 31 +++++++++++++++++
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  | 31 +++++++++++++++++
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 33
> ++++++++++++++++++-  .../drm/amd/amdkfd/kfd_device_queue_manager.c |
> 20 +++++++++--
>  .../gpu/drm/amd/include/kgd_kfd_interface.h   |  3 ++
>  5 files changed, 115 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 491acdf92f73..960acf68150a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -560,6 +560,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void
> *mqd,
>  	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
>  		type = RESET_WAVES;
>  		break;
> +	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> +		type = SAVE_WAVES;
> +		break;
>  	default:
>  		type = DRAIN_PIPE;
>  		break;
> @@ -754,6 +757,33 @@ static void set_vm_context_page_table_base(struct
> kgd_dev *kgd, uint32_t vmid,
>  	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
> }
> 
> +static void program_trap_handler_settings(struct kgd_dev *kgd,
> +		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) {
> +	struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +
> +	lock_srbm(kgd, 0, 0, 0, vmid);
> +
> +	/*
> +	 * Program TBA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
> +			lower_32_bits(tba_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
> +			upper_32_bits(tba_addr >> 8) |
> +			(1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
> +
> +	/*
> +	 * Program TMA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
> +			lower_32_bits(tma_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
> +			upper_32_bits(tma_addr >> 8));
> +
> +	unlock_srbm(kgd);
> +}
> +
>  const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
>  	.program_sh_mem_settings = kgd_program_sh_mem_settings,
>  	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -774,4
> +804,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
>  	.get_atc_vmid_pasid_mapping_info =
>  			get_atc_vmid_pasid_mapping_info,
>  	.set_vm_context_page_table_base =
> set_vm_context_page_table_base,
> +	.program_trap_handler_settings = program_trap_handler_settings,
> 
> [HK]: Naming  not consistent. program_trap_handler_settings,
> program_trap_handler_settings_v10_3 and
> kgd_gfx_v9_program_trap_handler_settings
> 

I am following the naming convention for the other functions defined in these files.
That's the reason the functions names are different. I don't mind naming them all the same.

>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> index 1f5620cc3570..dac0d751d5af 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> @@ -537,6 +537,9 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void
> *mqd,
>  	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
>  		type = RESET_WAVES;
>  		break;
> +	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> +		type = SAVE_WAVES;
> +		break;
>  	default:
>  		type = DRAIN_PIPE;
>  		break;
> @@ -658,6 +661,33 @@ static void
> set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t v
>  	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
> }
> 
> +static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd,
> +			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) {
> +	struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +
> +	lock_srbm(kgd, 0, 0, 0, vmid);
> +
> +	/*
> +	 * Program TBA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
> +			lower_32_bits(tba_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
> +			upper_32_bits(tba_addr >> 8) |
> +			(1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
> +
> +	/*
> +	 * Program TMA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
> +			lower_32_bits(tma_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
> +			 upper_32_bits(tma_addr >> 8));
> +
> +	unlock_srbm(kgd);
> +}
> +
>  #if 0
>  uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
>  				uint32_t trap_debug_wave_launch_mode, @@
> -820,6 +850,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
>  	.address_watch_get_offset = address_watch_get_offset_v10_3,
>  	.get_atc_vmid_pasid_mapping_info = NULL,
>  	.set_vm_context_page_table_base =
> set_vm_context_page_table_base_v10_3,
> +	.program_trap_handler_settings =
> program_trap_handler_settings_v10_3,
>  #if 0
>  	.enable_debug_trap = enable_debug_trap_v10_3,
>  	.disable_debug_trap = disable_debug_trap_v10_3, diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index ed3014fbb563..154244916727 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -42,7 +42,8 @@
>  enum hqd_dequeue_request_type {
>  	NO_ACTION = 0,
>  	DRAIN_PIPE,
> -	RESET_WAVES
> +	RESET_WAVES,
> +	SAVE_WAVES
>  };
> 
>  static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
> @@ -566,6 +567,9 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void
> *mqd,
>  	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
>  		type = RESET_WAVES;
>  		break;
> +	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> +		type = SAVE_WAVES;
> +		break;
>  	default:
>  		type = DRAIN_PIPE;
>  		break;
> @@ -878,6 +882,32 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev
> *kgd, int pasid,
>  				adev->gfx.cu_info.max_waves_per_simd;
>  }
> 
> +static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
> +                        uint32_t vmid, uint64_t tba_addr, uint64_t
> +tma_addr) {
> +	struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +
> +	lock_srbm(kgd, 0, 0, 0, vmid);
> +
> +	/*
> +	 * Program TBA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
> +                        lower_32_bits(tba_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
> +                        upper_32_bits(tba_addr >> 8));
> +
> +	/*
> +	 * Program TMA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
> +			lower_32_bits(tma_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
> +			upper_32_bits(tma_addr >> 8));
> +
> +	unlock_srbm(kgd);
> +}
> +
>  const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>  	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
>  	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
> @@ -899,4 +929,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>  			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
>  	.set_vm_context_page_table_base =
> kgd_gfx_v9_set_vm_context_page_table_base,
>  	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> +	.program_trap_handler_settings =
> +kgd_gfx_v9_program_trap_handler_settings,
>  };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index a972ef5eae68..6fd6b2248992 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -211,6 +211,14 @@ static void deallocate_doorbell(struct
> qcm_process_device *qpd,
>  	WARN_ON(!old);
>  }
> 
> +static void program_trap_handler_settings(struct device_queue_manager
> *dqm,
> +				struct qcm_process_device *qpd)
> +{
> +	return dqm->dev->kfd2kgd->program_trap_handler_settings(
> +						dqm->dev->kgd, qpd->vmid,
> +						qpd->tba_addr, qpd-
> >tma_addr);
> 
> [HK]: Since, this is not defined for all GFX generations, safer to check if function
> pointer is not NULL. I see it is called only VEGA10+
> 
Sure I can add that. However, I am making sure that the function is defined for all ASICs after VEGA10+.

Regards,
Mukul

> +}
> +
>  static int allocate_vmid(struct device_queue_manager *dqm,
>  			struct qcm_process_device *qpd,
>  			struct queue *q)
> @@ -241,6 +249,10 @@ static int allocate_vmid(struct device_queue_manager
> *dqm,
> 
>  	program_sh_mem_settings(dqm, qpd);
> 
> +	if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 &&
> +	    dqm->dev->cwsr_enabled)
> +		program_trap_handler_settings(dqm, qpd);
> +
>  	/* qpd->page_table_base is set earlier when register_process()
>  	 * is called, i.e. when the first queue is created.
>  	 */
> @@ -582,7 +594,9 @@ static int update_queue(struct device_queue_manager
> *dqm, struct queue *q)
>  		}
> 
>  		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
> -				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
> +				(dqm->dev->cwsr_enabled?
> +				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> +				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
>  				KFD_UNMAP_LATENCY_MS, q->pipe, q-
> >queue);
>  		if (retval) {
>  			pr_err("destroy mqd failed\n");
> @@ -675,7 +689,9 @@ static int evict_process_queues_nocpsch(struct
> device_queue_manager *dqm,
>  			continue;
> 
>  		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
> -				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
> +				(dqm->dev->cwsr_enabled?
> +				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> +				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
>  				KFD_UNMAP_LATENCY_MS, q->pipe, q-
> >queue);
>  		if (retval && !ret)
>  			/* Return the first error, but keep going to diff --git
> a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 95c656d205ed..c84bd7b2cf59 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -44,6 +44,7 @@ struct kgd_mem;
>  enum kfd_preempt_type {
>  	KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
>  	KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
> +	KFD_PREEMPT_TYPE_WAVEFRONT_SAVE
>  };
> 
>  struct kfd_vm_fault_info {
> @@ -298,6 +299,8 @@ struct kfd2kgd_calls {
> 
>  	void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int
> *wave_cnt,
>  			int *max_waves_per_cu);
> +	void (*program_trap_handler_settings)(struct kgd_dev *kgd,
> +			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
>  };
> 
>  #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
> --
> 2.17.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] drm/amdkfd: CWSR with software scheduler
  2021-08-11 14:56   ` Joshi, Mukul
@ 2021-08-11 16:32     ` Kasiviswanathan, Harish
  0 siblings, 0 replies; 4+ messages in thread
From: Kasiviswanathan, Harish @ 2021-08-11 16:32 UTC (permalink / raw)
  To: Joshi, Mukul, amd-gfx; +Cc: Kuehling, Felix, Cornwall, Jay

[AMD Official Use Only]



-----Original Message-----
From: Joshi, Mukul <Mukul.Joshi@amd.com> 
Sent: Wednesday, August 11, 2021 10:57 AM
To: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Cornwall, Jay <Jay.Cornwall@amd.com>
Subject: RE: [PATCH] drm/amdkfd: CWSR with software scheduler

[AMD Official Use Only]



> -----Original Message-----
> From: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
> Sent: Tuesday, August 10, 2021 9:15 PM
> To: Joshi, Mukul <Mukul.Joshi@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Cornwall, Jay 
> <Jay.Cornwall@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
> Subject: RE: [PATCH] drm/amdkfd: CWSR with software scheduler
> 
> [AMD Official Use Only]
> 
> Just few comments inline. With that acknowledged Reviewed-by: Harish 
> Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> 
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of 
> Mukul Joshi
> Sent: Monday, August 9, 2021 4:41 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Cornwall, Jay 
> <Jay.Cornwall@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
> Subject: [PATCH] drm/amdkfd: CWSR with software scheduler
> 
> This patch adds support to program trap handler settings when loading 
> driver with software scheduler (sched_policy=2).
> 
> Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
> Suggested-by: Jay Cornwall <Jay.Cornwall@amd.com>
> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 31 +++++++++++++++++
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  | 31 +++++++++++++++++ 
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 33
> ++++++++++++++++++-  .../drm/amd/amdkfd/kfd_device_queue_manager.c |
> 20 +++++++++--
>  .../gpu/drm/amd/include/kgd_kfd_interface.h   |  3 ++
>  5 files changed, 115 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 491acdf92f73..960acf68150a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -560,6 +560,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, 
> void *mqd,
>  	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
>  		type = RESET_WAVES;
>  		break;
> +	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> +		type = SAVE_WAVES;
> +		break;
>  	default:
>  		type = DRAIN_PIPE;
>  		break;
> @@ -754,6 +757,33 @@ static void set_vm_context_page_table_base(struct
> kgd_dev *kgd, uint32_t vmid,
>  	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); }
> 
> +static void program_trap_handler_settings(struct kgd_dev *kgd,
> +		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) {
> +	struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +
> +	lock_srbm(kgd, 0, 0, 0, vmid);
> +
> +	/*
> +	 * Program TBA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
> +			lower_32_bits(tba_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
> +			upper_32_bits(tba_addr >> 8) |
> +			(1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
> +
> +	/*
> +	 * Program TMA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
> +			lower_32_bits(tma_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
> +			upper_32_bits(tma_addr >> 8));
> +
> +	unlock_srbm(kgd);
> +}
> +
>  const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
>  	.program_sh_mem_settings = kgd_program_sh_mem_settings,
>  	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -774,4
> +804,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
>  	.get_atc_vmid_pasid_mapping_info =
>  			get_atc_vmid_pasid_mapping_info,
>  	.set_vm_context_page_table_base =
> set_vm_context_page_table_base,
> +	.program_trap_handler_settings = program_trap_handler_settings,
> 
> [HK]: Naming  not consistent. program_trap_handler_settings,
> program_trap_handler_settings_v10_3 and 
> kgd_gfx_v9_program_trap_handler_settings
> 

I am following the naming convention for the other functions defined in these files.
That's the reason the functions names are different. I don't mind naming them all the same.

[HK]: Thanks for clarifying. I am fine with as it is.

>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> index 1f5620cc3570..dac0d751d5af 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> @@ -537,6 +537,9 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, 
> void *mqd,
>  	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
>  		type = RESET_WAVES;
>  		break;
> +	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> +		type = SAVE_WAVES;
> +		break;
>  	default:
>  		type = DRAIN_PIPE;
>  		break;
> @@ -658,6 +661,33 @@ static void
> set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t v
>  	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); }
> 
> +static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd,
> +			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) {
> +	struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +
> +	lock_srbm(kgd, 0, 0, 0, vmid);
> +
> +	/*
> +	 * Program TBA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
> +			lower_32_bits(tba_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
> +			upper_32_bits(tba_addr >> 8) |
> +			(1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
> +
> +	/*
> +	 * Program TMA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
> +			lower_32_bits(tma_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
> +			 upper_32_bits(tma_addr >> 8));
> +
> +	unlock_srbm(kgd);
> +}
> +
>  #if 0
>  uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
>  				uint32_t trap_debug_wave_launch_mode, @@
> -820,6 +850,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
>  	.address_watch_get_offset = address_watch_get_offset_v10_3,
>  	.get_atc_vmid_pasid_mapping_info = NULL,
>  	.set_vm_context_page_table_base =
> set_vm_context_page_table_base_v10_3,
> +	.program_trap_handler_settings =
> program_trap_handler_settings_v10_3,
>  #if 0
>  	.enable_debug_trap = enable_debug_trap_v10_3,
>  	.disable_debug_trap = disable_debug_trap_v10_3, diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index ed3014fbb563..154244916727 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -42,7 +42,8 @@
>  enum hqd_dequeue_request_type {
>  	NO_ACTION = 0,
>  	DRAIN_PIPE,
> -	RESET_WAVES
> +	RESET_WAVES,
> +	SAVE_WAVES
>  };
> 
>  static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev 
> *kgd) @@ -566,6 +567,9 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev 
> *kgd, void *mqd,
>  	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
>  		type = RESET_WAVES;
>  		break;
> +	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> +		type = SAVE_WAVES;
> +		break;
>  	default:
>  		type = DRAIN_PIPE;
>  		break;
> @@ -878,6 +882,32 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev 
> *kgd, int pasid,
>  				adev->gfx.cu_info.max_waves_per_simd;
>  }
> 
> +static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
> +                        uint32_t vmid, uint64_t tba_addr, uint64_t
> +tma_addr) {
> +	struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +
> +	lock_srbm(kgd, 0, 0, 0, vmid);
> +
> +	/*
> +	 * Program TBA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
> +                        lower_32_bits(tba_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
> +                        upper_32_bits(tba_addr >> 8));
> +
> +	/*
> +	 * Program TMA registers
> +	 */
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
> +			lower_32_bits(tma_addr >> 8));
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
> +			upper_32_bits(tma_addr >> 8));
> +
> +	unlock_srbm(kgd);
> +}
> +
>  const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>  	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
>  	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
> @@ -899,4 +929,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>  			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
>  	.set_vm_context_page_table_base =
> kgd_gfx_v9_set_vm_context_page_table_base,
>  	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> +	.program_trap_handler_settings =
> +kgd_gfx_v9_program_trap_handler_settings,
>  };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index a972ef5eae68..6fd6b2248992 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -211,6 +211,14 @@ static void deallocate_doorbell(struct 
> qcm_process_device *qpd,
>  	WARN_ON(!old);
>  }
> 
> +static void program_trap_handler_settings(struct device_queue_manager
> *dqm,
> +				struct qcm_process_device *qpd)
> +{
> +	return dqm->dev->kfd2kgd->program_trap_handler_settings(
> +						dqm->dev->kgd, qpd->vmid,
> +						qpd->tba_addr, qpd-
> >tma_addr);
> 
> [HK]: Since, this is not defined for all GFX generations, safer to 
> check if function pointer is not NULL. I see it is called only VEGA10+
> 
Sure I can add that. However, I am making sure that the function is defined for all ASICs after VEGA10+.

Regards,
Mukul

> +}
> +
>  static int allocate_vmid(struct device_queue_manager *dqm,
>  			struct qcm_process_device *qpd,
>  			struct queue *q)
> @@ -241,6 +249,10 @@ static int allocate_vmid(struct 
> device_queue_manager *dqm,
> 
>  	program_sh_mem_settings(dqm, qpd);
> 
> +	if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 &&
> +	    dqm->dev->cwsr_enabled)
> +		program_trap_handler_settings(dqm, qpd);
> +
>  	/* qpd->page_table_base is set earlier when register_process()
>  	 * is called, i.e. when the first queue is created.
>  	 */
> @@ -582,7 +594,9 @@ static int update_queue(struct 
> device_queue_manager *dqm, struct queue *q)
>  		}
> 
>  		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
> -				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
> +				(dqm->dev->cwsr_enabled?
> +				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> +				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
>  				KFD_UNMAP_LATENCY_MS, q->pipe, q-
> >queue);
>  		if (retval) {
>  			pr_err("destroy mqd failed\n");
> @@ -675,7 +689,9 @@ static int evict_process_queues_nocpsch(struct
> device_queue_manager *dqm,
>  			continue;
> 
>  		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
> -				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
> +				(dqm->dev->cwsr_enabled?
> +				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> +				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
>  				KFD_UNMAP_LATENCY_MS, q->pipe, q-
> >queue);
>  		if (retval && !ret)
>  			/* Return the first error, but keep going to diff --git 
> a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 95c656d205ed..c84bd7b2cf59 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -44,6 +44,7 @@ struct kgd_mem;
>  enum kfd_preempt_type {
>  	KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
>  	KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
> +	KFD_PREEMPT_TYPE_WAVEFRONT_SAVE
>  };
> 
>  struct kfd_vm_fault_info {
> @@ -298,6 +299,8 @@ struct kfd2kgd_calls {
> 
>  	void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int 
> *wave_cnt,
>  			int *max_waves_per_cu);
> +	void (*program_trap_handler_settings)(struct kgd_dev *kgd,
> +			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
>  };
> 
>  #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
> --
> 2.17.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-08-11 16:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-09 20:40 [PATCH] drm/amdkfd: CWSR with software scheduler Mukul Joshi
2021-08-11  1:15 ` Kasiviswanathan, Harish
2021-08-11 14:56   ` Joshi, Mukul
2021-08-11 16:32     ` Kasiviswanathan, Harish

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.