All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/3] drm/amd/amdgpu: Define and implement a function that collects number of waves that are in flight.
@ 2020-09-17 17:10 Ramesh Errabolu
  0 siblings, 0 replies; 3+ messages in thread
From: Ramesh Errabolu @ 2020-09-17 17:10 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ramesh Errabolu

[Why]
Allow user to know how many compute units (CU) are in use at any given
moment.

[How]
Read registers of SQ that give number of waves that are in flight
of various queues. Use this information to determine number of CU's
in use.

Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 206 ++++++++++++++++++
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  11 +
 2 files changed, 217 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index e6aede725197..2f8c8140734e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -38,7 +38,9 @@
 #include "soc15d.h"
 #include "mmhub_v1_0.h"
 #include "gfxhub_v1_0.h"
+#include "gfx_v9_0.h"
 
+struct kfd_dev;
 
 enum hqd_dequeue_request_type {
 	NO_ACTION = 0,
@@ -706,6 +708,209 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
 	gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
+static void lock_spi_csq_mutexes(struct amdgpu_device *adev)
+{
+	mutex_lock(&adev->srbm_mutex);
+	mutex_lock(&adev->grbm_idx_mutex);
+
+}
+
+static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
+{
+	mutex_unlock(&adev->grbm_idx_mutex);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+/**
+ * @get_wave_count: Read device registers to get number of waves in flight for
+ * a particulare queue. The method also returns the VMID associated with the
+ * queue.
+ *
+ * @adev: Handle of device whose registers are to be read
+ *
+ * @queue_idx: Index of queue in the queue-map bit-field
+ *
+ * @wave_cnt: Output parameter updated with number of waves in flight
+ *
+ * @vmid: Output parameter updated with VMID of queue whose wave count
+ * is being collected
+ */
+static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
+			   int *wave_cnt, int *vmid)
+{
+	int pipe_idx;
+	int queue_slot;
+	unsigned int reg_val;
+
+	/*
+	 * By policy queues at slots 0 and 1 are reserved for non-compute
+	 * queues i.e. those managed for graphic functions.
+	 */
+	if ((queue_idx % adev->gfx.mec.num_queue_per_pipe) < 2)
+		return;
+
+	/*
+	 * Queue belongs to a compute workload. Determine the PIPE index
+	 * associated wit queue and program GRBM accordingly:
+	 * MEID = 1, PIPEID = pipe_idx, QUEUEID = queue_idx, VMID = 0
+	 */
+	pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
+	queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
+	soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
+
+	/*
+	 * Read from register number of waves in flight. If non-zero get the
+	 * VMID associated with queue
+	 */
+	reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
+			 queue_slot);
+	*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
+	if (*wave_cnt != 0)
+		*vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) &
+			 CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
+}
+
+/**
+ * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
+ * shader engine and aggregates the number of waves that are in fight for the
+ * process whose pasid is provided as a parameter. The process could have ZERO
+ * or more queues running and submitting waves to compute units.
+ *
+ * @note: It's possible that the device has too many queues (oversubscription)
+ * in which case a VMID could be remapped to a different PASID. This could lead
+ * to in accurate wave count. Following is a high-level sequence:
+ *    Time T1: vmid = getVmid(); vmid is associated with Pasid P1
+ *    Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2
+ * In the sequence above wave count obtained from time T1 will be incorrectly
+ * lost or added to total wave count.
+ *
+ * @kgd: Handle of device from which to get number of waves in flight
+ *
+ * @pasid: Identifies the process for which this query call is invoked
+ *
+ * @wave_cnt: Output parameter updated with number of waves in flight that
+ * belong to process with given pasid
+ *
+ * The registers that provide the waves in flight are:
+ *
+ *  SPI_CSQ_WF_ACTIVE_STATUS - bit-map of queues per pipe. At any moment there
+ *  can be a max of 32 queues that could submit wave fronts to be run by compute
+ *  units. The bit is ON if a queue is slotted, OFF if there is no queue. The
+ *  process could have ZERO or more queues slotted and submitting waves to be
+ *  run compute units. Even when there is a queue it is possible there could
+ *  be zero wave fronts, this can happen when queue is waiting on top-of-pipe
+ *  events - e.g. waitRegMem command
+ *
+ *  For each bit that is ON from above:
+ *
+ *    Read (SPI_CSQ_WF_ACTIVE_COUNT_0 + queue_idx) register. It provides the
+ *    number of waves that are in flight for the queue at specified index. The
+ *    index ranges from 0 to 7.
+ *
+ *    If non-zero waves are in fligth, read CP_HQD_VMID register to obtain VMID
+ *    of the wave(s).
+ *
+ *    Determine if VMID from above step maps to pasid provided as parameter. If
+ *    it matches agrregate the wave count. That the VMID will not match pasid is
+ *    a normal condition i.e. a device is expected to support multiple queues
+ *    from multiple proceses.
+ *
+ *  @note: Reading the registers mentioned above requires programming GRBM
+ *  appropriately.
+ */
+static void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
+					int *pasid_wave_cnt)
+{
+	int qidx;
+	int vmid;
+	int se_idx;
+	int sh_idx;
+	int se_cnt;
+	int sh_cnt;
+	int wave_cnt;
+	int queue_map;
+	int pasid_tmp;
+	int vmid_wave_cnt = 0;
+	struct amdgpu_device *adev;
+	DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
+
+	/*
+	 * Acquire  SRBM/GRBM locks before programming them to read registers.
+	 * By policy compute workloads are submitted on MEC1 (Micro-Engine)
+	 * Program GRBM to allow reading registers from MEC1. This constitutes
+	 * base programming. Programming of bit-fields for PIPE, QUEUE, etc will
+	 * be done later
+	 */
+	adev = get_amdgpu_device(kgd);
+	lock_spi_csq_mutexes(adev);
+	soc15_grbm_select(adev, 1, 0, 0, 0);
+
+	/*
+	 * Iterate through the shader engines and arrays of the device
+	 * to read registers that provide number of waves in flight
+	 */
+	bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap,
+			  KGD_MAX_QUEUES);
+	sh_cnt = adev->gfx.config.max_sh_per_se;
+	se_cnt = adev->gfx.config.max_shader_engines;
+	for (se_idx = 0; se_idx < se_cnt; se_idx++) {
+		for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
+
+			/*
+			 * Program GRBM to read queue map register that is
+			 * associated with specified shader engine and array
+			 */
+			gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
+			queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
+					   mmSPI_CSQ_WF_ACTIVE_STATUS));
+
+			/*
+			 * Assumption: queue map encodes following schema: four
+			 * pipes per each micro-engine, with each pipe mapping
+			 * eight queues. This schema is true for GFX9 devices
+			 * and must be verified for newer device families
+			 */
+			for (qidx = 0; qidx < 32; qidx++) {
+				if (!test_bit(qidx, cp_queue_bitmap))
+					continue;
+				if (!(queue_map & (1 << qidx)))
+					continue;
+
+				/*
+				 * For the specified queue index read number of
+				 * waves in flight and the VMID of the waves
+				 */
+				vmid = 0xFF;
+				wave_cnt = 0;
+				get_wave_count(adev, qidx, &wave_cnt, &vmid);
+
+				/*
+				 * Get PASID that is associated with VMID and
+				 * update the waves-in-flight aggregate
+				 */
+				if (wave_cnt != 0) {
+					pasid_tmp =
+					  RREG32(SOC15_REG_OFFSET(OSSSYS, 0,
+						 mmIH_VMID_0_LUT) + vmid);
+					if (pasid_tmp == pasid)
+						vmid_wave_cnt += wave_cnt;
+				}
+			}
+		}
+	}
+
+	/*
+	 * Reset GRBM to default state before releasing locks
+	 * associated with GRBM/SRBM resources
+	 */
+	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	soc15_grbm_select(adev, 0, 0, 0, 0);
+	unlock_spi_csq_mutexes(adev);
+
+	/* Update the output parameter and return */
+	*pasid_wave_cnt = vmid_wave_cnt;
+}
+
 const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -726,4 +931,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 	.get_atc_vmid_pasid_mapping_info =
 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
 };
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index fc592f60e6a0..19e885cd0853 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -212,6 +212,15 @@ struct tile_config {
  * IH ring entry. This function allows the KFD ISR to get the VMID
  * from the fault status register as early as possible.
  *
+ * @get_cu_occupancy: Function pointer that returns to caller the number
+ * of wave fronts that are in flight for all of the queues of a process
+ * as identified by its pasid. It is important to note that the value
+ * returned by this function is a snapshot of current moment and cannot
+ * guarantee any minimum for the number of waves in-flight. This function
+ * is defined for devices that belong to GFX9 and later GFX families. Care
+ * must be taken in calling this function as it is not defined for devices
+ * that belong to GFX8 and below GFX families.
+ *
  * This structure contains function pointers to services that the kgd driver
  * provides to amdkfd driver.
  *
@@ -286,6 +295,8 @@ struct kfd2kgd_calls {
 	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
 			uint32_t vmid, uint64_t page_table_base);
 	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
+
+	void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt);
 };
 
 #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
-- 
2.27.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [PATCH 2/3] drm/amd/amdgpu: Define and implement a function that collects number of waves that are in flight.
  2020-09-25 22:03 Ramesh Errabolu
@ 2020-09-28 10:48 ` Russell, Kent
  0 siblings, 0 replies; 3+ messages in thread
From: Russell, Kent @ 2020-09-28 10:48 UTC (permalink / raw)
  To: Errabolu, Ramesh, amd-gfx; +Cc: Errabolu, Ramesh

[AMD Public Use]

Some minor typos

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Ramesh Errabolu
> Sent: Friday, September 25, 2020 6:03 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Errabolu, Ramesh <Ramesh.Errabolu@amd.com>
> Subject: [PATCH 2/3] drm/amd/amdgpu: Define and implement a function that collects
> number of waves that are in flight.
> 
> [Why]
> Allow user to know how many compute units (CU) are in use at any given
> moment.
> 
> [How]
> Read registers of SQ that give number of waves that are in flight
> of various queues. Use this information to determine number of CU's
> in use.
> 
> Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 176 +++++++++++++++++-
>  .../gpu/drm/amd/include/kgd_kfd_interface.h   |  12 ++
>  2 files changed, 187 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index e6aede725197..87d4c8855805 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -38,7 +38,7 @@
>  #include "soc15d.h"
>  #include "mmhub_v1_0.h"
>  #include "gfxhub_v1_0.h"
> -
> +#include "gfx_v9_0.h"
> 
>  enum hqd_dequeue_request_type {
>  	NO_ACTION = 0,
> @@ -706,6 +706,179 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct
> kgd_dev *kgd,
>  	gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
>  }
> 
> +static void lock_spi_csq_mutexes(struct amdgpu_device *adev)
> +{
> +	mutex_lock(&adev->srbm_mutex);
> +	mutex_lock(&adev->grbm_idx_mutex);
> +
> +}
> +
> +static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
> +{
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +	mutex_unlock(&adev->srbm_mutex);
> +}
> +
> +/**
> + * @get_wave_count: Read device registers to get number of waves in flight for
> + * a particulare queue. The method also returns the VMID associated with the

particular

> + * queue.
> + *
> + * @adev: Handle of device whose registers are to be read
> + * @queue_idx: Index of queue in the queue-map bit-field
> + * @wave_cnt: Output parameter updated with number of waves in flight
> + * @vmid: Output parameter updated with VMID of queue whose wave count
> + * is being collected
> + */
> +static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
> +		int *wave_cnt, int *vmid)
> +{
> +	int pipe_idx;
> +	int queue_slot;
> +	unsigned int reg_val;
> +
> +	/*
> +	 * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
> +	 * parameters to read out waves in flight. Get VMID if there are
> +	 * non-zero waves in flight.
> +	 */
> +	*vmid = 0xFF;
> +	*wave_cnt = 0;
> +	pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
> +	queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
> +	soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
> +	reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
> +			 queue_slot);
> +	*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
> +	if (*wave_cnt != 0)
> +		*vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) &
> +			 CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
> +}
> +
> +/**
> + * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
> + * shader engine and aggregates the number of waves that are in fight for the
in flight

> + * process whose pasid is provided as a parameter. The process could have ZERO
> + * or more queues running and submitting waves to compute units.
> + *
> + * @kgd: Handle of device from which to get number of waves in flight
> + * @pasid: Identifies the process for which this query call is invoked
> + * @wave_cnt: Output parameter updated with number of waves in flight that
> + * belong to process with given pasid
> + * @max_waves_per_cu: Output parameter updated with maximum number of waves
> + * possible per Compute Unit
> + *
> + * @note: It's possible that the device has too many queues (oversubscription)
> + * in which case a VMID could be remapped to a different PASID. This could lead
> + * to in accurate wave count. Following is a high-level sequence:
to an inaccurate

> + *    Time T1: vmid = getVmid(); vmid is associated with Pasid P1
> + *    Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2
> + * In the sequence above wave count obtained from time T1 will be incorrectly
> + * lost or added to total wave count.
> + *
> + * The registers that provide the waves in flight are:
> + *
> + *  SPI_CSQ_WF_ACTIVE_STATUS - bit-map of queues per pipe. The bit is ON if a
> + *  queue is slotted, OFF if there is no queue. A process could have ZERO or
> + *  more queues slotted and submitting waves to be run on compute units. Even
> + *  when there is a queue it is possible there could be zero wave fronts, this
> + *  can happen when queue is waiting on top-of-pipe events - e.g. waitRegMem
> + *  command
> + *
> + *  For each bit that is ON from above:
> + *
> + *    Read (SPI_CSQ_WF_ACTIVE_COUNT_0 + queue_idx) register. It provides the
> + *    number of waves that are in flight for the queue at specified index. The
> + *    index ranges from 0 to 7.
> + *
> + *    If non-zero waves are in fligth, read CP_HQD_VMID register to obtain VMID
flight

> + *    of the wave(s).
> + *
> + *    Determine if VMID from above step maps to pasid provided as parameter. If
> + *    it matches agrregate the wave count. That the VMID will not match pasid is
> + *    a normal condition i.e. a device is expected to support multiple queues
> + *    from multiple proceses.
> + *
> + *  Reading registers referenced above involves programming GRBM appropriately
> + */
> +static void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
> +		int *pasid_wave_cnt, int *max_waves_per_cu)
> +{
> +	int qidx;
> +	int vmid;
> +	int se_idx;
> +	int sh_idx;
> +	int se_cnt;
> +	int sh_cnt;
> +	int wave_cnt;
> +	int queue_map;
> +	int pasid_tmp;
> +	int max_queue_cnt;
> +	int vmid_wave_cnt = 0;
> +	struct amdgpu_device *adev;
> +	DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
> +
> +	adev = get_amdgpu_device(kgd);
> +	lock_spi_csq_mutexes(adev);
> +	soc15_grbm_select(adev, 1, 0, 0, 0);
> +
> +	/*
> +	 * Iterate through the shader engines and arrays of the device
> +	 * to get number of waves in flight
> +	 */
> +	bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap,
> +			  KGD_MAX_QUEUES);
> +	max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
> +			adev->gfx.mec.num_queue_per_pipe;
> +	sh_cnt = adev->gfx.config.max_sh_per_se;
> +	se_cnt = adev->gfx.config.max_shader_engines;
> +	for (se_idx = 0; se_idx < se_cnt; se_idx++) {
> +		for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
> +
> +			gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
> +			queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
> +					   mmSPI_CSQ_WF_ACTIVE_STATUS));
> +
> +			/*
> +			 * Assumption: queue map encodes following schema: four
> +			 * pipes per each micro-engine, with each pipe mapping
> +			 * eight queues. This schema is true for GFX9 devices
> +			 * and must be verified for newer device families
> +			 */
> +			for (qidx = 0; qidx < max_queue_cnt; qidx++) {
> +
> +				/* Skip qeueus that are not associated with
> +				 * compute functions
> +				 */
> +				if (!test_bit(qidx, cp_queue_bitmap))
> +					continue;
> +
> +				if (!(queue_map & (1 << qidx)))
> +					continue;
> +
> +				/* Get number of waves in flight and aggregate them */
> +				get_wave_count(adev, qidx, &wave_cnt, &vmid);
> +				if (wave_cnt != 0) {
> +					pasid_tmp =
> +					  RREG32(SOC15_REG_OFFSET(OSSSYS, 0,
> +						 mmIH_VMID_0_LUT) + vmid);
> +					if (pasid_tmp == pasid)
> +						vmid_wave_cnt += wave_cnt;
> +				}
> +			}
> +		}
> +	}
> +
> +	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> +	soc15_grbm_select(adev, 0, 0, 0, 0);
> +	unlock_spi_csq_mutexes(adev);
> +
> +	/* Update the output parameters and return */
> +	*pasid_wave_cnt = vmid_wave_cnt;
> +	*max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
> +				adev->gfx.cu_info.max_waves_per_simd;
> +}
> +
>  const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>  	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
>  	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
> @@ -726,4 +899,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>  	.get_atc_vmid_pasid_mapping_info =
>  			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
>  	.set_vm_context_page_table_base =
> kgd_gfx_v9_set_vm_context_page_table_base,
> +	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
>  };
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index fc592f60e6a0..e37b4b9f626d 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -212,6 +212,15 @@ struct tile_config {
>   * IH ring entry. This function allows the KFD ISR to get the VMID
>   * from the fault status register as early as possible.
>   *
> + * @get_cu_occupancy: Function pointer that returns to caller the number
> + * of wave fronts that are in flight for all of the queues of a process
> + * as identified by its pasid. It is important to note that the value
> + * returned by this function is a snapshot of current moment and cannot
> + * guarantee any minimum for the number of waves in-flight. This function
> + * is defined for devices that belong to GFX9 and later GFX families. Care
> + * must be taken in calling this function as it is not defined for devices
> + * that belong to GFX8 and below GFX families.
> + *
>   * This structure contains function pointers to services that the kgd driver
>   * provides to amdkfd driver.
>   *
> @@ -286,6 +295,9 @@ struct kfd2kgd_calls {
>  	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
>  			uint32_t vmid, uint64_t page_table_base);
>  	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
> +
> +	void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt,
> +			int *max_waves_per_cu);
>  };
> 
>  #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
> --
> 2.27.0
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.or
> g%2Fmailman%2Flistinfo%2Famd-
> gfx&amp;data=02%7C01%7Ckent.russell%40amd.com%7C36b3f4cda50a47ed3b5608d8619e
> d37a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637366682081748390&amp;
> sdata=ZXD4eVB8TWow6sHB6Mf2OUV%2BSsb4EP%2BB5VMokKEp7W0%3D&amp;reserved=
> 0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2/3] drm/amd/amdgpu: Define and implement a function that collects number of waves that are in flight.
@ 2020-09-25 22:03 Ramesh Errabolu
  2020-09-28 10:48 ` Russell, Kent
  0 siblings, 1 reply; 3+ messages in thread
From: Ramesh Errabolu @ 2020-09-25 22:03 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ramesh Errabolu

[Why]
Allow user to know how many compute units (CU) are in use at any given
moment.

[How]
Read registers of SQ that give number of waves that are in flight
of various queues. Use this information to determine number of CU's
in use.

Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 176 +++++++++++++++++-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  12 ++
 2 files changed, 187 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index e6aede725197..87d4c8855805 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -38,7 +38,7 @@
 #include "soc15d.h"
 #include "mmhub_v1_0.h"
 #include "gfxhub_v1_0.h"
-
+#include "gfx_v9_0.h"
 
 enum hqd_dequeue_request_type {
 	NO_ACTION = 0,
@@ -706,6 +706,179 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
 	gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
+static void lock_spi_csq_mutexes(struct amdgpu_device *adev)
+{
+	mutex_lock(&adev->srbm_mutex);
+	mutex_lock(&adev->grbm_idx_mutex);
+
+}
+
+static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
+{
+	mutex_unlock(&adev->grbm_idx_mutex);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+/**
+ * @get_wave_count: Read device registers to get number of waves in flight for
+ * a particulare queue. The method also returns the VMID associated with the
+ * queue.
+ *
+ * @adev: Handle of device whose registers are to be read
+ * @queue_idx: Index of queue in the queue-map bit-field
+ * @wave_cnt: Output parameter updated with number of waves in flight
+ * @vmid: Output parameter updated with VMID of queue whose wave count
+ * is being collected
+ */
+static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
+		int *wave_cnt, int *vmid)
+{
+	int pipe_idx;
+	int queue_slot;
+	unsigned int reg_val;
+
+	/*
+	 * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
+	 * parameters to read out waves in flight. Get VMID if there are
+	 * non-zero waves in flight.
+	 */
+	*vmid = 0xFF;
+	*wave_cnt = 0;
+	pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
+	queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
+	soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
+	reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
+			 queue_slot);
+	*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
+	if (*wave_cnt != 0)
+		*vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) &
+			 CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
+}
+
+/**
+ * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
+ * shader engine and aggregates the number of waves that are in fight for the
+ * process whose pasid is provided as a parameter. The process could have ZERO
+ * or more queues running and submitting waves to compute units.
+ *
+ * @kgd: Handle of device from which to get number of waves in flight
+ * @pasid: Identifies the process for which this query call is invoked
+ * @wave_cnt: Output parameter updated with number of waves in flight that
+ * belong to process with given pasid
+ * @max_waves_per_cu: Output parameter updated with maximum number of waves
+ * possible per Compute Unit
+ *
+ * @note: It's possible that the device has too many queues (oversubscription)
+ * in which case a VMID could be remapped to a different PASID. This could lead
+ * to in accurate wave count. Following is a high-level sequence:
+ *    Time T1: vmid = getVmid(); vmid is associated with Pasid P1
+ *    Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2
+ * In the sequence above wave count obtained from time T1 will be incorrectly
+ * lost or added to total wave count.
+ *
+ * The registers that provide the waves in flight are:
+ *
+ *  SPI_CSQ_WF_ACTIVE_STATUS - bit-map of queues per pipe. The bit is ON if a
+ *  queue is slotted, OFF if there is no queue. A process could have ZERO or
+ *  more queues slotted and submitting waves to be run on compute units. Even
+ *  when there is a queue it is possible there could be zero wave fronts, this
+ *  can happen when queue is waiting on top-of-pipe events - e.g. waitRegMem
+ *  command
+ *
+ *  For each bit that is ON from above:
+ *
+ *    Read (SPI_CSQ_WF_ACTIVE_COUNT_0 + queue_idx) register. It provides the
+ *    number of waves that are in flight for the queue at specified index. The
+ *    index ranges from 0 to 7.
+ *
+ *    If non-zero waves are in fligth, read CP_HQD_VMID register to obtain VMID
+ *    of the wave(s).
+ *
+ *    Determine if VMID from above step maps to pasid provided as parameter. If
+ *    it matches agrregate the wave count. That the VMID will not match pasid is
+ *    a normal condition i.e. a device is expected to support multiple queues
+ *    from multiple proceses.
+ *
+ *  Reading registers referenced above involves programming GRBM appropriately
+ */
+static void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
+		int *pasid_wave_cnt, int *max_waves_per_cu)
+{
+	int qidx;
+	int vmid;
+	int se_idx;
+	int sh_idx;
+	int se_cnt;
+	int sh_cnt;
+	int wave_cnt;
+	int queue_map;
+	int pasid_tmp;
+	int max_queue_cnt;
+	int vmid_wave_cnt = 0;
+	struct amdgpu_device *adev;
+	DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
+
+	adev = get_amdgpu_device(kgd);
+	lock_spi_csq_mutexes(adev);
+	soc15_grbm_select(adev, 1, 0, 0, 0);
+
+	/*
+	 * Iterate through the shader engines and arrays of the device
+	 * to get number of waves in flight
+	 */
+	bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap,
+			  KGD_MAX_QUEUES);
+	max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
+			adev->gfx.mec.num_queue_per_pipe;
+	sh_cnt = adev->gfx.config.max_sh_per_se;
+	se_cnt = adev->gfx.config.max_shader_engines;
+	for (se_idx = 0; se_idx < se_cnt; se_idx++) {
+		for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
+
+			gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
+			queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
+					   mmSPI_CSQ_WF_ACTIVE_STATUS));
+
+			/*
+			 * Assumption: queue map encodes following schema: four
+			 * pipes per each micro-engine, with each pipe mapping
+			 * eight queues. This schema is true for GFX9 devices
+			 * and must be verified for newer device families
+			 */
+			for (qidx = 0; qidx < max_queue_cnt; qidx++) {
+
+				/* Skip qeueus that are not associated with
+				 * compute functions
+				 */
+				if (!test_bit(qidx, cp_queue_bitmap))
+					continue;
+
+				if (!(queue_map & (1 << qidx)))
+					continue;
+
+				/* Get number of waves in flight and aggregate them */
+				get_wave_count(adev, qidx, &wave_cnt, &vmid);
+				if (wave_cnt != 0) {
+					pasid_tmp =
+					  RREG32(SOC15_REG_OFFSET(OSSSYS, 0,
+						 mmIH_VMID_0_LUT) + vmid);
+					if (pasid_tmp == pasid)
+						vmid_wave_cnt += wave_cnt;
+				}
+			}
+		}
+	}
+
+	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	soc15_grbm_select(adev, 0, 0, 0, 0);
+	unlock_spi_csq_mutexes(adev);
+
+	/* Update the output parameters and return */
+	*pasid_wave_cnt = vmid_wave_cnt;
+	*max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
+				adev->gfx.cu_info.max_waves_per_simd;
+}
+
 const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -726,4 +899,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 	.get_atc_vmid_pasid_mapping_info =
 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
 };
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index fc592f60e6a0..e37b4b9f626d 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -212,6 +212,15 @@ struct tile_config {
  * IH ring entry. This function allows the KFD ISR to get the VMID
  * from the fault status register as early as possible.
  *
+ * @get_cu_occupancy: Function pointer that returns to caller the number
+ * of wave fronts that are in flight for all of the queues of a process
+ * as identified by its pasid. It is important to note that the value
+ * returned by this function is a snapshot of current moment and cannot
+ * guarantee any minimum for the number of waves in-flight. This function
+ * is defined for devices that belong to GFX9 and later GFX families. Care
+ * must be taken in calling this function as it is not defined for devices
+ * that belong to GFX8 and below GFX families.
+ *
  * This structure contains function pointers to services that the kgd driver
  * provides to amdkfd driver.
  *
@@ -286,6 +295,9 @@ struct kfd2kgd_calls {
 	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
 			uint32_t vmid, uint64_t page_table_base);
 	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
+
+	void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt,
+			int *max_waves_per_cu);
 };
 
 #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
-- 
2.27.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-09-28 10:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-17 17:10 [PATCH 2/3] drm/amd/amdgpu: Define and implement a function that collects number of waves that are in flight Ramesh Errabolu
2020-09-25 22:03 Ramesh Errabolu
2020-09-28 10:48 ` Russell, Kent

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.