All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/8] drm/amdkfd: Add gws number to kfd topology node properties
@ 2019-05-10 16:01 Zeng, Oak
       [not found] ` <1557504063-1559-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Zeng, Oak @ 2019-05-10 16:01 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zeng, Oak, Keely, Sean

Add amdgpu_amdkfd interface to get num_gws and add num_gws
to /sys/class/kfd/kfd/topology/nodes/x/properties. Only report
num_gws if MEC FW support GWS barriers. Currently it is
determined by a environment variable which will be replaced
with MEC FW version check when firmware is ready.

Change-Id: Ie0d00fb20a37ef2856860dbecbe1ad0ca1ef09f7
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  7 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 10 ++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h      |  5 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c  |  5 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h  |  1 +
 6 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 98326e3b..8151221 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -544,6 +544,13 @@ uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
 	return adev->rmmio_remap.bus_addr;
 }
 
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	return adev->gds.gws.total_size;
+}
+
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index f57f297..5700643 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -169,6 +169,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
 uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
 
 #define read_user_wptr(mmptr, wptr, dst)				\
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index a334d3b..3a03c2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -666,6 +666,16 @@ MODULE_PARM_DESC(noretry,
 int halt_if_hws_hang;
 module_param(halt_if_hws_hang, int, 0644);
 MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+
+/**
+ * DOC: hws_gws_support(bool)
+ * Whether HWS support gws barriers. Default value: false (not supported)
+ * This will be replaced with a MEC firmware version check once firmware
+ * is ready
+ */
+bool hws_gws_support;
+module_param(hws_gws_support, bool, 0444);
+MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
 #endif
 
 /**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8f02d78..338fb07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -160,6 +160,11 @@ extern int noretry;
  */
 extern int halt_if_hws_hang;
 
+/*
+ * Whether MEC FW support GWS barriers
+ */
+extern bool hws_gws_support;
+
 enum cache_policy {
 	cache_policy_coherent,
 	cache_policy_noncoherent
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 2c06d6c..128c72c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.lds_size_in_kb);
 	sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
 			dev->node_props.gds_size_in_kb);
+	sysfs_show_32bit_prop(buffer, "num_gws",
+			dev->node_props.num_gws);
 	sysfs_show_32bit_prop(buffer, "wave_front_size",
 			dev->node_props.wave_front_size);
 	sysfs_show_32bit_prop(buffer, "array_count",
@@ -1290,6 +1292,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 	dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
 	dev->node_props.num_sdma_xgmi_engines =
 				gpu->device_info->num_xgmi_sdma_engines;
+	dev->node_props.num_gws = (hws_gws_support &&
+		dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
+		amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
 
 	kfd_fill_mem_clk_max_info(dev);
 	kfd_fill_iolink_non_crat_info(dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 949e885..276354a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -65,6 +65,7 @@ struct kfd_node_properties {
 	uint32_t max_waves_per_simd;
 	uint32_t lds_size_in_kb;
 	uint32_t gds_size_in_kb;
+	uint32_t num_gws;
 	uint32_t wave_front_size;
 	uint32_t array_count;
 	uint32_t simd_arrays_per_engine;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/8] drm/amdgpu: Remove GWS barriers pre-reservation for gfx
       [not found] ` <1557504063-1559-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-10 16:01   ` Zeng, Oak
       [not found]     ` <1557504063-1559-2-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  2019-05-10 16:01   ` [PATCH 3/8] drm/amdkfd: Add interface to alloc gws from amdgpu Zeng, Oak
                     ` (5 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Zeng, Oak @ 2019-05-10 16:01 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zeng, Oak, Keely, Sean

User space allocated GWS barriers are used. The kernel
pre-allocated GWS barriers are unused.

Change-Id: I7aac259d1f6b7064d02aff231279ff605c29ea34
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h     | 3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     | 6 ------
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c       | 1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c       | 1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c       | 1 -
 6 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 5c79da8..273c16b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -82,7 +82,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
 
 	kref_init(&list->refcount);
 	list->gds_obj = adev->gds.gds_gfx_bo;
-	list->gws_obj = adev->gds.gws_gfx_bo;
+	list->gws_obj = NULL;
 	list->oa_obj = adev->gds.oa_gfx_bo;
 
 	array = amdgpu_bo_list_array_entry(list, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index f89f573..7e5b4ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -39,13 +39,12 @@ struct amdgpu_gds {
 	struct amdgpu_gds_asic_info	oa;
 	uint32_t			gds_compute_max_wave_id;
 
-	/* At present, GDS, GWS and OA resources for gfx (graphics)
+	/* At present, GDS and OA resources for gfx (graphics)
 	 * is always pre-allocated and available for graphics operation.
 	 * Such resource is shared between all gfx clients.
 	 * TODO: move this operation to user space
 	 * */
 	struct amdgpu_bo*		gds_gfx_bo;
-	struct amdgpu_bo*		gws_gfx_bo;
 	struct amdgpu_bo*		oa_gfx_bo;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 38ce11e..b0e51e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1791,12 +1791,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 	}
 
-	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
-				    1, AMDGPU_GEM_DOMAIN_GWS,
-				    &adev->gds.gws_gfx_bo, NULL, NULL);
-	if (r)
-		return r;
-
 	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
 			   adev->gds.oa.total_size);
 	if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a59e0fd..4d73456 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4497,7 +4497,6 @@ static int gfx_v7_0_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
-	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
 
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 02955e6..e255754 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2061,7 +2061,6 @@ static int gfx_v8_0_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
-	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
 
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 7d7d287..41d1fc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1785,7 +1785,6 @@ static int gfx_v9_0_sw_fini(void *handle)
 	}
 
 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
-	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
 
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/8] drm/amdkfd: Add interface to alloc gws from amdgpu
       [not found] ` <1557504063-1559-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  2019-05-10 16:01   ` [PATCH 2/8] drm/amdgpu: Remove GWS barriers pre-reservation for gfx Zeng, Oak
@ 2019-05-10 16:01   ` Zeng, Oak
  2019-05-10 16:01   ` [PATCH 4/8] drm/amdkfd: Allocate gws on device initialization Zeng, Oak
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Zeng, Oak @ 2019-05-10 16:01 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zeng, Oak, Keely, Sean

Add amdgpu_amdkfd interface to alloc and free gws
from amdgpu

Change-Id: I4eb418356e5a6051aa09c5e2c4a454263631d6ab
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 34 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
 2 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8151221..6bc7854 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -339,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 	amdgpu_bo_unref(&(bo));
 }
 
+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
+				void **mem_obj)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo_param bp;
+	int r;
+
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = 1;
+	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
+	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+	bp.type = ttm_bo_type_device;
+	bp.resv = NULL;
+
+	r = amdgpu_bo_create(adev, &bp, &bo);
+	if (r) {
+		dev_err(adev->dev,
+			"failed to allocate gws BO for amdkfd (%d)\n", r);
+		return r;
+	}
+
+	*mem_obj = bo;
+	return 0;
+}
+
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
+{
+	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
+
+	amdgpu_bo_unref(&bo);
+}
+
 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
 				      enum kgd_engine_type type)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 5700643..c00c974 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -153,6 +153,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 				void **mem_obj, uint64_t *gpu_addr,
 				void **cpu_ptr, bool mqd_gfx9);
 void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
 				      enum kgd_engine_type type);
 void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/8] drm/amdkfd: Allocate gws on device initialization
       [not found] ` <1557504063-1559-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  2019-05-10 16:01   ` [PATCH 2/8] drm/amdgpu: Remove GWS barriers pre-reservation for gfx Zeng, Oak
  2019-05-10 16:01   ` [PATCH 3/8] drm/amdkfd: Add interface to alloc gws from amdgpu Zeng, Oak
@ 2019-05-10 16:01   ` Zeng, Oak
  2019-05-10 16:01   ` [PATCH 5/8] drm/amdkfd: Add function to set queue gws Zeng, Oak
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Zeng, Oak @ 2019-05-10 16:01 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zeng, Oak, Keely, Sean

On device initialization, KFD allocates all (64) gws which
is shared by all KFD processes.

Change-Id: I1f1274b8d4f6a8ad08785e2791a9a79def75e913
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 14 +++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  3 +++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index a53dda9..b08dc26 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -552,6 +552,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	} else
 		kfd->max_proc_per_quantum = hws_max_conc_proc;
 
+	/* Allocate global GWS that is shared by all KFD processes */
+	if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
+			amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
+		dev_err(kfd_device, "Could not allocate %d gws\n",
+			amdgpu_amdkfd_get_num_gws(kfd->kgd));
+		goto out;
+	}
 	/* calculate max size of mqds needed for queues */
 	size = max_num_of_queues_per_device *
 			kfd->device_info->mqd_size_aligned;
@@ -575,7 +582,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
 			false)) {
 		dev_err(kfd_device, "Could not allocate %d bytes\n", size);
-		goto out;
+		goto alloc_gtt_mem_failure;
 	}
 
 	dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
@@ -645,6 +652,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	kfd_gtt_sa_fini(kfd);
 kfd_gtt_sa_init_error:
 	amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+alloc_gtt_mem_failure:
+	if (hws_gws_support)
+		amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
 	dev_err(kfd_device,
 		"device %x:%x NOT added due to errors\n",
 		kfd->pdev->vendor, kfd->pdev->device);
@@ -662,6 +672,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
 		kfd_doorbell_fini(kfd);
 		kfd_gtt_sa_fini(kfd);
 		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+		if (hws_gws_support)
+			amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
 	}
 
 	kfree(kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 338fb07..5969e37 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -288,6 +288,9 @@ struct kfd_dev {
 
 	/* Compute Profile ref. count */
 	atomic_t compute_profile;
+
+	/* Global GWS resource shared b/t processes*/
+	void *gws;
 };
 
 enum kfd_mempool {
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/8] drm/amdkfd: Add function to set queue gws
       [not found] ` <1557504063-1559-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2019-05-10 16:01   ` [PATCH 4/8] drm/amdkfd: Allocate gws on device initialization Zeng, Oak
@ 2019-05-10 16:01   ` Zeng, Oak
  2019-05-10 16:01   ` [PATCH 6/8] drm/amdkfd: Add function to add/remove gws to kfd process Zeng, Oak
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Zeng, Oak @ 2019-05-10 16:01 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zeng, Oak, Keely, Sean

Add functions in process queue manager to
set/get queue gws. Also set process's number
of gws used. Currently only one queue in
process can use and use all gws.

Change-Id: I03e480c8692db3eabfc3a188cce8904d5d962ab7
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  7 ++++
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 49 ++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 5969e37..09a8d0d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -454,6 +454,9 @@ struct queue_properties {
  *
  * @device: The kfd device that created this queue.
  *
+ * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
+ * otherwise.
+ *
  * This structure represents user mode compute queues.
  * It contains all the necessary data to handle such queues.
  *
@@ -475,6 +478,7 @@ struct queue {
 
 	struct kfd_process	*process;
 	struct kfd_dev		*device;
+	void *gws;
 };
 
 /*
@@ -868,6 +872,9 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
 			struct queue_properties *p);
 int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
 			struct queue_properties *p);
+int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
+			void *gws);
+void *pqm_get_gws(struct process_queue_manager *pqm, unsigned int qid);
 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
 						unsigned int qid);
 int pqm_get_wave_state(struct process_queue_manager *pqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index e652e25..1e9ed59 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -26,6 +26,7 @@
 #include "kfd_device_queue_manager.h"
 #include "kfd_priv.h"
 #include "kfd_kernel_queue.h"
+#include "amdgpu_amdkfd.h"
 
 static inline struct process_queue_node *get_queue_by_qid(
 			struct process_queue_manager *pqm, unsigned int qid)
@@ -400,6 +401,54 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
 	return 0;
 }
 
+int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
+			void *gws)
+{
+	struct kfd_dev *dev = NULL;
+	struct process_queue_node *pqn;
+	struct kfd_process_device *pdd;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_err("Queue id does not match any known queue\n");
+		return -EINVAL;
+	}
+
+	if (pqn->q)
+		dev = pqn->q->device;
+	if (WARN_ON(!dev))
+		return -ENODEV;
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return -EINVAL;
+	}
+
+	/* Only allow one queue per process can have GWS assigned */
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+		if (pqn->q && pqn->q->gws)
+			return -EINVAL;
+	}
+
+	pqn->q->gws = gws;
+	pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0;
+	return 0;
+}
+
+void *pqm_get_gws(struct process_queue_manager *pqm, unsigned int qid)
+{
+	struct process_queue_node *pqn;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_debug("No queue %d exists for get gws operation\n", qid);
+		return NULL;
+	}
+
+	return pqn->q->gws;
+}
+
 struct kernel_queue *pqm_get_kernel_queue(
 					struct process_queue_manager *pqm,
 					unsigned int qid)
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 6/8] drm/amdkfd: Add function to add/remove gws to kfd process
       [not found] ` <1557504063-1559-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2019-05-10 16:01   ` [PATCH 5/8] drm/amdkfd: Add function to set queue gws Zeng, Oak
@ 2019-05-10 16:01   ` Zeng, Oak
       [not found]     ` <1557504063-1559-6-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  2019-05-10 16:01   ` [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS Zeng, Oak
  2019-05-10 16:01   ` [PATCH 8/8] drm/amdkfd: PM4 packets change to support GWS Zeng, Oak
  6 siblings, 1 reply; 15+ messages in thread
From: Zeng, Oak @ 2019-05-10 16:01 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zeng, Oak, Keely, Sean

GWS bo is shared between all kfd processes. Add function to add gws
to kfd process's bo list so gws can be evicted from and restored
for process.

Change-Id: I75d74cfdadb7075ff8b2b68634e205deb73dc1ea
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 96 ++++++++++++++++++++++++
 2 files changed, 98 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index c00c974..f968bf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -155,6 +155,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
 int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
 void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
 				      enum kgd_engine_type type);
 void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e1cae4a..322c1db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -457,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
 	mutex_unlock(&process_info->lock);
 }
 
+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
+		struct amdkfd_process_info *process_info)
+{
+	struct ttm_validate_buffer *bo_list_entry;
+
+	bo_list_entry = &mem->validate_list;
+	mutex_lock(&process_info->lock);
+	list_del(&bo_list_entry->head);
+	mutex_unlock(&process_info->lock);
+}
+
 /* Initializes user pages. It registers the MMU notifier and validates
  * the userptr BO in the GTT domain.
  *
@@ -1197,6 +1208,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	return 0;
 
 allocate_init_user_pages_failed:
+	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
 	amdgpu_bo_unref(&bo);
 	/* Don't unreserve system mem limit twice */
 	goto err_reserve_limit;
@@ -2104,3 +2116,87 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	kfree(pd_bo_list);
 	return ret;
 }
+
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
+{
+	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+	struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
+	int ret;
+
+	if (!info || !gws)
+		return -EINVAL;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem)
+		return -EINVAL;
+
+	mutex_init(&(*mem)->lock);
+	(*mem)->bo = amdgpu_bo_ref(gws_bo);
+	(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
+	(*mem)->process_info = process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
+	amdgpu_sync_create(&(*mem)->sync);
+
+
+	/* Validate gws bo the first time it is added to process */
+	mutex_lock(&(*mem)->process_info->lock);
+	ret = amdgpu_bo_reserve(gws_bo, false);
+	if (unlikely(ret)) {
+		pr_err("Reserve gws bo failed %d\n", ret);
+		goto bo_reservation_failure;
+	}
+
+	ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, false);
+	if (ret) {
+		pr_err("GWS BO validate failed %d\n", ret);
+		goto bo_validation_failure;
+	}
+	/* GWS resource is shared b/t amdgpu and amdkfd
+	 * Add process eviction fence to bo so they can
+	 * evict each other.
+	 */
+	amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
+	amdgpu_bo_unreserve(gws_bo);
+	mutex_unlock(&(*mem)->process_info->lock);
+
+	return ret;
+
+bo_validation_failure:
+	amdgpu_bo_unreserve(gws_bo);
+bo_reservation_failure:
+	mutex_unlock(&(*mem)->process_info->lock);
+	amdgpu_sync_free(&(*mem)->sync);
+	remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
+	amdgpu_bo_unref(&gws_bo);
+	mutex_destroy(&(*mem)->lock);
+	kfree(*mem);
+	return ret;
+}
+
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
+{
+	int ret;
+	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+	struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+	struct amdgpu_bo *gws_bo = kgd_mem->bo;
+
+	/* Remove BO from process's validate list so restore worker won't touch
+	 * it anymore
+	 */
+	remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
+
+	ret = amdgpu_bo_reserve(gws_bo, false);
+	if (unlikely(ret)) {
+		pr_err("Reserve gws bo failed %d\n", ret);
+		//TODO add BO back to validate_list?
+		return ret;
+	}
+	amdgpu_amdkfd_remove_eviction_fence(gws_bo,
+			process_info->eviction_fence);
+	amdgpu_bo_unreserve(gws_bo);
+	amdgpu_sync_free(&kgd_mem->sync);
+	amdgpu_bo_unref(&gws_bo);
+	mutex_destroy(&kgd_mem->lock);
+	kfree(mem);
+	return 0;
+}
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS
       [not found] ` <1557504063-1559-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2019-05-10 16:01   ` [PATCH 6/8] drm/amdkfd: Add function to add/remove gws to kfd process Zeng, Oak
@ 2019-05-10 16:01   ` Zeng, Oak
       [not found]     ` <1557504063-1559-7-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  2019-05-10 16:01   ` [PATCH 8/8] drm/amdkfd: PM4 packets change to support GWS Zeng, Oak
  6 siblings, 1 reply; 15+ messages in thread
From: Zeng, Oak @ 2019-05-10 16:01 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zeng, Oak, Keely, Sean

Add a new kfd ioctl to allocate queue GWS. Queue
GWS is released on queue destroy.

Change-Id: I60153c26a577992ad873e4292e759e5c3d5bbd15
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 45 ++++++++++++++++++++++++++++++++
 include/uapi/linux/kfd_ioctl.h           | 19 +++++++++++++-
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 38ae53f..17dd970 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -338,6 +338,11 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
 
 	mutex_lock(&p->mutex);
 
+	if (pqm_get_gws(&p->pqm, args->queue_id)) {
+		amdgpu_amdkfd_remove_gws_from_process(p->kgd_process_info,
+				pqm_get_gws(&p->pqm, args->queue_id));
+		pqm_set_gws(&p->pqm, args->queue_id, NULL);
+	}
 	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
 
 	mutex_unlock(&p->mutex);
@@ -1559,6 +1564,44 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 	return err;
 }
 
+static int kfd_ioctl_alloc_queue_gws(struct file *filep,
+		struct kfd_process *p, void *data)
+{
+	int retval;
+	struct kfd_ioctl_alloc_queue_gws_args *args = data;
+	struct kfd_dev *dev = NULL;
+	struct kgd_mem *mem;
+
+	if (args->num_gws == 0)
+		return -EINVAL;
+
+	if (!hws_gws_support ||
+		dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+		return -EINVAL;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev) {
+		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+		return -EINVAL;
+	}
+
+	retval = amdgpu_amdkfd_add_gws_to_process(p->kgd_process_info,
+			dev->gws, &mem);
+	if (unlikely(retval))
+		return retval;
+
+	mutex_lock(&p->mutex);
+	retval = pqm_set_gws(&p->pqm, args->queue_id, mem);
+	mutex_unlock(&p->mutex);
+
+	if (unlikely(retval))
+		amdgpu_amdkfd_remove_gws_from_process(p->kgd_process_info, mem);
+
+	/* The gws_array parameter is reserved for future extension*/
+	args->gws_array[0] = 0;
+	return retval;
+}
+
 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
 		struct kfd_process *p, void *data)
 {
@@ -1761,6 +1804,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
 				kfd_ioctl_import_dmabuf, 0),
 
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
+			kfd_ioctl_alloc_queue_gws, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 20917c5..1964ab2 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -410,6 +410,20 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
 	__u32 n_success;		/* to/from KFD */
 };
 
+/* Allocate GWS for specific queue
+ *
+ * @gpu_id:      device identifier
+ * @queue_id:    queue's id that GWS is allocated for
+ * @num_gws:     how many GWS to allocate
+ * @gws_array:   used to return the allocated gws
+ */
+struct kfd_ioctl_alloc_queue_gws_args {
+	__u32 gpu_id;		/* to KFD */
+	__u32 queue_id;		/* to KFD */
+	__u32 num_gws;		/* to KFD */
+	__u32 *gws_array;	/* from KFD */
+};
+
 struct kfd_ioctl_get_dmabuf_info_args {
 	__u64 size;		/* from KFD */
 	__u64 metadata_ptr;	/* to KFD */
@@ -529,7 +543,10 @@ enum kfd_mmio_remap {
 #define AMDKFD_IOC_IMPORT_DMABUF		\
 		AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
 
+#define AMDKFD_IOC_ALLOC_QUEUE_GWS		\
+		AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x1E
+#define AMDKFD_COMMAND_END		0x1F
 
 #endif
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 8/8] drm/amdkfd: PM4 packets change to support GWS
       [not found] ` <1557504063-1559-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
                     ` (5 preceding siblings ...)
  2019-05-10 16:01   ` [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS Zeng, Oak
@ 2019-05-10 16:01   ` Zeng, Oak
  6 siblings, 0 replies; 15+ messages in thread
From: Zeng, Oak @ 2019-05-10 16:01 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zeng, Oak, Keely, Sean

Add a field in map_queues packet to indicate whether
this is a gws control queue. Only one queue per process
can be gws control queue. Change num_gws field in
map_process packet to 7 bits

Change-Id: I0db91d99c6962d14f9202b2eb950f8e7e497b79e
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 1 +
 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h  | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 3dd731c..07f02f8e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -159,6 +159,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
 
 	packet->bitfields2.engine_sel =
 		engine_sel__mes_map_queues__compute_vi;
+	packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
 	packet->bitfields2.queue_type =
 		queue_type__mes_map_queues__normal_compute_vi;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index 0661339..49ab66b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -176,8 +176,7 @@ struct pm4_mes_map_process {
 
 	union {
 		struct {
-			uint32_t num_gws:6;
-			uint32_t reserved7:1;
+			uint32_t num_gws:7;
 			uint32_t sdma_enable:1;
 			uint32_t num_oac:4;
 			uint32_t reserved8:4;
@@ -272,7 +271,9 @@ struct pm4_mes_map_queues {
 		struct {
 			uint32_t reserved1:4;
 			enum mes_map_queues_queue_sel_enum queue_sel:2;
-			uint32_t reserved2:15;
+			uint32_t reserved5:6;
+			uint32_t gws_control_queue:1;
+			uint32_t reserved2:8;
 			enum mes_map_queues_queue_type_enum queue_type:3;
 			uint32_t reserved3:2;
 			enum mes_map_queues_engine_sel_enum engine_sel:3;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/8] drm/amdgpu: Remove GWS barriers pre-reservation for gfx
       [not found]     ` <1557504063-1559-2-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-10 17:41       ` Christian König
  0 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2019-05-10 17:41 UTC (permalink / raw)
  To: Zeng, Oak, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Keely, Sean

Am 10.05.19 um 18:01 schrieb Zeng, Oak:
> User space allocated GWS barriers are used. The kernel
> pre-allocated GWS barriers are unused.

I have a patch which goes a step further and also removes the GDS and OA 
stuff as well and also cleans up the internal variables.

Give me a moment to dig that up,
Christian.

>
> Change-Id: I7aac259d1f6b7064d02aff231279ff605c29ea34
> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h     | 3 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     | 6 ------
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c       | 1 -
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c       | 1 -
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c       | 1 -
>   6 files changed, 2 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> index 5c79da8..273c16b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> @@ -82,7 +82,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
>   
>   	kref_init(&list->refcount);
>   	list->gds_obj = adev->gds.gds_gfx_bo;
> -	list->gws_obj = adev->gds.gws_gfx_bo;
> +	list->gws_obj = NULL;
>   	list->oa_obj = adev->gds.oa_gfx_bo;
>   
>   	array = amdgpu_bo_list_array_entry(list, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> index f89f573..7e5b4ba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> @@ -39,13 +39,12 @@ struct amdgpu_gds {
>   	struct amdgpu_gds_asic_info	oa;
>   	uint32_t			gds_compute_max_wave_id;
>   
> -	/* At present, GDS, GWS and OA resources for gfx (graphics)
> +	/* At present, GDS and OA resources for gfx (graphics)
>   	 * is always pre-allocated and available for graphics operation.
>   	 * Such resource is shared between all gfx clients.
>   	 * TODO: move this operation to user space
>   	 * */
>   	struct amdgpu_bo*		gds_gfx_bo;
> -	struct amdgpu_bo*		gws_gfx_bo;
>   	struct amdgpu_bo*		oa_gfx_bo;
>   };
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 38ce11e..b0e51e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1791,12 +1791,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>   		return r;
>   	}
>   
> -	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
> -				    1, AMDGPU_GEM_DOMAIN_GWS,
> -				    &adev->gds.gws_gfx_bo, NULL, NULL);
> -	if (r)
> -		return r;
> -
>   	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
>   			   adev->gds.oa.total_size);
>   	if (r) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index a59e0fd..4d73456 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -4497,7 +4497,6 @@ static int gfx_v7_0_sw_fini(void *handle)
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
>   	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> -	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
>   	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>   
>   	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 02955e6..e255754 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -2061,7 +2061,6 @@ static int gfx_v8_0_sw_fini(void *handle)
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
>   	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> -	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
>   	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>   
>   	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 7d7d287..41d1fc3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1785,7 +1785,6 @@ static int gfx_v9_0_sw_fini(void *handle)
>   	}
>   
>   	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> -	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
>   	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>   
>   	for (i = 0; i < adev->gfx.num_gfx_rings; i++)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 6/8] drm/amdkfd: Add function to add/remove gws to kfd process
       [not found]     ` <1557504063-1559-6-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-10 20:17       ` Kuehling, Felix
  0 siblings, 0 replies; 15+ messages in thread
From: Kuehling, Felix @ 2019-05-10 20:17 UTC (permalink / raw)
  To: Zeng, Oak, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Keely, Sean

The subject for this one should start with drm/amdgpu: because it's a 
change in amdgpu, not KFD.

See two more comments inline.

Regards,
   Felix

On 2019-05-10 12:01 p.m., Zeng, Oak wrote:
> [CAUTION: External Email]
>
> GWS bo is shared between all kfd processes. Add function to add gws
> to kfd process's bo list so gws can be evicted from and restored
> for process.
>
> Change-Id: I75d74cfdadb7075ff8b2b68634e205deb73dc1ea
> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 96 ++++++++++++++++++++++++
>   2 files changed, 98 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index c00c974..f968bf1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -155,6 +155,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
>   void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
>   int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
>   void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
> +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
> +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
>   uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
>                                        enum kgd_engine_type type);
>   void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index e1cae4a..322c1db 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -457,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
>          mutex_unlock(&process_info->lock);
>   }
>
> +static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
> +               struct amdkfd_process_info *process_info)
> +{
> +       struct ttm_validate_buffer *bo_list_entry;
> +
> +       bo_list_entry = &mem->validate_list;
> +       mutex_lock(&process_info->lock);
> +       list_del(&bo_list_entry->head);
> +       mutex_unlock(&process_info->lock);
> +}
> +
>   /* Initializes user pages. It registers the MMU notifier and validates
>    * the userptr BO in the GTT domain.
>    *
> @@ -1197,6 +1208,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>          return 0;
>
>   allocate_init_user_pages_failed:
> +       remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);

I think you meant to replace some code a few lines up:

             if (user_addr) {
                     ret = init_user_pages(*mem, current->mm, user_addr);
                     if (ret) {
    - mutex_lock(&avm->process_info->lock);
    - list_del(&(*mem)->validate_list.head);
    - mutex_unlock(&avm->process_info->lock);
                             goto allocate_init_user_pages_failed;
                     }
             }

But you're not removing that original code in this patch so you'd end up 
removing it from the list twice.


>          amdgpu_bo_unref(&bo);
>          /* Don't unreserve system mem limit twice */
>          goto err_reserve_limit;
> @@ -2104,3 +2116,87 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
>          kfree(pd_bo_list);
>          return ret;
>   }
> +
> +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
> +{
> +       struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
> +       struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
> +       int ret;
> +
> +       if (!info || !gws)
> +               return -EINVAL;
> +
> +       *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> +       if (!*mem)
> +               return -EINVAL;
> +
> +       mutex_init(&(*mem)->lock);
> +       (*mem)->bo = amdgpu_bo_ref(gws_bo);
> +       (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
> +       (*mem)->process_info = process_info;
> +       add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
> +       amdgpu_sync_create(&(*mem)->sync);
> +
> +
> +       /* Validate gws bo the first time it is added to process */
> +       mutex_lock(&(*mem)->process_info->lock);
> +       ret = amdgpu_bo_reserve(gws_bo, false);
> +       if (unlikely(ret)) {
> +               pr_err("Reserve gws bo failed %d\n", ret);
> +               goto bo_reservation_failure;
> +       }
> +
> +       ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, false);
> +       if (ret) {
> +               pr_err("GWS BO validate failed %d\n", ret);
> +               goto bo_validation_failure;
> +       }
> +       /* GWS resource is shared b/t amdgpu and amdkfd
> +        * Add process eviction fence to bo so they can
> +        * evict each other.
> +        */
> +       amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
> +       amdgpu_bo_unreserve(gws_bo);
> +       mutex_unlock(&(*mem)->process_info->lock);
> +
> +       return ret;
> +
> +bo_validation_failure:
> +       amdgpu_bo_unreserve(gws_bo);
> +bo_reservation_failure:
> +       mutex_unlock(&(*mem)->process_info->lock);
> +       amdgpu_sync_free(&(*mem)->sync);
> +       remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
> +       amdgpu_bo_unref(&gws_bo);
> +       mutex_destroy(&(*mem)->lock);
> +       kfree(*mem);

Set *mem = NULL after kfree to avoid returning a dangling pointer.


> +       return ret;
> +}
> +
> +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
> +{
> +       int ret;
> +       struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
> +       struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
> +       struct amdgpu_bo *gws_bo = kgd_mem->bo;
> +
> +       /* Remove BO from process's validate list so restore worker won't touch
> +        * it anymore
> +        */
> +       remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
> +
> +       ret = amdgpu_bo_reserve(gws_bo, false);
> +       if (unlikely(ret)) {
> +               pr_err("Reserve gws bo failed %d\n", ret);
> +               //TODO add BO back to validate_list?
> +               return ret;
> +       }
> +       amdgpu_amdkfd_remove_eviction_fence(gws_bo,
> +                       process_info->eviction_fence);
> +       amdgpu_bo_unreserve(gws_bo);
> +       amdgpu_sync_free(&kgd_mem->sync);
> +       amdgpu_bo_unref(&gws_bo);
> +       mutex_destroy(&kgd_mem->lock);
> +       kfree(mem);
> +       return 0;
> +}
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS
       [not found]     ` <1557504063-1559-7-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-10 20:28       ` Kuehling, Felix
       [not found]         ` <c76522c2-115e-a6b6-f136-44fa2a45be2b-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Kuehling, Felix @ 2019-05-10 20:28 UTC (permalink / raw)
  To: Zeng, Oak, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Keely, Sean

On 2019-05-10 12:01 p.m., Zeng, Oak wrote:
> Add a new kfd ioctl to allocate queue GWS. Queue
> GWS is released on queue destroy.
>
> Change-Id: I60153c26a577992ad873e4292e759e5c3d5bbd15
> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 45 ++++++++++++++++++++++++++++++++
>   include/uapi/linux/kfd_ioctl.h           | 19 +++++++++++++-
>   2 files changed, 63 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 38ae53f..17dd970 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -338,6 +338,11 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
>   
>   	mutex_lock(&p->mutex);
>   
> +	if (pqm_get_gws(&p->pqm, args->queue_id)) {
> +		amdgpu_amdkfd_remove_gws_from_process(p->kgd_process_info,
> +				pqm_get_gws(&p->pqm, args->queue_id));
> +		pqm_set_gws(&p->pqm, args->queue_id, NULL);
> +	}

It would be more robust if this was done inside pqm_destroy_queue. That 
way you'd handle other potential code paths that destroy queues (not 
sure there are any) and you wouldn't need pqm_get_gws exported from PQM.


>   	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
>   
>   	mutex_unlock(&p->mutex);
> @@ -1559,6 +1564,44 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>   	return err;
>   }
>   
> +static int kfd_ioctl_alloc_queue_gws(struct file *filep,
> +		struct kfd_process *p, void *data)
> +{
> +	int retval;
> +	struct kfd_ioctl_alloc_queue_gws_args *args = data;
> +	struct kfd_dev *dev = NULL;
> +	struct kgd_mem *mem;
> +
> +	if (args->num_gws == 0)
> +		return -EINVAL;
> +
> +	if (!hws_gws_support ||
> +		dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
> +		return -EINVAL;
> +
> +	dev = kfd_device_by_id(args->gpu_id);
> +	if (!dev) {
> +		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
> +		return -EINVAL;
> +	}
> +
> +	retval = amdgpu_amdkfd_add_gws_to_process(p->kgd_process_info,
> +			dev->gws, &mem);
> +	if (unlikely(retval))
> +		return retval;
> +
> +	mutex_lock(&p->mutex);
> +	retval = pqm_set_gws(&p->pqm, args->queue_id, mem);
> +	mutex_unlock(&p->mutex);
> +
> +	if (unlikely(retval))
> +		amdgpu_amdkfd_remove_gws_from_process(p->kgd_process_info, mem);
> +
> +	/* The gws_array parameter is reserved for future extension*/
> +	args->gws_array[0] = 0;
> +	return retval;
> +}
> +
>   static int kfd_ioctl_get_dmabuf_info(struct file *filep,
>   		struct kfd_process *p, void *data)
>   {
> @@ -1761,6 +1804,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
>   	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
>   				kfd_ioctl_import_dmabuf, 0),
>   
> +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
> +			kfd_ioctl_alloc_queue_gws, 0),
>   };
>   
>   #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 20917c5..1964ab2 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -410,6 +410,20 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
>   	__u32 n_success;		/* to/from KFD */
>   };
>   
> +/* Allocate GWS for specific queue
> + *
> + * @gpu_id:      device identifier
> + * @queue_id:    queue's id that GWS is allocated for
> + * @num_gws:     how many GWS to allocate
> + * @gws_array:   used to return the allocated gws
> + */
> +struct kfd_ioctl_alloc_queue_gws_args {
> +	__u32 gpu_id;		/* to KFD */
> +	__u32 queue_id;		/* to KFD */
> +	__u32 num_gws;		/* to KFD */
> +	__u32 *gws_array;	/* from KFD */

Don't use pointers in ioctl structures. Use uint64_t. Accessing user 
mode pointers requires copy_to/from_user or similar.

Also prefer to move 64-bit elements to the first element to ensure 
proper alignment, and pad the structure to 64-bit for ABI compatibility.

I'm not sure what your plan is for that gws_array. If it's a pointer to 
a user mode array, then that array needs be allocated by user mode. And 
user mode should probably pass down the size of the array it allocated 
in another parameter.

That said, I think what we want is not an array, but just the index of 
the first GWS entry that was allocated for the queue, which is currently 
always 0. So I'm not sure why you're calling this an "array".


> +};
> +
>   struct kfd_ioctl_get_dmabuf_info_args {
>   	__u64 size;		/* from KFD */
>   	__u64 metadata_ptr;	/* to KFD */
> @@ -529,7 +543,10 @@ enum kfd_mmio_remap {
>   #define AMDKFD_IOC_IMPORT_DMABUF		\
>   		AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
>   
> +#define AMDKFD_IOC_ALLOC_QUEUE_GWS		\
> +		AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
> +

This will force us to move some ioctl numbers in amd-kfd-staging and the 
DKMS branch, which will break the ABI of our ROCm releases. Not sure 
there is a good way to avoid that other than leaving a whole in the 
upstream ioctl space. I got push-back on that kind of thing when I 
originally upstreamed KFD. So this is just an FYI.

Regards,
   Felix

>   #define AMDKFD_COMMAND_START		0x01
> -#define AMDKFD_COMMAND_END		0x1E
> +#define AMDKFD_COMMAND_END		0x1F
>   
>   #endif
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS
       [not found]         ` <c76522c2-115e-a6b6-f136-44fa2a45be2b-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-13 16:03           ` Zeng, Oak
       [not found]             ` <BL0PR12MB258003620CDCE779957C845B800F0-b4cIHhjg/p/XzH18dTCKOgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Zeng, Oak @ 2019-05-13 16:03 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Russell, Kent
  Cc: Keely, Sean

Hi Felix,

See comments inline [Oak]

Hi Kent, there is one FYI embedded, so be careful when you merge this change back to kfd-staging branch.

Regards,
Oak

-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@amd.com> 
Sent: Friday, May 10, 2019 4:28 PM
To: Zeng, Oak <Oak.Zeng@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Keely, Sean <Sean.Keely@amd.com>
Subject: Re: [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS

On 2019-05-10 12:01 p.m., Zeng, Oak wrote:
> Add a new kfd ioctl to allocate queue GWS. Queue GWS is released on 
> queue destroy.
>
> Change-Id: I60153c26a577992ad873e4292e759e5c3d5bbd15
> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 45 ++++++++++++++++++++++++++++++++
>   include/uapi/linux/kfd_ioctl.h           | 19 +++++++++++++-
>   2 files changed, 63 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 38ae53f..17dd970 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -338,6 +338,11 @@ static int kfd_ioctl_destroy_queue(struct file 
> *filp, struct kfd_process *p,
>   
>   	mutex_lock(&p->mutex);
>   
> +	if (pqm_get_gws(&p->pqm, args->queue_id)) {
> +		amdgpu_amdkfd_remove_gws_from_process(p->kgd_process_info,
> +				pqm_get_gws(&p->pqm, args->queue_id));
> +		pqm_set_gws(&p->pqm, args->queue_id, NULL);
> +	}

It would be more robust if this was done inside pqm_destroy_queue. That way you'd handle other potential code paths that destroy queues (not sure there are any) and you wouldn't need pqm_get_gws exported from PQM.

[Oak] Good idea. Will do it. Even though currently there is no other code path destroying a queue using gws, your way is safer for future code change. 


>   	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
>   
>   	mutex_unlock(&p->mutex);
> @@ -1559,6 +1564,44 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>   	return err;
>   }
>   
> +static int kfd_ioctl_alloc_queue_gws(struct file *filep,
> +		struct kfd_process *p, void *data)
> +{
> +	int retval;
> +	struct kfd_ioctl_alloc_queue_gws_args *args = data;
> +	struct kfd_dev *dev = NULL;
> +	struct kgd_mem *mem;
> +
> +	if (args->num_gws == 0)
> +		return -EINVAL;
> +
> +	if (!hws_gws_support ||
> +		dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
> +		return -EINVAL;
> +
> +	dev = kfd_device_by_id(args->gpu_id);
> +	if (!dev) {
> +		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
> +		return -EINVAL;
> +	}
> +
> +	retval = amdgpu_amdkfd_add_gws_to_process(p->kgd_process_info,
> +			dev->gws, &mem);
> +	if (unlikely(retval))
> +		return retval;
> +
> +	mutex_lock(&p->mutex);
> +	retval = pqm_set_gws(&p->pqm, args->queue_id, mem);
> +	mutex_unlock(&p->mutex);
> +
> +	if (unlikely(retval))
> +		amdgpu_amdkfd_remove_gws_from_process(p->kgd_process_info, mem);
> +
> +	/* The gws_array parameter is reserved for future extension*/
> +	args->gws_array[0] = 0;
> +	return retval;
> +}
> +
>   static int kfd_ioctl_get_dmabuf_info(struct file *filep,
>   		struct kfd_process *p, void *data)
>   {
> @@ -1761,6 +1804,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
>   	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
>   				kfd_ioctl_import_dmabuf, 0),
>   
> +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
> +			kfd_ioctl_alloc_queue_gws, 0),
>   };
>   
>   #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
> diff --git a/include/uapi/linux/kfd_ioctl.h 
> b/include/uapi/linux/kfd_ioctl.h index 20917c5..1964ab2 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -410,6 +410,20 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
>   	__u32 n_success;		/* to/from KFD */
>   };
>   
> +/* Allocate GWS for specific queue
> + *
> + * @gpu_id:      device identifier
> + * @queue_id:    queue's id that GWS is allocated for
> + * @num_gws:     how many GWS to allocate
> + * @gws_array:   used to return the allocated gws
> + */
> +struct kfd_ioctl_alloc_queue_gws_args {
> +	__u32 gpu_id;		/* to KFD */
> +	__u32 queue_id;		/* to KFD */
> +	__u32 num_gws;		/* to KFD */
> +	__u32 *gws_array;	/* from KFD */

Don't use pointers in ioctl structures. Use uint64_t. Accessing user mode pointers requires copy_to/from_user or similar.

Also prefer to move 64-bit elements to the first element to ensure proper alignment, and pad the structure to 64-bit for ABI compatibility.

I'm not sure what your plan is for that gws_array. If it's a pointer to a user mode array, then that array needs be allocated by user mode. And user mode should probably pass down the size of the array it allocated in another parameter.

That said, I think what we want is not an array, but just the index of the first GWS entry that was allocated for the queue, which is currently always 0. So I'm not sure why you're calling this an "array".

[Oak] For the current design, one queue always get all 64 GWS, so returning the index of the first GWS (0) is not a problem. In the future, is it possible queue can only allocate a few none-contiguous GWS, for example GWS3 and GWS56? If this is the case, we will have to copy an array of gws back to user space.

> +};
> +
>   struct kfd_ioctl_get_dmabuf_info_args {
>   	__u64 size;		/* from KFD */
>   	__u64 metadata_ptr;	/* to KFD */
> @@ -529,7 +543,10 @@ enum kfd_mmio_remap {
>   #define AMDKFD_IOC_IMPORT_DMABUF		\
>   		AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
>   
> +#define AMDKFD_IOC_ALLOC_QUEUE_GWS		\
> +		AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
> +

This will force us to move some ioctl numbers in amd-kfd-staging and the DKMS branch, which will break the ABI of our ROCm releases. Not sure there is a good way to avoid that other than leaving a whole in the upstream ioctl space. I got push-back on that kind of thing when I originally upstreamed KFD. So this is just an FYI.
[Oak] Yes, when we merge this changes back to amd-kfd-staging branch, we have to change the ioctl number for GWS allocation to 0x22, to accommodate extra kfd ioctls on kfd-staging branch. @Russell, Kent FYI

Regards,
   Felix

>   #define AMDKFD_COMMAND_START		0x01
> -#define AMDKFD_COMMAND_END		0x1E
> +#define AMDKFD_COMMAND_END		0x1F
>   
>   #endif
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS
       [not found]             ` <BL0PR12MB258003620CDCE779957C845B800F0-b4cIHhjg/p/XzH18dTCKOgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-05-13 18:12               ` Kuehling, Felix
       [not found]                 ` <83257525-4d46-acbd-5853-eeb6913fa398-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Kuehling, Felix @ 2019-05-13 18:12 UTC (permalink / raw)
  To: Zeng, Oak, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Russell, Kent
  Cc: Keely, Sean

On 2019-05-13 12:03 p.m., Zeng, Oak wrote:
> Hi Felix,
>
> See comments inline [Oak]
>
> Hi Kent, there is one FYI embedded, so be careful when you merge this change back to kfd-staging branch.
>
> Regards,
> Oak
>
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling@amd.com>
> Sent: Friday, May 10, 2019 4:28 PM
> To: Zeng, Oak <Oak.Zeng@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Keely, Sean <Sean.Keely@amd.com>
> Subject: Re: [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS
>
> On 2019-05-10 12:01 p.m., Zeng, Oak wrote:
>> Add a new kfd ioctl to allocate queue GWS. Queue GWS is released on
>> queue destroy.
>>
>> Change-Id: I60153c26a577992ad873e4292e759e5c3d5bbd15
>> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 45 ++++++++++++++++++++++++++++++++
>>    include/uapi/linux/kfd_ioctl.h           | 19 +++++++++++++-
>>    2 files changed, 63 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 38ae53f..17dd970 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -338,6 +338,11 @@ static int kfd_ioctl_destroy_queue(struct file
>> *filp, struct kfd_process *p,
>>    
>>    	mutex_lock(&p->mutex);
>>    
>> +	if (pqm_get_gws(&p->pqm, args->queue_id)) {
>> +		amdgpu_amdkfd_remove_gws_from_process(p->kgd_process_info,
>> +				pqm_get_gws(&p->pqm, args->queue_id));
>> +		pqm_set_gws(&p->pqm, args->queue_id, NULL);
>> +	}
> It would be more robust if this was done inside pqm_destroy_queue. That way you'd handle other potential code paths that destroy queues (not sure there are any) and you wouldn't need pqm_get_gws exported from PQM.
>
> [Oak] Good idea. Will do it. Even though currently there is no other code path destroying a queue using gws, your way is safer for future code change.
>
>
>>    	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
>>    
>>    	mutex_unlock(&p->mutex);
>> @@ -1559,6 +1564,44 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>>    	return err;
>>    }
>>    
>> +static int kfd_ioctl_alloc_queue_gws(struct file *filep,
>> +		struct kfd_process *p, void *data)
>> +{
>> +	int retval;
>> +	struct kfd_ioctl_alloc_queue_gws_args *args = data;
>> +	struct kfd_dev *dev = NULL;
>> +	struct kgd_mem *mem;
>> +
>> +	if (args->num_gws == 0)
>> +		return -EINVAL;
>> +
>> +	if (!hws_gws_support ||
>> +		dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
>> +		return -EINVAL;
>> +
>> +	dev = kfd_device_by_id(args->gpu_id);
>> +	if (!dev) {
>> +		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
>> +		return -EINVAL;
>> +	}
>> +
>> +	retval = amdgpu_amdkfd_add_gws_to_process(p->kgd_process_info,
>> +			dev->gws, &mem);
>> +	if (unlikely(retval))
>> +		return retval;
>> +
>> +	mutex_lock(&p->mutex);
>> +	retval = pqm_set_gws(&p->pqm, args->queue_id, mem);
>> +	mutex_unlock(&p->mutex);
>> +
>> +	if (unlikely(retval))
>> +		amdgpu_amdkfd_remove_gws_from_process(p->kgd_process_info, mem);
>> +
>> +	/* The gws_array parameter is reserved for future extension*/
>> +	args->gws_array[0] = 0;
>> +	return retval;
>> +}
>> +
>>    static int kfd_ioctl_get_dmabuf_info(struct file *filep,
>>    		struct kfd_process *p, void *data)
>>    {
>> @@ -1761,6 +1804,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
>>    	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
>>    				kfd_ioctl_import_dmabuf, 0),
>>    
>> +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
>> +			kfd_ioctl_alloc_queue_gws, 0),
>>    };
>>    
>>    #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
>> diff --git a/include/uapi/linux/kfd_ioctl.h
>> b/include/uapi/linux/kfd_ioctl.h index 20917c5..1964ab2 100644
>> --- a/include/uapi/linux/kfd_ioctl.h
>> +++ b/include/uapi/linux/kfd_ioctl.h
>> @@ -410,6 +410,20 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
>>    	__u32 n_success;		/* to/from KFD */
>>    };
>>    
>> +/* Allocate GWS for specific queue
>> + *
>> + * @gpu_id:      device identifier
>> + * @queue_id:    queue's id that GWS is allocated for
>> + * @num_gws:     how many GWS to allocate
>> + * @gws_array:   used to return the allocated gws
>> + */
>> +struct kfd_ioctl_alloc_queue_gws_args {
>> +	__u32 gpu_id;		/* to KFD */
>> +	__u32 queue_id;		/* to KFD */
>> +	__u32 num_gws;		/* to KFD */
>> +	__u32 *gws_array;	/* from KFD */
> Don't use pointers in ioctl structures. Use uint64_t. Accessing user mode pointers requires copy_to/from_user or similar.
>
> Also prefer to move 64-bit elements to the first element to ensure proper alignment, and pad the structure to 64-bit for ABI compatibility.
>
> I'm not sure what your plan is for that gws_array. If it's a pointer to a user mode array, then that array needs be allocated by user mode. And user mode should probably pass down the size of the array it allocated in another parameter.
>
> That said, I think what we want is not an array, but just the index of the first GWS entry that was allocated for the queue, which is currently always 0. So I'm not sure why you're calling this an "array".
>
> [Oak] For the current design, one queue always get all 64 GWS, so returning the index of the first GWS (0) is not a problem. In the future, is it possible queue can only allocate a few none-contiguous GWS, for example GWS3 and GWS56? If this is the case, we will have to copy an array of gws back to user space.

Current HW only support contiguous allocations of GWS. I don't foresee 
that changing. I think we can acknowledge that in the API.


>
>> +};
>> +
>>    struct kfd_ioctl_get_dmabuf_info_args {
>>    	__u64 size;		/* from KFD */
>>    	__u64 metadata_ptr;	/* to KFD */
>> @@ -529,7 +543,10 @@ enum kfd_mmio_remap {
>>    #define AMDKFD_IOC_IMPORT_DMABUF		\
>>    		AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
>>    
>> +#define AMDKFD_IOC_ALLOC_QUEUE_GWS		\
>> +		AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
>> +
> This will force us to move some ioctl numbers in amd-kfd-staging and the DKMS branch, which will break the ABI of our ROCm releases. Not sure there is a good way to avoid that other than leaving a whole in the upstream ioctl space. I got push-back on that kind of thing when I originally upstreamed KFD. So this is just an FYI.
> [Oak] Yes, when we merge this changes back to amd-kfd-staging branch, we have to change the ioctl number for GWS allocation to 0x22, to accommodate extra kfd ioctls on kfd-staging branch. @Russell, Kent FYI

No, the other way around. With APIs that are upstream we should have 
amd-kfd-staging match the upstream ABI. That means we have to move the 
other non-upstream ioctl numbers.

Regards,
   Felix


>
> Regards,
>     Felix
>
>>    #define AMDKFD_COMMAND_START		0x01
>> -#define AMDKFD_COMMAND_END		0x1E
>> +#define AMDKFD_COMMAND_END		0x1F
>>    
>>    #endif
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS
       [not found]                 ` <83257525-4d46-acbd-5853-eeb6913fa398-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-13 19:14                   ` Christian König
  0 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2019-05-13 19:14 UTC (permalink / raw)
  To: Kuehling, Felix, Zeng, Oak,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Russell, Kent
  Cc: Keely, Sean

Am 13.05.19 um 20:12 schrieb Kuehling, Felix:
> On 2019-05-13 12:03 p.m., Zeng, Oak wrote:
>> Hi Felix,
>>
>> See comments inline [Oak]
>>
>> Hi Kent, there is one FYI embedded, so be careful when you merge this change back to kfd-staging branch.
>>
>> Regards,
>> Oak
>>
>> -----Original Message-----
>> From: Kuehling, Felix <Felix.Kuehling@amd.com>
>> Sent: Friday, May 10, 2019 4:28 PM
>> To: Zeng, Oak <Oak.Zeng@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Keely, Sean <Sean.Keely@amd.com>
>> Subject: Re: [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS
>>
>> On 2019-05-10 12:01 p.m., Zeng, Oak wrote:
>>> Add a new kfd ioctl to allocate queue GWS. Queue GWS is released on
>>> queue destroy.
>>>
>>> Change-Id: I60153c26a577992ad873e4292e759e5c3d5bbd15
>>> Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
>>> ---
>>>     drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 45 ++++++++++++++++++++++++++++++++
>>>     include/uapi/linux/kfd_ioctl.h           | 19 +++++++++++++-
>>>     2 files changed, 63 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index 38ae53f..17dd970 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -338,6 +338,11 @@ static int kfd_ioctl_destroy_queue(struct file
>>> *filp, struct kfd_process *p,
>>>     
>>>     	mutex_lock(&p->mutex);
>>>     
>>> +	if (pqm_get_gws(&p->pqm, args->queue_id)) {
>>> +		amdgpu_amdkfd_remove_gws_from_process(p->kgd_process_info,
>>> +				pqm_get_gws(&p->pqm, args->queue_id));
>>> +		pqm_set_gws(&p->pqm, args->queue_id, NULL);
>>> +	}
>> It would be more robust if this was done inside pqm_destroy_queue. That way you'd handle other potential code paths that destroy queues (not sure there are any) and you wouldn't need pqm_get_gws exported from PQM.
>>
>> [Oak] Good idea. Will do it. Even though currently there is no other code path destroying a queue using gws, your way is safer for future code change.
>>
>>
>>>     	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
>>>     
>>>     	mutex_unlock(&p->mutex);
>>> @@ -1559,6 +1564,44 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>>>     	return err;
>>>     }
>>>     
>>> +static int kfd_ioctl_alloc_queue_gws(struct file *filep,
>>> +		struct kfd_process *p, void *data)
>>> +{
>>> +	int retval;
>>> +	struct kfd_ioctl_alloc_queue_gws_args *args = data;
>>> +	struct kfd_dev *dev = NULL;
>>> +	struct kgd_mem *mem;
>>> +
>>> +	if (args->num_gws == 0)
>>> +		return -EINVAL;
>>> +
>>> +	if (!hws_gws_support ||
>>> +		dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
>>> +		return -EINVAL;
>>> +
>>> +	dev = kfd_device_by_id(args->gpu_id);
>>> +	if (!dev) {
>>> +		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	retval = amdgpu_amdkfd_add_gws_to_process(p->kgd_process_info,
>>> +			dev->gws, &mem);
>>> +	if (unlikely(retval))
>>> +		return retval;
>>> +
>>> +	mutex_lock(&p->mutex);
>>> +	retval = pqm_set_gws(&p->pqm, args->queue_id, mem);
>>> +	mutex_unlock(&p->mutex);
>>> +
>>> +	if (unlikely(retval))
>>> +		amdgpu_amdkfd_remove_gws_from_process(p->kgd_process_info, mem);
>>> +
>>> +	/* The gws_array parameter is reserved for future extension*/
>>> +	args->gws_array[0] = 0;
>>> +	return retval;
>>> +}
>>> +
>>>     static int kfd_ioctl_get_dmabuf_info(struct file *filep,
>>>     		struct kfd_process *p, void *data)
>>>     {
>>> @@ -1761,6 +1804,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
>>>     	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
>>>     				kfd_ioctl_import_dmabuf, 0),
>>>     
>>> +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
>>> +			kfd_ioctl_alloc_queue_gws, 0),
>>>     };
>>>     
>>>     #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
>>> diff --git a/include/uapi/linux/kfd_ioctl.h
>>> b/include/uapi/linux/kfd_ioctl.h index 20917c5..1964ab2 100644
>>> --- a/include/uapi/linux/kfd_ioctl.h
>>> +++ b/include/uapi/linux/kfd_ioctl.h
>>> @@ -410,6 +410,20 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
>>>     	__u32 n_success;		/* to/from KFD */
>>>     };
>>>     
>>> +/* Allocate GWS for specific queue
>>> + *
>>> + * @gpu_id:      device identifier
>>> + * @queue_id:    queue's id that GWS is allocated for
>>> + * @num_gws:     how many GWS to allocate
>>> + * @gws_array:   used to return the allocated gws
>>> + */
>>> +struct kfd_ioctl_alloc_queue_gws_args {
>>> +	__u32 gpu_id;		/* to KFD */
>>> +	__u32 queue_id;		/* to KFD */
>>> +	__u32 num_gws;		/* to KFD */
>>> +	__u32 *gws_array;	/* from KFD */
>> Don't use pointers in ioctl structures. Use uint64_t. Accessing user mode pointers requires copy_to/from_user or similar.
>>
>> Also prefer to move 64-bit elements to the first element to ensure proper alignment, and pad the structure to 64-bit for ABI compatibility.
>>
>> I'm not sure what your plan is for that gws_array. If it's a pointer to a user mode array, then that array needs be allocated by user mode. And user mode should probably pass down the size of the array it allocated in another parameter.
>>
>> That said, I think what we want is not an array, but just the index of the first GWS entry that was allocated for the queue, which is currently always 0. So I'm not sure why you're calling this an "array".
>>
>> [Oak] For the current design, one queue always get all 64 GWS, so returning the index of the first GWS (0) is not a problem. In the future, is it possible queue can only allocate a few none-contiguous GWS, for example GWS3 and GWS56? If this is the case, we will have to copy an array of gws back to user space.
> Current HW only support contiguous allocations of GWS. I don't foresee
> that changing. I think we can acknowledge that in the API.

You actually only need to know how many GWS blocks userspace wants to 
use. E.g. the hardware always remaps them starting from 0.

And yes this remapping is always contiguous,
Christian.

>
>
>>> +};
>>> +
>>>     struct kfd_ioctl_get_dmabuf_info_args {
>>>     	__u64 size;		/* from KFD */
>>>     	__u64 metadata_ptr;	/* to KFD */
>>> @@ -529,7 +543,10 @@ enum kfd_mmio_remap {
>>>     #define AMDKFD_IOC_IMPORT_DMABUF		\
>>>     		AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
>>>     
>>> +#define AMDKFD_IOC_ALLOC_QUEUE_GWS		\
>>> +		AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
>>> +
>> This will force us to move some ioctl numbers in amd-kfd-staging and the DKMS branch, which will break the ABI of our ROCm releases. Not sure there is a good way to avoid that other than leaving a whole in the upstream ioctl space. I got push-back on that kind of thing when I originally upstreamed KFD. So this is just an FYI.
>> [Oak] Yes, when we merge this changes back to amd-kfd-staging branch, we have to change the ioctl number for GWS allocation to 0x22, to accommodate extra kfd ioctls on kfd-staging branch. @Russell, Kent FYI
> No, the other way around. With APIs that are upstream we should have
> amd-kfd-staging match the upstream ABI. That means we have to move the
> other non-upstream ioctl numbers.
>
> Regards,
>     Felix
>
>
>> Regards,
>>      Felix
>>
>>>     #define AMDKFD_COMMAND_START		0x01
>>> -#define AMDKFD_COMMAND_END		0x1E
>>> +#define AMDKFD_COMMAND_END		0x1F
>>>     
>>>     #endif
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/8] drm/amdkfd: Add gws number to kfd topology node properties
@ 2019-05-23 22:41 Zeng, Oak
  0 siblings, 0 replies; 15+ messages in thread
From: Zeng, Oak @ 2019-05-23 22:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix, Zeng, Oak, Keely, Sean, Koenig, Christian

Add amdgpu_amdkfd interface to get num_gws and add num_gws
to /sys/class/kfd/kfd/topology/nodes/x/properties. Only report
num_gws if MEC FW support GWS barriers. Currently it is
determined by a module parameter which will be replaced
with MEC FW version check when firmware is ready.

Change-Id: Ie0d00fb20a37ef2856860dbecbe1ad0ca1ef09f7
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  7 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 10 ++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h      |  5 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c  |  5 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h  |  1 +
 6 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 98326e3b..a4780d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -544,6 +544,13 @@ uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
 	return adev->rmmio_remap.bus_addr;
 }
 
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	return adev->gds.gws_size;
+}
+
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index f57f297..5700643 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -169,6 +169,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
 uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
 
 #define read_user_wptr(mmptr, wptr, dst)				\
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index a334d3b..3a03c2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -666,6 +666,16 @@ MODULE_PARM_DESC(noretry,
 int halt_if_hws_hang;
 module_param(halt_if_hws_hang, int, 0644);
 MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+
+/**
+ * DOC: hws_gws_support(bool)
+ * Whether HWS support gws barriers. Default value: false (not supported)
+ * This will be replaced with a MEC firmware version check once firmware
+ * is ready
+ */
+bool hws_gws_support;
+module_param(hws_gws_support, bool, 0444);
+MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
 #endif
 
 /**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8f02d78..338fb07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -160,6 +160,11 @@ extern int noretry;
  */
 extern int halt_if_hws_hang;
 
+/*
+ * Whether MEC FW support GWS barriers
+ */
+extern bool hws_gws_support;
+
 enum cache_policy {
 	cache_policy_coherent,
 	cache_policy_noncoherent
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 2c06d6c..128c72c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.lds_size_in_kb);
 	sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
 			dev->node_props.gds_size_in_kb);
+	sysfs_show_32bit_prop(buffer, "num_gws",
+			dev->node_props.num_gws);
 	sysfs_show_32bit_prop(buffer, "wave_front_size",
 			dev->node_props.wave_front_size);
 	sysfs_show_32bit_prop(buffer, "array_count",
@@ -1290,6 +1292,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 	dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
 	dev->node_props.num_sdma_xgmi_engines =
 				gpu->device_info->num_xgmi_sdma_engines;
+	dev->node_props.num_gws = (hws_gws_support &&
+		dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
+		amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
 
 	kfd_fill_mem_clk_max_info(dev);
 	kfd_fill_iolink_non_crat_info(dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 949e885..276354a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -65,6 +65,7 @@ struct kfd_node_properties {
 	uint32_t max_waves_per_simd;
 	uint32_t lds_size_in_kb;
 	uint32_t gds_size_in_kb;
+	uint32_t num_gws;
 	uint32_t wave_front_size;
 	uint32_t array_count;
 	uint32_t simd_arrays_per_engine;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2019-05-23 22:41 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-10 16:01 [PATCH 1/8] drm/amdkfd: Add gws number to kfd topology node properties Zeng, Oak
     [not found] ` <1557504063-1559-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
2019-05-10 16:01   ` [PATCH 2/8] drm/amdgpu: Remove GWS barriers pre-reservation for gfx Zeng, Oak
     [not found]     ` <1557504063-1559-2-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
2019-05-10 17:41       ` Christian König
2019-05-10 16:01   ` [PATCH 3/8] drm/amdkfd: Add interface to alloc gws from amdgpu Zeng, Oak
2019-05-10 16:01   ` [PATCH 4/8] drm/amdkfd: Allocate gws on device initialization Zeng, Oak
2019-05-10 16:01   ` [PATCH 5/8] drm/amdkfd: Add function to set queue gws Zeng, Oak
2019-05-10 16:01   ` [PATCH 6/8] drm/amdkfd: Add function to add/remove gws to kfd process Zeng, Oak
     [not found]     ` <1557504063-1559-6-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
2019-05-10 20:17       ` Kuehling, Felix
2019-05-10 16:01   ` [PATCH 7/8] drm/amdkfd: New IOCTL to allocate queue GWS Zeng, Oak
     [not found]     ` <1557504063-1559-7-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
2019-05-10 20:28       ` Kuehling, Felix
     [not found]         ` <c76522c2-115e-a6b6-f136-44fa2a45be2b-5C7GfCeVMHo@public.gmane.org>
2019-05-13 16:03           ` Zeng, Oak
     [not found]             ` <BL0PR12MB258003620CDCE779957C845B800F0-b4cIHhjg/p/XzH18dTCKOgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-05-13 18:12               ` Kuehling, Felix
     [not found]                 ` <83257525-4d46-acbd-5853-eeb6913fa398-5C7GfCeVMHo@public.gmane.org>
2019-05-13 19:14                   ` Christian König
2019-05-10 16:01   ` [PATCH 8/8] drm/amdkfd: PM4 packets change to support GWS Zeng, Oak
2019-05-23 22:41 [PATCH 1/8] drm/amdkfd: Add gws number to kfd topology node properties Zeng, Oak

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.