All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] More KFD features to make the runtime happy
@ 2017-08-12  4:47 Felix Kuehling
       [not found] ` <1502513265-14281-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-12  4:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Felix Kuehling

This patch series adds two more features that are needed to get the current
HSA runtime to initialize successfully. With this I am able to run OCL tests
from the current (upcoming) ROCm release on CZ.

Moses Reuben (2):
  drm/amdgpu: Adding new kgd/kfd interface functions to support scratch
    memory
  drm/amdkfd: Adding new IOCTL for scratch memory

Yong Zhao (2):
  drm/amdgpu: Add kgd kfd interface get_tile_config()
  drm/amdkfd: Implement image tiling mode support

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  | 61 ++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  | 61 ++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           | 87 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            |  3 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  3 +
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |  2 +
 .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c   |  2 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  1 +
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h    | 16 ++++
 include/uapi/linux/kfd_ioctl.h                     | 54 +++++++++++++-
 10 files changed, 287 insertions(+), 3 deletions(-)

-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 1/4] drm/amdgpu: Adding new kgd/kfd interface functions to support scratch memory
       [not found] ` <1502513265-14281-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-12  4:47   ` Felix Kuehling
       [not found]     ` <1502513265-14281-2-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2017-08-12  4:47   ` [PATCH 2/4] drm/amdkfd: Adding new IOCTL for " Felix Kuehling
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-12  4:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Felix Kuehling, Moses Reuben

From: Moses Reuben <moses.reuben@amd.com>

Signed-off-by: Moses Reuben <moses.reuben@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 34 +++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 35 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |  4 +++
 3 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 994d262..11d515a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -135,6 +135,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
+					 uint64_t va, uint32_t vmid);
+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
+		uint8_t element_size, uint8_t index_stride, uint8_t mtype);
 
 static const struct kfd2kgd_calls kfd2kgd = {
 	.init_gtt_mem_allocation = alloc_gtt_mem,
@@ -159,7 +163,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
 	.write_vmid_invalidate_request = write_vmid_invalidate_request,
-	.get_fw_version = get_fw_version
+	.get_fw_version = get_fw_version,
+	.alloc_memory_of_scratch = alloc_memory_of_scratch,
+	.write_config_static_mem = write_config_static_mem
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -652,6 +658,32 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
 
+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
+		uint8_t element_size, uint8_t index_stride, uint8_t mtype)
+{
+	uint32_t reg;
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
+		element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
+		index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
+		mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
+
+	WREG32(mmSH_STATIC_MEM_CONFIG, reg);
+	return 0;
+}
+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
+				 uint64_t va, uint32_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+	WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+	unlock_srbm(kgd);
+
+	return 0;
+}
+
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 29a6f5d..ef677e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -94,6 +94,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 		uint8_t vmid);
 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
+				 uint64_t va, uint32_t vmid);
+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
+		uint8_t element_size, uint8_t index_stride, uint8_t mtype);
 
 static const struct kfd2kgd_calls kfd2kgd = {
 	.init_gtt_mem_allocation = alloc_gtt_mem,
@@ -120,12 +124,15 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_atc_vmid_pasid_mapping_valid =
 			get_atc_vmid_pasid_mapping_valid,
 	.write_vmid_invalidate_request = write_vmid_invalidate_request,
-	.get_fw_version = get_fw_version
+	.get_fw_version = get_fw_version,
+	.alloc_memory_of_scratch = alloc_memory_of_scratch,
+	.write_config_static_mem = write_config_static_mem
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
 {
 	return (struct kfd2kgd_calls *)&kfd2kgd;
+	return (struct kfd2kgd_calls *)&kfd2kgd;
 }
 
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -573,6 +580,32 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 	return 0;
 }
 
+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
+		uint8_t element_size, uint8_t index_stride, uint8_t mtype)
+{
+	uint32_t reg;
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
+		element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
+		index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
+		mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
+
+	WREG32(mmSH_STATIC_MEM_CONFIG, reg);
+	return 0;
+}
+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
+				 uint64_t va, uint32_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+	WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+	unlock_srbm(kgd);
+
+	return 0;
+}
+
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index ffafda0..1dd90b2 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -199,6 +199,10 @@ struct kfd2kgd_calls {
 
 	uint16_t (*get_fw_version)(struct kgd_dev *kgd,
 				enum kgd_engine_type type);
+	int (*alloc_memory_of_scratch)(struct kgd_dev *kgd,
+			uint64_t va, uint32_t vmid);
+	int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable,
+		uint8_t element_size, uint8_t index_stride, uint8_t mtype);
 };
 
 /**
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/4] drm/amdkfd: Adding new IOCTL for scratch memory
       [not found] ` <1502513265-14281-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2017-08-12  4:47   ` [PATCH 1/4] drm/amdgpu: Adding new kgd/kfd interface functions to support scratch memory Felix Kuehling
@ 2017-08-12  4:47   ` Felix Kuehling
       [not found]     ` <1502513265-14281-3-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2017-08-12  4:47   ` [PATCH 3/4] drm/amdgpu: Add kgd kfd interface get_tile_config() Felix Kuehling
  2017-08-12  4:47   ` [PATCH 4/4] drm/amdkfd: Implement image tiling mode support Felix Kuehling
  3 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-12  4:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Ben Goz, Felix Kuehling, Moses Reuben

From: Moses Reuben <moses.reuben@amd.com>

Signed-off-by: Moses Reuben <moses.reuben@amd.com>
Signed-off-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           | 44 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            |  3 ++
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  3 ++
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |  2 +
 .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c   |  2 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  1 +
 include/uapi/linux/kfd_ioctl.h                     | 19 +++++++++-
 7 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 7d78119..6be1bba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -848,6 +848,47 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
 
 	return err;
 }
+static int kfd_ioctl_alloc_scratch_memory(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_alloc_memory_of_scratch_args *args = data;
+	struct kfd_process_device *pdd;
+	struct kfd_dev *dev;
+	long err;
+
+	if (args->size == 0)
+		return -EINVAL;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_fail;
+	}
+
+	pdd->qpd.sh_hidden_private_base = args->va_addr;
+
+	mutex_unlock(&p->mutex);
+
+	if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) {
+		err = dev->kfd2kgd->alloc_memory_of_scratch(
+			dev->kgd, args->va_addr, pdd->qpd.vmid);
+		if (err)
+			goto alloc_memory_of_scratch_failed;
+	}
+
+	return 0;
+
+bind_process_to_device_fail:
+	mutex_unlock(&p->mutex);
+alloc_memory_of_scratch_failed:
+	return -EFAULT;
+}
 
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
@@ -902,6 +943,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
 			kfd_ioctl_dbg_wave_control, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH,
+			kfd_ioctl_alloc_scratch_memory, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e790e7f..33c43b7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -304,6 +304,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
 	kfd->dbgmgr = NULL;
 
+	/* Initialize scratch memory access */
+	kfd->kfd2kgd->write_config_static_mem(kfd->kgd, true, 1, 3, 0);
+
 	kfd->init_complete = true;
 	dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
 		 kfd->pdev->device);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 3891fe5..7bd8c39f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -268,6 +268,9 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 	pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
 			q->pipe, q->queue);
 
+	dqm->dev->kfd2kgd->alloc_memory_of_scratch(
+			dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
+
 	retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
 			       q->process->mm);
 	if (retval)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index fadc56a..72c3cba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -24,6 +24,7 @@
 #include "kfd_device_queue_manager.h"
 #include "cik_regs.h"
 #include "oss/oss_2_4_sh_mask.h"
+#include "gca/gfx_7_2_sh_mask.h"
 
 static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
 				   struct qcm_process_device *qpd,
@@ -123,6 +124,7 @@ static int register_process_cik(struct device_queue_manager *dqm,
 	} else {
 		temp = get_sh_mem_bases_nybble_64(pdd);
 		qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+		qpd->sh_mem_config |= 1  << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
 	}
 
 	pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 15e81ae..40e9ddd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -135,6 +135,8 @@ static int register_process_vi(struct device_queue_manager *dqm,
 		qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
 		qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
+		qpd->sh_mem_config |= 1  <<
+			SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
 	}
 
 	pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 30ce92c..b397ec7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -432,6 +432,7 @@ struct qcm_process_device {
 	uint32_t gds_size;
 	uint32_t num_gws;
 	uint32_t num_oac;
+	uint32_t sh_hidden_private_base;
 };
 
 /* Data that is per-process-per device. */
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index d683342..3dd86e1 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -232,6 +232,13 @@ struct kfd_ioctl_wait_events_args {
 	uint32_t wait_result;		/* from KFD */
 };
 
+struct kfd_ioctl_alloc_memory_of_scratch_args {
+	uint64_t va_addr;	/* to KFD */
+	uint64_t size;		/* to KFD */
+	uint32_t gpu_id;	/* to KFD */
+	uint32_t pad;
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -286,7 +293,17 @@ struct kfd_ioctl_wait_events_args {
 #define AMDKFD_IOC_DBG_WAVE_CONTROL		\
 		AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
 
+/* TODO:
+ * - AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
+ * - AMDKFD_IOC_FREE_MEMORY_OF_GPU
+ * - AMDKFD_IOC_MAP_MEMORY_TO_GPU
+ * - AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
+ */
+
+#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH	\
+		AMDKFD_IOWR(0x15, struct kfd_ioctl_alloc_memory_of_scratch_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x11
+#define AMDKFD_COMMAND_END		0x16
 
 #endif
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/4] drm/amdgpu: Add kgd kfd interface get_tile_config()
       [not found] ` <1502513265-14281-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2017-08-12  4:47   ` [PATCH 1/4] drm/amdgpu: Adding new kgd/kfd interface functions to support scratch memory Felix Kuehling
  2017-08-12  4:47   ` [PATCH 2/4] drm/amdkfd: Adding new IOCTL for " Felix Kuehling
@ 2017-08-12  4:47   ` Felix Kuehling
       [not found]     ` <1502513265-14281-4-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2017-08-12  4:47   ` [PATCH 4/4] drm/amdkfd: Implement image tiling mode support Felix Kuehling
  3 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-12  4:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Yong Zhao, Felix Kuehling

From: Yong Zhao <yong.zhao@amd.com>

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 29 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 28 +++++++++++++++++++++-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 12 ++++++++++
 3 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 11d515a..445a686 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -140,6 +140,32 @@ static int alloc_memory_of_scratch(struct kgd_dev *kgd,
 static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
 		uint8_t element_size, uint8_t index_stride, uint8_t mtype);
 
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+		struct tile_config *config)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	config->gb_addr_config = adev->gfx.config.gb_addr_config;
+	config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+				MC_ARB_RAMCFG, NOOFBANK);
+	config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+				MC_ARB_RAMCFG, NOOFRANKS);
+
+	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+	config->num_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	config->macro_tile_config_ptr =
+			adev->gfx.config.macrotile_mode_array;
+	config->num_macro_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+
+	return 0;
+}
+
 static const struct kfd2kgd_calls kfd2kgd = {
 	.init_gtt_mem_allocation = alloc_gtt_mem,
 	.free_gtt_mem = free_gtt_mem,
@@ -165,7 +191,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.write_vmid_invalidate_request = write_vmid_invalidate_request,
 	.get_fw_version = get_fw_version,
 	.alloc_memory_of_scratch = alloc_memory_of_scratch,
-	.write_config_static_mem = write_config_static_mem
+	.write_config_static_mem = write_config_static_mem,
+	.get_tile_config = amdgpu_amdkfd_get_tile_config,
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index ef677e5..2d6ba09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -99,6 +99,31 @@ static int alloc_memory_of_scratch(struct kgd_dev *kgd,
 static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
 		uint8_t element_size, uint8_t index_stride, uint8_t mtype);
 
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+		struct tile_config *config)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	config->gb_addr_config = adev->gfx.config.gb_addr_config;
+	config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+				MC_ARB_RAMCFG, NOOFBANK);
+	config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+				MC_ARB_RAMCFG, NOOFRANKS);
+
+	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+	config->num_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	config->macro_tile_config_ptr =
+			adev->gfx.config.macrotile_mode_array;
+	config->num_macro_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+	return 0;
+}
+
 static const struct kfd2kgd_calls kfd2kgd = {
 	.init_gtt_mem_allocation = alloc_gtt_mem,
 	.free_gtt_mem = free_gtt_mem,
@@ -126,7 +151,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.write_vmid_invalidate_request = write_vmid_invalidate_request,
 	.get_fw_version = get_fw_version,
 	.alloc_memory_of_scratch = alloc_memory_of_scratch,
-	.write_config_static_mem = write_config_static_mem
+	.write_config_static_mem = write_config_static_mem,
+	.get_tile_config = amdgpu_amdkfd_get_tile_config,
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 1dd90b2..22774ac 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -87,6 +87,17 @@ struct kgd2kfd_shared_resources {
 	size_t doorbell_start_offset;
 };
 
+struct tile_config {
+	uint32_t *tile_config_ptr;
+	uint32_t *macro_tile_config_ptr;
+	uint32_t num_tile_configs;
+	uint32_t num_macro_tile_configs;
+
+	uint32_t gb_addr_config;
+	uint32_t num_banks;
+	uint32_t num_ranks;
+};
+
 /**
  * struct kfd2kgd_calls
  *
@@ -203,6 +214,7 @@ struct kfd2kgd_calls {
 			uint64_t va, uint32_t vmid);
 	int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable,
 		uint8_t element_size, uint8_t index_stride, uint8_t mtype);
+	int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config);
 };
 
 /**
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found] ` <1502513265-14281-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-08-12  4:47   ` [PATCH 3/4] drm/amdgpu: Add kgd kfd interface get_tile_config() Felix Kuehling
@ 2017-08-12  4:47   ` Felix Kuehling
       [not found]     ` <1502513265-14281-5-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  3 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-12  4:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Yong Zhao, Felix Kuehling

From: Yong Zhao <yong.zhao@amd.com>

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 43 ++++++++++++++++++++++++++++++++
 include/uapi/linux/kfd_ioctl.h           | 37 ++++++++++++++++++++++++++-
 2 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6be1bba..68ec830 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -890,6 +890,46 @@ static int kfd_ioctl_alloc_scratch_memory(struct file *filep,
 	return -EFAULT;
 }
 
+static int kfd_ioctl_get_tile_config(struct file *filep,
+		struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_tile_config_args *args = data;
+	struct kfd_dev *dev;
+	struct tile_config config;
+	int err = 0;
+
+	dev = kfd_device_by_id(args->gpu_id);
+
+	dev->kfd2kgd->get_tile_config(dev->kgd, &config);
+
+	args->gb_addr_config = config.gb_addr_config;
+	args->num_banks = config.num_banks;
+	args->num_ranks = config.num_ranks;
+
+	if (args->num_tile_configs > config.num_tile_configs)
+		args->num_tile_configs = config.num_tile_configs;
+	err = copy_to_user((void __user *)args->tile_config_ptr,
+			config.tile_config_ptr,
+			args->num_tile_configs * sizeof(uint32_t));
+	if (err) {
+		args->num_tile_configs = 0;
+		return -EFAULT;
+	}
+
+	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
+		args->num_macro_tile_configs =
+				config.num_macro_tile_configs;
+	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
+			config.macro_tile_config_ptr,
+			args->num_macro_tile_configs * sizeof(uint32_t));
+	if (err) {
+		args->num_macro_tile_configs = 0;
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
 			    .cmd_drv = 0, .name = #ioctl}
@@ -946,6 +986,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH,
 			kfd_ioctl_alloc_scratch_memory, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
+			kfd_ioctl_get_tile_config, 0)
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 3dd86e1..10dcdc6 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -239,6 +239,29 @@ struct kfd_ioctl_alloc_memory_of_scratch_args {
 	uint32_t pad;
 };
 
+struct kfd_ioctl_get_tile_config_args {
+	/* to KFD: pointer to tile array */
+	uint64_t tile_config_ptr;
+	/* to KFD: pointer to macro tile array */
+	uint64_t macro_tile_config_ptr;
+	/* to KFD: array size allocated by user mode
+	 * from KFD: array size filled by kernel
+	 */
+	uint32_t num_tile_configs;
+	/* to KFD: array size allocated by user mode
+	 * from KFD: array size filled by kernel
+	 */
+	uint32_t num_macro_tile_configs;
+
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t gb_addr_config;	/* from KFD */
+	uint32_t num_banks;		/* from KFD */
+	uint32_t num_ranks;		/* from KFD */
+	/* struct size can be extended later if needed
+	 * without breaking ABI compatibility
+	 */
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -303,7 +326,19 @@ struct kfd_ioctl_alloc_memory_of_scratch_args {
 #define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH	\
 		AMDKFD_IOWR(0x15, struct kfd_ioctl_alloc_memory_of_scratch_args)
 
+/* TODO:
+ * - AMDKFD_IOC_SET_CU_MASK
+ * - AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE
+ * - AMDKFD_IOC_SET_TRAP_HANDLER
+ * - AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
+ * - AMDKFD_IOC_GET_DMABUF_INFO
+ * - AMDKFD_IOC_IMPORT_DMABUF
+ */
+
+#define AMDKFD_IOC_GET_TILE_CONFIG                                      \
+		AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_tile_config_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x16
+#define AMDKFD_COMMAND_END		0x1D
 
 #endif
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Adding new kgd/kfd interface functions to support scratch memory
       [not found]     ` <1502513265-14281-2-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-13  8:56       ` Oded Gabbay
       [not found]         ` <CAFCwf13YDgfX_3EFhQ8sE-8D7ApyDd9zwtqazW6Ups2dPReMqw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Oded Gabbay @ 2017-08-13  8:56 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: amd-gfx list

On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
> From: Moses Reuben <moses.reuben@amd.com>
>
> Signed-off-by: Moses Reuben <moses.reuben@amd.com>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 34 +++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 35 ++++++++++++++++++++++-
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |  4 +++
>  3 files changed, 71 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index 994d262..11d515a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -135,6 +135,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
>  static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
>
>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
> +                                        uint64_t va, uint32_t vmid);
> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>
>  static const struct kfd2kgd_calls kfd2kgd = {
>         .init_gtt_mem_allocation = alloc_gtt_mem,
> @@ -159,7 +163,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
>         .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
>         .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
>         .write_vmid_invalidate_request = write_vmid_invalidate_request,
> -       .get_fw_version = get_fw_version
> +       .get_fw_version = get_fw_version,
> +       .alloc_memory_of_scratch = alloc_memory_of_scratch,
> +       .write_config_static_mem = write_config_static_mem
>  };
>
>  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
> @@ -652,6 +658,32 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
>         WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>  }
>
> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype)
> +{
> +       uint32_t reg;
> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> +
> +       reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
> +               element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
> +               index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
> +               mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
> +
> +       WREG32(mmSH_STATIC_MEM_CONFIG, reg);

Don't you need to select and lock srbm before you write to this register ?

> +       return 0;
> +}
> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
> +                                uint64_t va, uint32_t vmid)
> +{
> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> +
> +       lock_srbm(kgd, 0, 0, 0, vmid);
> +       WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
> +       unlock_srbm(kgd);
> +
> +       return 0;
> +}
> +
>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
>  {
>         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index 29a6f5d..ef677e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -94,6 +94,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
>                 uint8_t vmid);
>  static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
> +                                uint64_t va, uint32_t vmid);
> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>
>  static const struct kfd2kgd_calls kfd2kgd = {
>         .init_gtt_mem_allocation = alloc_gtt_mem,
> @@ -120,12 +124,15 @@ static const struct kfd2kgd_calls kfd2kgd = {
>         .get_atc_vmid_pasid_mapping_valid =
>                         get_atc_vmid_pasid_mapping_valid,
>         .write_vmid_invalidate_request = write_vmid_invalidate_request,
> -       .get_fw_version = get_fw_version
> +       .get_fw_version = get_fw_version,
> +       .alloc_memory_of_scratch = alloc_memory_of_scratch,
> +       .write_config_static_mem = write_config_static_mem
>  };
>
>  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
>  {
>         return (struct kfd2kgd_calls *)&kfd2kgd;
> +       return (struct kfd2kgd_calls *)&kfd2kgd;
>  }
>
>  static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
> @@ -573,6 +580,32 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
>         return 0;
>  }
>
> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype)
> +{
> +       uint32_t reg;
> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> +
> +       reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
> +               element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
> +               index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
> +               mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
> +
> +       WREG32(mmSH_STATIC_MEM_CONFIG, reg);

Same comment as above.

> +       return 0;
> +}
> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
> +                                uint64_t va, uint32_t vmid)
> +{
> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> +
> +       lock_srbm(kgd, 0, 0, 0, vmid);
> +       WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
> +       unlock_srbm(kgd);
> +
> +       return 0;
> +}
> +
>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
>  {
>         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index ffafda0..1dd90b2 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -199,6 +199,10 @@ struct kfd2kgd_calls {
>
>         uint16_t (*get_fw_version)(struct kgd_dev *kgd,
>                                 enum kgd_engine_type type);
> +       int (*alloc_memory_of_scratch)(struct kgd_dev *kgd,
> +                       uint64_t va, uint32_t vmid);
> +       int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable,
> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>  };
>
>  /**
> --
> 2.7.4
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] drm/amdkfd: Adding new IOCTL for scratch memory
       [not found]     ` <1502513265-14281-3-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-13  9:04       ` Oded Gabbay
       [not found]         ` <CAFCwf11z1Uo9jd82XAGrapXA44KpY+KrV69nbXLkoX6v10_gig-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Oded Gabbay @ 2017-08-13  9:04 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: Ben Goz, Moses Reuben, amd-gfx list

On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
> From: Moses Reuben <moses.reuben@amd.com>
>
> Signed-off-by: Moses Reuben <moses.reuben@amd.com>
> Signed-off-by: Ben Goz <ben.goz@amd.com>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           | 44 ++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c            |  3 ++
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  3 ++
>  .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |  2 +
>  .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c   |  2 +
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  1 +
>  include/uapi/linux/kfd_ioctl.h                     | 19 +++++++++-
>  7 files changed, 73 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 7d78119..6be1bba 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -848,6 +848,47 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
>
>         return err;
>  }
> +static int kfd_ioctl_alloc_scratch_memory(struct file *filep,
> +                                       struct kfd_process *p, void *data)
> +{
> +       struct kfd_ioctl_alloc_memory_of_scratch_args *args = data;
> +       struct kfd_process_device *pdd;
> +       struct kfd_dev *dev;
> +       long err;
> +
> +       if (args->size == 0)
> +               return -EINVAL;
> +
> +       dev = kfd_device_by_id(args->gpu_id);
> +       if (!dev)
> +               return -EINVAL;
> +
> +       mutex_lock(&p->mutex);
> +
> +       pdd = kfd_bind_process_to_device(dev, p);
> +       if (IS_ERR(pdd)) {
> +               err = PTR_ERR(pdd);
> +               goto bind_process_to_device_fail;
> +       }
> +
> +       pdd->qpd.sh_hidden_private_base = args->va_addr;
> +
> +       mutex_unlock(&p->mutex);
> +
> +       if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) {
> +               err = dev->kfd2kgd->alloc_memory_of_scratch(
> +                       dev->kgd, args->va_addr, pdd->qpd.vmid);
> +               if (err)
> +                       goto alloc_memory_of_scratch_failed;
> +       }
> +
> +       return 0;
> +
> +bind_process_to_device_fail:
> +       mutex_unlock(&p->mutex);
> +alloc_memory_of_scratch_failed:
> +       return -EFAULT;
Should be return err to reflect actual error code;

> +}
>
>  #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
>         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
> @@ -902,6 +943,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
>
>         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
>                         kfd_ioctl_dbg_wave_control, 0),
> +
> +       AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH,
> +                       kfd_ioctl_alloc_scratch_memory, 0),
>  };
>
>  #define AMDKFD_CORE_IOCTL_COUNT        ARRAY_SIZE(amdkfd_ioctls)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index e790e7f..33c43b7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -304,6 +304,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>
>         kfd->dbgmgr = NULL;
>
> +       /* Initialize scratch memory access */
> +       kfd->kfd2kgd->write_config_static_mem(kfd->kgd, true, 1, 3, 0);
> +
>         kfd->init_complete = true;
>         dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
>                  kfd->pdev->device);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 3891fe5..7bd8c39f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -268,6 +268,9 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
>         pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
>                         q->pipe, q->queue);
>
> +       dqm->dev->kfd2kgd->alloc_memory_of_scratch(
> +                       dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
> +
>         retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
>                                q->process->mm);
>         if (retval)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
> index fadc56a..72c3cba 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
> @@ -24,6 +24,7 @@
>  #include "kfd_device_queue_manager.h"
>  #include "cik_regs.h"
>  #include "oss/oss_2_4_sh_mask.h"
> +#include "gca/gfx_7_2_sh_mask.h"
>
>  static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
>                                    struct qcm_process_device *qpd,
> @@ -123,6 +124,7 @@ static int register_process_cik(struct device_queue_manager *dqm,
>         } else {
>                 temp = get_sh_mem_bases_nybble_64(pdd);
>                 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
> +               qpd->sh_mem_config |= 1  << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
>         }
>
>         pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
> index 15e81ae..40e9ddd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
> @@ -135,6 +135,8 @@ static int register_process_vi(struct device_queue_manager *dqm,
>                 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
>                 qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
>                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
> +               qpd->sh_mem_config |= 1  <<
> +                       SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
>         }
>
>         pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 30ce92c..b397ec7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -432,6 +432,7 @@ struct qcm_process_device {
>         uint32_t gds_size;
>         uint32_t num_gws;
>         uint32_t num_oac;
> +       uint32_t sh_hidden_private_base;
>  };
>
>  /* Data that is per-process-per device. */
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index d683342..3dd86e1 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -232,6 +232,13 @@ struct kfd_ioctl_wait_events_args {
>         uint32_t wait_result;           /* from KFD */
>  };
>
> +struct kfd_ioctl_alloc_memory_of_scratch_args {
> +       uint64_t va_addr;       /* to KFD */
> +       uint64_t size;          /* to KFD */
> +       uint32_t gpu_id;        /* to KFD */
> +       uint32_t pad;
> +};
> +
>  #define AMDKFD_IOCTL_BASE 'K'
>  #define AMDKFD_IO(nr)                  _IO(AMDKFD_IOCTL_BASE, nr)
>  #define AMDKFD_IOR(nr, type)           _IOR(AMDKFD_IOCTL_BASE, nr, type)
> @@ -286,7 +293,17 @@ struct kfd_ioctl_wait_events_args {
>  #define AMDKFD_IOC_DBG_WAVE_CONTROL            \
>                 AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
>
> +/* TODO:
> + * - AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
> + * - AMDKFD_IOC_FREE_MEMORY_OF_GPU
> + * - AMDKFD_IOC_MAP_MEMORY_TO_GPU
> + * - AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
> + */
> +
> +#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH     \
> +               AMDKFD_IOWR(0x15, struct kfd_ioctl_alloc_memory_of_scratch_args)
> +
>  #define AMDKFD_COMMAND_START           0x01
> -#define AMDKFD_COMMAND_END             0x11
> +#define AMDKFD_COMMAND_END             0x16

You create a hole here, between 0x11 to 0x16. This would make the
sanity check in kfd_ioctl() to be useless.

Also, why not do a generic IOCTL for allocating GPU memory, and pass
parameter if its scratch or something else, similar to amdgpu's
"AMDGPU_GEM_DOMAIN_*" defines ?

>
>  #endif
> --
> 2.7.4
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: Add kgd kfd interface get_tile_config()
       [not found]     ` <1502513265-14281-4-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-13  9:05       ` Oded Gabbay
  0 siblings, 0 replies; 28+ messages in thread
From: Oded Gabbay @ 2017-08-13  9:05 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: Yong Zhao, amd-gfx list

On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
> From: Yong Zhao <yong.zhao@amd.com>
>
> Signed-off-by: Yong Zhao <yong.zhao@amd.com>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 29 ++++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 28 +++++++++++++++++++++-
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 12 ++++++++++
>  3 files changed, 67 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index 11d515a..445a686 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -140,6 +140,32 @@ static int alloc_memory_of_scratch(struct kgd_dev *kgd,
>  static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
>                 uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>
> +/* Because of REG_GET_FIELD() being used, we put this function in the
> + * asic specific file.
> + */
> +static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
> +               struct tile_config *config)
> +{
> +       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> +
> +       config->gb_addr_config = adev->gfx.config.gb_addr_config;
> +       config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
> +                               MC_ARB_RAMCFG, NOOFBANK);
> +       config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
> +                               MC_ARB_RAMCFG, NOOFRANKS);
> +
> +       config->tile_config_ptr = adev->gfx.config.tile_mode_array;
> +       config->num_tile_configs =
> +                       ARRAY_SIZE(adev->gfx.config.tile_mode_array);
> +       config->macro_tile_config_ptr =
> +                       adev->gfx.config.macrotile_mode_array;
> +       config->num_macro_tile_configs =
> +                       ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
> +
> +
> +       return 0;
> +}
> +
>  static const struct kfd2kgd_calls kfd2kgd = {
>         .init_gtt_mem_allocation = alloc_gtt_mem,
>         .free_gtt_mem = free_gtt_mem,
> @@ -165,7 +191,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
>         .write_vmid_invalidate_request = write_vmid_invalidate_request,
>         .get_fw_version = get_fw_version,
>         .alloc_memory_of_scratch = alloc_memory_of_scratch,
> -       .write_config_static_mem = write_config_static_mem
> +       .write_config_static_mem = write_config_static_mem,
> +       .get_tile_config = amdgpu_amdkfd_get_tile_config,
>  };
>
>  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index ef677e5..2d6ba09 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -99,6 +99,31 @@ static int alloc_memory_of_scratch(struct kgd_dev *kgd,
>  static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
>                 uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>
> +/* Because of REG_GET_FIELD() being used, we put this function in the
> + * asic specific file.
> + */
> +static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
> +               struct tile_config *config)
> +{
> +       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> +
> +       config->gb_addr_config = adev->gfx.config.gb_addr_config;
> +       config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
> +                               MC_ARB_RAMCFG, NOOFBANK);
> +       config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
> +                               MC_ARB_RAMCFG, NOOFRANKS);
> +
> +       config->tile_config_ptr = adev->gfx.config.tile_mode_array;
> +       config->num_tile_configs =
> +                       ARRAY_SIZE(adev->gfx.config.tile_mode_array);
> +       config->macro_tile_config_ptr =
> +                       adev->gfx.config.macrotile_mode_array;
> +       config->num_macro_tile_configs =
> +                       ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
> +
> +       return 0;
> +}
> +
>  static const struct kfd2kgd_calls kfd2kgd = {
>         .init_gtt_mem_allocation = alloc_gtt_mem,
>         .free_gtt_mem = free_gtt_mem,
> @@ -126,7 +151,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
>         .write_vmid_invalidate_request = write_vmid_invalidate_request,
>         .get_fw_version = get_fw_version,
>         .alloc_memory_of_scratch = alloc_memory_of_scratch,
> -       .write_config_static_mem = write_config_static_mem
> +       .write_config_static_mem = write_config_static_mem,
> +       .get_tile_config = amdgpu_amdkfd_get_tile_config,
>  };
>
>  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 1dd90b2..22774ac 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -87,6 +87,17 @@ struct kgd2kfd_shared_resources {
>         size_t doorbell_start_offset;
>  };
>
> +struct tile_config {
> +       uint32_t *tile_config_ptr;
> +       uint32_t *macro_tile_config_ptr;
> +       uint32_t num_tile_configs;
> +       uint32_t num_macro_tile_configs;
> +
> +       uint32_t gb_addr_config;
> +       uint32_t num_banks;
> +       uint32_t num_ranks;
> +};
> +
>  /**
>   * struct kfd2kgd_calls
>   *
> @@ -203,6 +214,7 @@ struct kfd2kgd_calls {
>                         uint64_t va, uint32_t vmid);
>         int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable,
>                 uint8_t element_size, uint8_t index_stride, uint8_t mtype);
> +       int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config);
>  };
>
>  /**
> --
> 2.7.4
>


This patch is:
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found]     ` <1502513265-14281-5-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-13  9:08       ` Oded Gabbay
       [not found]         ` <CAFCwf10iQhAvxZdwCYBK6f0fBKdpgD=f0W886awm2fJVCB=hLQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Oded Gabbay @ 2017-08-13  9:08 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: Yong Zhao, amd-gfx list

On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
> From: Yong Zhao <yong.zhao@amd.com>
>
> Signed-off-by: Yong Zhao <yong.zhao@amd.com>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 43 ++++++++++++++++++++++++++++++++
>  include/uapi/linux/kfd_ioctl.h           | 37 ++++++++++++++++++++++++++-
>  2 files changed, 79 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 6be1bba..68ec830 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -890,6 +890,46 @@ static int kfd_ioctl_alloc_scratch_memory(struct file *filep,
>         return -EFAULT;
>  }
>
> +static int kfd_ioctl_get_tile_config(struct file *filep,
> +               struct kfd_process *p, void *data)
> +{
> +       struct kfd_ioctl_get_tile_config_args *args = data;
> +       struct kfd_dev *dev;
> +       struct tile_config config;
> +       int err = 0;
> +
> +       dev = kfd_device_by_id(args->gpu_id);
> +
> +       dev->kfd2kgd->get_tile_config(dev->kgd, &config);
> +
> +       args->gb_addr_config = config.gb_addr_config;
> +       args->num_banks = config.num_banks;
> +       args->num_ranks = config.num_ranks;
> +
> +       if (args->num_tile_configs > config.num_tile_configs)
> +               args->num_tile_configs = config.num_tile_configs;
> +       err = copy_to_user((void __user *)args->tile_config_ptr,
> +                       config.tile_config_ptr,
> +                       args->num_tile_configs * sizeof(uint32_t));
> +       if (err) {
> +               args->num_tile_configs = 0;
> +               return -EFAULT;
> +       }
> +
> +       if (args->num_macro_tile_configs > config.num_macro_tile_configs)
> +               args->num_macro_tile_configs =
> +                               config.num_macro_tile_configs;
> +       err = copy_to_user((void __user *)args->macro_tile_config_ptr,
> +                       config.macro_tile_config_ptr,
> +                       args->num_macro_tile_configs * sizeof(uint32_t));
> +       if (err) {
> +               args->num_macro_tile_configs = 0;
> +               return -EFAULT;
> +       }
> +
> +       return 0;
> +}
> +
>  #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
>         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
>                             .cmd_drv = 0, .name = #ioctl}
> @@ -946,6 +986,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
>
>         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH,
>                         kfd_ioctl_alloc_scratch_memory, 0),
> +
> +       AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
> +                       kfd_ioctl_get_tile_config, 0)
>  };
>
>  #define AMDKFD_CORE_IOCTL_COUNT        ARRAY_SIZE(amdkfd_ioctls)
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 3dd86e1..10dcdc6 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -239,6 +239,29 @@ struct kfd_ioctl_alloc_memory_of_scratch_args {
>         uint32_t pad;
>  };
>
> +struct kfd_ioctl_get_tile_config_args {
> +       /* to KFD: pointer to tile array */
> +       uint64_t tile_config_ptr;
> +       /* to KFD: pointer to macro tile array */
> +       uint64_t macro_tile_config_ptr;
> +       /* to KFD: array size allocated by user mode
> +        * from KFD: array size filled by kernel
> +        */
> +       uint32_t num_tile_configs;
> +       /* to KFD: array size allocated by user mode
> +        * from KFD: array size filled by kernel
> +        */
> +       uint32_t num_macro_tile_configs;
> +
> +       uint32_t gpu_id;                /* to KFD */
> +       uint32_t gb_addr_config;        /* from KFD */
> +       uint32_t num_banks;             /* from KFD */
> +       uint32_t num_ranks;             /* from KFD */
> +       /* struct size can be extended later if needed
> +        * without breaking ABI compatibility
> +        */
> +};
> +
>  #define AMDKFD_IOCTL_BASE 'K'
>  #define AMDKFD_IO(nr)                  _IO(AMDKFD_IOCTL_BASE, nr)
>  #define AMDKFD_IOR(nr, type)           _IOR(AMDKFD_IOCTL_BASE, nr, type)
> @@ -303,7 +326,19 @@ struct kfd_ioctl_alloc_memory_of_scratch_args {
>  #define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH     \
>                 AMDKFD_IOWR(0x15, struct kfd_ioctl_alloc_memory_of_scratch_args)
>
> +/* TODO:
> + * - AMDKFD_IOC_SET_CU_MASK
> + * - AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE
> + * - AMDKFD_IOC_SET_TRAP_HANDLER
> + * - AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
> + * - AMDKFD_IOC_GET_DMABUF_INFO
> + * - AMDKFD_IOC_IMPORT_DMABUF
> + */
> +
> +#define AMDKFD_IOC_GET_TILE_CONFIG                                      \
> +               AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_tile_config_args)
> +
>  #define AMDKFD_COMMAND_START           0x01
> -#define AMDKFD_COMMAND_END             0x16
> +#define AMDKFD_COMMAND_END             0x1D

As in the previous patch, there is a hole here in the IOCTLs
numbering. I suggest that when you upstream new IOCTLs, you will put
them in consecutive numbers per the upstream driver, and then take
that change downstream because you can easily change it in your
driver.

Oded

>
>  #endif
> --
> 2.7.4
>

With the above fixed, this patch is:
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] drm/amdkfd: Adding new IOCTL for scratch memory
       [not found]         ` <CAFCwf11z1Uo9jd82XAGrapXA44KpY+KrV69nbXLkoX6v10_gig-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-08-14 15:10           ` Felix Kuehling
       [not found]             ` <22ab982e-a1b1-5d61-9eaa-f9c149cf3d5e-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-14 15:10 UTC (permalink / raw)
  To: Oded Gabbay; +Cc: amd-gfx list

[dropping Moses and Ben, they no longer work for AMD and the email
addresses bounce]


On 2017-08-13 05:04 AM, Oded Gabbay wrote:
> On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
>> From: Moses Reuben <moses.reuben@amd.com>
>>
>> Signed-off-by: Moses Reuben <moses.reuben@amd.com>
>> Signed-off-by: Ben Goz <ben.goz@amd.com>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
[snip]
>> @@ -286,7 +293,17 @@ struct kfd_ioctl_wait_events_args {
>>  #define AMDKFD_IOC_DBG_WAVE_CONTROL            \
>>                 AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
>>
>> +/* TODO:
>> + * - AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
>> + * - AMDKFD_IOC_FREE_MEMORY_OF_GPU
>> + * - AMDKFD_IOC_MAP_MEMORY_TO_GPU
>> + * - AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
>> + */
>> +
>> +#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH     \
>> +               AMDKFD_IOWR(0x15, struct kfd_ioctl_alloc_memory_of_scratch_args)
>> +
>>  #define AMDKFD_COMMAND_START           0x01
>> -#define AMDKFD_COMMAND_END             0x11
>> +#define AMDKFD_COMMAND_END             0x16
> You create a hole here, between 0x11 to 0x16. This would make the
> sanity check in kfd_ioctl() to be useless.

I'm creating the hole to match the ABI with our current ROCm releases.
The TODO comment lists the ioctls that will slot into the hole in future
patches.

kfd_ioctl checks that the function pointer is initialized and fails
otherwise. So I think this hole should work OK and attempts to call
unassigned ioctls should fail as expected:

        func = ioctl->func;

        if (unlikely(!func)) {
                dev_dbg(kfd_device, "no function\n");
                retcode = -EINVAL;
                goto err_i1;
        }


>
> Also, why not do a generic IOCTL for allocating GPU memory, and pass
> parameter if its scratch or something else, similar to amdgpu's
> "AMDGPU_GEM_DOMAIN_*" defines ?

The real memory allocation ioctl has more parameters. Also, I always
thought the name "ALLOC_MEMORY_OF_SCRATCH" was misleading, because this
ioctl doesn't really allocate anything, and there is no corresponding
"free". What it really does, it configures the virtual address of the
scratch backing memory for the process.

Maybe we could change the name of the ioctl (without changing the ABI)
to something like SET_SCRATCH_BACKING_VA. What do you think?

Regards,
  Felix

>
>>  #endif
>> --
>> 2.7.4
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] drm/amdkfd: Adding new IOCTL for scratch memory
       [not found]             ` <22ab982e-a1b1-5d61-9eaa-f9c149cf3d5e-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-14 15:15               ` Christian König
       [not found]                 ` <c310b952-116b-376c-0794-232e69f88592-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2017-08-14 15:15 UTC (permalink / raw)
  To: Felix Kuehling, Oded Gabbay; +Cc: amd-gfx list

Am 14.08.2017 um 17:10 schrieb Felix Kuehling:
> [dropping Moses and Ben, they no longer work for AMD and the email
> addresses bounce]
>
>
> On 2017-08-13 05:04 AM, Oded Gabbay wrote:
>> On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
>>> From: Moses Reuben <moses.reuben@amd.com>
>>>
>>> Signed-off-by: Moses Reuben <moses.reuben@amd.com>
>>> Signed-off-by: Ben Goz <ben.goz@amd.com>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> [snip]
>>> @@ -286,7 +293,17 @@ struct kfd_ioctl_wait_events_args {
>>>   #define AMDKFD_IOC_DBG_WAVE_CONTROL            \
>>>                  AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
>>>
>>> +/* TODO:
>>> + * - AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
>>> + * - AMDKFD_IOC_FREE_MEMORY_OF_GPU
>>> + * - AMDKFD_IOC_MAP_MEMORY_TO_GPU
>>> + * - AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
>>> + */
>>> +
>>> +#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH     \
>>> +               AMDKFD_IOWR(0x15, struct kfd_ioctl_alloc_memory_of_scratch_args)
>>> +
>>>   #define AMDKFD_COMMAND_START           0x01
>>> -#define AMDKFD_COMMAND_END             0x11
>>> +#define AMDKFD_COMMAND_END             0x16
>> You create a hole here, between 0x11 to 0x16. This would make the
>> sanity check in kfd_ioctl() to be useless.
> I'm creating the hole to match the ABI with our current ROCm releases.
> The TODO comment lists the ioctls that will slot into the hole in future
> patches.

That's usually not such a good idea, cause interface rare upstream as 
they are designed internally.

Probably better to just add them to the end of the list.

>
> kfd_ioctl checks that the function pointer is initialized and fails
> otherwise. So I think this hole should work OK and attempts to call
> unassigned ioctls should fail as expected:
>
>          func = ioctl->func;
>
>          if (unlikely(!func)) {
>                  dev_dbg(kfd_device, "no function\n");
>                  retcode = -EINVAL;
>                  goto err_i1;
>          }
>
>
>> Also, why not do a generic IOCTL for allocating GPU memory, and pass
>> parameter if its scratch or something else, similar to amdgpu's
>> "AMDGPU_GEM_DOMAIN_*" defines ?
> The real memory allocation ioctl has more parameters. Also, I always
> thought the name "ALLOC_MEMORY_OF_SCRATCH" was misleading, because this
> ioctl doesn't really allocate anything, and there is no corresponding
> "free". What it really does, it configures the virtual address of the
> scratch backing memory for the process.
>
> Maybe we could change the name of the ioctl (without changing the ABI)
> to something like SET_SCRATCH_BACKING_VA. What do you think?

That's better, but I think something in the direction of VA address 
config would be better.

BTW: What exactly this this good for?

Regards,
Christian.

>
> Regards,
>    Felix
>
>>>   #endif
>>> --
>>> 2.7.4
>>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found]         ` <CAFCwf10iQhAvxZdwCYBK6f0fBKdpgD=f0W886awm2fJVCB=hLQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-08-14 15:18           ` Felix Kuehling
       [not found]             ` <fc37f1a8-9c12-f1da-cdab-f03b50643f19-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-14 15:18 UTC (permalink / raw)
  To: Oded Gabbay; +Cc: Yong Zhao, amd-gfx list

On 2017-08-13 05:08 AM, Oded Gabbay wrote:
> As in the previous patch, there is a hole here in the IOCTLs
> numbering. I suggest that when you upstream new IOCTLs, you will put
> them in consecutive numbers per the upstream driver, and then take
> that change downstream because you can easily change it in your
> driver.

We can easily change it downstream, but it makes it harder for people to
test the new upstream kernel with already released ROCm user mode
drivers that are easy to install and without having to wait a few weeks
for the next release.

If testability is not a priority, then I'd rather upstream the new
ioctls in the order in which we made them. The only reason I'm going
out-of-order is to make testing as easy as possible, as early as
possible, with the most recently released user mode stack.

It's your call, though.

Regards,
  Felix

>
> Oded

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] drm/amdkfd: Adding new IOCTL for scratch memory
       [not found]                 ` <c310b952-116b-376c-0794-232e69f88592-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-14 15:31                   ` Felix Kuehling
       [not found]                     ` <33308ac3-bdf2-21ac-2122-b43702fdd95f-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-14 15:31 UTC (permalink / raw)
  To: Christian König, Oded Gabbay; +Cc: amd-gfx list

On 2017-08-14 11:15 AM, Christian König wrote:
> Am 14.08.2017 um 17:10 schrieb Felix Kuehling:
>> [dropping Moses and Ben, they no longer work for AMD and the email
>> addresses bounce]
>>
>>
>> On 2017-08-13 05:04 AM, Oded Gabbay wrote:
>>> On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling
>>> <Felix.Kuehling@amd.com> wrote:
>>>> From: Moses Reuben <moses.reuben@amd.com>
>>>>
>>>> Signed-off-by: Moses Reuben <moses.reuben@amd.com>
>>>> Signed-off-by: Ben Goz <ben.goz@amd.com>
>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> [snip]
>>>> @@ -286,7 +293,17 @@ struct kfd_ioctl_wait_events_args {
>>>>   #define AMDKFD_IOC_DBG_WAVE_CONTROL            \
>>>>                  AMDKFD_IOW(0x10, struct
>>>> kfd_ioctl_dbg_wave_control_args)
>>>>
>>>> +/* TODO:
>>>> + * - AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
>>>> + * - AMDKFD_IOC_FREE_MEMORY_OF_GPU
>>>> + * - AMDKFD_IOC_MAP_MEMORY_TO_GPU
>>>> + * - AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
>>>> + */
>>>> +
>>>> +#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH     \
>>>> +               AMDKFD_IOWR(0x15, struct
>>>> kfd_ioctl_alloc_memory_of_scratch_args)
>>>> +
>>>>   #define AMDKFD_COMMAND_START           0x01
>>>> -#define AMDKFD_COMMAND_END             0x11
>>>> +#define AMDKFD_COMMAND_END             0x16
>>> You create a hole here, between 0x11 to 0x16. This would make the
>>> sanity check in kfd_ioctl() to be useless.
>> I'm creating the hole to match the ABI with our current ROCm releases.
>> The TODO comment lists the ioctls that will slot into the hole in future
>> patches.
>
> That's usually not such a good idea, cause interface rare upstream as
> they are designed internally.
>
> Probably better to just add them to the end of the list.

Repeating the same argument I made on another email:

Changing the ioctl numbers to something different to our ROCm releases,
means you can't test until we make another release with updated ioctl
numbers.

If testability is not a priority, then I'd rather upstream the new
ioctls in the order in which we made them. The only reason I'm going
out-of-order is to make testing as easy as possible, as early as
possible, with the most recently released user mode stack.

>
>>
>> kfd_ioctl checks that the function pointer is initialized and fails
>> otherwise. So I think this hole should work OK and attempts to call
>> unassigned ioctls should fail as expected:
>>
>>          func = ioctl->func;
>>
>>          if (unlikely(!func)) {
>>                  dev_dbg(kfd_device, "no function\n");
>>                  retcode = -EINVAL;
>>                  goto err_i1;
>>          }
>>
>>
>>> Also, why not do a generic IOCTL for allocating GPU memory, and pass
>>> parameter if its scratch or something else, similar to amdgpu's
>>> "AMDGPU_GEM_DOMAIN_*" defines ?
>> The real memory allocation ioctl has more parameters. Also, I always
>> thought the name "ALLOC_MEMORY_OF_SCRATCH" was misleading, because this
>> ioctl doesn't really allocate anything, and there is no corresponding
>> "free". What it really does, it configures the virtual address of the
>> scratch backing memory for the process.
>>
>> Maybe we could change the name of the ioctl (without changing the ABI)
>> to something like SET_SCRATCH_BACKING_VA. What do you think?
>
> That's better, but I think something in the direction of VA address
> config would be better.
>
> BTW: What exactly this this good for?

Scratch memory is private memory per work-item. It's used when a shader
program has too few registers available. With HSA we use flat scratch
addressing, where shaders can access private memory in a special scratch
aperture using normal memory instructions. Using the same virtual
address, each work item gets its own private piece of memory. The
hardware does the address translation from the VA in the private
aperture to a scratch-backing VA. The application is responsible for
allocating the memory to back that scratch area, and to map it somewhere
in its virtual address space.

This ioctl tells the hardware (or HWS firmware) the VA of the scratch
backing memory.

Regards,
  Felix

>
> Regards,
> Christian.
>
>>
>> Regards,
>>    Felix
>>
>>>>   #endif
>>>> -- 
>>>> 2.7.4
>>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found]             ` <fc37f1a8-9c12-f1da-cdab-f03b50643f19-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-14 22:55               ` Felix Kuehling
       [not found]                 ` <2649fca7-9617-e434-a75a-f76d7e07d0f5-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-14 22:55 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Oded Gabbay

OK, I was hoping to keep the ABI unchanged during upstreaming, but I
realize that may not be realistic. And I feel I'm getting valuable
feedback, so I don't want to limit myself with ABI requirements that are
not necessary.

I still don't have commit access to git repos on freedesktop.org. But
I'm now planning to upload my WIP to the ROCm repositories on Github as
upstreaming branches for ROCk (kernel) and ROCt (Thunk). I should have
that in place in the next day or two.

If I need to tweak the ABI in the upstreaming process, all I need to
change in user mode is the Thunk, and I can make it available on my
GitHub branch for people to try, regardless of ROCm release schedules.
All the rest of the user mode stack should work unchanged as long as the
Thunk API doesn't need to be changed.

With that in mind, I'll remove the gaps from the ioctl assignment and
accept that KFD will diverge (for the better) from our internal branch
more or less significantly in the upstreaming process.

What's the plan for upstreaming on your side. Do you want to push things
incrementally? Or do you prefer to wait until everything (or most of it)
is reviewed and tested on an upstream-based branch? If it's the latter,
we could reshuffle the ioctls later to better match the current release
ABI before going upstream.

Regards,
  Felix

On 2017-08-14 11:18 AM, Felix Kuehling wrote:
> On 2017-08-13 05:08 AM, Oded Gabbay wrote:
>> As in the previous patch, there is a hole here in the IOCTLs
>> numbering. I suggest that when you upstream new IOCTLs, you will put
>> them in consecutive numbers per the upstream driver, and then take
>> that change downstream because you can easily change it in your
>> driver.
> We can easily change it downstream, but it makes it harder for people to
> test the new upstream kernel with already released ROCm user mode
> drivers that are easy to install and without having to wait a few weeks
> for the next release.
>
> If testability is not a priority, then I'd rather upstream the new
> ioctls in the order in which we made them. The only reason I'm going
> out-of-order is to make testing as easy as possible, as early as
> possible, with the most recently released user mode stack.
>
> It's your call, though.
>
> Regards,
>   Felix
>
>> Oded
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Adding new kgd/kfd interface functions to support scratch memory
       [not found]         ` <CAFCwf13YDgfX_3EFhQ8sE-8D7ApyDd9zwtqazW6Ups2dPReMqw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-08-15  1:12           ` Felix Kuehling
       [not found]             ` <c42dc57b-999b-250b-8ebe-214502869191-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-15  1:12 UTC (permalink / raw)
  To: Oded Gabbay, Marek Olšák, Deucher, Alexander; +Cc: amd-gfx list

[+Marek, Alex for comment, see below]

On 2017-08-13 04:56 AM, Oded Gabbay wrote:
> On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
>> From: Moses Reuben <moses.reuben@amd.com>
>>
>> Signed-off-by: Moses Reuben <moses.reuben@amd.com>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 34 +++++++++++++++++++++-
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 35 ++++++++++++++++++++++-
>>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |  4 +++
>>  3 files changed, 71 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>> index 994d262..11d515a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>> @@ -135,6 +135,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
>>  static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
>>
>>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
>> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
>> +                                        uint64_t va, uint32_t vmid);
>> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
>> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>>
>>  static const struct kfd2kgd_calls kfd2kgd = {
>>         .init_gtt_mem_allocation = alloc_gtt_mem,
>> @@ -159,7 +163,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
>>         .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
>>         .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
>>         .write_vmid_invalidate_request = write_vmid_invalidate_request,
>> -       .get_fw_version = get_fw_version
>> +       .get_fw_version = get_fw_version,
>> +       .alloc_memory_of_scratch = alloc_memory_of_scratch,
>> +       .write_config_static_mem = write_config_static_mem
>>  };
>>
>>  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
>> @@ -652,6 +658,32 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
>>         WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>>  }
>>
>> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
>> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype)
>> +{
>> +       uint32_t reg;
>> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>> +
>> +       reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
>> +               element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
>> +               index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
>> +               mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
>> +
>> +       WREG32(mmSH_STATIC_MEM_CONFIG, reg);
> Don't you need to select and lock srbm before you write to this register ?

No, this register seems to be global. SRBM_GFX_CNTL had no effect on it
when I experimented with it.

Since this is global initialization, I'm wondering if this should be
moved into part of the amdgpu initialization. Having amdkfd call back
into amdgpu like this during its initialization always seemed like a bit
roundabout.

Marek, do you know if this register affects Mesa performance or
behaviour with respect to scratch memory. According to the register spec
it only affects flat scratch memory addressing. Not sure if you're using
that addressing scheme in Mesa.

Alex, I see that SH_STATIC_MEM_CONFIG gets initialized in
gfx_v[78]_0_gpu_init identically to what we do in KFD. You're doing it
per-VMID (under cik_srbm_select). But my experiments show that this
register is global. So I think your initialization already works for KFD
and we can probably just drop this from the KFD initialization. Do you
have access to hardware docs that confirm that it's global?

Regards,
  Felix

>
>> +       return 0;
>> +}
>> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
>> +                                uint64_t va, uint32_t vmid)
>> +{
>> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>> +
>> +       lock_srbm(kgd, 0, 0, 0, vmid);
>> +       WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
>> +       unlock_srbm(kgd);
>> +
>> +       return 0;
>> +}
>> +
>>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
>>  {
>>         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> index 29a6f5d..ef677e5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> @@ -94,6 +94,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
>>                 uint8_t vmid);
>>  static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
>>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
>> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
>> +                                uint64_t va, uint32_t vmid);
>> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
>> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>>
>>  static const struct kfd2kgd_calls kfd2kgd = {
>>         .init_gtt_mem_allocation = alloc_gtt_mem,
>> @@ -120,12 +124,15 @@ static const struct kfd2kgd_calls kfd2kgd = {
>>         .get_atc_vmid_pasid_mapping_valid =
>>                         get_atc_vmid_pasid_mapping_valid,
>>         .write_vmid_invalidate_request = write_vmid_invalidate_request,
>> -       .get_fw_version = get_fw_version
>> +       .get_fw_version = get_fw_version,
>> +       .alloc_memory_of_scratch = alloc_memory_of_scratch,
>> +       .write_config_static_mem = write_config_static_mem
>>  };
>>
>>  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
>>  {
>>         return (struct kfd2kgd_calls *)&kfd2kgd;
>> +       return (struct kfd2kgd_calls *)&kfd2kgd;
>>  }
>>
>>  static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
>> @@ -573,6 +580,32 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
>>         return 0;
>>  }
>>
>> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
>> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype)
>> +{
>> +       uint32_t reg;
>> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>> +
>> +       reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
>> +               element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
>> +               index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
>> +               mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
>> +
>> +       WREG32(mmSH_STATIC_MEM_CONFIG, reg);
> Same comment as above.
>
>> +       return 0;
>> +}
>> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
>> +                                uint64_t va, uint32_t vmid)
>> +{
>> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>> +
>> +       lock_srbm(kgd, 0, 0, 0, vmid);
>> +       WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
>> +       unlock_srbm(kgd);
>> +
>> +       return 0;
>> +}
>> +
>>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
>>  {
>>         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>> index ffafda0..1dd90b2 100644
>> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>> @@ -199,6 +199,10 @@ struct kfd2kgd_calls {
>>
>>         uint16_t (*get_fw_version)(struct kgd_dev *kgd,
>>                                 enum kgd_engine_type type);
>> +       int (*alloc_memory_of_scratch)(struct kgd_dev *kgd,
>> +                       uint64_t va, uint32_t vmid);
>> +       int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable,
>> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>>  };
>>
>>  /**
>> --
>> 2.7.4
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found]                 ` <2649fca7-9617-e434-a75a-f76d7e07d0f5-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-15  8:21                   ` Christian König
  2017-08-15 10:20                   ` Oded Gabbay
  1 sibling, 0 replies; 28+ messages in thread
From: Christian König @ 2017-08-15  8:21 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Oded Gabbay

Am 15.08.2017 um 00:55 schrieb Felix Kuehling:
> OK, I was hoping to keep the ABI unchanged during upstreaming, but I
> realize that may not be realistic. And I feel I'm getting valuable
> feedback, so I don't want to limit myself with ABI requirements that are
> not necessary.

Yeah, agree. On the GFX side we keep the numbers and flags which are not 
upstream separated from the ones which are upstream.

I strongly suggest to do the same thing for KFD IOCTLs as well, cause 
when we release non upstream features and later find that we need to 
change the IOCTLs then this would silently break and that's never a good 
thing.

Regards,
Christian.

>
> I still don't have commit access to git repos on freedesktop.org. But
> I'm now planning to upload my WIP to the ROCm repositories on Github as
> upstreaming branches for ROCk (kernel) and ROCt (Thunk). I should have
> that in place in the next day or two.
>
> If I need to tweak the ABI in the upstreaming process, all I need to
> change in user mode is the Thunk, and I can make it available on my
> GitHub branch for people to try, regardless of ROCm release schedules.
> All the rest of the user mode stack should work unchanged as long as the
> Thunk API doesn't need to be changed.
>
> With that in mind, I'll remove the gaps from the ioctl assignment and
> accept that KFD will diverge (for the better) from our internal branch
> more or less significantly in the upstreaming process.
>
> What's the plan for upstreaming on your side. Do you want to push things
> incrementally? Or do you prefer to wait until everything (or most of it)
> is reviewed and tested on an upstream-based branch? If it's the latter,
> we could reshuffle the ioctls later to better match the current release
> ABI before going upstream.
>
> Regards,
>    Felix
>
> On 2017-08-14 11:18 AM, Felix Kuehling wrote:
>> On 2017-08-13 05:08 AM, Oded Gabbay wrote:
>>> As in the previous patch, there is a hole here in the IOCTLs
>>> numbering. I suggest that when you upstream new IOCTLs, you will put
>>> them in consecutive numbers per the upstream driver, and then take
>>> that change downstream because you can easily change it in your
>>> driver.
>> We can easily change it downstream, but it makes it harder for people to
>> test the new upstream kernel with already released ROCm user mode
>> drivers that are easy to install and without having to wait a few weeks
>> for the next release.
>>
>> If testability is not a priority, then I'd rather upstream the new
>> ioctls in the order in which we made them. The only reason I'm going
>> out-of-order is to make testing as easy as possible, as early as
>> possible, with the most recently released user mode stack.
>>
>> It's your call, though.
>>
>> Regards,
>>    Felix
>>
>>> Oded
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] drm/amdkfd: Adding new IOCTL for scratch memory
       [not found]                     ` <33308ac3-bdf2-21ac-2122-b43702fdd95f-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-15  8:24                       ` Christian König
       [not found]                         ` <b114b316-1142-11b8-6393-5f903bb2d663-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2017-08-15  8:24 UTC (permalink / raw)
  To: Felix Kuehling, Oded Gabbay; +Cc: amd-gfx list

Am 14.08.2017 um 17:31 schrieb Felix Kuehling:
> [SNIP]
> Repeating the same argument I made on another email:

Commented on that in the other mail, let's keep the discussion on this 
topic there.

>> BTW: What exactly this this good for?
> Scratch memory is private memory per work-item. It's used when a shader
> program has too few registers available. With HSA we use flat scratch
> addressing, where shaders can access private memory in a special scratch
> aperture using normal memory instructions. Using the same virtual
> address, each work item gets its own private piece of memory. The
> hardware does the address translation from the VA in the private
> aperture to a scratch-backing VA. The application is responsible for
> allocating the memory to back that scratch area, and to map it somewhere
> in its virtual address space.
>
> This ioctl tells the hardware (or HWS firmware) the VA of the scratch
> backing memory.

Ok, you've got me lost here. Not that I'm deeply into that stuff, but my 
last status is that those apertures are global and not per/process or VMID.

That would mean that when two processes try to set two different 
addresses we are completely lost here.

Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Adding new kgd/kfd interface functions to support scratch memory
       [not found]             ` <c42dc57b-999b-250b-8ebe-214502869191-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-15 10:07               ` Marek Olšák
  2017-08-15 14:49               ` Deucher, Alexander
  1 sibling, 0 replies; 28+ messages in thread
From: Marek Olšák @ 2017-08-15 10:07 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: Oded Gabbay, Deucher, Alexander, amd-gfx list

Mesa doesn't use flat scratch.

Marek

On Tue, Aug 15, 2017 at 3:12 AM, Felix Kuehling <felix.kuehling@amd.com> wrote:
> [+Marek, Alex for comment, see below]
>
> On 2017-08-13 04:56 AM, Oded Gabbay wrote:
>> On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
>>> From: Moses Reuben <moses.reuben@amd.com>
>>>
>>> Signed-off-by: Moses Reuben <moses.reuben@amd.com>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 34 +++++++++++++++++++++-
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 35 ++++++++++++++++++++++-
>>>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |  4 +++
>>>  3 files changed, 71 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>>> index 994d262..11d515a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>>> @@ -135,6 +135,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
>>>  static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
>>>
>>>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
>>> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
>>> +                                        uint64_t va, uint32_t vmid);
>>> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
>>> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>>>
>>>  static const struct kfd2kgd_calls kfd2kgd = {
>>>         .init_gtt_mem_allocation = alloc_gtt_mem,
>>> @@ -159,7 +163,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
>>>         .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
>>>         .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
>>>         .write_vmid_invalidate_request = write_vmid_invalidate_request,
>>> -       .get_fw_version = get_fw_version
>>> +       .get_fw_version = get_fw_version,
>>> +       .alloc_memory_of_scratch = alloc_memory_of_scratch,
>>> +       .write_config_static_mem = write_config_static_mem
>>>  };
>>>
>>>  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
>>> @@ -652,6 +658,32 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
>>>         WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>>>  }
>>>
>>> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
>>> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype)
>>> +{
>>> +       uint32_t reg;
>>> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>>> +
>>> +       reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
>>> +               element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
>>> +               index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
>>> +               mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
>>> +
>>> +       WREG32(mmSH_STATIC_MEM_CONFIG, reg);
>> Don't you need to select and lock srbm before you write to this register ?
>
> No, this register seems to be global. SRBM_GFX_CNTL had no effect on it
> when I experimented with it.
>
> Since this is global initialization, I'm wondering if this should be
> moved into part of the amdgpu initialization. Having amdkfd call back
> into amdgpu like this during its initialization always seemed like a bit
> roundabout.
>
> Marek, do you know if this register affects Mesa performance or
> behaviour with respect to scratch memory. According to the register spec
> it only affects flat scratch memory addressing. Not sure if you're using
> that addressing scheme in Mesa.
>
> Alex, I see that SH_STATIC_MEM_CONFIG gets initialized in
> gfx_v[78]_0_gpu_init identically to what we do in KFD. You're doing it
> per-VMID (under cik_srbm_select). But my experiments show that this
> register is global. So I think your initialization already works for KFD
> and we can probably just drop this from the KFD initialization. Do you
> have access to hardware docs that confirm that it's global?
>
> Regards,
>   Felix
>
>>
>>> +       return 0;
>>> +}
>>> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
>>> +                                uint64_t va, uint32_t vmid)
>>> +{
>>> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>>> +
>>> +       lock_srbm(kgd, 0, 0, 0, vmid);
>>> +       WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
>>> +       unlock_srbm(kgd);
>>> +
>>> +       return 0;
>>> +}
>>> +
>>>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
>>>  {
>>>         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>>> index 29a6f5d..ef677e5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>>> @@ -94,6 +94,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
>>>                 uint8_t vmid);
>>>  static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
>>>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
>>> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
>>> +                                uint64_t va, uint32_t vmid);
>>> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
>>> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>>>
>>>  static const struct kfd2kgd_calls kfd2kgd = {
>>>         .init_gtt_mem_allocation = alloc_gtt_mem,
>>> @@ -120,12 +124,15 @@ static const struct kfd2kgd_calls kfd2kgd = {
>>>         .get_atc_vmid_pasid_mapping_valid =
>>>                         get_atc_vmid_pasid_mapping_valid,
>>>         .write_vmid_invalidate_request = write_vmid_invalidate_request,
>>> -       .get_fw_version = get_fw_version
>>> +       .get_fw_version = get_fw_version,
>>> +       .alloc_memory_of_scratch = alloc_memory_of_scratch,
>>> +       .write_config_static_mem = write_config_static_mem
>>>  };
>>>
>>>  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
>>>  {
>>>         return (struct kfd2kgd_calls *)&kfd2kgd;
>>> +       return (struct kfd2kgd_calls *)&kfd2kgd;
>>>  }
>>>
>>>  static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
>>> @@ -573,6 +580,32 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
>>>         return 0;
>>>  }
>>>
>>> +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
>>> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype)
>>> +{
>>> +       uint32_t reg;
>>> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>>> +
>>> +       reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
>>> +               element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
>>> +               index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
>>> +               mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
>>> +
>>> +       WREG32(mmSH_STATIC_MEM_CONFIG, reg);
>> Same comment as above.
>>
>>> +       return 0;
>>> +}
>>> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
>>> +                                uint64_t va, uint32_t vmid)
>>> +{
>>> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>>> +
>>> +       lock_srbm(kgd, 0, 0, 0, vmid);
>>> +       WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
>>> +       unlock_srbm(kgd);
>>> +
>>> +       return 0;
>>> +}
>>> +
>>>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
>>>  {
>>>         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>>> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>>> index ffafda0..1dd90b2 100644
>>> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>>> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>>> @@ -199,6 +199,10 @@ struct kfd2kgd_calls {
>>>
>>>         uint16_t (*get_fw_version)(struct kgd_dev *kgd,
>>>                                 enum kgd_engine_type type);
>>> +       int (*alloc_memory_of_scratch)(struct kgd_dev *kgd,
>>> +                       uint64_t va, uint32_t vmid);
>>> +       int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable,
>>> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
>>>  };
>>>
>>>  /**
>>> --
>>> 2.7.4
>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found]                 ` <2649fca7-9617-e434-a75a-f76d7e07d0f5-5C7GfCeVMHo@public.gmane.org>
  2017-08-15  8:21                   ` Christian König
@ 2017-08-15 10:20                   ` Oded Gabbay
       [not found]                     ` <CAFCwf13bFDhL=_X-zx8AJE+0sD0Aw9z5Rku_7qDgY6VEdtS+qA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 28+ messages in thread
From: Oded Gabbay @ 2017-08-15 10:20 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: amd-gfx list

On Tue, Aug 15, 2017 at 1:55 AM, Felix Kuehling <felix.kuehling@amd.com> wrote:
> OK, I was hoping to keep the ABI unchanged during upstreaming, but I
> realize that may not be realistic. And I feel I'm getting valuable
> feedback, so I don't want to limit myself with ABI requirements that are
> not necessary.
I think that's the correct way to go forward.

>
> I still don't have commit access to git repos on freedesktop.org. But
> I'm now planning to upload my WIP to the ROCm repositories on Github as
> upstreaming branches for ROCk (kernel) and ROCt (Thunk). I should have
> that in place in the next day or two.
>
> If I need to tweak the ABI in the upstreaming process, all I need to
> change in user mode is the Thunk, and I can make it available on my
> GitHub branch for people to try, regardless of ROCm release schedules.
> All the rest of the user mode stack should work unchanged as long as the
> Thunk API doesn't need to be changed.
>
> With that in mind, I'll remove the gaps from the ioctl assignment and
> accept that KFD will diverge (for the better) from our internal branch
> more or less significantly in the upstreaming process.
>
> What's the plan for upstreaming on your side. Do you want to push things
> incrementally? Or do you prefer to wait until everything (or most of it)
> is reviewed and tested on an upstream-based branch? If it's the latter,
> we could reshuffle the ioctls later to better match the current release
> ABI before going upstream.
>
> Regards,
>   Felix

I prefer to do it incrementally, to avoid very large patch-sets which
usually end up in longer cycles of review-fix, which causes you more
pain because internal development continues during this time and you
need to keep everything synchronized. If you do it in small pieces,
there is more chance it will get to upstream faster and then you can
cross it off your list permanently and no longer worry about it not
being synchronized with internal development.

If you are talking about the current patch-set (you actually sent 2
patch-sets), then once you rebase them on the branches I mentioned,
they are more or less good to go (except from very small fixes we
talked about). If you can do it during this week, I think we can make
it for the 4.14 merge window.

Does that make sense ?

Oded

>
> On 2017-08-14 11:18 AM, Felix Kuehling wrote:
>> On 2017-08-13 05:08 AM, Oded Gabbay wrote:
>>> As in the previous patch, there is a hole here in the IOCTLs
>>> numbering. I suggest that when you upstream new IOCTLs, you will put
>>> them in consecutive numbers per the upstream driver, and then take
>>> that change downstream because you can easily change it in your
>>> driver.
>> We can easily change it downstream, but it makes it harder for people to
>> test the new upstream kernel with already released ROCm user mode
>> drivers that are easy to install and without having to wait a few weeks
>> for the next release.
>>
>> If testability is not a priority, then I'd rather upstream the new
>> ioctls in the order in which we made them. The only reason I'm going
>> out-of-order is to make testing as easy as possible, as early as
>> possible, with the most recently released user mode stack.
>>
>> It's your call, though.
>>
>> Regards,
>>   Felix
>>
>>> Oded
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] drm/amdkfd: Adding new IOCTL for scratch memory
       [not found]                         ` <b114b316-1142-11b8-6393-5f903bb2d663-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-15 14:33                           ` Alex Deucher
  2017-08-15 16:03                           ` Felix Kuehling
  1 sibling, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2017-08-15 14:33 UTC (permalink / raw)
  To: Christian König; +Cc: Oded Gabbay, Felix Kuehling, amd-gfx list

On Tue, Aug 15, 2017 at 4:24 AM, Christian König
<deathsimple@vodafone.de> wrote:
> Am 14.08.2017 um 17:31 schrieb Felix Kuehling:
>>
>> [SNIP]
>> Repeating the same argument I made on another email:
>
>
> Commented on that in the other mail, let's keep the discussion on this topic
> there.
>
>>> BTW: What exactly this this good for?
>>
>> Scratch memory is private memory per work-item. It's used when a shader
>> program has too few registers available. With HSA we use flat scratch
>> addressing, where shaders can access private memory in a special scratch
>> aperture using normal memory instructions. Using the same virtual
>> address, each work item gets its own private piece of memory. The
>> hardware does the address translation from the VA in the private
>> aperture to a scratch-backing VA. The application is responsible for
>> allocating the memory to back that scratch area, and to map it somewhere
>> in its virtual address space.
>>
>> This ioctl tells the hardware (or HWS firmware) the VA of the scratch
>> backing memory.
>
>
> Ok, you've got me lost here. Not that I'm deeply into that stuff, but my
> last status is that those apertures are global and not per/process or VMID.
>
> That would mean that when two processes try to set two different addresses
> we are completely lost here.
>

The scratch and lds apertures are per vmid.  See gfx_v8_0_init_compute_vmid()

Alex
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH 1/4] drm/amdgpu: Adding new kgd/kfd interface functions to support scratch memory
       [not found]             ` <c42dc57b-999b-250b-8ebe-214502869191-5C7GfCeVMHo@public.gmane.org>
  2017-08-15 10:07               ` Marek Olšák
@ 2017-08-15 14:49               ` Deucher, Alexander
  1 sibling, 0 replies; 28+ messages in thread
From: Deucher, Alexander @ 2017-08-15 14:49 UTC (permalink / raw)
  To: Kuehling, Felix, Oded Gabbay, Marek Olšák; +Cc: amd-gfx list

> -----Original Message-----
> From: Kuehling, Felix
> Sent: Monday, August 14, 2017 9:13 PM
> To: Oded Gabbay; Marek Olšák; Deucher, Alexander
> Cc: amd-gfx list
> Subject: Re: [PATCH 1/4] drm/amdgpu: Adding new kgd/kfd interface
> functions to support scratch memory
> 
> [+Marek, Alex for comment, see below]
> 
> On 2017-08-13 04:56 AM, Oded Gabbay wrote:
> > On Sat, Aug 12, 2017 at 7:47 AM, Felix Kuehling <Felix.Kuehling@amd.com>
> wrote:
> >> From: Moses Reuben <moses.reuben@amd.com>
> >>
> >> Signed-off-by: Moses Reuben <moses.reuben@amd.com>
> >> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >> ---
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 34
> +++++++++++++++++++++-
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 35
> ++++++++++++++++++++++-
> >>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |  4 +++
> >>  3 files changed, 71 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> >> index 994d262..11d515a 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> >> @@ -135,6 +135,10 @@ static uint16_t
> get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
> >>  static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t
> vmid);
> >>
> >>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum
> kgd_engine_type type);
> >> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
> >> +                                        uint64_t va, uint32_t vmid);
> >> +static int write_config_static_mem(struct kgd_dev *kgd, bool
> swizzle_enable,
> >> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
> >>
> >>  static const struct kfd2kgd_calls kfd2kgd = {
> >>         .init_gtt_mem_allocation = alloc_gtt_mem,
> >> @@ -159,7 +163,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
> >>         .get_atc_vmid_pasid_mapping_pasid =
> get_atc_vmid_pasid_mapping_pasid,
> >>         .get_atc_vmid_pasid_mapping_valid =
> get_atc_vmid_pasid_mapping_valid,
> >>         .write_vmid_invalidate_request = write_vmid_invalidate_request,
> >> -       .get_fw_version = get_fw_version
> >> +       .get_fw_version = get_fw_version,
> >> +       .alloc_memory_of_scratch = alloc_memory_of_scratch,
> >> +       .write_config_static_mem = write_config_static_mem
> >>  };
> >>
> >>  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
> >> @@ -652,6 +658,32 @@ static void write_vmid_invalidate_request(struct
> kgd_dev *kgd, uint8_t vmid)
> >>         WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> >>  }
> >>
> >> +static int write_config_static_mem(struct kgd_dev *kgd, bool
> swizzle_enable,
> >> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype)
> >> +{
> >> +       uint32_t reg;
> >> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> >> +
> >> +       reg = swizzle_enable <<
> SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
> >> +               element_size <<
> SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
> >> +               index_stride <<
> SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
> >> +               mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
> >> +
> >> +       WREG32(mmSH_STATIC_MEM_CONFIG, reg);
> > Don't you need to select and lock srbm before you write to this register ?
> 
> No, this register seems to be global. SRBM_GFX_CNTL had no effect on it
> when I experimented with it.
> 
> Since this is global initialization, I'm wondering if this should be
> moved into part of the amdgpu initialization. Having amdkfd call back
> into amdgpu like this during its initialization always seemed like a bit
> roundabout.
> 
> Marek, do you know if this register affects Mesa performance or
> behaviour with respect to scratch memory. According to the register spec
> it only affects flat scratch memory addressing. Not sure if you're using
> that addressing scheme in Mesa.
> 
> Alex, I see that SH_STATIC_MEM_CONFIG gets initialized in
> gfx_v[78]_0_gpu_init identically to what we do in KFD. You're doing it
> per-VMID (under cik_srbm_select). But my experiments show that this
> register is global. So I think your initialization already works for KFD
> and we can probably just drop this from the KFD initialization. Do you
> have access to hardware docs that confirm that it's global?

We'll have to confirm with the hw teams.  I always thought it was per vmid.

Alex

> 
> Regards,
>   Felix
> 
> >
> >> +       return 0;
> >> +}
> >> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
> >> +                                uint64_t va, uint32_t vmid)
> >> +{
> >> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> >> +
> >> +       lock_srbm(kgd, 0, 0, 0, vmid);
> >> +       WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
> >> +       unlock_srbm(kgd);
> >> +
> >> +       return 0;
> >> +}
> >> +
> >>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum
> kgd_engine_type type)
> >>  {
> >>         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> >> index 29a6f5d..ef677e5 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> >> @@ -94,6 +94,10 @@ static uint16_t
> get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
> >>                 uint8_t vmid);
> >>  static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t
> vmid);
> >>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum
> kgd_engine_type type);
> >> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
> >> +                                uint64_t va, uint32_t vmid);
> >> +static int write_config_static_mem(struct kgd_dev *kgd, bool
> swizzle_enable,
> >> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
> >>
> >>  static const struct kfd2kgd_calls kfd2kgd = {
> >>         .init_gtt_mem_allocation = alloc_gtt_mem,
> >> @@ -120,12 +124,15 @@ static const struct kfd2kgd_calls kfd2kgd = {
> >>         .get_atc_vmid_pasid_mapping_valid =
> >>                         get_atc_vmid_pasid_mapping_valid,
> >>         .write_vmid_invalidate_request = write_vmid_invalidate_request,
> >> -       .get_fw_version = get_fw_version
> >> +       .get_fw_version = get_fw_version,
> >> +       .alloc_memory_of_scratch = alloc_memory_of_scratch,
> >> +       .write_config_static_mem = write_config_static_mem
> >>  };
> >>
> >>  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
> >>  {
> >>         return (struct kfd2kgd_calls *)&kfd2kgd;
> >> +       return (struct kfd2kgd_calls *)&kfd2kgd;
> >>  }
> >>
> >>  static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev
> *kgd)
> >> @@ -573,6 +580,32 @@ static uint32_t
> kgd_address_watch_get_offset(struct kgd_dev *kgd,
> >>         return 0;
> >>  }
> >>
> >> +static int write_config_static_mem(struct kgd_dev *kgd, bool
> swizzle_enable,
> >> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype)
> >> +{
> >> +       uint32_t reg;
> >> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> >> +
> >> +       reg = swizzle_enable <<
> SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
> >> +               element_size <<
> SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
> >> +               index_stride <<
> SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
> >> +               mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
> >> +
> >> +       WREG32(mmSH_STATIC_MEM_CONFIG, reg);
> > Same comment as above.
> >
> >> +       return 0;
> >> +}
> >> +static int alloc_memory_of_scratch(struct kgd_dev *kgd,
> >> +                                uint64_t va, uint32_t vmid)
> >> +{
> >> +       struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> >> +
> >> +       lock_srbm(kgd, 0, 0, 0, vmid);
> >> +       WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
> >> +       unlock_srbm(kgd);
> >> +
> >> +       return 0;
> >> +}
> >> +
> >>  static uint16_t get_fw_version(struct kgd_dev *kgd, enum
> kgd_engine_type type)
> >>  {
> >>         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> >> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> >> index ffafda0..1dd90b2 100644
> >> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> >> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> >> @@ -199,6 +199,10 @@ struct kfd2kgd_calls {
> >>
> >>         uint16_t (*get_fw_version)(struct kgd_dev *kgd,
> >>                                 enum kgd_engine_type type);
> >> +       int (*alloc_memory_of_scratch)(struct kgd_dev *kgd,
> >> +                       uint64_t va, uint32_t vmid);
> >> +       int (*write_config_static_mem)(struct kgd_dev *kgd, bool
> swizzle_enable,
> >> +               uint8_t element_size, uint8_t index_stride, uint8_t mtype);
> >>  };
> >>
> >>  /**
> >> --
> >> 2.7.4
> >>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] drm/amdkfd: Adding new IOCTL for scratch memory
       [not found]                         ` <b114b316-1142-11b8-6393-5f903bb2d663-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-15 14:33                           ` Alex Deucher
@ 2017-08-15 16:03                           ` Felix Kuehling
       [not found]                             ` <d9a8220d-f9f1-b16c-da11-3c21a450926d-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-15 16:03 UTC (permalink / raw)
  To: Christian König, Oded Gabbay; +Cc: amd-gfx list

On 2017-08-15 04:24 AM, Christian König wrote:
> Am 14.08.2017 um 17:31 schrieb Felix Kuehling:
>> [SNIP]
>> Repeating the same argument I made on another email:
>
> Commented on that in the other mail, let's keep the discussion on this
> topic there.
>
>>> BTW: What exactly this this good for?
>> Scratch memory is private memory per work-item. It's used when a shader
>> program has too few registers available. With HSA we use flat scratch
>> addressing, where shaders can access private memory in a special scratch
>> aperture using normal memory instructions. Using the same virtual
>> address, each work item gets its own private piece of memory. The
>> hardware does the address translation from the VA in the private
>> aperture to a scratch-backing VA. The application is responsible for
>> allocating the memory to back that scratch area, and to map it somewhere
>> in its virtual address space.
>>
>> This ioctl tells the hardware (or HWS firmware) the VA of the scratch
>> backing memory.
>
> Ok, you've got me lost here. Not that I'm deeply into that stuff, but
> my last status is that those apertures are global and not per/process
> or VMID.

The apertures for private (scratch) and LDS are configured in
SH_MEM_BASES. As far as I know, this is per VMID. At least that's the
way gfx_v8_0_init_compute_vmid initializes it. When we use the HWS we
don't program this directly, but this information is included in the
map_process packet in the runlist and the firmware programs the
registers when it assigns a VMID to a process.

The scratch backing VA is configured in SH_HIDDEN_PRIVATE_BASE_VMID,
which is per VMID. Again, with the HWS, this is programmed by the
firmware and the driver just includes it in the map_process packet.

Regards,
  Felix

>
> That would mean that when two processes try to set two different
> addresses we are completely lost here.
>
> Christian.

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] drm/amdkfd: Adding new IOCTL for scratch memory
       [not found]                             ` <d9a8220d-f9f1-b16c-da11-3c21a450926d-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-15 18:53                               ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2017-08-15 18:53 UTC (permalink / raw)
  To: Felix Kuehling, Oded Gabbay; +Cc: amd-gfx list

Am 15.08.2017 um 18:03 schrieb Felix Kuehling:
> On 2017-08-15 04:24 AM, Christian König wrote:
>> Am 14.08.2017 um 17:31 schrieb Felix Kuehling:
>>> [SNIP]
>>> Repeating the same argument I made on another email:
>> Commented on that in the other mail, let's keep the discussion on this
>> topic there.
>>
>>>> BTW: What exactly this this good for?
>>> Scratch memory is private memory per work-item. It's used when a shader
>>> program has too few registers available. With HSA we use flat scratch
>>> addressing, where shaders can access private memory in a special scratch
>>> aperture using normal memory instructions. Using the same virtual
>>> address, each work item gets its own private piece of memory. The
>>> hardware does the address translation from the VA in the private
>>> aperture to a scratch-backing VA. The application is responsible for
>>> allocating the memory to back that scratch area, and to map it somewhere
>>> in its virtual address space.
>>>
>>> This ioctl tells the hardware (or HWS firmware) the VA of the scratch
>>> backing memory.
>> Ok, you've got me lost here. Not that I'm deeply into that stuff, but
>> my last status is that those apertures are global and not per/process
>> or VMID.
> The apertures for private (scratch) and LDS are configured in
> SH_MEM_BASES. As far as I know, this is per VMID. At least that's the
> way gfx_v8_0_init_compute_vmid initializes it. When we use the HWS we
> don't program this directly, but this information is included in the
> map_process packet in the runlist and the firmware programs the
> registers when it assigns a VMID to a process.
>
> The scratch backing VA is configured in SH_HIDDEN_PRIVATE_BASE_VMID,
> which is per VMID. Again, with the HWS, this is programmed by the
> firmware and the driver just includes it in the map_process packet.

Ok, that makes sense to me.

The same question came up in a different context a few month back in a 
thread and I probably just confused this with some other SH_* registers 
which turned out to be global while we always thought it is per VMID.

Christian.

> Regards,
>    Felix
>
>> That would mean that when two processes try to set two different
>> addresses we are completely lost here.
>>
>> Christian.
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found]                     ` <CAFCwf13bFDhL=_X-zx8AJE+0sD0Aw9z5Rku_7qDgY6VEdtS+qA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-08-16  0:35                       ` Felix Kuehling
       [not found]                         ` <fd9a9222-f7fa-8f72-6a23-619a17a872e3-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-16  0:35 UTC (permalink / raw)
  To: Oded Gabbay; +Cc: amd-gfx list

On 2017-08-15 06:20 AM, Oded Gabbay wrote:
> I prefer to do it incrementally, to avoid very large patch-sets which
> usually end up in longer cycles of review-fix, which causes you more
> pain because internal development continues during this time and you
> need to keep everything synchronized. If you do it in small pieces,
> there is more chance it will get to upstream faster and then you can
> cross it off your list permanently and no longer worry about it not
> being synchronized with internal development.
>
> If you are talking about the current patch-set (you actually sent 2
> patch-sets), then once you rebase them on the branches I mentioned,
> they are more or less good to go (except from very small fixes we
> talked about). If you can do it during this week, I think we can make
> it for the 4.14 merge window.
>
> Does that make sense ?

Sounds good. I'm hoping to get a bit more into 4.14. We'll see how it
goes. I've probably been told before but forgot: What's Dave Airlie's
deadline for accepting patches into 4.14?

BTW, thanks for your prompt feedback on these patches. I hope you're
getting paid for this work, cause there is a lot more coming. ;) I
really appreciate it.

Regards,
  Felix

>
> Oded

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found]                         ` <fd9a9222-f7fa-8f72-6a23-619a17a872e3-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-16  0:40                           ` Michel Dänzer
       [not found]                             ` <8ff456e3-94a8-7547-d01d-6aca5aaface8-otUistvHUpPR7s880joybQ@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Michel Dänzer @ 2017-08-16  0:40 UTC (permalink / raw)
  To: Felix Kuehling, Oded Gabbay; +Cc: amd-gfx list

On 16/08/17 09:35 AM, Felix Kuehling wrote:
> On 2017-08-15 06:20 AM, Oded Gabbay wrote:
>> I prefer to do it incrementally, to avoid very large patch-sets which
>> usually end up in longer cycles of review-fix, which causes you more
>> pain because internal development continues during this time and you
>> need to keep everything synchronized. If you do it in small pieces,
>> there is more chance it will get to upstream faster and then you can
>> cross it off your list permanently and no longer worry about it not
>> being synchronized with internal development.
>>
>> If you are talking about the current patch-set (you actually sent 2
>> patch-sets), then once you rebase them on the branches I mentioned,
>> they are more or less good to go (except from very small fixes we
>> talked about). If you can do it during this week, I think we can make
>> it for the 4.14 merge window.
>>
>> Does that make sense ?
> 
> Sounds good. I'm hoping to get a bit more into 4.14. We'll see how it
> goes. I've probably been told before but forgot: What's Dave Airlie's
> deadline for accepting patches into 4.14?

The deadline is usually around -rc6 of the previous cycle, so for 4.14
it might be the end of this week (keep in mind that Dave's in Australia,
so his work day is over when yours starts).


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found]                             ` <8ff456e3-94a8-7547-d01d-6aca5aaface8-otUistvHUpPR7s880joybQ@public.gmane.org>
@ 2017-08-16  9:24                               ` Oded Gabbay
       [not found]                                 ` <CAFCwf11Rjt3smEjF6KAeUWCu=GCsSXAWbd1j4qA2tu+WL0C13g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Oded Gabbay @ 2017-08-16  9:24 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: Felix Kuehling, amd-gfx list

On Wed, Aug 16, 2017 at 3:40 AM, Michel Dänzer <michel@daenzer.net> wrote:
>
> On 16/08/17 09:35 AM, Felix Kuehling wrote:
> > On 2017-08-15 06:20 AM, Oded Gabbay wrote:
> >> I prefer to do it incrementally, to avoid very large patch-sets which
> >> usually end up in longer cycles of review-fix, which causes you more
> >> pain because internal development continues during this time and you
> >> need to keep everything synchronized. If you do it in small pieces,
> >> there is more chance it will get to upstream faster and then you can
> >> cross it off your list permanently and no longer worry about it not
> >> being synchronized with internal development.
> >>
> >> If you are talking about the current patch-set (you actually sent 2
> >> patch-sets), then once you rebase them on the branches I mentioned,
> >> they are more or less good to go (except from very small fixes we
> >> talked about). If you can do it during this week, I think we can make
> >> it for the 4.14 merge window.
> >>
> >> Does that make sense ?
> >
> > Sounds good. I'm hoping to get a bit more into 4.14. We'll see how it
> > goes. I've probably been told before but forgot: What's Dave Airlie's
> > deadline for accepting patches into 4.14?
>
> The deadline is usually around -rc6 of the previous cycle, so for 4.14
> it might be the end of this week (keep in mind that Dave's in Australia,
> so his work day is over when yours starts).
>
Yeah, Michel is correct. If its 1 or 2 trivial patches I can probably
get it in even during -rc7 or start of merge window, but for this
large patch-set -rc5/6 is the safer way to go.

Oded

>
> --
> Earthling Michel Dänzer               |               http://www.amd.com
> Libre software enthusiast             |             Mesa and X developer
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found]                                 ` <CAFCwf11Rjt3smEjF6KAeUWCu=GCsSXAWbd1j4qA2tu+WL0C13g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-08-16 21:53                                   ` Felix Kuehling
       [not found]                                     ` <f4c6d3c4-3dd9-3add-5ebc-01659a655e66-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2017-08-16 21:53 UTC (permalink / raw)
  To: Oded Gabbay, Michel Dänzer; +Cc: amd-gfx list

On 2017-08-16 05:24 AM, Oded Gabbay wrote:
>> The deadline is usually around -rc6 of the previous cycle, so for 4.14
>> it might be the end of this week (keep in mind that Dave's in Australia,
>> so his work day is over when yours starts).
>>
> Yeah, Michel is correct. If its 1 or 2 trivial patches I can probably
> get it in even during -rc7 or start of merge window, but for this
> large patch-set -rc5/6 is the safer way to go.

OK. Then this is it for 4.14. I'll target the next set for 4.15.

The next set will be a bunch of bug fixes and smaller features that
apply to KV and CZ. I want to get all the small stuff out of the way so
it can be reviewed properly without getting confounded with the big
changes for dGPU support.

After that I'll get to the big pieces of dGPU support for GFX7 and GFX8.

Finally I'll add GFX9 (Vega10) support.

Regards,
  Felix

>
> Oded
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdkfd: Implement image tiling mode support
       [not found]                                     ` <f4c6d3c4-3dd9-3add-5ebc-01659a655e66-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-17  9:18                                       ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2017-08-17  9:18 UTC (permalink / raw)
  To: Felix Kuehling, Oded Gabbay, Michel Dänzer; +Cc: amd-gfx list

Am 16.08.2017 um 23:53 schrieb Felix Kuehling:
> On 2017-08-16 05:24 AM, Oded Gabbay wrote:
>>> The deadline is usually around -rc6 of the previous cycle, so for 4.14
>>> it might be the end of this week (keep in mind that Dave's in Australia,
>>> so his work day is over when yours starts).
>>>
>> Yeah, Michel is correct. If its 1 or 2 trivial patches I can probably
>> get it in even during -rc7 or start of merge window, but for this
>> large patch-set -rc5/6 is the safer way to go.
> OK. Then this is it for 4.14. I'll target the next set for 4.15.
>
> The next set will be a bunch of bug fixes and smaller features that
> apply to KV and CZ. I want to get all the small stuff out of the way so
> it can be reviewed properly without getting confounded with the big
> changes for dGPU support.
>
> After that I'll get to the big pieces of dGPU support for GFX7 and GFX8.
>
> Finally I'll add GFX9 (Vega10) support.

Sounds like a plan to me.

But as long as they are just bug fixes and maybe minor cleanups feel 
free to send in even more for 4.14.

Regards,
Christian.

>
> Regards,
>    Felix
>
>> Oded
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2017-08-17  9:18 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-12  4:47 [PATCH 0/4] More KFD features to make the runtime happy Felix Kuehling
     [not found] ` <1502513265-14281-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-08-12  4:47   ` [PATCH 1/4] drm/amdgpu: Adding new kgd/kfd interface functions to support scratch memory Felix Kuehling
     [not found]     ` <1502513265-14281-2-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-08-13  8:56       ` Oded Gabbay
     [not found]         ` <CAFCwf13YDgfX_3EFhQ8sE-8D7ApyDd9zwtqazW6Ups2dPReMqw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-08-15  1:12           ` Felix Kuehling
     [not found]             ` <c42dc57b-999b-250b-8ebe-214502869191-5C7GfCeVMHo@public.gmane.org>
2017-08-15 10:07               ` Marek Olšák
2017-08-15 14:49               ` Deucher, Alexander
2017-08-12  4:47   ` [PATCH 2/4] drm/amdkfd: Adding new IOCTL for " Felix Kuehling
     [not found]     ` <1502513265-14281-3-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-08-13  9:04       ` Oded Gabbay
     [not found]         ` <CAFCwf11z1Uo9jd82XAGrapXA44KpY+KrV69nbXLkoX6v10_gig-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-08-14 15:10           ` Felix Kuehling
     [not found]             ` <22ab982e-a1b1-5d61-9eaa-f9c149cf3d5e-5C7GfCeVMHo@public.gmane.org>
2017-08-14 15:15               ` Christian König
     [not found]                 ` <c310b952-116b-376c-0794-232e69f88592-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-14 15:31                   ` Felix Kuehling
     [not found]                     ` <33308ac3-bdf2-21ac-2122-b43702fdd95f-5C7GfCeVMHo@public.gmane.org>
2017-08-15  8:24                       ` Christian König
     [not found]                         ` <b114b316-1142-11b8-6393-5f903bb2d663-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-15 14:33                           ` Alex Deucher
2017-08-15 16:03                           ` Felix Kuehling
     [not found]                             ` <d9a8220d-f9f1-b16c-da11-3c21a450926d-5C7GfCeVMHo@public.gmane.org>
2017-08-15 18:53                               ` Christian König
2017-08-12  4:47   ` [PATCH 3/4] drm/amdgpu: Add kgd kfd interface get_tile_config() Felix Kuehling
     [not found]     ` <1502513265-14281-4-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-08-13  9:05       ` Oded Gabbay
2017-08-12  4:47   ` [PATCH 4/4] drm/amdkfd: Implement image tiling mode support Felix Kuehling
     [not found]     ` <1502513265-14281-5-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-08-13  9:08       ` Oded Gabbay
     [not found]         ` <CAFCwf10iQhAvxZdwCYBK6f0fBKdpgD=f0W886awm2fJVCB=hLQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-08-14 15:18           ` Felix Kuehling
     [not found]             ` <fc37f1a8-9c12-f1da-cdab-f03b50643f19-5C7GfCeVMHo@public.gmane.org>
2017-08-14 22:55               ` Felix Kuehling
     [not found]                 ` <2649fca7-9617-e434-a75a-f76d7e07d0f5-5C7GfCeVMHo@public.gmane.org>
2017-08-15  8:21                   ` Christian König
2017-08-15 10:20                   ` Oded Gabbay
     [not found]                     ` <CAFCwf13bFDhL=_X-zx8AJE+0sD0Aw9z5Rku_7qDgY6VEdtS+qA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-08-16  0:35                       ` Felix Kuehling
     [not found]                         ` <fd9a9222-f7fa-8f72-6a23-619a17a872e3-5C7GfCeVMHo@public.gmane.org>
2017-08-16  0:40                           ` Michel Dänzer
     [not found]                             ` <8ff456e3-94a8-7547-d01d-6aca5aaface8-otUistvHUpPR7s880joybQ@public.gmane.org>
2017-08-16  9:24                               ` Oded Gabbay
     [not found]                                 ` <CAFCwf11Rjt3smEjF6KAeUWCu=GCsSXAWbd1j4qA2tu+WL0C13g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-08-16 21:53                                   ` Felix Kuehling
     [not found]                                     ` <f4c6d3c4-3dd9-3add-5ebc-01659a655e66-5C7GfCeVMHo@public.gmane.org>
2017-08-17  9:18                                       ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.