All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 0/7] RFC: Usermode queue for AMDGPU driver
@ 2022-12-23 19:36 Shashank Sharma
  2022-12-23 19:36 ` [RFC 1/7] drm/amdgpu: UAPI for user queue management Shashank Sharma
                   ` (7 more replies)
  0 siblings, 8 replies; 64+ messages in thread
From: Shashank Sharma @ 2022-12-23 19:36 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Shashank Sharma, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

This is a RFC series to implement usermode graphics queues for AMDGPU
driver (Navi 3X and above). The idea of usermode graphics queue is to
allow direct workload submission from a userspace graphics process who
has amdgpu graphics context.

Once we have some initial feedback on the design, we will publish a
follow up V1 series with a libdrm consumer test. 

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>

Alex Deucher (1):
  drm/amdgpu: UAPI for user queue management

Arunpravin Paneer Selvam (1):
  drm/amdgpu: Secure semaphore for usermode queue

Arvind Yadav (1):
  drm/amdgpu: Create MQD for userspace queue

Shashank Sharma (4):
  drm/amdgpu: Add usermode queue for gfx work
  drm/amdgpu: Allocate doorbell slot for user queue
  drm/amdgpu: Create context for usermode queue
  drm/amdgpu: Map userqueue into HW

 drivers/gpu/drm/amd/amdgpu/Makefile           |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  14 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 ++++++++++++++++
 .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 ++++++++
 .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
 .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 ++++++++++++++++++
 include/uapi/drm/amdgpu_drm.h                 |  52 ++
 8 files changed, 1413 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h

-- 
2.34.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2022-12-23 19:36 [RFC 0/7] RFC: Usermode queue for AMDGPU driver Shashank Sharma
@ 2022-12-23 19:36 ` Shashank Sharma
  2022-12-24 20:20   ` Bas Nieuwenhuizen
                     ` (2 more replies)
  2022-12-23 19:36 ` [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work Shashank Sharma
                   ` (6 subsequent siblings)
  7 siblings, 3 replies; 64+ messages in thread
From: Shashank Sharma @ 2022-12-23 19:36 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Shashank Sharma, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

From: Alex Deucher <alexander.deucher@amd.com>

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 0d93ec132ebb..a3d0dd6f62c5 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
 #define DRM_AMDGPU_VM			0x13
 #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
 #define DRM_AMDGPU_SCHED		0x15
+#define DRM_AMDGPU_USERQ		0x16
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
 #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
 #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
 
 /**
  * DOC: memory domains
@@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
 	union drm_amdgpu_ctx_out out;
 };
 
+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE	1
+#define AMDGPU_USERQ_OP_FREE	2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE	(1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL	(1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+	/** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+	__u32	flags;
+	/** IP type: AMDGPU_HW_IP_* */
+	__u32	ip_type;
+	/** GEM object handle */
+	__u32   doorbell_handle;
+	/** Doorbell offset in dwords */
+	__u32   doorbell_offset;
+	/** GPU virtual address of the queue */
+	__u64   queue_va;
+	/** Size of the queue in bytes */
+	__u64   queue_size;
+	/** GPU virtual address of the rptr */
+	__u64   rptr_va;
+	/** GPU virtual address of the wptr */
+	__u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+	/** AMDGPU_USERQ_OP_* */
+	__u32	op;
+	/** Flags */
+	__u32	flags;
+	/** Context handle to associate the queue with */
+	__u32	ctx_id;
+	__u32	pad;
+	/** Queue descriptor */
+	struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+	/** Queue handle */
+	__u32	q_id;
+	/** Flags */
+	__u32	flags;
+};
+
+union drm_amdgpu_userq {
+	struct drm_amdgpu_userq_in in;
+	struct drm_amdgpu_userq_out out;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID	1
 #define AMDGPU_VM_OP_UNRESERVE_VMID	2
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 64+ messages in thread

* [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2022-12-23 19:36 [RFC 0/7] RFC: Usermode queue for AMDGPU driver Shashank Sharma
  2022-12-23 19:36 ` [RFC 1/7] drm/amdgpu: UAPI for user queue management Shashank Sharma
@ 2022-12-23 19:36 ` Shashank Sharma
  2022-12-24 18:19   ` Oded Gabbay
                     ` (3 more replies)
  2022-12-23 19:36 ` [RFC 3/7] drm/amdgpu: Create MQD for userspace queue Shashank Sharma
                   ` (5 subsequent siblings)
  7 siblings, 4 replies; 64+ messages in thread
From: Shashank Sharma @ 2022-12-23 19:36 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Shashank Sharma, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

This patch adds skeleton code for usermode queue creation. It
typically contains:
- A new structure to keep all the user queue data in one place.
- An IOCTL function to create/free a usermode queue.
- A function to generate unique index for the queue.
- A global ptr in amdgpu_dev

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++++++++++++++++++
 .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
 5 files changed, 246 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 6ad39cf71bdd..e2a34ee57bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -209,6 +209,8 @@ amdgpu-y += \
 # add amdkfd interfaces
 amdgpu-y += amdgpu_amdkfd.o
 
+# add usermode queue
+amdgpu-y += amdgpu_userqueue.o
 
 ifneq ($(CONFIG_HSA_AMD),)
 AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8639a4f9c6e8..4b566fcfca18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -749,6 +749,11 @@ struct amdgpu_mqd {
 			struct amdgpu_mqd_prop *p);
 };
 
+struct amdgpu_userq_globals {
+	struct ida ida;
+	struct mutex userq_mutex;
+};
+
 #define AMDGPU_RESET_MAGIC_NUM 64
 #define AMDGPU_MAX_DF_PERFMONS 4
 #define AMDGPU_PRODUCT_NAME_LEN 64
@@ -955,6 +960,7 @@ struct amdgpu_device {
 	bool                            enable_mes_kiq;
 	struct amdgpu_mes               mes;
 	struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
+	struct amdgpu_userq_globals	userq;
 
 	/* df */
 	struct amdgpu_df                df;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..f7413859b14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
 	unsigned long			ras_counter_ce;
 	unsigned long			ras_counter_ue;
 	uint32_t			stable_pstate;
+	struct amdgpu_usermode_queue	*userq;
 };
 
 struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
new file mode 100644
index 000000000000..3b6e8f75495c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "soc15_common.h"
+
+#define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
+
+static int
+amdgpu_userqueue_index(struct amdgpu_device *adev)
+{
+    int index;
+    struct amdgpu_userq_globals *uqg = &adev->userq;
+
+    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
+    return index;
+}
+
+static void
+amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_userq_globals *uqg = &adev->userq;
+
+    ida_simple_remove(&uqg->ida, queue->queue_id);
+}
+
+static int
+amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct drm_amdgpu_userq_mqd *mqd_in)
+{
+    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || mqd_in->doorbell_offset == 0) {
+        DRM_ERROR("Invalid queue object address\n");
+        return -EINVAL;
+    }
+
+    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || mqd_in->wptr_va == 0) {
+        DRM_ERROR("Invalid queue object value\n");
+        return -EINVAL;
+    }
+
+    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= AMDGPU_HW_IP_NUM) {
+        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
+        return -EINVAL;
+    }
+
+    if (!CHECK_ACCESS(mqd_in->queue_va) || !CHECK_ACCESS(mqd_in->rptr_va) ||
+        !CHECK_ACCESS(mqd_in->wptr_va)) {
+            DRM_ERROR("Invalid mapping of queue ptrs, access error\n");
+            return -EINVAL;
+    }
+
+    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
+    return 0;
+}
+
+int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
+                            union drm_amdgpu_userq *args)
+{
+    int r, pasid;
+    struct amdgpu_usermode_queue *queue;
+    struct amdgpu_fpriv *fpriv = filp->driver_priv;
+    struct amdgpu_vm *vm = &fpriv->vm;
+    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
+    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
+
+    if (!ctx) {
+        DRM_ERROR("Invalid GPU context\n");
+        return -EINVAL;
+    }
+
+    if (vm->pasid < 0) {
+        DRM_WARN("No PASID info found\n");
+        pasid = 0;
+    }
+
+    mutex_lock(&adev->userq.userq_mutex);
+
+    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
+    if (!queue) {
+        DRM_ERROR("Failed to allocate memory for queue\n");
+        mutex_unlock(&adev->userq.userq_mutex);
+        return -ENOMEM;
+    }
+
+    r = amdgpu_userqueue_validate_input(adev, mqd_in);
+    if (r < 0) {
+        DRM_ERROR("Invalid input to create queue\n");
+        goto free_queue;
+    }
+
+    queue->vm = vm;
+    queue->pasid = pasid;
+    queue->wptr_gpu_addr = mqd_in->wptr_va;
+    queue->rptr_gpu_addr = mqd_in->rptr_va;
+    queue->queue_size = mqd_in->queue_size;
+    queue->queue_type = mqd_in->ip_type;
+    queue->paging = false;
+    queue->flags = mqd_in->flags;
+    queue->queue_id = amdgpu_userqueue_index(adev);
+
+    ctx->userq = queue;
+    args->out.q_id = queue->queue_id;
+    args->out.flags = 0;
+    mutex_unlock(&adev->userq.userq_mutex);
+    return 0;
+
+free_queue:
+    amdgpu_userqueue_remove_index(adev, queue);
+    mutex_unlock(&adev->userq.userq_mutex);
+    kfree(queue);
+    return r;
+}
+
+void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
+                              union drm_amdgpu_userq *args)
+{
+    struct amdgpu_fpriv *fpriv = filp->driver_priv;
+    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
+    struct amdgpu_usermode_queue *queue = ctx->userq;
+
+    mutex_lock(&adev->userq.userq_mutex);
+    amdgpu_userqueue_remove_index(adev, queue);
+    ctx->userq = NULL;
+    mutex_unlock(&adev->userq.userq_mutex);
+    kfree(queue);
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *filp)
+{
+    union drm_amdgpu_userq *args = data;
+    struct amdgpu_device *adev = drm_to_adev(dev);
+    int r = 0;
+
+    switch (args->in.op) {
+    case AMDGPU_USERQ_OP_CREATE:
+        r = amdgpu_userqueue_create(adev, filp, args);
+        if (r)
+            DRM_ERROR("Failed to create usermode queue\n");
+        break;
+
+    case AMDGPU_USERQ_OP_FREE:
+        amdgpu_userqueue_destroy(adev, filp, args);
+        break;
+
+    default:
+        DRM_ERROR("Invalid user queue op specified: %d\n", args->in.op);
+        return -EINVAL;
+    }
+
+    return r;
+}
+
+int amdgpu_userqueue_init(struct amdgpu_device *adev)
+{
+    struct amdgpu_userq_globals *uqg = &adev->userq;
+
+    mutex_init(&uqg->userq_mutex);
+    return 0;
+}
+
+void amdgpu_userqueue_fini(struct amdgpu_device *adev)
+{
+
+}
diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
new file mode 100644
index 000000000000..c1fe39ffaf72
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_USERMODE_QUEUE_H_
+#define AMDGPU_USERMODE_QUEUE_H_
+
+#define AMDGPU_MAX_USERQ 512
+
+struct amdgpu_usermode_queue {
+	int		queue_id;
+	int		queue_type;
+	int		queue_size;
+	int		paging;
+	int		pasid;
+	int		use_doorbell;
+	int		doorbell_index;
+
+	uint64_t	mqd_gpu_addr;
+	uint64_t	wptr_gpu_addr;
+	uint64_t	rptr_gpu_addr;
+	uint64_t	queue_gpu_addr;
+	uint64_t	flags;
+	void 		*mqd_cpu_ptr;
+
+	struct amdgpu_bo	*mqd_obj;
+	struct amdgpu_vm    	*vm;
+	struct list_head 	list;
+};
+
+#endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 64+ messages in thread

* [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2022-12-23 19:36 [RFC 0/7] RFC: Usermode queue for AMDGPU driver Shashank Sharma
  2022-12-23 19:36 ` [RFC 1/7] drm/amdgpu: UAPI for user queue management Shashank Sharma
  2022-12-23 19:36 ` [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work Shashank Sharma
@ 2022-12-23 19:36 ` Shashank Sharma
  2022-12-29 17:47   ` Alex Deucher
  2022-12-23 19:36 ` [RFC 4/7] drm/amdgpu: Allocate doorbell slot for user queue Shashank Sharma
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2022-12-23 19:36 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Shashank Sharma, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

From: Arvind Yadav <arvind.yadav@amd.com>

MQD describes the properies of a user queue to the HW, and allows it to
accurately configure the queue while mapping it in GPU HW. This patch
adds:
- A new header file which contains the MQD definition
- A new function which creates an MQD object and fills it with userqueue
  data

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>

Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 138 +++++
 .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 ++++++++++++++++++
 2 files changed, 682 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 3b6e8f75495c..a91cc304cb9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -25,7 +25,10 @@
 #include "amdgpu_vm.h"
 #include "amdgpu_mes.h"
 #include "amdgpu_usermode_queue.h"
+#include "amdgpu_usermode_queue_mqd.h"
 #include "soc15_common.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
 
 #define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
 
@@ -47,6 +50,134 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode
     ida_simple_remove(&uqg->ida, queue->queue_id);
 }
 
+static void
+amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_usermode_queue_mqd *mqd = queue->mqd_cpu_ptr;
+    uint64_t hqd_gpu_addr, wb_gpu_addr;
+    uint32_t tmp;
+    uint32_t rb_bufsz;
+
+    /* set up gfx hqd wptr */
+    mqd->cp_gfx_hqd_wptr = 0;
+    mqd->cp_gfx_hqd_wptr_hi = 0;
+
+    /* set the pointer to the MQD */
+    mqd->cp_mqd_base_addr = queue->mqd_gpu_addr & 0xfffffffc;
+    mqd->cp_mqd_base_addr_hi = upper_32_bits(queue->mqd_gpu_addr);
+
+    /* set up mqd control */
+    tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
+    tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+    tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+    tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+    mqd->cp_gfx_mqd_control = tmp;
+
+    /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
+    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+    mqd->cp_gfx_hqd_vmid = 0;
+
+    /* set up default queue priority level
+    * 0x0 = low priority, 0x1 = high priority */
+    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
+    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
+    mqd->cp_gfx_hqd_queue_priority = tmp;
+
+    /* set up time quantum */
+    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
+    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+    mqd->cp_gfx_hqd_quantum = tmp;
+
+    /* set up gfx hqd base. this is similar as CP_RB_BASE */
+    hqd_gpu_addr = queue->queue_gpu_addr >> 8;
+    mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+    mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+    /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+    wb_gpu_addr = queue->rptr_gpu_addr;
+    mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+    mqd->cp_gfx_hqd_rptr_addr_hi =
+    upper_32_bits(wb_gpu_addr) & 0xffff;
+
+    /* set up rb_wptr_poll addr */
+    wb_gpu_addr = queue->wptr_gpu_addr;
+    mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+    mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+    /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+    rb_bufsz = order_base_2(queue->queue_size / 4) - 1;
+    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
+    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+    mqd->cp_gfx_hqd_cntl = tmp;
+
+    /* set up cp_doorbell_control */
+    tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+    if (queue->use_doorbell) {
+        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+                    DOORBELL_OFFSET, queue->doorbell_index);
+        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+                    DOORBELL_EN, 1);
+    } else {
+        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+                    DOORBELL_EN, 0);
+    }
+    mqd->cp_rb_doorbell_control = tmp;
+
+    /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+    mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
+
+    /* activate the queue */
+    mqd->cp_gfx_hqd_active = 1;
+}
+
+static int
+amdgpu_userqueue_create_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
+{
+    int r;
+    int size = sizeof(struct amdgpu_usermode_queue_mqd);
+
+    r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+                                AMDGPU_GEM_DOMAIN_VRAM,
+                                &queue->mqd_obj,
+                                &queue->mqd_gpu_addr,
+                                &queue->mqd_cpu_ptr);
+    if (r) {
+        DRM_ERROR("Failed to allocate bo for userqueue (%d)", r);
+        return r;
+    }
+
+    memset(queue->mqd_cpu_ptr, 0, size);
+    r = amdgpu_bo_reserve(queue->mqd_obj, false);
+    if (unlikely(r != 0)) {
+        DRM_ERROR("Failed to reserve mqd for userqueue (%d)", r);
+        goto free_mqd;
+    }
+
+    /* Fill MQD with userqueue data */
+    amdgpu_userqueue_setup_mqd(adev, queue);
+    amdgpu_bo_unreserve(queue->mqd_obj);
+    return 0;
+
+free_mqd:
+    amdgpu_bo_free_kernel(&queue->mqd_obj,
+			    &queue->mqd_gpu_addr,
+			    &queue->mqd_cpu_ptr);
+    return r;
+}
+
+static void
+amdgpu_userqueue_destroy_mqd(struct amdgpu_usermode_queue *queue)
+{
+    amdgpu_bo_free_kernel(&queue->mqd_obj,
+			    &queue->mqd_gpu_addr,
+			    &queue->mqd_cpu_ptr);
+}
+
 static int
 amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct drm_amdgpu_userq_mqd *mqd_in)
 {
@@ -120,6 +251,12 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
     queue->flags = mqd_in->flags;
     queue->queue_id = amdgpu_userqueue_index(adev);
 
+    r = amdgpu_userqueue_create_mqd(adev, queue);
+    if (r < 0) {
+        DRM_ERROR("Failed to create mqd for queue\n");
+        goto free_queue;
+    }
+
     ctx->userq = queue;
     args->out.q_id = queue->queue_id;
     args->out.flags = 0;
@@ -141,6 +278,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
     struct amdgpu_usermode_queue *queue = ctx->userq;
 
     mutex_lock(&adev->userq.userq_mutex);
+    amdgpu_userqueue_destroy_mqd(queue);
     amdgpu_userqueue_remove_index(adev, queue);
     ctx->userq = NULL;
     mutex_unlock(&adev->userq.userq_mutex);
diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
new file mode 100644
index 000000000000..d0a285708ba5
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
@@ -0,0 +1,544 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_USERMODE_QUEUE_MQD_H_
+#define AMDGPU_USERMODE_QUEUE_MQD_H_
+
+/*MQD struct for usermode Queue*/
+struct amdgpu_usermode_queue_mqd
+{
+	uint32_t shadow_base_lo; // offset: 0  (0x0)
+	uint32_t shadow_base_hi; // offset: 1  (0x1)
+	uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
+	uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
+	uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
+	uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
+	uint32_t shadow_initialized; // offset: 6  (0x6)
+	uint32_t ib_vmid; // offset: 7  (0x7)
+	uint32_t reserved_8; // offset: 8  (0x8)
+	uint32_t reserved_9; // offset: 9  (0x9)
+	uint32_t reserved_10; // offset: 10  (0xA)
+	uint32_t reserved_11; // offset: 11  (0xB)
+	uint32_t reserved_12; // offset: 12  (0xC)
+	uint32_t reserved_13; // offset: 13  (0xD)
+	uint32_t reserved_14; // offset: 14  (0xE)
+	uint32_t reserved_15; // offset: 15  (0xF)
+	uint32_t reserved_16; // offset: 16  (0x10)
+	uint32_t reserved_17; // offset: 17  (0x11)
+	uint32_t reserved_18; // offset: 18  (0x12)
+	uint32_t reserved_19; // offset: 19  (0x13)
+	uint32_t reserved_20; // offset: 20  (0x14)
+	uint32_t reserved_21; // offset: 21  (0x15)
+	uint32_t reserved_22; // offset: 22  (0x16)
+	uint32_t reserved_23; // offset: 23  (0x17)
+	uint32_t reserved_24; // offset: 24  (0x18)
+	uint32_t reserved_25; // offset: 25  (0x19)
+	uint32_t reserved_26; // offset: 26  (0x1A)
+	uint32_t reserved_27; // offset: 27  (0x1B)
+	uint32_t reserved_28; // offset: 28  (0x1C)
+	uint32_t reserved_29; // offset: 29  (0x1D)
+	uint32_t reserved_30; // offset: 30  (0x1E)
+	uint32_t reserved_31; // offset: 31  (0x1F)
+	uint32_t reserved_32; // offset: 32  (0x20)
+	uint32_t reserved_33; // offset: 33  (0x21)
+	uint32_t reserved_34; // offset: 34  (0x22)
+	uint32_t reserved_35; // offset: 35  (0x23)
+	uint32_t reserved_36; // offset: 36  (0x24)
+	uint32_t reserved_37; // offset: 37  (0x25)
+	uint32_t reserved_38; // offset: 38  (0x26)
+	uint32_t reserved_39; // offset: 39  (0x27)
+	uint32_t reserved_40; // offset: 40  (0x28)
+	uint32_t reserved_41; // offset: 41  (0x29)
+	uint32_t reserved_42; // offset: 42  (0x2A)
+	uint32_t reserved_43; // offset: 43  (0x2B)
+	uint32_t reserved_44; // offset: 44  (0x2C)
+	uint32_t reserved_45; // offset: 45  (0x2D)
+	uint32_t reserved_46; // offset: 46  (0x2E)
+	uint32_t reserved_47; // offset: 47  (0x2F)
+	uint32_t reserved_48; // offset: 48  (0x30)
+	uint32_t reserved_49; // offset: 49  (0x31)
+	uint32_t reserved_50; // offset: 50  (0x32)
+	uint32_t reserved_51; // offset: 51  (0x33)
+	uint32_t reserved_52; // offset: 52  (0x34)
+	uint32_t reserved_53; // offset: 53  (0x35)
+	uint32_t reserved_54; // offset: 54  (0x36)
+	uint32_t reserved_55; // offset: 55  (0x37)
+	uint32_t reserved_56; // offset: 56  (0x38)
+	uint32_t reserved_57; // offset: 57  (0x39)
+	uint32_t reserved_58; // offset: 58  (0x3A)
+	uint32_t reserved_59; // offset: 59  (0x3B)
+	uint32_t reserved_60; // offset: 60  (0x3C)
+	uint32_t reserved_61; // offset: 61  (0x3D)
+	uint32_t reserved_62; // offset: 62  (0x3E)
+	uint32_t reserved_63; // offset: 63  (0x3F)
+	uint32_t reserved_64; // offset: 64  (0x40)
+	uint32_t reserved_65; // offset: 65  (0x41)
+	uint32_t reserved_66; // offset: 66  (0x42)
+	uint32_t reserved_67; // offset: 67  (0x43)
+	uint32_t reserved_68; // offset: 68  (0x44)
+	uint32_t reserved_69; // offset: 69  (0x45)
+	uint32_t reserved_70; // offset: 70  (0x46)
+	uint32_t reserved_71; // offset: 71  (0x47)
+	uint32_t reserved_72; // offset: 72  (0x48)
+	uint32_t reserved_73; // offset: 73  (0x49)
+	uint32_t reserved_74; // offset: 74  (0x4A)
+	uint32_t reserved_75; // offset: 75  (0x4B)
+	uint32_t reserved_76; // offset: 76  (0x4C)
+	uint32_t reserved_77; // offset: 77  (0x4D)
+	uint32_t reserved_78; // offset: 78  (0x4E)
+	uint32_t reserved_79; // offset: 79  (0x4F)
+	uint32_t reserved_80; // offset: 80  (0x50)
+	uint32_t reserved_81; // offset: 81  (0x51)
+	uint32_t reserved_82; // offset: 82  (0x52)
+	uint32_t reserved_83; // offset: 83  (0x53)
+	uint32_t checksum_lo; // offset: 84  (0x54)
+	uint32_t checksum_hi; // offset: 85  (0x55)
+	uint32_t cp_mqd_query_time_lo; // offset: 86  (0x56)
+	uint32_t cp_mqd_query_time_hi; // offset: 87  (0x57)
+	uint32_t reserved_88; // offset: 88  (0x58)
+	uint32_t reserved_89; // offset: 89  (0x59)
+	uint32_t reserved_90; // offset: 90  (0x5A)
+	uint32_t reserved_91; // offset: 91  (0x5B)
+	uint32_t cp_mqd_query_wave_count; // offset: 92  (0x5C)
+	uint32_t cp_mqd_query_gfx_hqd_rptr; // offset: 93  (0x5D)
+	uint32_t cp_mqd_query_gfx_hqd_wptr; // offset: 94  (0x5E)
+	uint32_t cp_mqd_query_gfx_hqd_offset; // offset: 95  (0x5F)
+	uint32_t reserved_96; // offset: 96  (0x60)
+	uint32_t reserved_97; // offset: 97  (0x61)
+	uint32_t reserved_98; // offset: 98  (0x62)
+	uint32_t reserved_99; // offset: 99  (0x63)
+	uint32_t reserved_100; // offset: 100  (0x64)
+	uint32_t reserved_101; // offset: 101  (0x65)
+	uint32_t reserved_102; // offset: 102  (0x66)
+	uint32_t reserved_103; // offset: 103  (0x67)
+	uint32_t task_shader_control_buf_addr_lo; // offset: 104  (0x68)
+	uint32_t task_shader_control_buf_addr_hi; // offset: 105  (0x69)
+	uint32_t task_shader_read_rptr_lo; // offset: 106  (0x6A)
+	uint32_t task_shader_read_rptr_hi; // offset: 107  (0x6B)
+	uint32_t task_shader_num_entries; // offset: 108  (0x6C)
+	uint32_t task_shader_num_entries_bits; // offset: 109  (0x6D)
+	uint32_t task_shader_ring_buffer_addr_lo; // offset: 110  (0x6E)
+	uint32_t task_shader_ring_buffer_addr_hi; // offset: 111  (0x6F)
+	uint32_t reserved_112; // offset: 112  (0x70)
+	uint32_t reserved_113; // offset: 113  (0x71)
+	uint32_t reserved_114; // offset: 114  (0x72)
+	uint32_t reserved_115; // offset: 115  (0x73)
+	uint32_t reserved_116; // offset: 116  (0x74)
+	uint32_t reserved_117; // offset: 117  (0x75)
+	uint32_t reserved_118; // offset: 118  (0x76)
+	uint32_t reserved_119; // offset: 119  (0x77)
+	uint32_t reserved_120; // offset: 120  (0x78)
+	uint32_t reserved_121; // offset: 121  (0x79)
+	uint32_t reserved_122; // offset: 122  (0x7A)
+	uint32_t reserved_123; // offset: 123  (0x7B)
+	uint32_t reserved_124; // offset: 124  (0x7C)
+	uint32_t reserved_125; // offset: 125  (0x7D)
+	uint32_t reserved_126; // offset: 126  (0x7E)
+	uint32_t reserved_127; // offset: 127  (0x7F)
+	uint32_t cp_mqd_base_addr; // offset: 128  (0x80)
+	uint32_t cp_mqd_base_addr_hi; // offset: 129  (0x81)
+	uint32_t cp_gfx_hqd_active; // offset: 130  (0x82)
+	uint32_t cp_gfx_hqd_vmid; // offset: 131  (0x83)
+	uint32_t reserved_131; // offset: 132  (0x84)
+	uint32_t reserved_132; // offset: 133  (0x85)
+	uint32_t cp_gfx_hqd_queue_priority; // offset: 134  (0x86)
+	uint32_t cp_gfx_hqd_quantum; // offset: 135  (0x87)
+	uint32_t cp_gfx_hqd_base; // offset: 136  (0x88)
+	uint32_t cp_gfx_hqd_base_hi; // offset: 137  (0x89)
+	uint32_t cp_gfx_hqd_rptr; // offset: 138  (0x8A)
+	uint32_t cp_gfx_hqd_rptr_addr; // offset: 139  (0x8B)
+	uint32_t cp_gfx_hqd_rptr_addr_hi; // offset: 140  (0x8C)
+	uint32_t cp_rb_wptr_poll_addr_lo; // offset: 141  (0x8D)
+	uint32_t cp_rb_wptr_poll_addr_hi; // offset: 142  (0x8E)
+	uint32_t cp_rb_doorbell_control; // offset: 143  (0x8F)
+	uint32_t cp_gfx_hqd_offset; // offset: 144  (0x90)
+	uint32_t cp_gfx_hqd_cntl; // offset: 145  (0x91)
+	uint32_t reserved_146; // offset: 146  (0x92)
+	uint32_t reserved_147; // offset: 147  (0x93)
+	uint32_t cp_gfx_hqd_csmd_rptr; // offset: 148  (0x94)
+	uint32_t cp_gfx_hqd_wptr; // offset: 149  (0x95)
+	uint32_t cp_gfx_hqd_wptr_hi; // offset: 150  (0x96)
+	uint32_t reserved_151; // offset: 151  (0x97)
+	uint32_t reserved_152; // offset: 152  (0x98)
+	uint32_t reserved_153; // offset: 153  (0x99)
+	uint32_t reserved_154; // offset: 154  (0x9A)
+	uint32_t reserved_155; // offset: 155  (0x9B)
+	uint32_t cp_gfx_hqd_mapped; // offset: 156  (0x9C)
+	uint32_t cp_gfx_hqd_que_mgr_control; // offset: 157  (0x9D)
+	uint32_t reserved_158; // offset: 158  (0x9E)
+	uint32_t reserved_159; // offset: 159  (0x9F)
+	uint32_t cp_gfx_hqd_hq_status0; // offset: 160  (0xA0)
+	uint32_t cp_gfx_hqd_hq_control0; // offset: 161  (0xA1)
+	uint32_t cp_gfx_mqd_control; // offset: 162  (0xA2)
+	uint32_t reserved_163; // offset: 163  (0xA3)
+	uint32_t reserved_164; // offset: 164  (0xA4)
+	uint32_t reserved_165; // offset: 165  (0xA5)
+	uint32_t reserved_166; // offset: 166  (0xA6)
+	uint32_t reserved_167; // offset: 167  (0xA7)
+	uint32_t reserved_168; // offset: 168  (0xA8)
+	uint32_t reserved_169; // offset: 169  (0xA9)
+	uint32_t cp_num_prim_needed_count0_lo; // offset: 170  (0xAA)
+	uint32_t cp_num_prim_needed_count0_hi; // offset: 171  (0xAB)
+	uint32_t cp_num_prim_needed_count1_lo; // offset: 172  (0xAC)
+	uint32_t cp_num_prim_needed_count1_hi; // offset: 173  (0xAD)
+	uint32_t cp_num_prim_needed_count2_lo; // offset: 174  (0xAE)
+	uint32_t cp_num_prim_needed_count2_hi; // offset: 175  (0xAF)
+	uint32_t cp_num_prim_needed_count3_lo; // offset: 176  (0xB0)
+	uint32_t cp_num_prim_needed_count3_hi; // offset: 177  (0xB1)
+	uint32_t cp_num_prim_written_count0_lo; // offset: 178  (0xB2)
+	uint32_t cp_num_prim_written_count0_hi; // offset: 179  (0xB3)
+	uint32_t cp_num_prim_written_count1_lo; // offset: 180  (0xB4)
+	uint32_t cp_num_prim_written_count1_hi; // offset: 181  (0xB5)
+	uint32_t cp_num_prim_written_count2_lo; // offset: 182  (0xB6)
+	uint32_t cp_num_prim_written_count2_hi; // offset: 183  (0xB7)
+	uint32_t cp_num_prim_written_count3_lo; // offset: 184  (0xB8)
+	uint32_t cp_num_prim_written_count3_hi; // offset: 185  (0xB9)
+	uint32_t reserved_186; // offset: 186  (0xBA)
+	uint32_t reserved_187; // offset: 187  (0xBB)
+	uint32_t reserved_188; // offset: 188  (0xBC)
+	uint32_t reserved_189; // offset: 189  (0xBD)
+	uint32_t reserved_190; // offset: 190  (0xBE)
+	uint32_t reserved_191; // offset: 191  (0xBF)
+	uint32_t reserved_192; // offset: 192  (0xC0)
+	uint32_t reserved_193; // offset: 193  (0xC1)
+	uint32_t reserved_194; // offset: 194  (0xC2)
+	uint32_t reserved_195; // offset: 195  (0xC3)
+	uint32_t reserved_196; // offset: 196  (0xC4)
+	uint32_t reserved_197; // offset: 197  (0xC5)
+	uint32_t reserved_198; // offset: 198  (0xC6)
+	uint32_t reserved_199; // offset: 199  (0xC7)
+	uint32_t reserved_200; // offset: 200  (0xC8)
+	uint32_t reserved_201; // offset: 201  (0xC9)
+	uint32_t reserved_202; // offset: 202  (0xCA)
+	uint32_t reserved_203; // offset: 203  (0xCB)
+	uint32_t reserved_204; // offset: 204  (0xCC)
+	uint32_t reserved_205; // offset: 205  (0xCD)
+	uint32_t reserved_206; // offset: 206  (0xCE)
+	uint32_t reserved_207; // offset: 207  (0xCF)
+	uint32_t reserved_208; // offset: 208  (0xD0)
+	uint32_t reserved_209; // offset: 209  (0xD1)
+	uint32_t reserved_210; // offset: 210  (0xD2)
+	uint32_t reserved_211; // offset: 211  (0xD3)
+	uint32_t reserved_212; // offset: 212  (0xD4)
+	uint32_t reserved_213; // offset: 213  (0xD5)
+	uint32_t reserved_214; // offset: 214  (0xD6)
+	uint32_t reserved_215; // offset: 215  (0xD7)
+	uint32_t reserved_216; // offset: 216  (0xD8)
+	uint32_t reserved_217; // offset: 217  (0xD9)
+	uint32_t reserved_218; // offset: 218  (0xDA)
+	uint32_t reserved_219; // offset: 219  (0xDB)
+	uint32_t reserved_220; // offset: 220  (0xDC)
+	uint32_t reserved_221; // offset: 221  (0xDD)
+	uint32_t reserved_222; // offset: 222  (0xDE)
+	uint32_t reserved_223; // offset: 223  (0xDF)
+	uint32_t reserved_224; // offset: 224  (0xE0)
+	uint32_t reserved_225; // offset: 225  (0xE1)
+	uint32_t reserved_226; // offset: 226  (0xE2)
+	uint32_t reserved_227; // offset: 227  (0xE3)
+	uint32_t reserved_228; // offset: 228  (0xE4)
+	uint32_t reserved_229; // offset: 229  (0xE5)
+	uint32_t reserved_230; // offset: 230  (0xE6)
+	uint32_t reserved_231; // offset: 231  (0xE7)
+	uint32_t reserved_232; // offset: 232  (0xE8)
+	uint32_t reserved_233; // offset: 233  (0xE9)
+	uint32_t reserved_234; // offset: 234  (0xEA)
+	uint32_t reserved_235; // offset: 235  (0xEB)
+	uint32_t reserved_236; // offset: 236  (0xEC)
+	uint32_t reserved_237; // offset: 237  (0xED)
+	uint32_t reserved_238; // offset: 238  (0xEE)
+	uint32_t reserved_239; // offset: 239  (0xEF)
+	uint32_t reserved_240; // offset: 240  (0xF0)
+	uint32_t reserved_241; // offset: 241  (0xF1)
+	uint32_t reserved_242; // offset: 242  (0xF2)
+	uint32_t reserved_243; // offset: 243  (0xF3)
+	uint32_t reserved_244; // offset: 244  (0xF4)
+	uint32_t reserved_245; // offset: 245  (0xF5)
+	uint32_t reserved_246; // offset: 246  (0xF6)
+	uint32_t reserved_247; // offset: 247  (0xF7)
+	uint32_t reserved_248; // offset: 248  (0xF8)
+	uint32_t reserved_249; // offset: 249  (0xF9)
+	uint32_t reserved_250; // offset: 250  (0xFA)
+	uint32_t reserved_251; // offset: 251  (0xFB)
+	uint32_t reserved_252; // offset: 252  (0xFC)
+	uint32_t reserved_253; // offset: 253  (0xFD)
+	uint32_t reserved_254; // offset: 254  (0xFE)
+	uint32_t reserved_255; // offset: 255  (0xFF)
+	uint32_t reserved_256; // offset: 256  (0x100)
+	uint32_t reserved_257; // offset: 257  (0x101)
+	uint32_t reserved_258; // offset: 258  (0x102)
+	uint32_t reserved_259; // offset: 259  (0x103)
+	uint32_t reserved_260; // offset: 260  (0x104)
+	uint32_t reserved_261; // offset: 261  (0x105)
+	uint32_t reserved_262; // offset: 262  (0x106)
+	uint32_t reserved_263; // offset: 263  (0x107)
+	uint32_t reserved_264; // offset: 264  (0x108)
+	uint32_t reserved_265; // offset: 265  (0x109)
+	uint32_t reserved_266; // offset: 266  (0x10A)
+	uint32_t reserved_267; // offset: 267  (0x10B)
+	uint32_t reserved_268; // offset: 268  (0x10C)
+	uint32_t reserved_269; // offset: 269  (0x10D)
+	uint32_t reserved_270; // offset: 270  (0x10E)
+	uint32_t reserved_271; // offset: 271  (0x10F)
+	uint32_t reserved_272; // offset: 272  (0x110)
+	uint32_t reserved_273; // offset: 273  (0x111)
+	uint32_t reserved_274; // offset: 274  (0x112)
+	uint32_t reserved_275; // offset: 275  (0x113)
+	uint32_t reserved_276; // offset: 276  (0x114)
+	uint32_t reserved_277; // offset: 277  (0x115)
+	uint32_t reserved_278; // offset: 278  (0x116)
+	uint32_t reserved_279; // offset: 279  (0x117)
+	uint32_t reserved_280; // offset: 280  (0x118)
+	uint32_t reserved_281; // offset: 281  (0x119)
+	uint32_t reserved_282; // offset: 282  (0x11A)
+	uint32_t reserved_283; // offset: 283  (0x11B)
+	uint32_t reserved_284; // offset: 284  (0x11C)
+	uint32_t reserved_285; // offset: 285  (0x11D)
+	uint32_t reserved_286; // offset: 286  (0x11E)
+	uint32_t reserved_287; // offset: 287  (0x11F)
+	uint32_t reserved_288; // offset: 288  (0x120)
+	uint32_t reserved_289; // offset: 289  (0x121)
+	uint32_t reserved_290; // offset: 290  (0x122)
+	uint32_t reserved_291; // offset: 291  (0x123)
+	uint32_t reserved_292; // offset: 292  (0x124)
+	uint32_t reserved_293; // offset: 293  (0x125)
+	uint32_t reserved_294; // offset: 294  (0x126)
+	uint32_t reserved_295; // offset: 295  (0x127)
+	uint32_t reserved_296; // offset: 296  (0x128)
+	uint32_t reserved_297; // offset: 297  (0x129)
+	uint32_t reserved_298; // offset: 298  (0x12A)
+	uint32_t reserved_299; // offset: 299  (0x12B)
+	uint32_t reserved_300; // offset: 300  (0x12C)
+	uint32_t reserved_301; // offset: 301  (0x12D)
+	uint32_t reserved_302; // offset: 302  (0x12E)
+	uint32_t reserved_303; // offset: 303  (0x12F)
+	uint32_t reserved_304; // offset: 304  (0x130)
+	uint32_t reserved_305; // offset: 305  (0x131)
+	uint32_t reserved_306; // offset: 306  (0x132)
+	uint32_t reserved_307; // offset: 307  (0x133)
+	uint32_t reserved_308; // offset: 308  (0x134)
+	uint32_t reserved_309; // offset: 309  (0x135)
+	uint32_t reserved_310; // offset: 310  (0x136)
+	uint32_t reserved_311; // offset: 311  (0x137)
+	uint32_t reserved_312; // offset: 312  (0x138)
+	uint32_t reserved_313; // offset: 313  (0x139)
+	uint32_t reserved_314; // offset: 314  (0x13A)
+	uint32_t reserved_315; // offset: 315  (0x13B)
+	uint32_t reserved_316; // offset: 316  (0x13C)
+	uint32_t reserved_317; // offset: 317  (0x13D)
+	uint32_t reserved_318; // offset: 318  (0x13E)
+	uint32_t reserved_319; // offset: 319  (0x13F)
+	uint32_t reserved_320; // offset: 320  (0x140)
+	uint32_t reserved_321; // offset: 321  (0x141)
+	uint32_t reserved_322; // offset: 322  (0x142)
+	uint32_t reserved_323; // offset: 323  (0x143)
+	uint32_t reserved_324; // offset: 324  (0x144)
+	uint32_t reserved_325; // offset: 325  (0x145)
+	uint32_t reserved_326; // offset: 326  (0x146)
+	uint32_t reserved_327; // offset: 327  (0x147)
+	uint32_t reserved_328; // offset: 328  (0x148)
+	uint32_t reserved_329; // offset: 329  (0x149)
+	uint32_t reserved_330; // offset: 330  (0x14A)
+	uint32_t reserved_331; // offset: 331  (0x14B)
+	uint32_t reserved_332; // offset: 332  (0x14C)
+	uint32_t reserved_333; // offset: 333  (0x14D)
+	uint32_t reserved_334; // offset: 334  (0x14E)
+	uint32_t reserved_335; // offset: 335  (0x14F)
+	uint32_t reserved_336; // offset: 336  (0x150)
+	uint32_t reserved_337; // offset: 337  (0x151)
+	uint32_t reserved_338; // offset: 338  (0x152)
+	uint32_t reserved_339; // offset: 339  (0x153)
+	uint32_t reserved_340; // offset: 340  (0x154)
+	uint32_t reserved_341; // offset: 341  (0x155)
+	uint32_t reserved_342; // offset: 342  (0x156)
+	uint32_t reserved_343; // offset: 343  (0x157)
+	uint32_t reserved_344; // offset: 344  (0x158)
+	uint32_t reserved_345; // offset: 345  (0x159)
+	uint32_t reserved_346; // offset: 346  (0x15A)
+	uint32_t reserved_347; // offset: 347  (0x15B)
+	uint32_t reserved_348; // offset: 348  (0x15C)
+	uint32_t reserved_349; // offset: 349  (0x15D)
+	uint32_t reserved_350; // offset: 350  (0x15E)
+	uint32_t reserved_351; // offset: 351  (0x15F)
+	uint32_t reserved_352; // offset: 352  (0x160)
+	uint32_t reserved_353; // offset: 353  (0x161)
+	uint32_t reserved_354; // offset: 354  (0x162)
+	uint32_t reserved_355; // offset: 355  (0x163)
+	uint32_t reserved_356; // offset: 356  (0x164)
+	uint32_t reserved_357; // offset: 357  (0x165)
+	uint32_t reserved_358; // offset: 358  (0x166)
+	uint32_t reserved_359; // offset: 359  (0x167)
+	uint32_t reserved_360; // offset: 360  (0x168)
+	uint32_t reserved_361; // offset: 361  (0x169)
+	uint32_t reserved_362; // offset: 362  (0x16A)
+	uint32_t reserved_363; // offset: 363  (0x16B)
+	uint32_t reserved_364; // offset: 364  (0x16C)
+	uint32_t reserved_365; // offset: 365  (0x16D)
+	uint32_t reserved_366; // offset: 366  (0x16E)
+	uint32_t reserved_367; // offset: 367  (0x16F)
+	uint32_t reserved_368; // offset: 368  (0x170)
+	uint32_t reserved_369; // offset: 369  (0x171)
+	uint32_t reserved_370; // offset: 370  (0x172)
+	uint32_t reserved_371; // offset: 371  (0x173)
+	uint32_t reserved_372; // offset: 372  (0x174)
+	uint32_t reserved_373; // offset: 373  (0x175)
+	uint32_t reserved_374; // offset: 374  (0x176)
+	uint32_t reserved_375; // offset: 375  (0x177)
+	uint32_t reserved_376; // offset: 376  (0x178)
+	uint32_t reserved_377; // offset: 377  (0x179)
+	uint32_t reserved_378; // offset: 378  (0x17A)
+	uint32_t reserved_379; // offset: 379  (0x17B)
+	uint32_t reserved_380; // offset: 380  (0x17C)
+	uint32_t reserved_381; // offset: 381  (0x17D)
+	uint32_t reserved_382; // offset: 382  (0x17E)
+	uint32_t reserved_383; // offset: 383  (0x17F)
+	uint32_t reserved_384; // offset: 384  (0x180)
+	uint32_t reserved_385; // offset: 385  (0x181)
+	uint32_t reserved_386; // offset: 386  (0x182)
+	uint32_t reserved_387; // offset: 387  (0x183)
+	uint32_t reserved_388; // offset: 388  (0x184)
+	uint32_t reserved_389; // offset: 389  (0x185)
+	uint32_t reserved_390; // offset: 390  (0x186)
+	uint32_t reserved_391; // offset: 391  (0x187)
+	uint32_t reserved_392; // offset: 392  (0x188)
+	uint32_t reserved_393; // offset: 393  (0x189)
+	uint32_t reserved_394; // offset: 394  (0x18A)
+	uint32_t reserved_395; // offset: 395  (0x18B)
+	uint32_t reserved_396; // offset: 396  (0x18C)
+	uint32_t reserved_397; // offset: 397  (0x18D)
+	uint32_t reserved_398; // offset: 398  (0x18E)
+	uint32_t reserved_399; // offset: 399  (0x18F)
+	uint32_t reserved_400; // offset: 400  (0x190)
+	uint32_t reserved_401; // offset: 401  (0x191)
+	uint32_t reserved_402; // offset: 402  (0x192)
+	uint32_t reserved_403; // offset: 403  (0x193)
+	uint32_t reserved_404; // offset: 404  (0x194)
+	uint32_t reserved_405; // offset: 405  (0x195)
+	uint32_t reserved_406; // offset: 406  (0x196)
+	uint32_t reserved_407; // offset: 407  (0x197)
+	uint32_t reserved_408; // offset: 408  (0x198)
+	uint32_t reserved_409; // offset: 409  (0x199)
+	uint32_t reserved_410; // offset: 410  (0x19A)
+	uint32_t reserved_411; // offset: 411  (0x19B)
+	uint32_t reserved_412; // offset: 412  (0x19C)
+	uint32_t reserved_413; // offset: 413  (0x19D)
+	uint32_t reserved_414; // offset: 414  (0x19E)
+	uint32_t reserved_415; // offset: 415  (0x19F)
+	uint32_t reserved_416; // offset: 416  (0x1A0)
+	uint32_t reserved_417; // offset: 417  (0x1A1)
+	uint32_t reserved_418; // offset: 418  (0x1A2)
+	uint32_t reserved_419; // offset: 419  (0x1A3)
+	uint32_t reserved_420; // offset: 420  (0x1A4)
+	uint32_t reserved_421; // offset: 421  (0x1A5)
+	uint32_t reserved_422; // offset: 422  (0x1A6)
+	uint32_t reserved_423; // offset: 423  (0x1A7)
+	uint32_t reserved_424; // offset: 424  (0x1A8)
+	uint32_t reserved_425; // offset: 425  (0x1A9)
+	uint32_t reserved_426; // offset: 426  (0x1AA)
+	uint32_t reserved_427; // offset: 427  (0x1AB)
+	uint32_t reserved_428; // offset: 428  (0x1AC)
+	uint32_t reserved_429; // offset: 429  (0x1AD)
+	uint32_t reserved_430; // offset: 430  (0x1AE)
+	uint32_t reserved_431; // offset: 431  (0x1AF)
+	uint32_t reserved_432; // offset: 432  (0x1B0)
+	uint32_t reserved_433; // offset: 433  (0x1B1)
+	uint32_t reserved_434; // offset: 434  (0x1B2)
+	uint32_t reserved_435; // offset: 435  (0x1B3)
+	uint32_t reserved_436; // offset: 436  (0x1B4)
+	uint32_t reserved_437; // offset: 437  (0x1B5)
+	uint32_t reserved_438; // offset: 438  (0x1B6)
+	uint32_t reserved_439; // offset: 439  (0x1B7)
+	uint32_t reserved_440; // offset: 440  (0x1B8)
+	uint32_t reserved_441; // offset: 441  (0x1B9)
+	uint32_t reserved_442; // offset: 442  (0x1BA)
+	uint32_t reserved_443; // offset: 443  (0x1BB)
+	uint32_t reserved_444; // offset: 444  (0x1BC)
+	uint32_t reserved_445; // offset: 445  (0x1BD)
+	uint32_t reserved_446; // offset: 446  (0x1BE)
+	uint32_t reserved_447; // offset: 447  (0x1BF)
+	uint32_t reserved_448; // offset: 448  (0x1C0)
+	uint32_t reserved_449; // offset: 449  (0x1C1)
+	uint32_t reserved_450; // offset: 450  (0x1C2)
+	uint32_t reserved_451; // offset: 451  (0x1C3)
+	uint32_t reserved_452; // offset: 452  (0x1C4)
+	uint32_t reserved_453; // offset: 453  (0x1C5)
+	uint32_t reserved_454; // offset: 454  (0x1C6)
+	uint32_t reserved_455; // offset: 455  (0x1C7)
+	uint32_t reserved_456; // offset: 456  (0x1C8)
+	uint32_t reserved_457; // offset: 457  (0x1C9)
+	uint32_t reserved_458; // offset: 458  (0x1CA)
+	uint32_t reserved_459; // offset: 459  (0x1CB)
+	uint32_t reserved_460; // offset: 460  (0x1CC)
+	uint32_t reserved_461; // offset: 461  (0x1CD)
+	uint32_t reserved_462; // offset: 462  (0x1CE)
+	uint32_t reserved_463; // offset: 463  (0x1CF)
+	uint32_t reserved_464; // offset: 464  (0x1D0)
+	uint32_t reserved_465; // offset: 465  (0x1D1)
+	uint32_t reserved_466; // offset: 466  (0x1D2)
+	uint32_t reserved_467; // offset: 467  (0x1D3)
+	uint32_t reserved_468; // offset: 468  (0x1D4)
+	uint32_t reserved_469; // offset: 469  (0x1D5)
+	uint32_t reserved_470; // offset: 470  (0x1D6)
+	uint32_t reserved_471; // offset: 471  (0x1D7)
+	uint32_t reserved_472; // offset: 472  (0x1D8)
+	uint32_t reserved_473; // offset: 473  (0x1D9)
+	uint32_t reserved_474; // offset: 474  (0x1DA)
+	uint32_t reserved_475; // offset: 475  (0x1DB)
+	uint32_t reserved_476; // offset: 476  (0x1DC)
+	uint32_t reserved_477; // offset: 477  (0x1DD)
+	uint32_t reserved_478; // offset: 478  (0x1DE)
+	uint32_t reserved_479; // offset: 479  (0x1DF)
+	uint32_t reserved_480; // offset: 480  (0x1E0)
+	uint32_t reserved_481; // offset: 481  (0x1E1)
+	uint32_t reserved_482; // offset: 482  (0x1E2)
+	uint32_t reserved_483; // offset: 483  (0x1E3)
+	uint32_t reserved_484; // offset: 484  (0x1E4)
+	uint32_t reserved_485; // offset: 485  (0x1E5)
+	uint32_t reserved_486; // offset: 486  (0x1E6)
+	uint32_t reserved_487; // offset: 487  (0x1E7)
+	uint32_t reserved_488; // offset: 488  (0x1E8)
+	uint32_t reserved_489; // offset: 489  (0x1E9)
+	uint32_t reserved_490; // offset: 490  (0x1EA)
+	uint32_t reserved_491; // offset: 491  (0x1EB)
+	uint32_t reserved_492; // offset: 492  (0x1EC)
+	uint32_t reserved_493; // offset: 493  (0x1ED)
+	uint32_t reserved_494; // offset: 494  (0x1EE)
+	uint32_t reserved_495; // offset: 495  (0x1EF)
+	uint32_t reserved_496; // offset: 496  (0x1F0)
+	uint32_t reserved_497; // offset: 497  (0x1F1)
+	uint32_t reserved_498; // offset: 498  (0x1F2)
+	uint32_t reserved_499; // offset: 499  (0x1F3)
+	uint32_t reserved_500; // offset: 500  (0x1F4)
+	uint32_t reserved_501; // offset: 501  (0x1F5)
+	uint32_t reserved_502; // offset: 502  (0x1F6)
+	uint32_t reserved_503; // offset: 503  (0x1F7)
+	uint32_t reserved_504; // offset: 504  (0x1F8)
+	uint32_t reserved_505; // offset: 505  (0x1F9)
+	uint32_t reserved_506; // offset: 506  (0x1FA)
+	uint32_t reserved_507; // offset: 507  (0x1FB)
+	uint32_t reserved_508; // offset: 508  (0x1FC)
+	uint32_t reserved_509; // offset: 509  (0x1FD)
+	uint32_t reserved_510; // offset: 510  (0x1FE)
+	uint32_t reserved_511; // offset: 511  (0x1FF)
+};
+
+#endif
\ No newline at end of file
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 64+ messages in thread

* [RFC 4/7] drm/amdgpu: Allocate doorbell slot for user queue
  2022-12-23 19:36 [RFC 0/7] RFC: Usermode queue for AMDGPU driver Shashank Sharma
                   ` (2 preceding siblings ...)
  2022-12-23 19:36 ` [RFC 3/7] drm/amdgpu: Create MQD for userspace queue Shashank Sharma
@ 2022-12-23 19:36 ` Shashank Sharma
  2022-12-29 17:50   ` Alex Deucher
  2022-12-23 19:36 ` [RFC 5/7] drm/amdgpu: Create context for usermode queue Shashank Sharma
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2022-12-23 19:36 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Shashank Sharma, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

This patch allocates a doorbell slot in the bar, for the usermode queue.
We are using the unique queue-id to get this slot from MES.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index a91cc304cb9e..b566ce4cb7f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -50,6 +50,25 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode
     ida_simple_remove(&uqg->ida, queue->queue_id);
 }
 
+static int
+amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
+                    struct amdgpu_usermode_queue *queue)
+{
+    int r;
+    unsigned int doorbell_index;
+
+    r = amdgpu_mes_alloc_process_doorbells(adev, &doorbell_index);
+	if (r < 0) {
+        DRM_ERROR("Failed to allocate doorbell for user queue\n");
+        return r;
+    }
+
+    /* We are using qnique queue-id to get doorbell here */
+    queue->doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev,
+			    doorbell_index, queue->queue_id);
+    return 0;
+}
+
 static void
 amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
 {
@@ -257,12 +276,21 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
         goto free_queue;
     }
 
+    r = amdgpu_userqueue_get_doorbell(adev, queue);
+    if (r) {
+        DRM_ERROR("Failed to create doorbell for queue\n");
+        goto free_mqd;
+    }
+
     ctx->userq = queue;
     args->out.q_id = queue->queue_id;
     args->out.flags = 0;
     mutex_unlock(&adev->userq.userq_mutex);
     return 0;
 
+free_mqd:
+    amdgpu_userqueue_destroy_mqd(queue);
+
 free_queue:
     amdgpu_userqueue_remove_index(adev, queue);
     mutex_unlock(&adev->userq.userq_mutex);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 64+ messages in thread

* [RFC 5/7] drm/amdgpu: Create context for usermode queue
  2022-12-23 19:36 [RFC 0/7] RFC: Usermode queue for AMDGPU driver Shashank Sharma
                   ` (3 preceding siblings ...)
  2022-12-23 19:36 ` [RFC 4/7] drm/amdgpu: Allocate doorbell slot for user queue Shashank Sharma
@ 2022-12-23 19:36 ` Shashank Sharma
  2022-12-29 17:54   ` Alex Deucher
  2022-12-23 19:36 ` [RFC 6/7] drm/amdgpu: Map userqueue into HW Shashank Sharma
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2022-12-23 19:36 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Shashank Sharma, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

The FW expects us to allocate atleast one page as process
context space, and one for gang context space. This patch adds some
object for the same.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>

Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 57 +++++++++++++++++++
 .../drm/amd/include/amdgpu_usermode_queue.h   |  8 +++
 2 files changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index b566ce4cb7f0..2a854a5e2f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -69,6 +69,56 @@ amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
     return 0;
 }
 
+static int
+amdgpu_userqueue_create_context(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
+{
+    int r;
+    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
+    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
+    /*
+     * The FW expects atleast one page space allocated for
+     * process context related work, and one for gang context.
+     */
+    r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+                                AMDGPU_GEM_DOMAIN_VRAM,
+                                &pctx->obj,
+                                &pctx->gpu_addr,
+                                &pctx->cpu_ptr);
+    if (r) {
+        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
+        return r;
+    }
+
+    r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+                                AMDGPU_GEM_DOMAIN_VRAM,
+                                &gctx->obj,
+                                &gctx->gpu_addr,
+                                &gctx->cpu_ptr);
+    if (r) {
+        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
+        amdgpu_bo_free_kernel(&pctx->obj,
+                              &pctx->gpu_addr,
+                              &pctx->cpu_ptr);
+        return r;
+    }
+
+    return 0;
+}
+
+static void
+amdgpu_userqueue_free_context(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
+    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
+
+    amdgpu_bo_free_kernel(&pctx->obj,
+                          &pctx->gpu_addr,
+                          &pctx->cpu_ptr);
+    amdgpu_bo_free_kernel(&pctx->obj,
+                          &gctx->gpu_addr,
+                          &gctx->cpu_ptr);
+}
+
 static void
 amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
 {
@@ -282,6 +332,12 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
         goto free_mqd;
     }
 
+    r = amdgpu_userqueue_create_context(adev, queue);
+    if (r < 0) {
+        DRM_ERROR("Failed to create context for queue\n");
+        goto free_mqd;
+    }
+
     ctx->userq = queue;
     args->out.q_id = queue->queue_id;
     args->out.flags = 0;
@@ -306,6 +362,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
     struct amdgpu_usermode_queue *queue = ctx->userq;
 
     mutex_lock(&adev->userq.userq_mutex);
+    amdgpu_userqueue_free_context(adev, queue);
     amdgpu_userqueue_destroy_mqd(queue);
     amdgpu_userqueue_remove_index(adev, queue);
     ctx->userq = NULL;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
index c1fe39ffaf72..8bf3c0be6937 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
@@ -26,6 +26,12 @@
 
 #define AMDGPU_MAX_USERQ 512
 
+struct amdgpu_userq_ctx {
+	struct amdgpu_bo *obj;
+	uint64_t gpu_addr;
+	void	*cpu_ptr;
+};
+
 struct amdgpu_usermode_queue {
 	int		queue_id;
 	int		queue_type;
@@ -44,6 +50,8 @@ struct amdgpu_usermode_queue {
 
 	struct amdgpu_bo	*mqd_obj;
 	struct amdgpu_vm    	*vm;
+	struct amdgpu_userq_ctx proc_ctx;
+	struct amdgpu_userq_ctx gang_ctx;
 	struct list_head 	list;
 };
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 64+ messages in thread

* [RFC 6/7] drm/amdgpu: Map userqueue into HW
  2022-12-23 19:36 [RFC 0/7] RFC: Usermode queue for AMDGPU driver Shashank Sharma
                   ` (4 preceding siblings ...)
  2022-12-23 19:36 ` [RFC 5/7] drm/amdgpu: Create context for usermode queue Shashank Sharma
@ 2022-12-23 19:36 ` Shashank Sharma
  2022-12-29 17:51   ` Alex Deucher
  2022-12-23 19:36 ` [RFC 7/7] drm/amdgpu: Secure semaphore for usermode queue Shashank Sharma
  2022-12-29 18:02 ` [RFC 0/7] RFC: Usermode queue for AMDGPU driver Alex Deucher
  7 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2022-12-23 19:36 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Shashank Sharma, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

This patch add the function to map/unmap the usermode queue into the HW,
using the prepared MQD and other objects. After this mapping, the queue
will be ready to accept the workload.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>

Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 2a854a5e2f70..b164e24247ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -50,6 +50,67 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode
     ida_simple_remove(&uqg->ida, queue->queue_id);
 }
 
+static int amdgpu_userqueue_map(struct amdgpu_device *adev,
+                    struct amdgpu_usermode_queue *queue)
+{
+    int r;
+    struct mes_add_queue_input queue_input;
+
+    memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+
+    queue_input.process_va_start = 0;
+    queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+    queue_input.process_quantum = 100000; /* 10ms */
+    queue_input.gang_quantum = 10000; /* 1ms */
+    queue_input.paging = false;
+
+    queue_input.gang_context_addr = queue->gang_ctx.gpu_addr;
+    queue_input.process_context_addr = queue->proc_ctx.gpu_addr;
+    queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+    queue_input.gang_global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+
+    queue_input.process_id = queue->pasid;
+    queue_input.queue_type = queue->queue_type;
+    queue_input.mqd_addr = queue->mqd_gpu_addr;
+    queue_input.wptr_addr = queue->wptr_gpu_addr;
+    queue_input.queue_size = queue->queue_size >> 2;
+    queue_input.doorbell_offset = queue->doorbell_index;
+    queue_input.page_table_base_addr =  queue->vm->pd_phys_addr;
+
+    amdgpu_mes_lock(&adev->mes);
+    r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+    amdgpu_mes_unlock(&adev->mes);
+    if (r) {
+        DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
+        return r;
+    }
+
+    DRM_DEBUG_DRIVER("Queue %d mapped successfully\n", queue->queue_id);
+    return 0;
+}
+
+static void amdgpu_userqueue_unmap(struct amdgpu_device *adev,
+                    struct amdgpu_usermode_queue *queue)
+{
+    int r;
+    struct mes_remove_queue_input queue_input;
+
+    memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+    queue_input.doorbell_offset = queue->doorbell_index;
+    queue_input.gang_context_addr = queue->gang_ctx.gpu_addr;
+
+    amdgpu_mes_lock(&adev->mes);
+    r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+    amdgpu_mes_unlock(&adev->mes);
+
+    if (r) {
+        DRM_ERROR("Failed to unmap usermode queue %d\n", queue->queue_id);
+        return;
+    }
+
+    DRM_DEBUG_DRIVER("Usermode queue %d unmapped\n", queue->queue_id);
+}
+
 static int
 amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
                     struct amdgpu_usermode_queue *queue)
@@ -338,12 +399,21 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
         goto free_mqd;
     }
 
+    r = amdgpu_userqueue_map(adev, queue);
+    if (r < 0) {
+        DRM_ERROR("Failed to map queue\n");
+        goto free_ctx;
+    }
+
     ctx->userq = queue;
     args->out.q_id = queue->queue_id;
     args->out.flags = 0;
     mutex_unlock(&adev->userq.userq_mutex);
     return 0;
 
+free_ctx:
+    amdgpu_userqueue_free_context(adev, queue);
+
 free_mqd:
     amdgpu_userqueue_destroy_mqd(queue);
 
@@ -362,6 +432,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
     struct amdgpu_usermode_queue *queue = ctx->userq;
 
     mutex_lock(&adev->userq.userq_mutex);
+    amdgpu_userqueue_unmap(adev, queue);
     amdgpu_userqueue_free_context(adev, queue);
     amdgpu_userqueue_destroy_mqd(queue);
     amdgpu_userqueue_remove_index(adev, queue);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 64+ messages in thread

* [RFC 7/7] drm/amdgpu: Secure semaphore for usermode queue
  2022-12-23 19:36 [RFC 0/7] RFC: Usermode queue for AMDGPU driver Shashank Sharma
                   ` (5 preceding siblings ...)
  2022-12-23 19:36 ` [RFC 6/7] drm/amdgpu: Map userqueue into HW Shashank Sharma
@ 2022-12-23 19:36 ` Shashank Sharma
  2022-12-25 10:07   ` Zhang, Yifan
  2022-12-29 18:02 ` [RFC 0/7] RFC: Usermode queue for AMDGPU driver Alex Deucher
  7 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2022-12-23 19:36 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Shashank Shamra, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

From: Arunpravin Paneer Selvam <arunpravin.paneerselvam@amd.com>

This is a WIP patch, which adds an kernel implementation of secure
semaphore for the usermode queues. The UAPI for the same is yet to
be implemented.

The idea is to create a RO page and map it to each process requesting a
user mode queue, and give them a qnique offset in the page, which can be
polled (like wait_mem) for sync.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Shashank Shamra <shashank.sharma@amd.com>

Signed-off-by: Arunpravin Paneer Selvam <arunpravin.paneerselvam@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |   7 +-
 .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 ++++++++++++++++++
 .../drm/amd/include/amdgpu_usermode_queue.h   |  10 +
 .../amd/include/amdgpu_usermode_queue_mqd.h   |   4 +-
 6 files changed, 272 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index e2a34ee57bfb..daec7bb9ab3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -211,6 +211,7 @@ amdgpu-y += amdgpu_amdkfd.o
 
 # add usermode queue
 amdgpu-y += amdgpu_userqueue.o
+amdgpu-y += amdgpu_userqueue_secure_sem.o
 
 ifneq ($(CONFIG_HSA_AMD),)
 AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4b566fcfca18..7325c01efc90 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -749,9 +749,17 @@ struct amdgpu_mqd {
 			struct amdgpu_mqd_prop *p);
 };
 
+struct amdgpu_userq_sec_sem {
+	struct amdgpu_bo *sem_obj;
+	u64 gpu_addr;
+	u32 num_sem;
+	unsigned long used[DIV_ROUND_UP(64, BITS_PER_LONG)];
+};
+
 struct amdgpu_userq_globals {
 	struct ida ida;
 	struct mutex userq_mutex;
+	struct amdgpu_userq_sec_sem sem;
 };
 
 #define AMDGPU_RESET_MAGIC_NUM 64
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index b164e24247ca..2af634bbe3dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -261,6 +261,10 @@ amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_qu
     /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
     mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
 
+    /* Setup semaphore fence address */
+    mqd->fenceaddress_lo = queue->sem_data.sem_gpu_addr & 0xFFFFFFFC;
+    mqd->fenceaddress_lo = upper_32_bits(queue->sem_data.sem_gpu_addr) & 0xFFFF;
+
     /* activate the queue */
     mqd->cp_gfx_hqd_active = 1;
 }
@@ -472,10 +476,11 @@ int amdgpu_userqueue_init(struct amdgpu_device *adev)
     struct amdgpu_userq_globals *uqg = &adev->userq;
 
     mutex_init(&uqg->userq_mutex);
+    amdgpu_userqueue_sec_sem_init(adev);
     return 0;
 }
 
 void amdgpu_userqueue_fini(struct amdgpu_device *adev)
 {
-
+    amdgpu_userqueue_sec_sem_fini(adev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
new file mode 100644
index 000000000000..6e6a7d62a300
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "amdgpu_usermode_queue_mqd.h"
+
+static int amdgpu_userqueue_sem_addr_unmap(struct amdgpu_device *adev,
+					   struct amdgpu_usermode_queue *q)
+{
+    struct amdgpu_userq_sec_sem_data *sem_bo_data = &q->sem_data;
+    struct amdgpu_bo_va *bo_va = sem_bo_data->sem_data_va;
+    struct amdgpu_vm *vm = bo_va->base.vm;
+    struct amdgpu_bo *bo = adev->userq.sem.sem_obj;
+    struct amdgpu_bo_list_entry vm_pd;
+    struct list_head list, duplicates;
+    struct dma_fence *fence = NULL;
+    struct ttm_validate_buffer tv;
+    struct ww_acquire_ctx ticket;
+    long r = 0;
+
+    INIT_LIST_HEAD(&list);
+    INIT_LIST_HEAD(&duplicates);
+
+    tv.bo = &bo->tbo;
+    tv.num_shared = 2;
+    list_add(&tv.head, &list);
+
+    amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
+
+    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+    if (r) {
+        DRM_ERROR("leaking bo va because we fail to reserve bo (%ld)\n", r);
+        return r;
+    }
+
+    amdgpu_vm_bo_del(adev, bo_va);
+    if (!amdgpu_vm_ready(vm))
+        goto out_unlock;
+
+    r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
+    if (r)
+        goto out_unlock;
+    if (fence) {
+        amdgpu_bo_fence(bo, fence, true);
+        fence = NULL;
+    }
+
+    r = amdgpu_vm_clear_freed(adev, vm, &fence);
+    if (r || !fence)
+        goto out_unlock;
+
+    dma_fence_wait(fence, false);
+    amdgpu_bo_fence(bo, fence, true);
+    dma_fence_put(fence);
+
+out_unlock:
+    if (unlikely(r < 0))
+        DRM_ERROR("failed to clear page tables (%ld)\n", r);
+    ttm_eu_backoff_reservation(&ticket, &list);
+
+    return r;
+}
+
+static u64 amdgpu_sem_bo_vaddr(struct amdgpu_device *adev) {
+       u64 addr = AMDGPU_VA_RESERVED_SIZE;
+
+       /* TODO:Find va address for sem bo mapping */
+       return addr;
+}
+
+static int amdgpu_userqueue_sem_addr_map(struct amdgpu_device *adev,
+                                        struct amdgpu_usermode_queue *q)
+{
+    struct amdgpu_userq_sec_sem_data *sem_bo_data;
+    struct amdgpu_bo *sem_obj = adev->userq.sem.sem_obj;
+    struct ttm_validate_buffer csa_tv;
+    struct amdgpu_bo_list_entry pd;
+    struct ww_acquire_ctx ticket;
+    struct amdgpu_vm *vm = q->vm;
+    struct amdgpu_bo_va *bo_va;
+    struct amdgpu_sync sync;
+    struct list_head list;
+    int r;
+
+    amdgpu_sync_create(&sync);
+    INIT_LIST_HEAD(&list);
+    INIT_LIST_HEAD(&csa_tv.head);
+
+    sem_bo_data = &q->sem_data;
+
+    csa_tv.bo = &sem_obj->tbo;
+    csa_tv.num_shared = 1;
+
+    list_add(&csa_tv.head, &list);
+    amdgpu_vm_get_pd_bo(vm, &list, &pd);
+
+    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+    if (r) {
+        DRM_ERROR("failed to reserve userqueue sec sem object BO: err=%d\n", r);
+        return r;
+    }
+
+    bo_va = amdgpu_vm_bo_add(adev, vm, sem_obj);
+    if (!bo_va) {
+        ttm_eu_backoff_reservation(&ticket, &list);
+        DRM_ERROR("failed to create bo_va for userqueue sec sem object BO\n");
+        return -ENOMEM;
+    }
+
+    sem_bo_data->sem_gpu_addr = amdgpu_sem_bo_vaddr(adev);
+    r = amdgpu_vm_bo_map(adev, bo_va, sem_bo_data->sem_gpu_addr, 0,
+                        AMDGPU_MAX_USERQUEUE_SEC_SEM * sizeof(u64),
+                        AMDGPU_PTE_READABLE | AMDGPU_PTE_READABLE);
+
+    if (r) {
+        DRM_ERROR("failed to do bo_map on sec sem object BO, err=%d\n", r);
+        goto error;
+    }
+
+    r = amdgpu_vm_bo_update(adev, bo_va, false);
+    if (r) {
+        DRM_ERROR("failed to do vm_bo_update on sec sem object BO\n");
+        goto error;
+    }
+    amdgpu_sync_fence(&sync, bo_va->last_pt_update);
+
+    r = amdgpu_vm_update_pdes(adev, vm, false);
+    if (r) {
+        DRM_ERROR("failed to update pdes on sec sem object BO\n");
+        goto error;
+    }
+    amdgpu_sync_fence(&sync, vm->last_update);
+
+    amdgpu_sync_wait(&sync, false);
+    ttm_eu_backoff_reservation(&ticket, &list);
+
+    amdgpu_sync_free(&sync);
+    sem_bo_data->sem_data_va = bo_va;
+    return 0;
+
+error:
+    amdgpu_vm_bo_del(adev, bo_va);
+    ttm_eu_backoff_reservation(&ticket, &list);
+    amdgpu_sync_free(&sync);
+    return r;
+}
+
+int amdgpu_userqueue_sec_sem_get(struct amdgpu_device *adev,
+                            struct amdgpu_usermode_queue *q,
+                            u64 *gpu_addr)
+{
+    unsigned long offset = find_first_zero_bit(adev->userq.sem.used, adev->userq.sem.num_sem);
+    u32 sem_offset;
+    int r;
+
+    if (offset < adev->userq.sem.num_sem) {
+        __set_bit(offset, adev->userq.sem.used);
+        sem_offset = offset << 6; /* convert to qw offset */
+    } else {
+        return -EINVAL;
+    }
+
+    r = amdgpu_userqueue_sem_addr_map(adev, q);
+    if (r) {
+        DRM_ERROR("failed to map sec sem object BO");
+        amdgpu_userqueue_sem_addr_unmap(adev, q);
+        return r;
+    }
+
+    *gpu_addr = sem_offset + q->sem_data.sem_gpu_addr;
+
+    return 0;
+}
+
+void amdgpu_userqueue_sec_sem_free(struct amdgpu_device *adev,
+                                struct amdgpu_usermode_queue *q,
+                                u32 sem)
+{
+    int r;
+
+    r = amdgpu_userqueue_sem_addr_unmap(adev, q);
+    if (r)
+        DRM_ERROR("failed to unmap sec sem object BO");
+
+    sem >>= 6;
+    if (sem < adev->userq.sem.num_sem)
+        __clear_bit(sem, adev->userq.sem.used);
+}
+
+int
+amdgpu_userqueue_sec_sem_init(struct amdgpu_device *adev)
+{
+    int r;
+
+    if (adev->userq.sem.sem_obj == NULL) {
+        /*
+        * AMDGPU_MAX_USERQUEUE_SEC_SEM * sizeof(u64) = AMDGPU_MAX_USERQUEUE_SEC_SEM
+        * 64bit slots
+        */
+        r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_USERQUEUE_SEC_SEM * sizeof(u64),
+                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->userq.sem.sem_obj,
+                                    &adev->userq.sem.gpu_addr, NULL);
+        if (r) {
+            DRM_ERROR("Create userqueue SEM bo failed, err %d\n", r);
+            return r;
+        }
+
+        adev->userq.sem.num_sem = AMDGPU_MAX_USERQUEUE_SEC_SEM;
+        memset(&adev->userq.sem.used, 0, sizeof(adev->userq.sem.used));
+    }
+
+    return 0;
+}
+
+void
+amdgpu_userqueue_sec_sem_fini(struct amdgpu_device *adev)
+{
+    if (adev->userq.sem.sem_obj) {
+        amdgpu_bo_free_kernel(&adev->userq.sem.sem_obj,
+                &adev->userq.sem.gpu_addr,
+                NULL);
+        adev->userq.sem.sem_obj = NULL;
+    }
+}
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
index 8bf3c0be6937..630d9b5d2423 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
@@ -25,6 +25,12 @@
 #define AMDGPU_USERMODE_QUEUE_H_
 
 #define AMDGPU_MAX_USERQ 512
+#define AMDGPU_MAX_USERQUEUE_SEC_SEM 64
+
+struct amdgpu_userq_sec_sem_data {
+	u64 sem_gpu_addr;
+	struct amdgpu_bo_va *sem_data_va;
+};
 
 struct amdgpu_userq_ctx {
 	struct amdgpu_bo *obj;
@@ -52,7 +58,11 @@ struct amdgpu_usermode_queue {
 	struct amdgpu_vm    	*vm;
 	struct amdgpu_userq_ctx proc_ctx;
 	struct amdgpu_userq_ctx gang_ctx;
+
+	struct amdgpu_userq_sec_sem_data sem_data;
 	struct list_head 	list;
 };
 
+int amdgpu_userqueue_sec_sem_init(struct amdgpu_device *adev);
+void amdgpu_userqueue_sec_sem_fini(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
index d0a285708ba5..e0bfb67d91f4 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
@@ -35,8 +35,8 @@ struct amdgpu_usermode_queue_mqd
 	uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
 	uint32_t shadow_initialized; // offset: 6  (0x6)
 	uint32_t ib_vmid; // offset: 7  (0x7)
-	uint32_t reserved_8; // offset: 8  (0x8)
-	uint32_t reserved_9; // offset: 9  (0x9)
+	uint32_t fenceaddress_lo; // offset: 8  (0x8)
+	uint32_t fenceaddress_high; // offset: 9  (0x9)
 	uint32_t reserved_10; // offset: 10  (0xA)
 	uint32_t reserved_11; // offset: 11  (0xB)
 	uint32_t reserved_12; // offset: 12  (0xC)
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2022-12-23 19:36 ` [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work Shashank Sharma
@ 2022-12-24 18:19   ` Oded Gabbay
  2022-12-26 10:34     ` Shashank Sharma
  2022-12-25 15:44   ` Christian König
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 64+ messages in thread
From: Oded Gabbay @ 2022-12-24 18:19 UTC (permalink / raw)
  To: Shashank Sharma
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam

On Fri, Dec 23, 2022 at 9:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>
> This patch adds skeleton code for usermode queue creation. It
> typically contains:
> - A new structure to keep all the user queue data in one place.
> - An IOCTL function to create/free a usermode queue.
> - A function to generate unique index for the queue.
> - A global ptr in amdgpu_dev
>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++++++++++++++++++
>  .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
>  5 files changed, 246 insertions(+)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 6ad39cf71bdd..e2a34ee57bfb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -209,6 +209,8 @@ amdgpu-y += \
>  # add amdkfd interfaces
>  amdgpu-y += amdgpu_amdkfd.o
>
> +# add usermode queue
> +amdgpu-y += amdgpu_userqueue.o
>
>  ifneq ($(CONFIG_HSA_AMD),)
>  AMDKFD_PATH := ../amdkfd
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8639a4f9c6e8..4b566fcfca18 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -749,6 +749,11 @@ struct amdgpu_mqd {
>                         struct amdgpu_mqd_prop *p);
>  };
>
> +struct amdgpu_userq_globals {
> +       struct ida ida;
> +       struct mutex userq_mutex;
> +};
> +
>  #define AMDGPU_RESET_MAGIC_NUM 64
>  #define AMDGPU_MAX_DF_PERFMONS 4
>  #define AMDGPU_PRODUCT_NAME_LEN 64
> @@ -955,6 +960,7 @@ struct amdgpu_device {
>         bool                            enable_mes_kiq;
>         struct amdgpu_mes               mes;
>         struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
> +       struct amdgpu_userq_globals     userq;
>
>         /* df */
>         struct amdgpu_df                df;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 0fa0e56daf67..f7413859b14f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>         unsigned long                   ras_counter_ce;
>         unsigned long                   ras_counter_ue;
>         uint32_t                        stable_pstate;
> +       struct amdgpu_usermode_queue    *userq;
>  };
>
>  struct amdgpu_ctx_mgr {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> new file mode 100644
> index 000000000000..3b6e8f75495c
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -0,0 +1,187 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include "amdgpu.h"
> +#include "amdgpu_vm.h"
> +#include "amdgpu_mes.h"
> +#include "amdgpu_usermode_queue.h"
> +#include "soc15_common.h"
> +
> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
> +
> +static int
> +amdgpu_userqueue_index(struct amdgpu_device *adev)
> +{
> +    int index;
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
> +    return index;
> +}
> +
> +static void
> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
> +{
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    ida_simple_remove(&uqg->ida, queue->queue_id);
> +}
> +
> +static int
> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct drm_amdgpu_userq_mqd *mqd_in)
> +{
> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || mqd_in->doorbell_offset == 0) {
> +        DRM_ERROR("Invalid queue object address\n");
> +        return -EINVAL;
> +    }
> +
> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || mqd_in->wptr_va == 0) {
> +        DRM_ERROR("Invalid queue object value\n");
> +        return -EINVAL;
> +    }
> +
> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= AMDGPU_HW_IP_NUM) {
> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
> +        return -EINVAL;
> +    }
> +
> +    if (!CHECK_ACCESS(mqd_in->queue_va) || !CHECK_ACCESS(mqd_in->rptr_va) ||
> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
> +            DRM_ERROR("Invalid mapping of queue ptrs, access error\n");
> +            return -EINVAL;
> +    }
> +
> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
> +    return 0;
> +}
> +
> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
> +                            union drm_amdgpu_userq *args)
> +{
> +    int r, pasid;
> +    struct amdgpu_usermode_queue *queue;
> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +    struct amdgpu_vm *vm = &fpriv->vm;
> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
> +
> +    if (!ctx) {
> +        DRM_ERROR("Invalid GPU context\n");
> +        return -EINVAL;
> +    }
> +
> +    if (vm->pasid < 0) {
> +        DRM_WARN("No PASID info found\n");
> +        pasid = 0;
> +    }
> +
> +    mutex_lock(&adev->userq.userq_mutex);
> +
> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
> +    if (!queue) {
> +        DRM_ERROR("Failed to allocate memory for queue\n");
> +        mutex_unlock(&adev->userq.userq_mutex);
> +        return -ENOMEM;
> +    }
> +
> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
> +    if (r < 0) {
> +        DRM_ERROR("Invalid input to create queue\n");
> +        goto free_queue;
> +    }
> +
> +    queue->vm = vm;
> +    queue->pasid = pasid;
> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
> +    queue->queue_size = mqd_in->queue_size;
> +    queue->queue_type = mqd_in->ip_type;
> +    queue->paging = false;
> +    queue->flags = mqd_in->flags;
> +    queue->queue_id = amdgpu_userqueue_index(adev);
> +
> +    ctx->userq = queue;
It looks like you have a single userq per context, and here you simply
override the userq pointer.
Maybe I've missed it, but where do you protect against a user
accidentally creating two user queues ? It will cause a memory leak as
you don't release the previous q.
I would imagine you should reject the user from creating another userq
until it frees the current userq.

Oded

> +    args->out.q_id = queue->queue_id;
> +    args->out.flags = 0;
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    return 0;
> +
> +free_queue:
> +    amdgpu_userqueue_remove_index(adev, queue);
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    kfree(queue);
> +    return r;
> +}
> +
> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
> +                              union drm_amdgpu_userq *args)
> +{
> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
> +    struct amdgpu_usermode_queue *queue = ctx->userq;
> +
> +    mutex_lock(&adev->userq.userq_mutex);
> +    amdgpu_userqueue_remove_index(adev, queue);
> +    ctx->userq = NULL;
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    kfree(queue);
> +}
> +
> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
> +                      struct drm_file *filp)
> +{
> +    union drm_amdgpu_userq *args = data;
> +    struct amdgpu_device *adev = drm_to_adev(dev);
> +    int r = 0;
> +
> +    switch (args->in.op) {
> +    case AMDGPU_USERQ_OP_CREATE:
> +        r = amdgpu_userqueue_create(adev, filp, args);
> +        if (r)
> +            DRM_ERROR("Failed to create usermode queue\n");
> +        break;
> +
> +    case AMDGPU_USERQ_OP_FREE:
> +        amdgpu_userqueue_destroy(adev, filp, args);
> +        break;
> +
> +    default:
> +        DRM_ERROR("Invalid user queue op specified: %d\n", args->in.op);
> +        return -EINVAL;
> +    }
> +
> +    return r;
> +}
> +
> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
> +{
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    mutex_init(&uqg->userq_mutex);
> +    return 0;
> +}
> +
> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
> +{
> +
> +}
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> new file mode 100644
> index 000000000000..c1fe39ffaf72
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> @@ -0,0 +1,50 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef AMDGPU_USERMODE_QUEUE_H_
> +#define AMDGPU_USERMODE_QUEUE_H_
> +
> +#define AMDGPU_MAX_USERQ 512
> +
> +struct amdgpu_usermode_queue {
> +       int             queue_id;
> +       int             queue_type;
> +       int             queue_size;
> +       int             paging;
> +       int             pasid;
> +       int             use_doorbell;
> +       int             doorbell_index;
> +
> +       uint64_t        mqd_gpu_addr;
> +       uint64_t        wptr_gpu_addr;
> +       uint64_t        rptr_gpu_addr;
> +       uint64_t        queue_gpu_addr;
> +       uint64_t        flags;
> +       void            *mqd_cpu_ptr;
> +
> +       struct amdgpu_bo        *mqd_obj;
> +       struct amdgpu_vm        *vm;
> +       struct list_head        list;
> +};
> +
> +#endif
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2022-12-23 19:36 ` [RFC 1/7] drm/amdgpu: UAPI for user queue management Shashank Sharma
@ 2022-12-24 20:20   ` Bas Nieuwenhuizen
  2022-12-27 16:58     ` Alex Deucher
  2023-01-02 13:26   ` Christian König
  2023-01-03 18:29   ` Felix Kuehling
  2 siblings, 1 reply; 64+ messages in thread
From: Bas Nieuwenhuizen @ 2022-12-24 20:20 UTC (permalink / raw)
  To: Shashank Sharma
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam

On Fri, Dec 23, 2022 at 8:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>
> From: Alex Deucher <alexander.deucher@amd.com>
>
> This patch intorduces new UAPI/IOCTL for usermode graphics
> queue. The userspace app will fill this structure and request
> the graphics driver to add a graphics work queue for it. The
> output of this UAPI is a queue id.
>
> This UAPI maps the queue into GPU, so the graphics app can start
> submitting work to the queue as soon as the call returns.
>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>  include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
>  1 file changed, 52 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 0d93ec132ebb..a3d0dd6f62c5 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -54,6 +54,7 @@ extern "C" {
>  #define DRM_AMDGPU_VM                  0x13
>  #define DRM_AMDGPU_FENCE_TO_HANDLE     0x14
>  #define DRM_AMDGPU_SCHED               0x15
> +#define DRM_AMDGPU_USERQ               0x16
>
>  #define DRM_IOCTL_AMDGPU_GEM_CREATE    DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>  #define DRM_IOCTL_AMDGPU_GEM_MMAP      DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -71,6 +72,7 @@ extern "C" {
>  #define DRM_IOCTL_AMDGPU_VM            DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
>  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>  #define DRM_IOCTL_AMDGPU_SCHED         DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> +#define DRM_IOCTL_AMDGPU_USERQ         DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>
>  /**
>   * DOC: memory domains
> @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
>         union drm_amdgpu_ctx_out out;
>  };
>
> +/* user queue IOCTL */
> +#define AMDGPU_USERQ_OP_CREATE 1
> +#define AMDGPU_USERQ_OP_FREE   2
> +
> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> +#define AMDGPU_USERQ_MQD_FLAGS_AQL     (1 << 1)

Can we document what AQL means here?


> +
> +struct drm_amdgpu_userq_mqd {
> +       /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> +       __u32   flags;
> +       /** IP type: AMDGPU_HW_IP_* */
> +       __u32   ip_type;
> +       /** GEM object handle */
> +       __u32   doorbell_handle;
> +       /** Doorbell offset in dwords */
> +       __u32   doorbell_offset;

What are the doorbell handle/offset for? I don't see any of them used
in the rest of the series (we only check the handle isn't 0, which
isn't enough validation for a GEM handle to consider it valid), and
the kernel seems to allocate some kind of doorbell index in patch 4.
Does userspace need to know about that one? (similarly use_doorbell in
that patch seems like it is never explicitly written to)

The other questions I have are about how this interacts with memory
management. Does this have access to all BOs allocated with
AMDGPU_GEM_CREATE_VM_ALWAYS_VALID? What about imported BOs? How does
this interact with VA unmap/map operations? (AFAICT we have no way to
tell if pagetable modifying operations are complete from userspace for
now). What happens if we need to spill BOs from VRAM due to
(cross-process) memory pressure?

> +       /** GPU virtual address of the queue */
> +       __u64   queue_va;
> +       /** Size of the queue in bytes */
> +       __u64   queue_size;
> +       /** GPU virtual address of the rptr */
> +       __u64   rptr_va;
> +       /** GPU virtual address of the wptr */
> +       __u64   wptr_va;
> +};
> +
> +struct drm_amdgpu_userq_in {
> +       /** AMDGPU_USERQ_OP_* */
> +       __u32   op;
> +       /** Flags */
> +       __u32   flags;
> +       /** Context handle to associate the queue with */
> +       __u32   ctx_id;
> +       __u32   pad;
> +       /** Queue descriptor */
> +       struct drm_amdgpu_userq_mqd mqd;
> +};
> +
> +struct drm_amdgpu_userq_out {
> +       /** Queue handle */
> +       __u32   q_id;
> +       /** Flags */
> +       __u32   flags;
> +};
> +
> +union drm_amdgpu_userq {
> +       struct drm_amdgpu_userq_in in;
> +       struct drm_amdgpu_userq_out out;
> +};
> +
>  /* vm ioctl */
>  #define AMDGPU_VM_OP_RESERVE_VMID      1
>  #define AMDGPU_VM_OP_UNRESERVE_VMID    2
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* RE: [RFC 7/7] drm/amdgpu: Secure semaphore for usermode queue
  2022-12-23 19:36 ` [RFC 7/7] drm/amdgpu: Secure semaphore for usermode queue Shashank Sharma
@ 2022-12-25 10:07   ` Zhang, Yifan
  2022-12-27  9:32     ` Arunpravin Paneer Selvam
  0 siblings, 1 reply; 64+ messages in thread
From: Zhang, Yifan @ 2022-12-25 10:07 UTC (permalink / raw)
  To: Sharma, Shashank, amd-gfx
  Cc: Deucher, Alexander, Paneer Selvam, Arunpravin, Koenig, Christian,
	Yadav, Arvind, Sharma, Shashank

[Public]

From: Arunpravin Paneer Selvam <arunpravin.paneerselvam@amd.com>

This is a WIP patch, which adds an kernel implementation of secure semaphore for the usermode queues. The UAPI for the same is yet to be implemented.

The idea is to create a RO page and map it to each process requesting a user mode queue, and give them a qnique offset in the page, which can be polled (like wait_mem) for sync.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Shashank Shamra <shashank.sharma@amd.com>

Signed-off-by: Arunpravin Paneer Selvam <arunpravin.paneerselvam@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |   7 +-
 .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 ++++++++++++++++++
 .../drm/amd/include/amdgpu_usermode_queue.h   |  10 +
 .../amd/include/amdgpu_usermode_queue_mqd.h   |   4 +-
 6 files changed, 272 insertions(+), 3 deletions(-)  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index e2a34ee57bfb..daec7bb9ab3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -211,6 +211,7 @@ amdgpu-y += amdgpu_amdkfd.o
 
 # add usermode queue
 amdgpu-y += amdgpu_userqueue.o
+amdgpu-y += amdgpu_userqueue_secure_sem.o
 
 ifneq ($(CONFIG_HSA_AMD),)
 AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4b566fcfca18..7325c01efc90 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -749,9 +749,17 @@ struct amdgpu_mqd {
 			struct amdgpu_mqd_prop *p);
 };
 
+struct amdgpu_userq_sec_sem {
+	struct amdgpu_bo *sem_obj;
+	u64 gpu_addr;
+	u32 num_sem;
+	unsigned long used[DIV_ROUND_UP(64, BITS_PER_LONG)]; };
+
 struct amdgpu_userq_globals {
 	struct ida ida;
 	struct mutex userq_mutex;
+	struct amdgpu_userq_sec_sem sem;
 };
 
 #define AMDGPU_RESET_MAGIC_NUM 64
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index b164e24247ca..2af634bbe3dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -261,6 +261,10 @@ amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_qu
     /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
     mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
 
+    /* Setup semaphore fence address */
+    mqd->fenceaddress_lo = queue->sem_data.sem_gpu_addr & 0xFFFFFFFC;
+    mqd->fenceaddress_lo = upper_32_bits(queue->sem_data.sem_gpu_addr) 
+ & 0xFFFF;
+

It should be mqd-> fenceaddress_high = upper_32_bits(queue->sem_data.sem_gpu_addr)  & 0xFFFF

     /* activate the queue */
     mqd->cp_gfx_hqd_active = 1;
 }
@@ -472,10 +476,11 @@ int amdgpu_userqueue_init(struct amdgpu_device *adev)
     struct amdgpu_userq_globals *uqg = &adev->userq;
 
     mutex_init(&uqg->userq_mutex);
+    amdgpu_userqueue_sec_sem_init(adev);
     return 0;
 }
 
 void amdgpu_userqueue_fini(struct amdgpu_device *adev)  {
-
+    amdgpu_userqueue_sec_sem_fini(adev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
new file mode 100644
index 000000000000..6e6a7d62a300
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
+obtaining a
+ * copy of this software and associated documentation files (the 
+"Software"),
+ * to deal in the Software without restriction, including without 
+limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
+sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom 
+the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 
+SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
+DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "amdgpu_usermode_queue_mqd.h"
+
+static int amdgpu_userqueue_sem_addr_unmap(struct amdgpu_device *adev,
+					   struct amdgpu_usermode_queue *q) {
+    struct amdgpu_userq_sec_sem_data *sem_bo_data = &q->sem_data;
+    struct amdgpu_bo_va *bo_va = sem_bo_data->sem_data_va;
+    struct amdgpu_vm *vm = bo_va->base.vm;
+    struct amdgpu_bo *bo = adev->userq.sem.sem_obj;
+    struct amdgpu_bo_list_entry vm_pd;
+    struct list_head list, duplicates;
+    struct dma_fence *fence = NULL;
+    struct ttm_validate_buffer tv;
+    struct ww_acquire_ctx ticket;
+    long r = 0;
+
+    INIT_LIST_HEAD(&list);
+    INIT_LIST_HEAD(&duplicates);
+
+    tv.bo = &bo->tbo;
+    tv.num_shared = 2;
+    list_add(&tv.head, &list);
+
+    amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
+
+    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+    if (r) {
+        DRM_ERROR("leaking bo va because we fail to reserve bo (%ld)\n", r);
+        return r;
+    }
+
+    amdgpu_vm_bo_del(adev, bo_va);
+    if (!amdgpu_vm_ready(vm))
+        goto out_unlock;
+
+    r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
+    if (r)
+        goto out_unlock;
+    if (fence) {
+        amdgpu_bo_fence(bo, fence, true);
+        fence = NULL;
+    }
+
+    r = amdgpu_vm_clear_freed(adev, vm, &fence);
+    if (r || !fence)
+        goto out_unlock;
+
+    dma_fence_wait(fence, false);
+    amdgpu_bo_fence(bo, fence, true);
+    dma_fence_put(fence);
+
+out_unlock:
+    if (unlikely(r < 0))
+        DRM_ERROR("failed to clear page tables (%ld)\n", r);
+    ttm_eu_backoff_reservation(&ticket, &list);
+
+    return r;
+}
+
+static u64 amdgpu_sem_bo_vaddr(struct amdgpu_device *adev) {
+       u64 addr = AMDGPU_VA_RESERVED_SIZE;
+
+       /* TODO:Find va address for sem bo mapping */
+       return addr;
+}
+
+static int amdgpu_userqueue_sem_addr_map(struct amdgpu_device *adev,
+                                        struct amdgpu_usermode_queue 
+*q) {
+    struct amdgpu_userq_sec_sem_data *sem_bo_data;
+    struct amdgpu_bo *sem_obj = adev->userq.sem.sem_obj;
+    struct ttm_validate_buffer csa_tv;
+    struct amdgpu_bo_list_entry pd;
+    struct ww_acquire_ctx ticket;
+    struct amdgpu_vm *vm = q->vm;
+    struct amdgpu_bo_va *bo_va;
+    struct amdgpu_sync sync;
+    struct list_head list;
+    int r;
+
+    amdgpu_sync_create(&sync);
+    INIT_LIST_HEAD(&list);
+    INIT_LIST_HEAD(&csa_tv.head);
+
+    sem_bo_data = &q->sem_data;
+
+    csa_tv.bo = &sem_obj->tbo;
+    csa_tv.num_shared = 1;
+
+    list_add(&csa_tv.head, &list);
+    amdgpu_vm_get_pd_bo(vm, &list, &pd);
+
+    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+    if (r) {
+        DRM_ERROR("failed to reserve userqueue sec sem object BO: err=%d\n", r);
+        return r;
+    }
+
+    bo_va = amdgpu_vm_bo_add(adev, vm, sem_obj);
+    if (!bo_va) {
+        ttm_eu_backoff_reservation(&ticket, &list);
+        DRM_ERROR("failed to create bo_va for userqueue sec sem object BO\n");
+        return -ENOMEM;
+    }
+
+    sem_bo_data->sem_gpu_addr = amdgpu_sem_bo_vaddr(adev);
+    r = amdgpu_vm_bo_map(adev, bo_va, sem_bo_data->sem_gpu_addr, 0,
+                        AMDGPU_MAX_USERQUEUE_SEC_SEM * sizeof(u64),
+                        AMDGPU_PTE_READABLE | AMDGPU_PTE_READABLE);
+
+    if (r) {
+        DRM_ERROR("failed to do bo_map on sec sem object BO, err=%d\n", r);
+        goto error;
+    }
+
+    r = amdgpu_vm_bo_update(adev, bo_va, false);
+    if (r) {
+        DRM_ERROR("failed to do vm_bo_update on sec sem object BO\n");
+        goto error;
+    }
+    amdgpu_sync_fence(&sync, bo_va->last_pt_update);
+
+    r = amdgpu_vm_update_pdes(adev, vm, false);
+    if (r) {
+        DRM_ERROR("failed to update pdes on sec sem object BO\n");
+        goto error;
+    }
+    amdgpu_sync_fence(&sync, vm->last_update);
+
+    amdgpu_sync_wait(&sync, false);
+    ttm_eu_backoff_reservation(&ticket, &list);
+
+    amdgpu_sync_free(&sync);
+    sem_bo_data->sem_data_va = bo_va;
+    return 0;
+
+error:
+    amdgpu_vm_bo_del(adev, bo_va);
+    ttm_eu_backoff_reservation(&ticket, &list);
+    amdgpu_sync_free(&sync);
+    return r;
+}
+
+int amdgpu_userqueue_sec_sem_get(struct amdgpu_device *adev,
+                            struct amdgpu_usermode_queue *q,
+                            u64 *gpu_addr) {
+    unsigned long offset = find_first_zero_bit(adev->userq.sem.used, adev->userq.sem.num_sem);
+    u32 sem_offset;
+    int r;
+
+    if (offset < adev->userq.sem.num_sem) {
+        __set_bit(offset, adev->userq.sem.used);
+        sem_offset = offset << 6; /* convert to qw offset */
+    } else {
+        return -EINVAL;
+    }
+
+    r = amdgpu_userqueue_sem_addr_map(adev, q);
+    if (r) {
+        DRM_ERROR("failed to map sec sem object BO");
+        amdgpu_userqueue_sem_addr_unmap(adev, q);
+        return r;
+    }
+
+    *gpu_addr = sem_offset + q->sem_data.sem_gpu_addr;
+
+    return 0;
+}
+
+void amdgpu_userqueue_sec_sem_free(struct amdgpu_device *adev,
+                                struct amdgpu_usermode_queue *q,
+                                u32 sem) {
+    int r;
+
+    r = amdgpu_userqueue_sem_addr_unmap(adev, q);
+    if (r)
+        DRM_ERROR("failed to unmap sec sem object BO");
+
+    sem >>= 6;
+    if (sem < adev->userq.sem.num_sem)
+        __clear_bit(sem, adev->userq.sem.used); }
+
+int
+amdgpu_userqueue_sec_sem_init(struct amdgpu_device *adev) {
+    int r;
+
+    if (adev->userq.sem.sem_obj == NULL) {
+        /*
+        * AMDGPU_MAX_USERQUEUE_SEC_SEM * sizeof(u64) = AMDGPU_MAX_USERQUEUE_SEC_SEM
+        * 64bit slots
+        */
+        r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_USERQUEUE_SEC_SEM * sizeof(u64),
+                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->userq.sem.sem_obj,
+                                    &adev->userq.sem.gpu_addr, NULL);
+        if (r) {
+            DRM_ERROR("Create userqueue SEM bo failed, err %d\n", r);
+            return r;
+        }
+
+        adev->userq.sem.num_sem = AMDGPU_MAX_USERQUEUE_SEC_SEM;
+        memset(&adev->userq.sem.used, 0, sizeof(adev->userq.sem.used));
+    }
+
+    return 0;
+}
+
+void
+amdgpu_userqueue_sec_sem_fini(struct amdgpu_device *adev) {
+    if (adev->userq.sem.sem_obj) {
+        amdgpu_bo_free_kernel(&adev->userq.sem.sem_obj,
+                &adev->userq.sem.gpu_addr,
+                NULL);
+        adev->userq.sem.sem_obj = NULL;
+    }
+}
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
index 8bf3c0be6937..630d9b5d2423 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
@@ -25,6 +25,12 @@
 #define AMDGPU_USERMODE_QUEUE_H_
 
 #define AMDGPU_MAX_USERQ 512
+#define AMDGPU_MAX_USERQUEUE_SEC_SEM 64
+
+struct amdgpu_userq_sec_sem_data {
+	u64 sem_gpu_addr;
+	struct amdgpu_bo_va *sem_data_va;
+};
 
 struct amdgpu_userq_ctx {
 	struct amdgpu_bo *obj;
@@ -52,7 +58,11 @@ struct amdgpu_usermode_queue {
 	struct amdgpu_vm    	*vm;
 	struct amdgpu_userq_ctx proc_ctx;
 	struct amdgpu_userq_ctx gang_ctx;
+
+	struct amdgpu_userq_sec_sem_data sem_data;
 	struct list_head 	list;
 };
 
+int amdgpu_userqueue_sec_sem_init(struct amdgpu_device *adev); void 
+amdgpu_userqueue_sec_sem_fini(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
index d0a285708ba5..e0bfb67d91f4 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
@@ -35,8 +35,8 @@ struct amdgpu_usermode_queue_mqd
 	uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
 	uint32_t shadow_initialized; // offset: 6  (0x6)
 	uint32_t ib_vmid; // offset: 7  (0x7)
-	uint32_t reserved_8; // offset: 8  (0x8)
-	uint32_t reserved_9; // offset: 9  (0x9)
+	uint32_t fenceaddress_lo; // offset: 8  (0x8)
+	uint32_t fenceaddress_high; // offset: 9  (0x9)
 	uint32_t reserved_10; // offset: 10  (0xA)
 	uint32_t reserved_11; // offset: 11  (0xB)
 	uint32_t reserved_12; // offset: 12  (0xC)
--
2.34.1

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2022-12-23 19:36 ` [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work Shashank Sharma
  2022-12-24 18:19   ` Oded Gabbay
@ 2022-12-25 15:44   ` Christian König
  2022-12-26 10:41     ` Shashank Sharma
  2022-12-29 17:41   ` Alex Deucher
  2023-01-04  8:55   ` Zhu, Jiadong
  3 siblings, 1 reply; 64+ messages in thread
From: Christian König @ 2022-12-25 15:44 UTC (permalink / raw)
  To: Shashank Sharma, amd-gfx
  Cc: Alex Deucher, Christian Koenig, arvind.yadav, arunpravin.paneerselvam

Am 23.12.22 um 20:36 schrieb Shashank Sharma:
> This patch adds skeleton code for usermode queue creation. It
> typically contains:
> - A new structure to keep all the user queue data in one place.
> - An IOCTL function to create/free a usermode queue.
> - A function to generate unique index for the queue.
> - A global ptr in amdgpu_dev
>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++++++++++++++++++
>   .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
>   5 files changed, 246 insertions(+)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>   create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 6ad39cf71bdd..e2a34ee57bfb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -209,6 +209,8 @@ amdgpu-y += \
>   # add amdkfd interfaces
>   amdgpu-y += amdgpu_amdkfd.o
>   
> +# add usermode queue
> +amdgpu-y += amdgpu_userqueue.o
>   
>   ifneq ($(CONFIG_HSA_AMD),)
>   AMDKFD_PATH := ../amdkfd
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8639a4f9c6e8..4b566fcfca18 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -749,6 +749,11 @@ struct amdgpu_mqd {
>   			struct amdgpu_mqd_prop *p);
>   };
>   
> +struct amdgpu_userq_globals {
> +	struct ida ida;
> +	struct mutex userq_mutex;
> +};
> +
>   #define AMDGPU_RESET_MAGIC_NUM 64
>   #define AMDGPU_MAX_DF_PERFMONS 4
>   #define AMDGPU_PRODUCT_NAME_LEN 64
> @@ -955,6 +960,7 @@ struct amdgpu_device {
>   	bool                            enable_mes_kiq;
>   	struct amdgpu_mes               mes;
>   	struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
> +	struct amdgpu_userq_globals	userq;

This is a pretty big NAK to this. User mode queues should absolutely not 
be global!

This must be per fpriv, see how amdgpu_ctx/amdgpu_ctx_mgr is designed.

Or is that for the interface with the MES? If yes than that should be 
part of the MES code, not here.

>   
>   	/* df */
>   	struct amdgpu_df                df;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 0fa0e56daf67..f7413859b14f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>   	unsigned long			ras_counter_ce;
>   	unsigned long			ras_counter_ue;
>   	uint32_t			stable_pstate;
> +	struct amdgpu_usermode_queue	*userq;

Why should we have this in the ctx here???

Regards,
Christian.

>   };
>   
>   struct amdgpu_ctx_mgr {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> new file mode 100644
> index 000000000000..3b6e8f75495c
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -0,0 +1,187 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include "amdgpu.h"
> +#include "amdgpu_vm.h"
> +#include "amdgpu_mes.h"
> +#include "amdgpu_usermode_queue.h"
> +#include "soc15_common.h"
> +
> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
> +
> +static int
> +amdgpu_userqueue_index(struct amdgpu_device *adev)
> +{
> +    int index;
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
> +    return index;
> +}
> +
> +static void
> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
> +{
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    ida_simple_remove(&uqg->ida, queue->queue_id);
> +}
> +
> +static int
> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct drm_amdgpu_userq_mqd *mqd_in)
> +{
> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || mqd_in->doorbell_offset == 0) {
> +        DRM_ERROR("Invalid queue object address\n");
> +        return -EINVAL;
> +    }
> +
> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || mqd_in->wptr_va == 0) {
> +        DRM_ERROR("Invalid queue object value\n");
> +        return -EINVAL;
> +    }
> +
> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= AMDGPU_HW_IP_NUM) {
> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
> +        return -EINVAL;
> +    }
> +
> +    if (!CHECK_ACCESS(mqd_in->queue_va) || !CHECK_ACCESS(mqd_in->rptr_va) ||
> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
> +            DRM_ERROR("Invalid mapping of queue ptrs, access error\n");
> +            return -EINVAL;
> +    }
> +
> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
> +    return 0;
> +}
> +
> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
> +                            union drm_amdgpu_userq *args)
> +{
> +    int r, pasid;
> +    struct amdgpu_usermode_queue *queue;
> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +    struct amdgpu_vm *vm = &fpriv->vm;
> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
> +
> +    if (!ctx) {
> +        DRM_ERROR("Invalid GPU context\n");
> +        return -EINVAL;
> +    }
> +
> +    if (vm->pasid < 0) {
> +        DRM_WARN("No PASID info found\n");
> +        pasid = 0;
> +    }
> +
> +    mutex_lock(&adev->userq.userq_mutex);
> +
> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
> +    if (!queue) {
> +        DRM_ERROR("Failed to allocate memory for queue\n");
> +        mutex_unlock(&adev->userq.userq_mutex);
> +        return -ENOMEM;
> +    }
> +
> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
> +    if (r < 0) {
> +        DRM_ERROR("Invalid input to create queue\n");
> +        goto free_queue;
> +    }
> +
> +    queue->vm = vm;
> +    queue->pasid = pasid;
> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
> +    queue->queue_size = mqd_in->queue_size;
> +    queue->queue_type = mqd_in->ip_type;
> +    queue->paging = false;
> +    queue->flags = mqd_in->flags;
> +    queue->queue_id = amdgpu_userqueue_index(adev);
> +
> +    ctx->userq = queue;
> +    args->out.q_id = queue->queue_id;
> +    args->out.flags = 0;
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    return 0;
> +
> +free_queue:
> +    amdgpu_userqueue_remove_index(adev, queue);
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    kfree(queue);
> +    return r;
> +}
> +
> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
> +                              union drm_amdgpu_userq *args)
> +{
> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
> +    struct amdgpu_usermode_queue *queue = ctx->userq;
> +
> +    mutex_lock(&adev->userq.userq_mutex);
> +    amdgpu_userqueue_remove_index(adev, queue);
> +    ctx->userq = NULL;
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    kfree(queue);
> +}
> +
> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
> +		       struct drm_file *filp)
> +{
> +    union drm_amdgpu_userq *args = data;
> +    struct amdgpu_device *adev = drm_to_adev(dev);
> +    int r = 0;
> +
> +    switch (args->in.op) {
> +    case AMDGPU_USERQ_OP_CREATE:
> +        r = amdgpu_userqueue_create(adev, filp, args);
> +        if (r)
> +            DRM_ERROR("Failed to create usermode queue\n");
> +        break;
> +
> +    case AMDGPU_USERQ_OP_FREE:
> +        amdgpu_userqueue_destroy(adev, filp, args);
> +        break;
> +
> +    default:
> +        DRM_ERROR("Invalid user queue op specified: %d\n", args->in.op);
> +        return -EINVAL;
> +    }
> +
> +    return r;
> +}
> +
> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
> +{
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    mutex_init(&uqg->userq_mutex);
> +    return 0;
> +}
> +
> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
> +{
> +
> +}
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> new file mode 100644
> index 000000000000..c1fe39ffaf72
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> @@ -0,0 +1,50 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef AMDGPU_USERMODE_QUEUE_H_
> +#define AMDGPU_USERMODE_QUEUE_H_
> +
> +#define AMDGPU_MAX_USERQ 512
> +
> +struct amdgpu_usermode_queue {
> +	int		queue_id;
> +	int		queue_type;
> +	int		queue_size;
> +	int		paging;
> +	int		pasid;
> +	int		use_doorbell;
> +	int		doorbell_index;
> +
> +	uint64_t	mqd_gpu_addr;
> +	uint64_t	wptr_gpu_addr;
> +	uint64_t	rptr_gpu_addr;
> +	uint64_t	queue_gpu_addr;
> +	uint64_t	flags;
> +	void 		*mqd_cpu_ptr;
> +
> +	struct amdgpu_bo	*mqd_obj;
> +	struct amdgpu_vm    	*vm;
> +	struct list_head 	list;
> +};
> +
> +#endif


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2022-12-24 18:19   ` Oded Gabbay
@ 2022-12-26 10:34     ` Shashank Sharma
  0 siblings, 0 replies; 64+ messages in thread
From: Shashank Sharma @ 2022-12-26 10:34 UTC (permalink / raw)
  To: Oded Gabbay
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam

Hello Oded,

Thank you for your comments,

On 24/12/2022 19:19, Oded Gabbay wrote:
> On Fri, Dec 23, 2022 at 9:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>> This patch adds skeleton code for usermode queue creation. It
>> typically contains:
>> - A new structure to keep all the user queue data in one place.
>> - An IOCTL function to create/free a usermode queue.
>> - A function to generate unique index for the queue.
>> - A global ptr in amdgpu_dev
>>
>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++++++++++++++++++
>>   .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
>>   5 files changed, 246 insertions(+)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>   create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 6ad39cf71bdd..e2a34ee57bfb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -209,6 +209,8 @@ amdgpu-y += \
>>   # add amdkfd interfaces
>>   amdgpu-y += amdgpu_amdkfd.o
>>
>> +# add usermode queue
>> +amdgpu-y += amdgpu_userqueue.o
>>
>>   ifneq ($(CONFIG_HSA_AMD),)
>>   AMDKFD_PATH := ../amdkfd
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 8639a4f9c6e8..4b566fcfca18 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -749,6 +749,11 @@ struct amdgpu_mqd {
>>                          struct amdgpu_mqd_prop *p);
>>   };
>>
>> +struct amdgpu_userq_globals {
>> +       struct ida ida;
>> +       struct mutex userq_mutex;
>> +};
>> +
>>   #define AMDGPU_RESET_MAGIC_NUM 64
>>   #define AMDGPU_MAX_DF_PERFMONS 4
>>   #define AMDGPU_PRODUCT_NAME_LEN 64
>> @@ -955,6 +960,7 @@ struct amdgpu_device {
>>          bool                            enable_mes_kiq;
>>          struct amdgpu_mes               mes;
>>          struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
>> +       struct amdgpu_userq_globals     userq;
>>
>>          /* df */
>>          struct amdgpu_df                df;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> index 0fa0e56daf67..f7413859b14f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>          unsigned long                   ras_counter_ce;
>>          unsigned long                   ras_counter_ue;
>>          uint32_t                        stable_pstate;
>> +       struct amdgpu_usermode_queue    *userq;
>>   };
>>
>>   struct amdgpu_ctx_mgr {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> new file mode 100644
>> index 000000000000..3b6e8f75495c
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -0,0 +1,187 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include "amdgpu.h"
>> +#include "amdgpu_vm.h"
>> +#include "amdgpu_mes.h"
>> +#include "amdgpu_usermode_queue.h"
>> +#include "soc15_common.h"
>> +
>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
>> +
>> +static int
>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>> +{
>> +    int index;
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
>> +    return index;
>> +}
>> +
>> +static void
>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
>> +{
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>> +}
>> +
>> +static int
>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct drm_amdgpu_userq_mqd *mqd_in)
>> +{
>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || mqd_in->doorbell_offset == 0) {
>> +        DRM_ERROR("Invalid queue object address\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || mqd_in->wptr_va == 0) {
>> +        DRM_ERROR("Invalid queue object value\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= AMDGPU_HW_IP_NUM) {
>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (!CHECK_ACCESS(mqd_in->queue_va) || !CHECK_ACCESS(mqd_in->rptr_va) ||
>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>> +            DRM_ERROR("Invalid mapping of queue ptrs, access error\n");
>> +            return -EINVAL;
>> +    }
>> +
>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
>> +    return 0;
>> +}
>> +
>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>> +                            union drm_amdgpu_userq *args)
>> +{
>> +    int r, pasid;
>> +    struct amdgpu_usermode_queue *queue;
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    struct amdgpu_vm *vm = &fpriv->vm;
>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>> +
>> +    if (!ctx) {
>> +        DRM_ERROR("Invalid GPU context\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (vm->pasid < 0) {
>> +        DRM_WARN("No PASID info found\n");
>> +        pasid = 0;
>> +    }
>> +
>> +    mutex_lock(&adev->userq.userq_mutex);
>> +
>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
>> +    if (!queue) {
>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>> +        mutex_unlock(&adev->userq.userq_mutex);
>> +        return -ENOMEM;
>> +    }
>> +
>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>> +    if (r < 0) {
>> +        DRM_ERROR("Invalid input to create queue\n");
>> +        goto free_queue;
>> +    }
>> +
>> +    queue->vm = vm;
>> +    queue->pasid = pasid;
>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>> +    queue->queue_size = mqd_in->queue_size;
>> +    queue->queue_type = mqd_in->ip_type;
>> +    queue->paging = false;
>> +    queue->flags = mqd_in->flags;
>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>> +
>> +    ctx->userq = queue;
> It looks like you have a single userq per context, and here you simply
> override the userq pointer.
> Maybe I've missed it, but where do you protect against a user
> accidentally creating two user queues ? It will cause a memory leak as
> you don't release the previous q.
> I would imagine you should reject the user from creating another userq
> until it frees the current userq.

Noted, this is a valid expectation, will add a check here.

- Shashank

> Oded
>
>> +    args->out.q_id = queue->queue_id;
>> +    args->out.flags = 0;
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    return 0;
>> +
>> +free_queue:
>> +    amdgpu_userqueue_remove_index(adev, queue);
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    kfree(queue);
>> +    return r;
>> +}
>> +
>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
>> +                              union drm_amdgpu_userq *args)
>> +{
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>> +
>> +    mutex_lock(&adev->userq.userq_mutex);
>> +    amdgpu_userqueue_remove_index(adev, queue);
>> +    ctx->userq = NULL;
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    kfree(queue);
>> +}
>> +
>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>> +                      struct drm_file *filp)
>> +{
>> +    union drm_amdgpu_userq *args = data;
>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>> +    int r = 0;
>> +
>> +    switch (args->in.op) {
>> +    case AMDGPU_USERQ_OP_CREATE:
>> +        r = amdgpu_userqueue_create(adev, filp, args);
>> +        if (r)
>> +            DRM_ERROR("Failed to create usermode queue\n");
>> +        break;
>> +
>> +    case AMDGPU_USERQ_OP_FREE:
>> +        amdgpu_userqueue_destroy(adev, filp, args);
>> +        break;
>> +
>> +    default:
>> +        DRM_ERROR("Invalid user queue op specified: %d\n", args->in.op);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return r;
>> +}
>> +
>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>> +{
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    mutex_init(&uqg->userq_mutex);
>> +    return 0;
>> +}
>> +
>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>> +{
>> +
>> +}
>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> new file mode 100644
>> index 000000000000..c1fe39ffaf72
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> @@ -0,0 +1,50 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>> +#define AMDGPU_USERMODE_QUEUE_H_
>> +
>> +#define AMDGPU_MAX_USERQ 512
>> +
>> +struct amdgpu_usermode_queue {
>> +       int             queue_id;
>> +       int             queue_type;
>> +       int             queue_size;
>> +       int             paging;
>> +       int             pasid;
>> +       int             use_doorbell;
>> +       int             doorbell_index;
>> +
>> +       uint64_t        mqd_gpu_addr;
>> +       uint64_t        wptr_gpu_addr;
>> +       uint64_t        rptr_gpu_addr;
>> +       uint64_t        queue_gpu_addr;
>> +       uint64_t        flags;
>> +       void            *mqd_cpu_ptr;
>> +
>> +       struct amdgpu_bo        *mqd_obj;
>> +       struct amdgpu_vm        *vm;
>> +       struct list_head        list;
>> +};
>> +
>> +#endif
>> --
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2022-12-25 15:44   ` Christian König
@ 2022-12-26 10:41     ` Shashank Sharma
  2023-01-02 12:39       ` Christian König
  0 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2022-12-26 10:41 UTC (permalink / raw)
  To: Christian König, amd-gfx
  Cc: Alex Deucher, Christian Koenig, arvind.yadav, arunpravin.paneerselvam

Hello Christian,

On 25/12/2022 16:44, Christian König wrote:
> Am 23.12.22 um 20:36 schrieb Shashank Sharma:
>> This patch adds skeleton code for usermode queue creation. It
>> typically contains:
>> - A new structure to keep all the user queue data in one place.
>> - An IOCTL function to create/free a usermode queue.
>> - A function to generate unique index for the queue.
>> - A global ptr in amdgpu_dev
>>
>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++++++++++++++++++
>>   .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
>>   5 files changed, 246 insertions(+)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>   create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 6ad39cf71bdd..e2a34ee57bfb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -209,6 +209,8 @@ amdgpu-y += \
>>   # add amdkfd interfaces
>>   amdgpu-y += amdgpu_amdkfd.o
>>   +# add usermode queue
>> +amdgpu-y += amdgpu_userqueue.o
>>     ifneq ($(CONFIG_HSA_AMD),)
>>   AMDKFD_PATH := ../amdkfd
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 8639a4f9c6e8..4b566fcfca18 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -749,6 +749,11 @@ struct amdgpu_mqd {
>>               struct amdgpu_mqd_prop *p);
>>   };
>>   +struct amdgpu_userq_globals {
>> +    struct ida ida;
>> +    struct mutex userq_mutex;
>> +};
>> +
>>   #define AMDGPU_RESET_MAGIC_NUM 64
>>   #define AMDGPU_MAX_DF_PERFMONS 4
>>   #define AMDGPU_PRODUCT_NAME_LEN 64
>> @@ -955,6 +960,7 @@ struct amdgpu_device {
>>       bool                            enable_mes_kiq;
>>       struct amdgpu_mes               mes;
>>       struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
>> +    struct amdgpu_userq_globals    userq;
>
> This is a pretty big NAK to this. User mode queues should absolutely 
> not be global!
>
> This must be per fpriv, see how amdgpu_ctx/amdgpu_ctx_mgr is designed.
Noted,
>
> Or is that for the interface with the MES? If yes than that should be 
> part of the MES code, not here.
This is actually to keep a mutex and keep an IDR object. I will first 
check how amdgpu_ctx handles it, as you suggested.
>
>>         /* df */
>>       struct amdgpu_df                df;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> index 0fa0e56daf67..f7413859b14f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>       unsigned long            ras_counter_ce;
>>       unsigned long            ras_counter_ue;
>>       uint32_t            stable_pstate;
>> +    struct amdgpu_usermode_queue    *userq;
>
> Why should we have this in the ctx here???

We are allocating a few things dynamically for the queue, which would be 
valid until we destroy this queue. Also we need to save this queue

container at some place for the destroy function,  and I thought it 
would make sense to keep this with the context ptr, as this is how we are

identifying the incoming request.

- Shashank

>
> Regards,
> Christian.
>
>>   };
>>     struct amdgpu_ctx_mgr {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> new file mode 100644
>> index 000000000000..3b6e8f75495c
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -0,0 +1,187 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person 
>> obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without 
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, 
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom 
>> the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be 
>> included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>> EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>> DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>> OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>> USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include "amdgpu.h"
>> +#include "amdgpu_vm.h"
>> +#include "amdgpu_mes.h"
>> +#include "amdgpu_usermode_queue.h"
>> +#include "soc15_common.h"
>> +
>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
>> sizeof(__u64)))
>> +
>> +static int
>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>> +{
>> +    int index;
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
>> +    return index;
>> +}
>> +
>> +static void
>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
>> amdgpu_usermode_queue *queue)
>> +{
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>> +}
>> +
>> +static int
>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct 
>> drm_amdgpu_userq_mqd *mqd_in)
>> +{
>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || 
>> mqd_in->doorbell_offset == 0) {
>> +        DRM_ERROR("Invalid queue object address\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || 
>> mqd_in->wptr_va == 0) {
>> +        DRM_ERROR("Invalid queue object value\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= 
>> AMDGPU_HW_IP_NUM) {
>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (!CHECK_ACCESS(mqd_in->queue_va) || 
>> !CHECK_ACCESS(mqd_in->rptr_va) ||
>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>> +            DRM_ERROR("Invalid mapping of queue ptrs, access error\n");
>> +            return -EINVAL;
>> +    }
>> +
>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
>> +    return 0;
>> +}
>> +
>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct 
>> drm_file *filp,
>> +                            union drm_amdgpu_userq *args)
>> +{
>> +    int r, pasid;
>> +    struct amdgpu_usermode_queue *queue;
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    struct amdgpu_vm *vm = &fpriv->vm;
>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>> +
>> +    if (!ctx) {
>> +        DRM_ERROR("Invalid GPU context\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (vm->pasid < 0) {
>> +        DRM_WARN("No PASID info found\n");
>> +        pasid = 0;
>> +    }
>> +
>> +    mutex_lock(&adev->userq.userq_mutex);
>> +
>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
>> +    if (!queue) {
>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>> +        mutex_unlock(&adev->userq.userq_mutex);
>> +        return -ENOMEM;
>> +    }
>> +
>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>> +    if (r < 0) {
>> +        DRM_ERROR("Invalid input to create queue\n");
>> +        goto free_queue;
>> +    }
>> +
>> +    queue->vm = vm;
>> +    queue->pasid = pasid;
>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>> +    queue->queue_size = mqd_in->queue_size;
>> +    queue->queue_type = mqd_in->ip_type;
>> +    queue->paging = false;
>> +    queue->flags = mqd_in->flags;
>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>> +
>> +    ctx->userq = queue;
>> +    args->out.q_id = queue->queue_id;
>> +    args->out.flags = 0;
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    return 0;
>> +
>> +free_queue:
>> +    amdgpu_userqueue_remove_index(adev, queue);
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    kfree(queue);
>> +    return r;
>> +}
>> +
>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct 
>> drm_file *filp,
>> +                              union drm_amdgpu_userq *args)
>> +{
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>> +
>> +    mutex_lock(&adev->userq.userq_mutex);
>> +    amdgpu_userqueue_remove_index(adev, queue);
>> +    ctx->userq = NULL;
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    kfree(queue);
>> +}
>> +
>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>> +               struct drm_file *filp)
>> +{
>> +    union drm_amdgpu_userq *args = data;
>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>> +    int r = 0;
>> +
>> +    switch (args->in.op) {
>> +    case AMDGPU_USERQ_OP_CREATE:
>> +        r = amdgpu_userqueue_create(adev, filp, args);
>> +        if (r)
>> +            DRM_ERROR("Failed to create usermode queue\n");
>> +        break;
>> +
>> +    case AMDGPU_USERQ_OP_FREE:
>> +        amdgpu_userqueue_destroy(adev, filp, args);
>> +        break;
>> +
>> +    default:
>> +        DRM_ERROR("Invalid user queue op specified: %d\n", 
>> args->in.op);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return r;
>> +}
>> +
>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>> +{
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    mutex_init(&uqg->userq_mutex);
>> +    return 0;
>> +}
>> +
>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>> +{
>> +
>> +}
>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h 
>> b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> new file mode 100644
>> index 000000000000..c1fe39ffaf72
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> @@ -0,0 +1,50 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person 
>> obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without 
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, 
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom 
>> the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be 
>> included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>> EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>> DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>> OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>> USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>> +#define AMDGPU_USERMODE_QUEUE_H_
>> +
>> +#define AMDGPU_MAX_USERQ 512
>> +
>> +struct amdgpu_usermode_queue {
>> +    int        queue_id;
>> +    int        queue_type;
>> +    int        queue_size;
>> +    int        paging;
>> +    int        pasid;
>> +    int        use_doorbell;
>> +    int        doorbell_index;
>> +
>> +    uint64_t    mqd_gpu_addr;
>> +    uint64_t    wptr_gpu_addr;
>> +    uint64_t    rptr_gpu_addr;
>> +    uint64_t    queue_gpu_addr;
>> +    uint64_t    flags;
>> +    void         *mqd_cpu_ptr;
>> +
>> +    struct amdgpu_bo    *mqd_obj;
>> +    struct amdgpu_vm        *vm;
>> +    struct list_head     list;
>> +};
>> +
>> +#endif
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 7/7] drm/amdgpu: Secure semaphore for usermode queue
  2022-12-25 10:07   ` Zhang, Yifan
@ 2022-12-27  9:32     ` Arunpravin Paneer Selvam
  0 siblings, 0 replies; 64+ messages in thread
From: Arunpravin Paneer Selvam @ 2022-12-27  9:32 UTC (permalink / raw)
  To: Zhang, Yifan, Sharma, Shashank, amd-gfx
  Cc: Deucher, Alexander, Koenig, Christian, Yadav, Arvind



On 12/25/2022 3:37 PM, Zhang, Yifan wrote:
> [Public]
>
> From: Arunpravin Paneer Selvam <arunpravin.paneerselvam@amd.com>
>
> This is a WIP patch, which adds an kernel implementation of secure semaphore for the usermode queues. The UAPI for the same is yet to be implemented.
>
> The idea is to create a RO page and map it to each process requesting a user mode queue, and give them a qnique offset in the page, which can be polled (like wait_mem) for sync.
>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Cc: Shashank Shamra <shashank.sharma@amd.com>
>
> Signed-off-by: Arunpravin Paneer Selvam <arunpravin.paneerselvam@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Makefile           |   1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   8 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |   7 +-
>   .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 ++++++++++++++++++
>   .../drm/amd/include/amdgpu_usermode_queue.h   |  10 +
>   .../amd/include/amdgpu_usermode_queue_mqd.h   |   4 +-
>   6 files changed, 272 insertions(+), 3 deletions(-)  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index e2a34ee57bfb..daec7bb9ab3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -211,6 +211,7 @@ amdgpu-y += amdgpu_amdkfd.o
>   
>   # add usermode queue
>   amdgpu-y += amdgpu_userqueue.o
> +amdgpu-y += amdgpu_userqueue_secure_sem.o
>   
>   ifneq ($(CONFIG_HSA_AMD),)
>   AMDKFD_PATH := ../amdkfd
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4b566fcfca18..7325c01efc90 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -749,9 +749,17 @@ struct amdgpu_mqd {
>   			struct amdgpu_mqd_prop *p);
>   };
>   
> +struct amdgpu_userq_sec_sem {
> +	struct amdgpu_bo *sem_obj;
> +	u64 gpu_addr;
> +	u32 num_sem;
> +	unsigned long used[DIV_ROUND_UP(64, BITS_PER_LONG)]; };
> +
>   struct amdgpu_userq_globals {
>   	struct ida ida;
>   	struct mutex userq_mutex;
> +	struct amdgpu_userq_sec_sem sem;
>   };
>   
>   #define AMDGPU_RESET_MAGIC_NUM 64
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index b164e24247ca..2af634bbe3dc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -261,6 +261,10 @@ amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_qu
>       /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
>       mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
>   
> +    /* Setup semaphore fence address */
> +    mqd->fenceaddress_lo = queue->sem_data.sem_gpu_addr & 0xFFFFFFFC;
> +    mqd->fenceaddress_lo = upper_32_bits(queue->sem_data.sem_gpu_addr)
> + & 0xFFFF;
> +
>
> It should be mqd-> fenceaddress_high = upper_32_bits(queue->sem_data.sem_gpu_addr)  & 0xFFFF
yes, I think it got messed up in integration.

Thanks,
Arun.
>
>       /* activate the queue */
>       mqd->cp_gfx_hqd_active = 1;
>   }
> @@ -472,10 +476,11 @@ int amdgpu_userqueue_init(struct amdgpu_device *adev)
>       struct amdgpu_userq_globals *uqg = &adev->userq;
>   
>       mutex_init(&uqg->userq_mutex);
> +    amdgpu_userqueue_sec_sem_init(adev);
>       return 0;
>   }
>   
>   void amdgpu_userqueue_fini(struct amdgpu_device *adev)  {
> -
> +    amdgpu_userqueue_sec_sem_fini(adev);
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
> new file mode 100644
> index 000000000000..6e6a7d62a300
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
> @@ -0,0 +1,245 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +#include "amdgpu.h"
> +#include "amdgpu_mes.h"
> +#include "amdgpu_usermode_queue.h"
> +#include "amdgpu_usermode_queue_mqd.h"
> +
> +static int amdgpu_userqueue_sem_addr_unmap(struct amdgpu_device *adev,
> +					   struct amdgpu_usermode_queue *q) {
> +    struct amdgpu_userq_sec_sem_data *sem_bo_data = &q->sem_data;
> +    struct amdgpu_bo_va *bo_va = sem_bo_data->sem_data_va;
> +    struct amdgpu_vm *vm = bo_va->base.vm;
> +    struct amdgpu_bo *bo = adev->userq.sem.sem_obj;
> +    struct amdgpu_bo_list_entry vm_pd;
> +    struct list_head list, duplicates;
> +    struct dma_fence *fence = NULL;
> +    struct ttm_validate_buffer tv;
> +    struct ww_acquire_ctx ticket;
> +    long r = 0;
> +
> +    INIT_LIST_HEAD(&list);
> +    INIT_LIST_HEAD(&duplicates);
> +
> +    tv.bo = &bo->tbo;
> +    tv.num_shared = 2;
> +    list_add(&tv.head, &list);
> +
> +    amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
> +
> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
> +    if (r) {
> +        DRM_ERROR("leaking bo va because we fail to reserve bo (%ld)\n", r);
> +        return r;
> +    }
> +
> +    amdgpu_vm_bo_del(adev, bo_va);
> +    if (!amdgpu_vm_ready(vm))
> +        goto out_unlock;
> +
> +    r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
> +    if (r)
> +        goto out_unlock;
> +    if (fence) {
> +        amdgpu_bo_fence(bo, fence, true);
> +        fence = NULL;
> +    }
> +
> +    r = amdgpu_vm_clear_freed(adev, vm, &fence);
> +    if (r || !fence)
> +        goto out_unlock;
> +
> +    dma_fence_wait(fence, false);
> +    amdgpu_bo_fence(bo, fence, true);
> +    dma_fence_put(fence);
> +
> +out_unlock:
> +    if (unlikely(r < 0))
> +        DRM_ERROR("failed to clear page tables (%ld)\n", r);
> +    ttm_eu_backoff_reservation(&ticket, &list);
> +
> +    return r;
> +}
> +
> +static u64 amdgpu_sem_bo_vaddr(struct amdgpu_device *adev) {
> +       u64 addr = AMDGPU_VA_RESERVED_SIZE;
> +
> +       /* TODO:Find va address for sem bo mapping */
> +       return addr;
> +}
> +
> +static int amdgpu_userqueue_sem_addr_map(struct amdgpu_device *adev,
> +                                        struct amdgpu_usermode_queue
> +*q) {
> +    struct amdgpu_userq_sec_sem_data *sem_bo_data;
> +    struct amdgpu_bo *sem_obj = adev->userq.sem.sem_obj;
> +    struct ttm_validate_buffer csa_tv;
> +    struct amdgpu_bo_list_entry pd;
> +    struct ww_acquire_ctx ticket;
> +    struct amdgpu_vm *vm = q->vm;
> +    struct amdgpu_bo_va *bo_va;
> +    struct amdgpu_sync sync;
> +    struct list_head list;
> +    int r;
> +
> +    amdgpu_sync_create(&sync);
> +    INIT_LIST_HEAD(&list);
> +    INIT_LIST_HEAD(&csa_tv.head);
> +
> +    sem_bo_data = &q->sem_data;
> +
> +    csa_tv.bo = &sem_obj->tbo;
> +    csa_tv.num_shared = 1;
> +
> +    list_add(&csa_tv.head, &list);
> +    amdgpu_vm_get_pd_bo(vm, &list, &pd);
> +
> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
> +    if (r) {
> +        DRM_ERROR("failed to reserve userqueue sec sem object BO: err=%d\n", r);
> +        return r;
> +    }
> +
> +    bo_va = amdgpu_vm_bo_add(adev, vm, sem_obj);
> +    if (!bo_va) {
> +        ttm_eu_backoff_reservation(&ticket, &list);
> +        DRM_ERROR("failed to create bo_va for userqueue sec sem object BO\n");
> +        return -ENOMEM;
> +    }
> +
> +    sem_bo_data->sem_gpu_addr = amdgpu_sem_bo_vaddr(adev);
> +    r = amdgpu_vm_bo_map(adev, bo_va, sem_bo_data->sem_gpu_addr, 0,
> +                        AMDGPU_MAX_USERQUEUE_SEC_SEM * sizeof(u64),
> +                        AMDGPU_PTE_READABLE | AMDGPU_PTE_READABLE);
> +
> +    if (r) {
> +        DRM_ERROR("failed to do bo_map on sec sem object BO, err=%d\n", r);
> +        goto error;
> +    }
> +
> +    r = amdgpu_vm_bo_update(adev, bo_va, false);
> +    if (r) {
> +        DRM_ERROR("failed to do vm_bo_update on sec sem object BO\n");
> +        goto error;
> +    }
> +    amdgpu_sync_fence(&sync, bo_va->last_pt_update);
> +
> +    r = amdgpu_vm_update_pdes(adev, vm, false);
> +    if (r) {
> +        DRM_ERROR("failed to update pdes on sec sem object BO\n");
> +        goto error;
> +    }
> +    amdgpu_sync_fence(&sync, vm->last_update);
> +
> +    amdgpu_sync_wait(&sync, false);
> +    ttm_eu_backoff_reservation(&ticket, &list);
> +
> +    amdgpu_sync_free(&sync);
> +    sem_bo_data->sem_data_va = bo_va;
> +    return 0;
> +
> +error:
> +    amdgpu_vm_bo_del(adev, bo_va);
> +    ttm_eu_backoff_reservation(&ticket, &list);
> +    amdgpu_sync_free(&sync);
> +    return r;
> +}
> +
> +int amdgpu_userqueue_sec_sem_get(struct amdgpu_device *adev,
> +                            struct amdgpu_usermode_queue *q,
> +                            u64 *gpu_addr) {
> +    unsigned long offset = find_first_zero_bit(adev->userq.sem.used, adev->userq.sem.num_sem);
> +    u32 sem_offset;
> +    int r;
> +
> +    if (offset < adev->userq.sem.num_sem) {
> +        __set_bit(offset, adev->userq.sem.used);
> +        sem_offset = offset << 6; /* convert to qw offset */
> +    } else {
> +        return -EINVAL;
> +    }
> +
> +    r = amdgpu_userqueue_sem_addr_map(adev, q);
> +    if (r) {
> +        DRM_ERROR("failed to map sec sem object BO");
> +        amdgpu_userqueue_sem_addr_unmap(adev, q);
> +        return r;
> +    }
> +
> +    *gpu_addr = sem_offset + q->sem_data.sem_gpu_addr;
> +
> +    return 0;
> +}
> +
> +void amdgpu_userqueue_sec_sem_free(struct amdgpu_device *adev,
> +                                struct amdgpu_usermode_queue *q,
> +                                u32 sem) {
> +    int r;
> +
> +    r = amdgpu_userqueue_sem_addr_unmap(adev, q);
> +    if (r)
> +        DRM_ERROR("failed to unmap sec sem object BO");
> +
> +    sem >>= 6;
> +    if (sem < adev->userq.sem.num_sem)
> +        __clear_bit(sem, adev->userq.sem.used); }
> +
> +int
> +amdgpu_userqueue_sec_sem_init(struct amdgpu_device *adev) {
> +    int r;
> +
> +    if (adev->userq.sem.sem_obj == NULL) {
> +        /*
> +        * AMDGPU_MAX_USERQUEUE_SEC_SEM * sizeof(u64) = AMDGPU_MAX_USERQUEUE_SEC_SEM
> +        * 64bit slots
> +        */
> +        r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_USERQUEUE_SEC_SEM * sizeof(u64),
> +                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->userq.sem.sem_obj,
> +                                    &adev->userq.sem.gpu_addr, NULL);
> +        if (r) {
> +            DRM_ERROR("Create userqueue SEM bo failed, err %d\n", r);
> +            return r;
> +        }
> +
> +        adev->userq.sem.num_sem = AMDGPU_MAX_USERQUEUE_SEC_SEM;
> +        memset(&adev->userq.sem.used, 0, sizeof(adev->userq.sem.used));
> +    }
> +
> +    return 0;
> +}
> +
> +void
> +amdgpu_userqueue_sec_sem_fini(struct amdgpu_device *adev) {
> +    if (adev->userq.sem.sem_obj) {
> +        amdgpu_bo_free_kernel(&adev->userq.sem.sem_obj,
> +                &adev->userq.sem.gpu_addr,
> +                NULL);
> +        adev->userq.sem.sem_obj = NULL;
> +    }
> +}
> \ No newline at end of file
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> index 8bf3c0be6937..630d9b5d2423 100644
> --- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> @@ -25,6 +25,12 @@
>   #define AMDGPU_USERMODE_QUEUE_H_
>   
>   #define AMDGPU_MAX_USERQ 512
> +#define AMDGPU_MAX_USERQUEUE_SEC_SEM 64
> +
> +struct amdgpu_userq_sec_sem_data {
> +	u64 sem_gpu_addr;
> +	struct amdgpu_bo_va *sem_data_va;
> +};
>   
>   struct amdgpu_userq_ctx {
>   	struct amdgpu_bo *obj;
> @@ -52,7 +58,11 @@ struct amdgpu_usermode_queue {
>   	struct amdgpu_vm    	*vm;
>   	struct amdgpu_userq_ctx proc_ctx;
>   	struct amdgpu_userq_ctx gang_ctx;
> +
> +	struct amdgpu_userq_sec_sem_data sem_data;
>   	struct list_head 	list;
>   };
>   
> +int amdgpu_userqueue_sec_sem_init(struct amdgpu_device *adev); void
> +amdgpu_userqueue_sec_sem_fini(struct amdgpu_device *adev);
>   #endif
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
> index d0a285708ba5..e0bfb67d91f4 100644
> --- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
> @@ -35,8 +35,8 @@ struct amdgpu_usermode_queue_mqd
>   	uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
>   	uint32_t shadow_initialized; // offset: 6  (0x6)
>   	uint32_t ib_vmid; // offset: 7  (0x7)
> -	uint32_t reserved_8; // offset: 8  (0x8)
> -	uint32_t reserved_9; // offset: 9  (0x9)
> +	uint32_t fenceaddress_lo; // offset: 8  (0x8)
> +	uint32_t fenceaddress_high; // offset: 9  (0x9)
>   	uint32_t reserved_10; // offset: 10  (0xA)
>   	uint32_t reserved_11; // offset: 11  (0xB)
>   	uint32_t reserved_12; // offset: 12  (0xC)
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2022-12-24 20:20   ` Bas Nieuwenhuizen
@ 2022-12-27 16:58     ` Alex Deucher
  2023-01-02 11:27       ` Christian König
  0 siblings, 1 reply; 64+ messages in thread
From: Alex Deucher @ 2022-12-27 16:58 UTC (permalink / raw)
  To: Bas Nieuwenhuizen
  Cc: arunpravin.paneerselvam, Shashank Sharma, amd-gfx, arvind.yadav,
	Alex Deucher, Christian Koenig

On Sat, Dec 24, 2022 at 3:21 PM Bas Nieuwenhuizen
<bas@basnieuwenhuizen.nl> wrote:
>
> On Fri, Dec 23, 2022 at 8:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
> >
> > From: Alex Deucher <alexander.deucher@amd.com>
> >
> > This patch intorduces new UAPI/IOCTL for usermode graphics
> > queue. The userspace app will fill this structure and request
> > the graphics driver to add a graphics work queue for it. The
> > output of this UAPI is a queue id.
> >
> > This UAPI maps the queue into GPU, so the graphics app can start
> > submitting work to the queue as soon as the call returns.
> >
> > Cc: Alex Deucher <alexander.deucher@amd.com>
> > Cc: Christian Koenig <christian.koenig@amd.com>
> > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> > ---
> >  include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
> >  1 file changed, 52 insertions(+)
> >
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 0d93ec132ebb..a3d0dd6f62c5 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -54,6 +54,7 @@ extern "C" {
> >  #define DRM_AMDGPU_VM                  0x13
> >  #define DRM_AMDGPU_FENCE_TO_HANDLE     0x14
> >  #define DRM_AMDGPU_SCHED               0x15
> > +#define DRM_AMDGPU_USERQ               0x16
> >
> >  #define DRM_IOCTL_AMDGPU_GEM_CREATE    DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >  #define DRM_IOCTL_AMDGPU_GEM_MMAP      DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -71,6 +72,7 @@ extern "C" {
> >  #define DRM_IOCTL_AMDGPU_VM            DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >  #define DRM_IOCTL_AMDGPU_SCHED         DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > +#define DRM_IOCTL_AMDGPU_USERQ         DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >
> >  /**
> >   * DOC: memory domains
> > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >         union drm_amdgpu_ctx_out out;
> >  };
> >
> > +/* user queue IOCTL */
> > +#define AMDGPU_USERQ_OP_CREATE 1
> > +#define AMDGPU_USERQ_OP_FREE   2
> > +
> > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> > +#define AMDGPU_USERQ_MQD_FLAGS_AQL     (1 << 1)
>
> Can we document what AQL means here?

AQL is the packet format used by KFD/ROCm.  The idea is to be able to
create queues that support either format (AQL or PM4).

>
>
> > +
> > +struct drm_amdgpu_userq_mqd {
> > +       /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > +       __u32   flags;
> > +       /** IP type: AMDGPU_HW_IP_* */
> > +       __u32   ip_type;
> > +       /** GEM object handle */
> > +       __u32   doorbell_handle;
> > +       /** Doorbell offset in dwords */
> > +       __u32   doorbell_offset;
>
> What are the doorbell handle/offset for? I don't see any of them used
> in the rest of the series (we only check the handle isn't 0, which
> isn't enough validation for a GEM handle to consider it valid), and
> the kernel seems to allocate some kind of doorbell index in patch 4.
> Does userspace need to know about that one? (similarly use_doorbell in
> that patch seems like it is never explicitly written to)

The doorbell is how you trigger the engine to start processing the
user queue.  The idea is that each user process allocates a page of
doorbell space (one of the PCI BARs) and then each 64 bit segment in
that page could be used for a user mode queue.  So the UMD writes its
data to the queue, updates the wptr, and then writes to the doorbell
to tell the firmware to start processing the queue.

>
> The other questions I have are about how this interacts with memory
> management. Does this have access to all BOs allocated with
> AMDGPU_GEM_CREATE_VM_ALWAYS_VALID? What about imported BOs? How does
> this interact with VA unmap/map operations? (AFAICT we have no way to
> tell if pagetable modifying operations are complete from userspace for
> now). What happens if we need to spill BOs from VRAM due to
> (cross-process) memory pressure?

Effectively everything you map on the GPU would be valid.  If there is
memory pressure, the kernel driver will behave similarly to KFD.  It
will unmap the queues (which preempts all work on the engines), do any
memory migrations, and then map the queues again.

Alex

>
> > +       /** GPU virtual address of the queue */
> > +       __u64   queue_va;
> > +       /** Size of the queue in bytes */
> > +       __u64   queue_size;
> > +       /** GPU virtual address of the rptr */
> > +       __u64   rptr_va;
> > +       /** GPU virtual address of the wptr */
> > +       __u64   wptr_va;
> > +};
> > +
> > +struct drm_amdgpu_userq_in {
> > +       /** AMDGPU_USERQ_OP_* */
> > +       __u32   op;
> > +       /** Flags */
> > +       __u32   flags;
> > +       /** Context handle to associate the queue with */
> > +       __u32   ctx_id;
> > +       __u32   pad;
> > +       /** Queue descriptor */
> > +       struct drm_amdgpu_userq_mqd mqd;
> > +};
> > +
> > +struct drm_amdgpu_userq_out {
> > +       /** Queue handle */
> > +       __u32   q_id;
> > +       /** Flags */
> > +       __u32   flags;
> > +};
> > +
> > +union drm_amdgpu_userq {
> > +       struct drm_amdgpu_userq_in in;
> > +       struct drm_amdgpu_userq_out out;
> > +};
> > +
> >  /* vm ioctl */
> >  #define AMDGPU_VM_OP_RESERVE_VMID      1
> >  #define AMDGPU_VM_OP_UNRESERVE_VMID    2
> > --
> > 2.34.1
> >

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2022-12-23 19:36 ` [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work Shashank Sharma
  2022-12-24 18:19   ` Oded Gabbay
  2022-12-25 15:44   ` Christian König
@ 2022-12-29 17:41   ` Alex Deucher
  2023-01-02 13:53     ` Christian König
  2023-01-03  9:16     ` Shashank Sharma
  2023-01-04  8:55   ` Zhu, Jiadong
  3 siblings, 2 replies; 64+ messages in thread
From: Alex Deucher @ 2022-12-29 17:41 UTC (permalink / raw)
  To: Shashank Sharma
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>
> This patch adds skeleton code for usermode queue creation. It
> typically contains:
> - A new structure to keep all the user queue data in one place.
> - An IOCTL function to create/free a usermode queue.
> - A function to generate unique index for the queue.
> - A global ptr in amdgpu_dev
>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++++++++++++++++++
>  .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
>  5 files changed, 246 insertions(+)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 6ad39cf71bdd..e2a34ee57bfb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -209,6 +209,8 @@ amdgpu-y += \
>  # add amdkfd interfaces
>  amdgpu-y += amdgpu_amdkfd.o
>
> +# add usermode queue
> +amdgpu-y += amdgpu_userqueue.o
>
>  ifneq ($(CONFIG_HSA_AMD),)
>  AMDKFD_PATH := ../amdkfd
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8639a4f9c6e8..4b566fcfca18 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -749,6 +749,11 @@ struct amdgpu_mqd {
>                         struct amdgpu_mqd_prop *p);
>  };
>
> +struct amdgpu_userq_globals {
> +       struct ida ida;
> +       struct mutex userq_mutex;
> +};
> +
>  #define AMDGPU_RESET_MAGIC_NUM 64
>  #define AMDGPU_MAX_DF_PERFMONS 4
>  #define AMDGPU_PRODUCT_NAME_LEN 64
> @@ -955,6 +960,7 @@ struct amdgpu_device {
>         bool                            enable_mes_kiq;
>         struct amdgpu_mes               mes;
>         struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
> +       struct amdgpu_userq_globals     userq;
>
>         /* df */
>         struct amdgpu_df                df;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 0fa0e56daf67..f7413859b14f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>         unsigned long                   ras_counter_ce;
>         unsigned long                   ras_counter_ue;
>         uint32_t                        stable_pstate;
> +       struct amdgpu_usermode_queue    *userq;

There can be multiple queues per context.  We should make this a list.

>  };
>
>  struct amdgpu_ctx_mgr {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> new file mode 100644
> index 000000000000..3b6e8f75495c
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -0,0 +1,187 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include "amdgpu.h"
> +#include "amdgpu_vm.h"
> +#include "amdgpu_mes.h"
> +#include "amdgpu_usermode_queue.h"
> +#include "soc15_common.h"
> +
> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
> +
> +static int
> +amdgpu_userqueue_index(struct amdgpu_device *adev)
> +{
> +    int index;
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
> +    return index;
> +}
> +
> +static void
> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
> +{
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    ida_simple_remove(&uqg->ida, queue->queue_id);
> +}
> +
> +static int
> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct drm_amdgpu_userq_mqd *mqd_in)
> +{
> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || mqd_in->doorbell_offset == 0) {
> +        DRM_ERROR("Invalid queue object address\n");
> +        return -EINVAL;
> +    }
> +
> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || mqd_in->wptr_va == 0) {
> +        DRM_ERROR("Invalid queue object value\n");
> +        return -EINVAL;
> +    }
> +
> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= AMDGPU_HW_IP_NUM) {
> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
> +        return -EINVAL;
> +    }
> +
> +    if (!CHECK_ACCESS(mqd_in->queue_va) || !CHECK_ACCESS(mqd_in->rptr_va) ||
> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
> +            DRM_ERROR("Invalid mapping of queue ptrs, access error\n");
> +            return -EINVAL;
> +    }

Need to check the flags as well.

> +
> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
> +    return 0;
> +}
> +
> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
> +                            union drm_amdgpu_userq *args)
> +{
> +    int r, pasid;
> +    struct amdgpu_usermode_queue *queue;
> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +    struct amdgpu_vm *vm = &fpriv->vm;
> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
> +
> +    if (!ctx) {
> +        DRM_ERROR("Invalid GPU context\n");
> +        return -EINVAL;
> +    }
> +
> +    if (vm->pasid < 0) {
> +        DRM_WARN("No PASID info found\n");
> +        pasid = 0;
> +    }
> +
> +    mutex_lock(&adev->userq.userq_mutex);
> +
> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
> +    if (!queue) {
> +        DRM_ERROR("Failed to allocate memory for queue\n");
> +        mutex_unlock(&adev->userq.userq_mutex);
> +        return -ENOMEM;
> +    }
> +
> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
> +    if (r < 0) {
> +        DRM_ERROR("Invalid input to create queue\n");
> +        goto free_queue;
> +    }
> +
> +    queue->vm = vm;
> +    queue->pasid = pasid;
> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
> +    queue->queue_size = mqd_in->queue_size;
> +    queue->queue_type = mqd_in->ip_type;
> +    queue->paging = false;
> +    queue->flags = mqd_in->flags;
> +    queue->queue_id = amdgpu_userqueue_index(adev);
> +
> +    ctx->userq = queue;
> +    args->out.q_id = queue->queue_id;
> +    args->out.flags = 0;
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    return 0;
> +
> +free_queue:
> +    amdgpu_userqueue_remove_index(adev, queue);
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    kfree(queue);
> +    return r;
> +}
> +
> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
> +                              union drm_amdgpu_userq *args)
> +{
> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
> +    struct amdgpu_usermode_queue *queue = ctx->userq;
> +
> +    mutex_lock(&adev->userq.userq_mutex);
> +    amdgpu_userqueue_remove_index(adev, queue);
> +    ctx->userq = NULL;
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    kfree(queue);
> +}
> +
> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
> +                      struct drm_file *filp)
> +{
> +    union drm_amdgpu_userq *args = data;
> +    struct amdgpu_device *adev = drm_to_adev(dev);
> +    int r = 0;
> +
> +    switch (args->in.op) {
> +    case AMDGPU_USERQ_OP_CREATE:
> +        r = amdgpu_userqueue_create(adev, filp, args);
> +        if (r)
> +            DRM_ERROR("Failed to create usermode queue\n");
> +        break;
> +
> +    case AMDGPU_USERQ_OP_FREE:
> +        amdgpu_userqueue_destroy(adev, filp, args);
> +        break;
> +
> +    default:
> +        DRM_ERROR("Invalid user queue op specified: %d\n", args->in.op);
> +        return -EINVAL;
> +    }
> +
> +    return r;
> +}
> +
> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
> +{
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    mutex_init(&uqg->userq_mutex);
> +    return 0;
> +}
> +
> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
> +{
> +
> +}
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> new file mode 100644
> index 000000000000..c1fe39ffaf72
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> @@ -0,0 +1,50 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef AMDGPU_USERMODE_QUEUE_H_
> +#define AMDGPU_USERMODE_QUEUE_H_
> +
> +#define AMDGPU_MAX_USERQ 512
> +
> +struct amdgpu_usermode_queue {
> +       int             queue_id;
> +       int             queue_type;
> +       int             queue_size;
> +       int             paging;
> +       int             pasid;
> +       int             use_doorbell;
> +       int             doorbell_index;
> +
> +       uint64_t        mqd_gpu_addr;
> +       uint64_t        wptr_gpu_addr;
> +       uint64_t        rptr_gpu_addr;
> +       uint64_t        queue_gpu_addr;
> +       uint64_t        flags;
> +       void            *mqd_cpu_ptr;
> +
> +       struct amdgpu_bo        *mqd_obj;
> +       struct amdgpu_vm        *vm;
> +       struct list_head        list;
> +};
> +
> +#endif
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2022-12-23 19:36 ` [RFC 3/7] drm/amdgpu: Create MQD for userspace queue Shashank Sharma
@ 2022-12-29 17:47   ` Alex Deucher
  2023-01-03  9:36     ` Shashank Sharma
  0 siblings, 1 reply; 64+ messages in thread
From: Alex Deucher @ 2022-12-29 17:47 UTC (permalink / raw)
  To: Shashank Sharma
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>
> From: Arvind Yadav <arvind.yadav@amd.com>
>
> MQD describes the properies of a user queue to the HW, and allows it to
> accurately configure the queue while mapping it in GPU HW. This patch
> adds:
> - A new header file which contains the MQD definition
> - A new function which creates an MQD object and fills it with userqueue
>   data
>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
>
> Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 138 +++++
>  .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 ++++++++++++++++++
>  2 files changed, 682 insertions(+)
>  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index 3b6e8f75495c..a91cc304cb9e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -25,7 +25,10 @@
>  #include "amdgpu_vm.h"
>  #include "amdgpu_mes.h"
>  #include "amdgpu_usermode_queue.h"
> +#include "amdgpu_usermode_queue_mqd.h"
>  #include "soc15_common.h"
> +#include "gc/gc_11_0_0_offset.h"
> +#include "gc/gc_11_0_0_sh_mask.h"

Don't add IP specific code to this file.

>
>  #define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
>
> @@ -47,6 +50,134 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode
>      ida_simple_remove(&uqg->ida, queue->queue_id);
>  }
>
> +static void
> +amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)

This should be a ring callback or some new IP level callback to init
an MQD since we'll need this for multiple IP types and generations of
IPs.

> +{
> +    struct amdgpu_usermode_queue_mqd *mqd = queue->mqd_cpu_ptr;
> +    uint64_t hqd_gpu_addr, wb_gpu_addr;
> +    uint32_t tmp;
> +    uint32_t rb_bufsz;
> +
> +    /* set up gfx hqd wptr */
> +    mqd->cp_gfx_hqd_wptr = 0;
> +    mqd->cp_gfx_hqd_wptr_hi = 0;
> +
> +    /* set the pointer to the MQD */
> +    mqd->cp_mqd_base_addr = queue->mqd_gpu_addr & 0xfffffffc;
> +    mqd->cp_mqd_base_addr_hi = upper_32_bits(queue->mqd_gpu_addr);
> +
> +    /* set up mqd control */
> +    tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
> +    tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
> +    tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
> +    tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
> +    mqd->cp_gfx_mqd_control = tmp;
> +
> +    /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
> +    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
> +    mqd->cp_gfx_hqd_vmid = 0;
> +
> +    /* set up default queue priority level
> +    * 0x0 = low priority, 0x1 = high priority */
> +    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
> +    mqd->cp_gfx_hqd_queue_priority = tmp;
> +
> +    /* set up time quantum */
> +    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
> +    mqd->cp_gfx_hqd_quantum = tmp;
> +
> +    /* set up gfx hqd base. this is similar as CP_RB_BASE */
> +    hqd_gpu_addr = queue->queue_gpu_addr >> 8;
> +    mqd->cp_gfx_hqd_base = hqd_gpu_addr;
> +    mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
> +
> +    /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
> +    wb_gpu_addr = queue->rptr_gpu_addr;
> +    mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
> +    mqd->cp_gfx_hqd_rptr_addr_hi =
> +    upper_32_bits(wb_gpu_addr) & 0xffff;
> +
> +    /* set up rb_wptr_poll addr */
> +    wb_gpu_addr = queue->wptr_gpu_addr;
> +    mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
> +    mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
> +
> +    /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
> +    rb_bufsz = order_base_2(queue->queue_size / 4) - 1;
> +    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
> +#ifdef __BIG_ENDIAN
> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
> +#endif
> +    mqd->cp_gfx_hqd_cntl = tmp;
> +
> +    /* set up cp_doorbell_control */
> +    tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
> +    if (queue->use_doorbell) {
> +        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
> +                    DOORBELL_OFFSET, queue->doorbell_index);
> +        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
> +                    DOORBELL_EN, 1);
> +    } else {
> +        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
> +                    DOORBELL_EN, 0);
> +    }
> +    mqd->cp_rb_doorbell_control = tmp;
> +
> +    /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
> +    mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
> +
> +    /* activate the queue */
> +    mqd->cp_gfx_hqd_active = 1;
> +}
> +
> +static int
> +amdgpu_userqueue_create_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
> +{
> +    int r;
> +    int size = sizeof(struct amdgpu_usermode_queue_mqd);
> +
> +    r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
> +                                AMDGPU_GEM_DOMAIN_VRAM,
> +                                &queue->mqd_obj,
> +                                &queue->mqd_gpu_addr,
> +                                &queue->mqd_cpu_ptr);
> +    if (r) {
> +        DRM_ERROR("Failed to allocate bo for userqueue (%d)", r);
> +        return r;
> +    }
> +
> +    memset(queue->mqd_cpu_ptr, 0, size);
> +    r = amdgpu_bo_reserve(queue->mqd_obj, false);
> +    if (unlikely(r != 0)) {
> +        DRM_ERROR("Failed to reserve mqd for userqueue (%d)", r);
> +        goto free_mqd;
> +    }
> +
> +    /* Fill MQD with userqueue data */
> +    amdgpu_userqueue_setup_mqd(adev, queue);
> +    amdgpu_bo_unreserve(queue->mqd_obj);
> +    return 0;
> +
> +free_mqd:
> +    amdgpu_bo_free_kernel(&queue->mqd_obj,
> +                           &queue->mqd_gpu_addr,
> +                           &queue->mqd_cpu_ptr);
> +    return r;
> +}
> +
> +static void
> +amdgpu_userqueue_destroy_mqd(struct amdgpu_usermode_queue *queue)
> +{
> +    amdgpu_bo_free_kernel(&queue->mqd_obj,
> +                           &queue->mqd_gpu_addr,
> +                           &queue->mqd_cpu_ptr);
> +}
> +
>  static int
>  amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct drm_amdgpu_userq_mqd *mqd_in)
>  {
> @@ -120,6 +251,12 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>      queue->flags = mqd_in->flags;
>      queue->queue_id = amdgpu_userqueue_index(adev);
>
> +    r = amdgpu_userqueue_create_mqd(adev, queue);
> +    if (r < 0) {
> +        DRM_ERROR("Failed to create mqd for queue\n");
> +        goto free_queue;
> +    }
> +
>      ctx->userq = queue;
>      args->out.q_id = queue->queue_id;
>      args->out.flags = 0;
> @@ -141,6 +278,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
>      struct amdgpu_usermode_queue *queue = ctx->userq;
>
>      mutex_lock(&adev->userq.userq_mutex);
> +    amdgpu_userqueue_destroy_mqd(queue);
>      amdgpu_userqueue_remove_index(adev, queue);
>      ctx->userq = NULL;
>      mutex_unlock(&adev->userq.userq_mutex);
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
> new file mode 100644
> index 000000000000..d0a285708ba5
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
> @@ -0,0 +1,544 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef AMDGPU_USERMODE_QUEUE_MQD_H_
> +#define AMDGPU_USERMODE_QUEUE_MQD_H_
> +
> +/*MQD struct for usermode Queue*/
> +struct amdgpu_usermode_queue_mqd

This is specific to GC 11.  Every IP and version will have its own MQD
format.  That should live in the IP specific code, not the generic
code.  We already have the generic MQD parameters that we need from
the userq IOCTL.

Alex

> +{
> +       uint32_t shadow_base_lo; // offset: 0  (0x0)
> +       uint32_t shadow_base_hi; // offset: 1  (0x1)
> +       uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
> +       uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
> +       uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
> +       uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
> +       uint32_t shadow_initialized; // offset: 6  (0x6)
> +       uint32_t ib_vmid; // offset: 7  (0x7)
> +       uint32_t reserved_8; // offset: 8  (0x8)
> +       uint32_t reserved_9; // offset: 9  (0x9)
> +       uint32_t reserved_10; // offset: 10  (0xA)
> +       uint32_t reserved_11; // offset: 11  (0xB)
> +       uint32_t reserved_12; // offset: 12  (0xC)
> +       uint32_t reserved_13; // offset: 13  (0xD)
> +       uint32_t reserved_14; // offset: 14  (0xE)
> +       uint32_t reserved_15; // offset: 15  (0xF)
> +       uint32_t reserved_16; // offset: 16  (0x10)
> +       uint32_t reserved_17; // offset: 17  (0x11)
> +       uint32_t reserved_18; // offset: 18  (0x12)
> +       uint32_t reserved_19; // offset: 19  (0x13)
> +       uint32_t reserved_20; // offset: 20  (0x14)
> +       uint32_t reserved_21; // offset: 21  (0x15)
> +       uint32_t reserved_22; // offset: 22  (0x16)
> +       uint32_t reserved_23; // offset: 23  (0x17)
> +       uint32_t reserved_24; // offset: 24  (0x18)
> +       uint32_t reserved_25; // offset: 25  (0x19)
> +       uint32_t reserved_26; // offset: 26  (0x1A)
> +       uint32_t reserved_27; // offset: 27  (0x1B)
> +       uint32_t reserved_28; // offset: 28  (0x1C)
> +       uint32_t reserved_29; // offset: 29  (0x1D)
> +       uint32_t reserved_30; // offset: 30  (0x1E)
> +       uint32_t reserved_31; // offset: 31  (0x1F)
> +       uint32_t reserved_32; // offset: 32  (0x20)
> +       uint32_t reserved_33; // offset: 33  (0x21)
> +       uint32_t reserved_34; // offset: 34  (0x22)
> +       uint32_t reserved_35; // offset: 35  (0x23)
> +       uint32_t reserved_36; // offset: 36  (0x24)
> +       uint32_t reserved_37; // offset: 37  (0x25)
> +       uint32_t reserved_38; // offset: 38  (0x26)
> +       uint32_t reserved_39; // offset: 39  (0x27)
> +       uint32_t reserved_40; // offset: 40  (0x28)
> +       uint32_t reserved_41; // offset: 41  (0x29)
> +       uint32_t reserved_42; // offset: 42  (0x2A)
> +       uint32_t reserved_43; // offset: 43  (0x2B)
> +       uint32_t reserved_44; // offset: 44  (0x2C)
> +       uint32_t reserved_45; // offset: 45  (0x2D)
> +       uint32_t reserved_46; // offset: 46  (0x2E)
> +       uint32_t reserved_47; // offset: 47  (0x2F)
> +       uint32_t reserved_48; // offset: 48  (0x30)
> +       uint32_t reserved_49; // offset: 49  (0x31)
> +       uint32_t reserved_50; // offset: 50  (0x32)
> +       uint32_t reserved_51; // offset: 51  (0x33)
> +       uint32_t reserved_52; // offset: 52  (0x34)
> +       uint32_t reserved_53; // offset: 53  (0x35)
> +       uint32_t reserved_54; // offset: 54  (0x36)
> +       uint32_t reserved_55; // offset: 55  (0x37)
> +       uint32_t reserved_56; // offset: 56  (0x38)
> +       uint32_t reserved_57; // offset: 57  (0x39)
> +       uint32_t reserved_58; // offset: 58  (0x3A)
> +       uint32_t reserved_59; // offset: 59  (0x3B)
> +       uint32_t reserved_60; // offset: 60  (0x3C)
> +       uint32_t reserved_61; // offset: 61  (0x3D)
> +       uint32_t reserved_62; // offset: 62  (0x3E)
> +       uint32_t reserved_63; // offset: 63  (0x3F)
> +       uint32_t reserved_64; // offset: 64  (0x40)
> +       uint32_t reserved_65; // offset: 65  (0x41)
> +       uint32_t reserved_66; // offset: 66  (0x42)
> +       uint32_t reserved_67; // offset: 67  (0x43)
> +       uint32_t reserved_68; // offset: 68  (0x44)
> +       uint32_t reserved_69; // offset: 69  (0x45)
> +       uint32_t reserved_70; // offset: 70  (0x46)
> +       uint32_t reserved_71; // offset: 71  (0x47)
> +       uint32_t reserved_72; // offset: 72  (0x48)
> +       uint32_t reserved_73; // offset: 73  (0x49)
> +       uint32_t reserved_74; // offset: 74  (0x4A)
> +       uint32_t reserved_75; // offset: 75  (0x4B)
> +       uint32_t reserved_76; // offset: 76  (0x4C)
> +       uint32_t reserved_77; // offset: 77  (0x4D)
> +       uint32_t reserved_78; // offset: 78  (0x4E)
> +       uint32_t reserved_79; // offset: 79  (0x4F)
> +       uint32_t reserved_80; // offset: 80  (0x50)
> +       uint32_t reserved_81; // offset: 81  (0x51)
> +       uint32_t reserved_82; // offset: 82  (0x52)
> +       uint32_t reserved_83; // offset: 83  (0x53)
> +       uint32_t checksum_lo; // offset: 84  (0x54)
> +       uint32_t checksum_hi; // offset: 85  (0x55)
> +       uint32_t cp_mqd_query_time_lo; // offset: 86  (0x56)
> +       uint32_t cp_mqd_query_time_hi; // offset: 87  (0x57)
> +       uint32_t reserved_88; // offset: 88  (0x58)
> +       uint32_t reserved_89; // offset: 89  (0x59)
> +       uint32_t reserved_90; // offset: 90  (0x5A)
> +       uint32_t reserved_91; // offset: 91  (0x5B)
> +       uint32_t cp_mqd_query_wave_count; // offset: 92  (0x5C)
> +       uint32_t cp_mqd_query_gfx_hqd_rptr; // offset: 93  (0x5D)
> +       uint32_t cp_mqd_query_gfx_hqd_wptr; // offset: 94  (0x5E)
> +       uint32_t cp_mqd_query_gfx_hqd_offset; // offset: 95  (0x5F)
> +       uint32_t reserved_96; // offset: 96  (0x60)
> +       uint32_t reserved_97; // offset: 97  (0x61)
> +       uint32_t reserved_98; // offset: 98  (0x62)
> +       uint32_t reserved_99; // offset: 99  (0x63)
> +       uint32_t reserved_100; // offset: 100  (0x64)
> +       uint32_t reserved_101; // offset: 101  (0x65)
> +       uint32_t reserved_102; // offset: 102  (0x66)
> +       uint32_t reserved_103; // offset: 103  (0x67)
> +       uint32_t task_shader_control_buf_addr_lo; // offset: 104  (0x68)
> +       uint32_t task_shader_control_buf_addr_hi; // offset: 105  (0x69)
> +       uint32_t task_shader_read_rptr_lo; // offset: 106  (0x6A)
> +       uint32_t task_shader_read_rptr_hi; // offset: 107  (0x6B)
> +       uint32_t task_shader_num_entries; // offset: 108  (0x6C)
> +       uint32_t task_shader_num_entries_bits; // offset: 109  (0x6D)
> +       uint32_t task_shader_ring_buffer_addr_lo; // offset: 110  (0x6E)
> +       uint32_t task_shader_ring_buffer_addr_hi; // offset: 111  (0x6F)
> +       uint32_t reserved_112; // offset: 112  (0x70)
> +       uint32_t reserved_113; // offset: 113  (0x71)
> +       uint32_t reserved_114; // offset: 114  (0x72)
> +       uint32_t reserved_115; // offset: 115  (0x73)
> +       uint32_t reserved_116; // offset: 116  (0x74)
> +       uint32_t reserved_117; // offset: 117  (0x75)
> +       uint32_t reserved_118; // offset: 118  (0x76)
> +       uint32_t reserved_119; // offset: 119  (0x77)
> +       uint32_t reserved_120; // offset: 120  (0x78)
> +       uint32_t reserved_121; // offset: 121  (0x79)
> +       uint32_t reserved_122; // offset: 122  (0x7A)
> +       uint32_t reserved_123; // offset: 123  (0x7B)
> +       uint32_t reserved_124; // offset: 124  (0x7C)
> +       uint32_t reserved_125; // offset: 125  (0x7D)
> +       uint32_t reserved_126; // offset: 126  (0x7E)
> +       uint32_t reserved_127; // offset: 127  (0x7F)
> +       uint32_t cp_mqd_base_addr; // offset: 128  (0x80)
> +       uint32_t cp_mqd_base_addr_hi; // offset: 129  (0x81)
> +       uint32_t cp_gfx_hqd_active; // offset: 130  (0x82)
> +       uint32_t cp_gfx_hqd_vmid; // offset: 131  (0x83)
> +       uint32_t reserved_131; // offset: 132  (0x84)
> +       uint32_t reserved_132; // offset: 133  (0x85)
> +       uint32_t cp_gfx_hqd_queue_priority; // offset: 134  (0x86)
> +       uint32_t cp_gfx_hqd_quantum; // offset: 135  (0x87)
> +       uint32_t cp_gfx_hqd_base; // offset: 136  (0x88)
> +       uint32_t cp_gfx_hqd_base_hi; // offset: 137  (0x89)
> +       uint32_t cp_gfx_hqd_rptr; // offset: 138  (0x8A)
> +       uint32_t cp_gfx_hqd_rptr_addr; // offset: 139  (0x8B)
> +       uint32_t cp_gfx_hqd_rptr_addr_hi; // offset: 140  (0x8C)
> +       uint32_t cp_rb_wptr_poll_addr_lo; // offset: 141  (0x8D)
> +       uint32_t cp_rb_wptr_poll_addr_hi; // offset: 142  (0x8E)
> +       uint32_t cp_rb_doorbell_control; // offset: 143  (0x8F)
> +       uint32_t cp_gfx_hqd_offset; // offset: 144  (0x90)
> +       uint32_t cp_gfx_hqd_cntl; // offset: 145  (0x91)
> +       uint32_t reserved_146; // offset: 146  (0x92)
> +       uint32_t reserved_147; // offset: 147  (0x93)
> +       uint32_t cp_gfx_hqd_csmd_rptr; // offset: 148  (0x94)
> +       uint32_t cp_gfx_hqd_wptr; // offset: 149  (0x95)
> +       uint32_t cp_gfx_hqd_wptr_hi; // offset: 150  (0x96)
> +       uint32_t reserved_151; // offset: 151  (0x97)
> +       uint32_t reserved_152; // offset: 152  (0x98)
> +       uint32_t reserved_153; // offset: 153  (0x99)
> +       uint32_t reserved_154; // offset: 154  (0x9A)
> +       uint32_t reserved_155; // offset: 155  (0x9B)
> +       uint32_t cp_gfx_hqd_mapped; // offset: 156  (0x9C)
> +       uint32_t cp_gfx_hqd_que_mgr_control; // offset: 157  (0x9D)
> +       uint32_t reserved_158; // offset: 158  (0x9E)
> +       uint32_t reserved_159; // offset: 159  (0x9F)
> +       uint32_t cp_gfx_hqd_hq_status0; // offset: 160  (0xA0)
> +       uint32_t cp_gfx_hqd_hq_control0; // offset: 161  (0xA1)
> +       uint32_t cp_gfx_mqd_control; // offset: 162  (0xA2)
> +       uint32_t reserved_163; // offset: 163  (0xA3)
> +       uint32_t reserved_164; // offset: 164  (0xA4)
> +       uint32_t reserved_165; // offset: 165  (0xA5)
> +       uint32_t reserved_166; // offset: 166  (0xA6)
> +       uint32_t reserved_167; // offset: 167  (0xA7)
> +       uint32_t reserved_168; // offset: 168  (0xA8)
> +       uint32_t reserved_169; // offset: 169  (0xA9)
> +       uint32_t cp_num_prim_needed_count0_lo; // offset: 170  (0xAA)
> +       uint32_t cp_num_prim_needed_count0_hi; // offset: 171  (0xAB)
> +       uint32_t cp_num_prim_needed_count1_lo; // offset: 172  (0xAC)
> +       uint32_t cp_num_prim_needed_count1_hi; // offset: 173  (0xAD)
> +       uint32_t cp_num_prim_needed_count2_lo; // offset: 174  (0xAE)
> +       uint32_t cp_num_prim_needed_count2_hi; // offset: 175  (0xAF)
> +       uint32_t cp_num_prim_needed_count3_lo; // offset: 176  (0xB0)
> +       uint32_t cp_num_prim_needed_count3_hi; // offset: 177  (0xB1)
> +       uint32_t cp_num_prim_written_count0_lo; // offset: 178  (0xB2)
> +       uint32_t cp_num_prim_written_count0_hi; // offset: 179  (0xB3)
> +       uint32_t cp_num_prim_written_count1_lo; // offset: 180  (0xB4)
> +       uint32_t cp_num_prim_written_count1_hi; // offset: 181  (0xB5)
> +       uint32_t cp_num_prim_written_count2_lo; // offset: 182  (0xB6)
> +       uint32_t cp_num_prim_written_count2_hi; // offset: 183  (0xB7)
> +       uint32_t cp_num_prim_written_count3_lo; // offset: 184  (0xB8)
> +       uint32_t cp_num_prim_written_count3_hi; // offset: 185  (0xB9)
> +       uint32_t reserved_186; // offset: 186  (0xBA)
> +       uint32_t reserved_187; // offset: 187  (0xBB)
> +       uint32_t reserved_188; // offset: 188  (0xBC)
> +       uint32_t reserved_189; // offset: 189  (0xBD)
> +       uint32_t reserved_190; // offset: 190  (0xBE)
> +       uint32_t reserved_191; // offset: 191  (0xBF)
> +       uint32_t reserved_192; // offset: 192  (0xC0)
> +       uint32_t reserved_193; // offset: 193  (0xC1)
> +       uint32_t reserved_194; // offset: 194  (0xC2)
> +       uint32_t reserved_195; // offset: 195  (0xC3)
> +       uint32_t reserved_196; // offset: 196  (0xC4)
> +       uint32_t reserved_197; // offset: 197  (0xC5)
> +       uint32_t reserved_198; // offset: 198  (0xC6)
> +       uint32_t reserved_199; // offset: 199  (0xC7)
> +       uint32_t reserved_200; // offset: 200  (0xC8)
> +       uint32_t reserved_201; // offset: 201  (0xC9)
> +       uint32_t reserved_202; // offset: 202  (0xCA)
> +       uint32_t reserved_203; // offset: 203  (0xCB)
> +       uint32_t reserved_204; // offset: 204  (0xCC)
> +       uint32_t reserved_205; // offset: 205  (0xCD)
> +       uint32_t reserved_206; // offset: 206  (0xCE)
> +       uint32_t reserved_207; // offset: 207  (0xCF)
> +       uint32_t reserved_208; // offset: 208  (0xD0)
> +       uint32_t reserved_209; // offset: 209  (0xD1)
> +       uint32_t reserved_210; // offset: 210  (0xD2)
> +       uint32_t reserved_211; // offset: 211  (0xD3)
> +       uint32_t reserved_212; // offset: 212  (0xD4)
> +       uint32_t reserved_213; // offset: 213  (0xD5)
> +       uint32_t reserved_214; // offset: 214  (0xD6)
> +       uint32_t reserved_215; // offset: 215  (0xD7)
> +       uint32_t reserved_216; // offset: 216  (0xD8)
> +       uint32_t reserved_217; // offset: 217  (0xD9)
> +       uint32_t reserved_218; // offset: 218  (0xDA)
> +       uint32_t reserved_219; // offset: 219  (0xDB)
> +       uint32_t reserved_220; // offset: 220  (0xDC)
> +       uint32_t reserved_221; // offset: 221  (0xDD)
> +       uint32_t reserved_222; // offset: 222  (0xDE)
> +       uint32_t reserved_223; // offset: 223  (0xDF)
> +       uint32_t reserved_224; // offset: 224  (0xE0)
> +       uint32_t reserved_225; // offset: 225  (0xE1)
> +       uint32_t reserved_226; // offset: 226  (0xE2)
> +       uint32_t reserved_227; // offset: 227  (0xE3)
> +       uint32_t reserved_228; // offset: 228  (0xE4)
> +       uint32_t reserved_229; // offset: 229  (0xE5)
> +       uint32_t reserved_230; // offset: 230  (0xE6)
> +       uint32_t reserved_231; // offset: 231  (0xE7)
> +       uint32_t reserved_232; // offset: 232  (0xE8)
> +       uint32_t reserved_233; // offset: 233  (0xE9)
> +       uint32_t reserved_234; // offset: 234  (0xEA)
> +       uint32_t reserved_235; // offset: 235  (0xEB)
> +       uint32_t reserved_236; // offset: 236  (0xEC)
> +       uint32_t reserved_237; // offset: 237  (0xED)
> +       uint32_t reserved_238; // offset: 238  (0xEE)
> +       uint32_t reserved_239; // offset: 239  (0xEF)
> +       uint32_t reserved_240; // offset: 240  (0xF0)
> +       uint32_t reserved_241; // offset: 241  (0xF1)
> +       uint32_t reserved_242; // offset: 242  (0xF2)
> +       uint32_t reserved_243; // offset: 243  (0xF3)
> +       uint32_t reserved_244; // offset: 244  (0xF4)
> +       uint32_t reserved_245; // offset: 245  (0xF5)
> +       uint32_t reserved_246; // offset: 246  (0xF6)
> +       uint32_t reserved_247; // offset: 247  (0xF7)
> +       uint32_t reserved_248; // offset: 248  (0xF8)
> +       uint32_t reserved_249; // offset: 249  (0xF9)
> +       uint32_t reserved_250; // offset: 250  (0xFA)
> +       uint32_t reserved_251; // offset: 251  (0xFB)
> +       uint32_t reserved_252; // offset: 252  (0xFC)
> +       uint32_t reserved_253; // offset: 253  (0xFD)
> +       uint32_t reserved_254; // offset: 254  (0xFE)
> +       uint32_t reserved_255; // offset: 255  (0xFF)
> +       uint32_t reserved_256; // offset: 256  (0x100)
> +       uint32_t reserved_257; // offset: 257  (0x101)
> +       uint32_t reserved_258; // offset: 258  (0x102)
> +       uint32_t reserved_259; // offset: 259  (0x103)
> +       uint32_t reserved_260; // offset: 260  (0x104)
> +       uint32_t reserved_261; // offset: 261  (0x105)
> +       uint32_t reserved_262; // offset: 262  (0x106)
> +       uint32_t reserved_263; // offset: 263  (0x107)
> +       uint32_t reserved_264; // offset: 264  (0x108)
> +       uint32_t reserved_265; // offset: 265  (0x109)
> +       uint32_t reserved_266; // offset: 266  (0x10A)
> +       uint32_t reserved_267; // offset: 267  (0x10B)
> +       uint32_t reserved_268; // offset: 268  (0x10C)
> +       uint32_t reserved_269; // offset: 269  (0x10D)
> +       uint32_t reserved_270; // offset: 270  (0x10E)
> +       uint32_t reserved_271; // offset: 271  (0x10F)
> +       uint32_t reserved_272; // offset: 272  (0x110)
> +       uint32_t reserved_273; // offset: 273  (0x111)
> +       uint32_t reserved_274; // offset: 274  (0x112)
> +       uint32_t reserved_275; // offset: 275  (0x113)
> +       uint32_t reserved_276; // offset: 276  (0x114)
> +       uint32_t reserved_277; // offset: 277  (0x115)
> +       uint32_t reserved_278; // offset: 278  (0x116)
> +       uint32_t reserved_279; // offset: 279  (0x117)
> +       uint32_t reserved_280; // offset: 280  (0x118)
> +       uint32_t reserved_281; // offset: 281  (0x119)
> +       uint32_t reserved_282; // offset: 282  (0x11A)
> +       uint32_t reserved_283; // offset: 283  (0x11B)
> +       uint32_t reserved_284; // offset: 284  (0x11C)
> +       uint32_t reserved_285; // offset: 285  (0x11D)
> +       uint32_t reserved_286; // offset: 286  (0x11E)
> +       uint32_t reserved_287; // offset: 287  (0x11F)
> +       uint32_t reserved_288; // offset: 288  (0x120)
> +       uint32_t reserved_289; // offset: 289  (0x121)
> +       uint32_t reserved_290; // offset: 290  (0x122)
> +       uint32_t reserved_291; // offset: 291  (0x123)
> +       uint32_t reserved_292; // offset: 292  (0x124)
> +       uint32_t reserved_293; // offset: 293  (0x125)
> +       uint32_t reserved_294; // offset: 294  (0x126)
> +       uint32_t reserved_295; // offset: 295  (0x127)
> +       uint32_t reserved_296; // offset: 296  (0x128)
> +       uint32_t reserved_297; // offset: 297  (0x129)
> +       uint32_t reserved_298; // offset: 298  (0x12A)
> +       uint32_t reserved_299; // offset: 299  (0x12B)
> +       uint32_t reserved_300; // offset: 300  (0x12C)
> +       uint32_t reserved_301; // offset: 301  (0x12D)
> +       uint32_t reserved_302; // offset: 302  (0x12E)
> +       uint32_t reserved_303; // offset: 303  (0x12F)
> +       uint32_t reserved_304; // offset: 304  (0x130)
> +       uint32_t reserved_305; // offset: 305  (0x131)
> +       uint32_t reserved_306; // offset: 306  (0x132)
> +       uint32_t reserved_307; // offset: 307  (0x133)
> +       uint32_t reserved_308; // offset: 308  (0x134)
> +       uint32_t reserved_309; // offset: 309  (0x135)
> +       uint32_t reserved_310; // offset: 310  (0x136)
> +       uint32_t reserved_311; // offset: 311  (0x137)
> +       uint32_t reserved_312; // offset: 312  (0x138)
> +       uint32_t reserved_313; // offset: 313  (0x139)
> +       uint32_t reserved_314; // offset: 314  (0x13A)
> +       uint32_t reserved_315; // offset: 315  (0x13B)
> +       uint32_t reserved_316; // offset: 316  (0x13C)
> +       uint32_t reserved_317; // offset: 317  (0x13D)
> +       uint32_t reserved_318; // offset: 318  (0x13E)
> +       uint32_t reserved_319; // offset: 319  (0x13F)
> +       uint32_t reserved_320; // offset: 320  (0x140)
> +       uint32_t reserved_321; // offset: 321  (0x141)
> +       uint32_t reserved_322; // offset: 322  (0x142)
> +       uint32_t reserved_323; // offset: 323  (0x143)
> +       uint32_t reserved_324; // offset: 324  (0x144)
> +       uint32_t reserved_325; // offset: 325  (0x145)
> +       uint32_t reserved_326; // offset: 326  (0x146)
> +       uint32_t reserved_327; // offset: 327  (0x147)
> +       uint32_t reserved_328; // offset: 328  (0x148)
> +       uint32_t reserved_329; // offset: 329  (0x149)
> +       uint32_t reserved_330; // offset: 330  (0x14A)
> +       uint32_t reserved_331; // offset: 331  (0x14B)
> +       uint32_t reserved_332; // offset: 332  (0x14C)
> +       uint32_t reserved_333; // offset: 333  (0x14D)
> +       uint32_t reserved_334; // offset: 334  (0x14E)
> +       uint32_t reserved_335; // offset: 335  (0x14F)
> +       uint32_t reserved_336; // offset: 336  (0x150)
> +       uint32_t reserved_337; // offset: 337  (0x151)
> +       uint32_t reserved_338; // offset: 338  (0x152)
> +       uint32_t reserved_339; // offset: 339  (0x153)
> +       uint32_t reserved_340; // offset: 340  (0x154)
> +       uint32_t reserved_341; // offset: 341  (0x155)
> +       uint32_t reserved_342; // offset: 342  (0x156)
> +       uint32_t reserved_343; // offset: 343  (0x157)
> +       uint32_t reserved_344; // offset: 344  (0x158)
> +       uint32_t reserved_345; // offset: 345  (0x159)
> +       uint32_t reserved_346; // offset: 346  (0x15A)
> +       uint32_t reserved_347; // offset: 347  (0x15B)
> +       uint32_t reserved_348; // offset: 348  (0x15C)
> +       uint32_t reserved_349; // offset: 349  (0x15D)
> +       uint32_t reserved_350; // offset: 350  (0x15E)
> +       uint32_t reserved_351; // offset: 351  (0x15F)
> +       uint32_t reserved_352; // offset: 352  (0x160)
> +       uint32_t reserved_353; // offset: 353  (0x161)
> +       uint32_t reserved_354; // offset: 354  (0x162)
> +       uint32_t reserved_355; // offset: 355  (0x163)
> +       uint32_t reserved_356; // offset: 356  (0x164)
> +       uint32_t reserved_357; // offset: 357  (0x165)
> +       uint32_t reserved_358; // offset: 358  (0x166)
> +       uint32_t reserved_359; // offset: 359  (0x167)
> +       uint32_t reserved_360; // offset: 360  (0x168)
> +       uint32_t reserved_361; // offset: 361  (0x169)
> +       uint32_t reserved_362; // offset: 362  (0x16A)
> +       uint32_t reserved_363; // offset: 363  (0x16B)
> +       uint32_t reserved_364; // offset: 364  (0x16C)
> +       uint32_t reserved_365; // offset: 365  (0x16D)
> +       uint32_t reserved_366; // offset: 366  (0x16E)
> +       uint32_t reserved_367; // offset: 367  (0x16F)
> +       uint32_t reserved_368; // offset: 368  (0x170)
> +       uint32_t reserved_369; // offset: 369  (0x171)
> +       uint32_t reserved_370; // offset: 370  (0x172)
> +       uint32_t reserved_371; // offset: 371  (0x173)
> +       uint32_t reserved_372; // offset: 372  (0x174)
> +       uint32_t reserved_373; // offset: 373  (0x175)
> +       uint32_t reserved_374; // offset: 374  (0x176)
> +       uint32_t reserved_375; // offset: 375  (0x177)
> +       uint32_t reserved_376; // offset: 376  (0x178)
> +       uint32_t reserved_377; // offset: 377  (0x179)
> +       uint32_t reserved_378; // offset: 378  (0x17A)
> +       uint32_t reserved_379; // offset: 379  (0x17B)
> +       uint32_t reserved_380; // offset: 380  (0x17C)
> +       uint32_t reserved_381; // offset: 381  (0x17D)
> +       uint32_t reserved_382; // offset: 382  (0x17E)
> +       uint32_t reserved_383; // offset: 383  (0x17F)
> +       uint32_t reserved_384; // offset: 384  (0x180)
> +       uint32_t reserved_385; // offset: 385  (0x181)
> +       uint32_t reserved_386; // offset: 386  (0x182)
> +       uint32_t reserved_387; // offset: 387  (0x183)
> +       uint32_t reserved_388; // offset: 388  (0x184)
> +       uint32_t reserved_389; // offset: 389  (0x185)
> +       uint32_t reserved_390; // offset: 390  (0x186)
> +       uint32_t reserved_391; // offset: 391  (0x187)
> +       uint32_t reserved_392; // offset: 392  (0x188)
> +       uint32_t reserved_393; // offset: 393  (0x189)
> +       uint32_t reserved_394; // offset: 394  (0x18A)
> +       uint32_t reserved_395; // offset: 395  (0x18B)
> +       uint32_t reserved_396; // offset: 396  (0x18C)
> +       uint32_t reserved_397; // offset: 397  (0x18D)
> +       uint32_t reserved_398; // offset: 398  (0x18E)
> +       uint32_t reserved_399; // offset: 399  (0x18F)
> +       uint32_t reserved_400; // offset: 400  (0x190)
> +       uint32_t reserved_401; // offset: 401  (0x191)
> +       uint32_t reserved_402; // offset: 402  (0x192)
> +       uint32_t reserved_403; // offset: 403  (0x193)
> +       uint32_t reserved_404; // offset: 404  (0x194)
> +       uint32_t reserved_405; // offset: 405  (0x195)
> +       uint32_t reserved_406; // offset: 406  (0x196)
> +       uint32_t reserved_407; // offset: 407  (0x197)
> +       uint32_t reserved_408; // offset: 408  (0x198)
> +       uint32_t reserved_409; // offset: 409  (0x199)
> +       uint32_t reserved_410; // offset: 410  (0x19A)
> +       uint32_t reserved_411; // offset: 411  (0x19B)
> +       uint32_t reserved_412; // offset: 412  (0x19C)
> +       uint32_t reserved_413; // offset: 413  (0x19D)
> +       uint32_t reserved_414; // offset: 414  (0x19E)
> +       uint32_t reserved_415; // offset: 415  (0x19F)
> +       uint32_t reserved_416; // offset: 416  (0x1A0)
> +       uint32_t reserved_417; // offset: 417  (0x1A1)
> +       uint32_t reserved_418; // offset: 418  (0x1A2)
> +       uint32_t reserved_419; // offset: 419  (0x1A3)
> +       uint32_t reserved_420; // offset: 420  (0x1A4)
> +       uint32_t reserved_421; // offset: 421  (0x1A5)
> +       uint32_t reserved_422; // offset: 422  (0x1A6)
> +       uint32_t reserved_423; // offset: 423  (0x1A7)
> +       uint32_t reserved_424; // offset: 424  (0x1A8)
> +       uint32_t reserved_425; // offset: 425  (0x1A9)
> +       uint32_t reserved_426; // offset: 426  (0x1AA)
> +       uint32_t reserved_427; // offset: 427  (0x1AB)
> +       uint32_t reserved_428; // offset: 428  (0x1AC)
> +       uint32_t reserved_429; // offset: 429  (0x1AD)
> +       uint32_t reserved_430; // offset: 430  (0x1AE)
> +       uint32_t reserved_431; // offset: 431  (0x1AF)
> +       uint32_t reserved_432; // offset: 432  (0x1B0)
> +       uint32_t reserved_433; // offset: 433  (0x1B1)
> +       uint32_t reserved_434; // offset: 434  (0x1B2)
> +       uint32_t reserved_435; // offset: 435  (0x1B3)
> +       uint32_t reserved_436; // offset: 436  (0x1B4)
> +       uint32_t reserved_437; // offset: 437  (0x1B5)
> +       uint32_t reserved_438; // offset: 438  (0x1B6)
> +       uint32_t reserved_439; // offset: 439  (0x1B7)
> +       uint32_t reserved_440; // offset: 440  (0x1B8)
> +       uint32_t reserved_441; // offset: 441  (0x1B9)
> +       uint32_t reserved_442; // offset: 442  (0x1BA)
> +       uint32_t reserved_443; // offset: 443  (0x1BB)
> +       uint32_t reserved_444; // offset: 444  (0x1BC)
> +       uint32_t reserved_445; // offset: 445  (0x1BD)
> +       uint32_t reserved_446; // offset: 446  (0x1BE)
> +       uint32_t reserved_447; // offset: 447  (0x1BF)
> +       uint32_t reserved_448; // offset: 448  (0x1C0)
> +       uint32_t reserved_449; // offset: 449  (0x1C1)
> +       uint32_t reserved_450; // offset: 450  (0x1C2)
> +       uint32_t reserved_451; // offset: 451  (0x1C3)
> +       uint32_t reserved_452; // offset: 452  (0x1C4)
> +       uint32_t reserved_453; // offset: 453  (0x1C5)
> +       uint32_t reserved_454; // offset: 454  (0x1C6)
> +       uint32_t reserved_455; // offset: 455  (0x1C7)
> +       uint32_t reserved_456; // offset: 456  (0x1C8)
> +       uint32_t reserved_457; // offset: 457  (0x1C9)
> +       uint32_t reserved_458; // offset: 458  (0x1CA)
> +       uint32_t reserved_459; // offset: 459  (0x1CB)
> +       uint32_t reserved_460; // offset: 460  (0x1CC)
> +       uint32_t reserved_461; // offset: 461  (0x1CD)
> +       uint32_t reserved_462; // offset: 462  (0x1CE)
> +       uint32_t reserved_463; // offset: 463  (0x1CF)
> +       uint32_t reserved_464; // offset: 464  (0x1D0)
> +       uint32_t reserved_465; // offset: 465  (0x1D1)
> +       uint32_t reserved_466; // offset: 466  (0x1D2)
> +       uint32_t reserved_467; // offset: 467  (0x1D3)
> +       uint32_t reserved_468; // offset: 468  (0x1D4)
> +       uint32_t reserved_469; // offset: 469  (0x1D5)
> +       uint32_t reserved_470; // offset: 470  (0x1D6)
> +       uint32_t reserved_471; // offset: 471  (0x1D7)
> +       uint32_t reserved_472; // offset: 472  (0x1D8)
> +       uint32_t reserved_473; // offset: 473  (0x1D9)
> +       uint32_t reserved_474; // offset: 474  (0x1DA)
> +       uint32_t reserved_475; // offset: 475  (0x1DB)
> +       uint32_t reserved_476; // offset: 476  (0x1DC)
> +       uint32_t reserved_477; // offset: 477  (0x1DD)
> +       uint32_t reserved_478; // offset: 478  (0x1DE)
> +       uint32_t reserved_479; // offset: 479  (0x1DF)
> +       uint32_t reserved_480; // offset: 480  (0x1E0)
> +       uint32_t reserved_481; // offset: 481  (0x1E1)
> +       uint32_t reserved_482; // offset: 482  (0x1E2)
> +       uint32_t reserved_483; // offset: 483  (0x1E3)
> +       uint32_t reserved_484; // offset: 484  (0x1E4)
> +       uint32_t reserved_485; // offset: 485  (0x1E5)
> +       uint32_t reserved_486; // offset: 486  (0x1E6)
> +       uint32_t reserved_487; // offset: 487  (0x1E7)
> +       uint32_t reserved_488; // offset: 488  (0x1E8)
> +       uint32_t reserved_489; // offset: 489  (0x1E9)
> +       uint32_t reserved_490; // offset: 490  (0x1EA)
> +       uint32_t reserved_491; // offset: 491  (0x1EB)
> +       uint32_t reserved_492; // offset: 492  (0x1EC)
> +       uint32_t reserved_493; // offset: 493  (0x1ED)
> +       uint32_t reserved_494; // offset: 494  (0x1EE)
> +       uint32_t reserved_495; // offset: 495  (0x1EF)
> +       uint32_t reserved_496; // offset: 496  (0x1F0)
> +       uint32_t reserved_497; // offset: 497  (0x1F1)
> +       uint32_t reserved_498; // offset: 498  (0x1F2)
> +       uint32_t reserved_499; // offset: 499  (0x1F3)
> +       uint32_t reserved_500; // offset: 500  (0x1F4)
> +       uint32_t reserved_501; // offset: 501  (0x1F5)
> +       uint32_t reserved_502; // offset: 502  (0x1F6)
> +       uint32_t reserved_503; // offset: 503  (0x1F7)
> +       uint32_t reserved_504; // offset: 504  (0x1F8)
> +       uint32_t reserved_505; // offset: 505  (0x1F9)
> +       uint32_t reserved_506; // offset: 506  (0x1FA)
> +       uint32_t reserved_507; // offset: 507  (0x1FB)
> +       uint32_t reserved_508; // offset: 508  (0x1FC)
> +       uint32_t reserved_509; // offset: 509  (0x1FD)
> +       uint32_t reserved_510; // offset: 510  (0x1FE)
> +       uint32_t reserved_511; // offset: 511  (0x1FF)
> +};
> +
> +#endif
> \ No newline at end of file
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 4/7] drm/amdgpu: Allocate doorbell slot for user queue
  2022-12-23 19:36 ` [RFC 4/7] drm/amdgpu: Allocate doorbell slot for user queue Shashank Sharma
@ 2022-12-29 17:50   ` Alex Deucher
  2023-01-03  9:37     ` Shashank Sharma
  0 siblings, 1 reply; 64+ messages in thread
From: Alex Deucher @ 2022-12-29 17:50 UTC (permalink / raw)
  To: Shashank Sharma
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>
> This patch allocates a doorbell slot in the bar, for the usermode queue.
> We are using the unique queue-id to get this slot from MES.

We should manage the doorbell BAR just like VRAM.  I had a set of
patches to convert doorbell memory to GEM objects.  The user should be
able to allocate doorbell memory via the GEM IOCTL just like VRAM or
GTT.  Then when the user calls the USERQ IOCTL, we can just look up
the GEM object from the handle and then calculate the doorbell offset
based on the offset of the GEM object from the start of the BAR.

Alex

>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 28 +++++++++++++++++++
>  1 file changed, 28 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index a91cc304cb9e..b566ce4cb7f0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -50,6 +50,25 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode
>      ida_simple_remove(&uqg->ida, queue->queue_id);
>  }
>
> +static int
> +amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
> +                    struct amdgpu_usermode_queue *queue)
> +{
> +    int r;
> +    unsigned int doorbell_index;
> +
> +    r = amdgpu_mes_alloc_process_doorbells(adev, &doorbell_index);
> +       if (r < 0) {
> +        DRM_ERROR("Failed to allocate doorbell for user queue\n");
> +        return r;
> +    }
> +
> +    /* We are using qnique queue-id to get doorbell here */
> +    queue->doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev,
> +                           doorbell_index, queue->queue_id);
> +    return 0;
> +}
> +
>  static void
>  amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
>  {
> @@ -257,12 +276,21 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>          goto free_queue;
>      }
>
> +    r = amdgpu_userqueue_get_doorbell(adev, queue);
> +    if (r) {
> +        DRM_ERROR("Failed to create doorbell for queue\n");
> +        goto free_mqd;
> +    }
> +
>      ctx->userq = queue;
>      args->out.q_id = queue->queue_id;
>      args->out.flags = 0;
>      mutex_unlock(&adev->userq.userq_mutex);
>      return 0;
>
> +free_mqd:
> +    amdgpu_userqueue_destroy_mqd(queue);
> +
>  free_queue:
>      amdgpu_userqueue_remove_index(adev, queue);
>      mutex_unlock(&adev->userq.userq_mutex);
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 6/7] drm/amdgpu: Map userqueue into HW
  2022-12-23 19:36 ` [RFC 6/7] drm/amdgpu: Map userqueue into HW Shashank Sharma
@ 2022-12-29 17:51   ` Alex Deucher
  2023-01-03  9:38     ` Shashank Sharma
  0 siblings, 1 reply; 64+ messages in thread
From: Alex Deucher @ 2022-12-29 17:51 UTC (permalink / raw)
  To: Shashank Sharma
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>
> This patch add the function to map/unmap the usermode queue into the HW,
> using the prepared MQD and other objects. After this mapping, the queue
> will be ready to accept the workload.

This should also be a callback into IP specific code.  It will be
different for each IP type and version.

Alex

>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 71 +++++++++++++++++++
>  1 file changed, 71 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index 2a854a5e2f70..b164e24247ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -50,6 +50,67 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode
>      ida_simple_remove(&uqg->ida, queue->queue_id);
>  }
>
> +static int amdgpu_userqueue_map(struct amdgpu_device *adev,
> +                    struct amdgpu_usermode_queue *queue)
> +{
> +    int r;
> +    struct mes_add_queue_input queue_input;
> +
> +    memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
> +
> +    queue_input.process_va_start = 0;
> +    queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
> +    queue_input.process_quantum = 100000; /* 10ms */
> +    queue_input.gang_quantum = 10000; /* 1ms */
> +    queue_input.paging = false;
> +
> +    queue_input.gang_context_addr = queue->gang_ctx.gpu_addr;
> +    queue_input.process_context_addr = queue->proc_ctx.gpu_addr;
> +    queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
> +    queue_input.gang_global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
> +
> +    queue_input.process_id = queue->pasid;
> +    queue_input.queue_type = queue->queue_type;
> +    queue_input.mqd_addr = queue->mqd_gpu_addr;
> +    queue_input.wptr_addr = queue->wptr_gpu_addr;
> +    queue_input.queue_size = queue->queue_size >> 2;
> +    queue_input.doorbell_offset = queue->doorbell_index;
> +    queue_input.page_table_base_addr =  queue->vm->pd_phys_addr;
> +
> +    amdgpu_mes_lock(&adev->mes);
> +    r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
> +    amdgpu_mes_unlock(&adev->mes);
> +    if (r) {
> +        DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
> +        return r;
> +    }
> +
> +    DRM_DEBUG_DRIVER("Queue %d mapped successfully\n", queue->queue_id);
> +    return 0;
> +}
> +
> +static void amdgpu_userqueue_unmap(struct amdgpu_device *adev,
> +                    struct amdgpu_usermode_queue *queue)
> +{
> +    int r;
> +    struct mes_remove_queue_input queue_input;
> +
> +    memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
> +    queue_input.doorbell_offset = queue->doorbell_index;
> +    queue_input.gang_context_addr = queue->gang_ctx.gpu_addr;
> +
> +    amdgpu_mes_lock(&adev->mes);
> +    r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
> +    amdgpu_mes_unlock(&adev->mes);
> +
> +    if (r) {
> +        DRM_ERROR("Failed to unmap usermode queue %d\n", queue->queue_id);
> +        return;
> +    }
> +
> +    DRM_DEBUG_DRIVER("Usermode queue %d unmapped\n", queue->queue_id);
> +}
> +
>  static int
>  amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
>                      struct amdgpu_usermode_queue *queue)
> @@ -338,12 +399,21 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>          goto free_mqd;
>      }
>
> +    r = amdgpu_userqueue_map(adev, queue);
> +    if (r < 0) {
> +        DRM_ERROR("Failed to map queue\n");
> +        goto free_ctx;
> +    }
> +
>      ctx->userq = queue;
>      args->out.q_id = queue->queue_id;
>      args->out.flags = 0;
>      mutex_unlock(&adev->userq.userq_mutex);
>      return 0;
>
> +free_ctx:
> +    amdgpu_userqueue_free_context(adev, queue);
> +
>  free_mqd:
>      amdgpu_userqueue_destroy_mqd(queue);
>
> @@ -362,6 +432,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
>      struct amdgpu_usermode_queue *queue = ctx->userq;
>
>      mutex_lock(&adev->userq.userq_mutex);
> +    amdgpu_userqueue_unmap(adev, queue);
>      amdgpu_userqueue_free_context(adev, queue);
>      amdgpu_userqueue_destroy_mqd(queue);
>      amdgpu_userqueue_remove_index(adev, queue);
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 5/7] drm/amdgpu: Create context for usermode queue
  2022-12-23 19:36 ` [RFC 5/7] drm/amdgpu: Create context for usermode queue Shashank Sharma
@ 2022-12-29 17:54   ` Alex Deucher
  2023-01-03  9:40     ` Shashank Sharma
  0 siblings, 1 reply; 64+ messages in thread
From: Alex Deucher @ 2022-12-29 17:54 UTC (permalink / raw)
  To: Shashank Sharma
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>
> The FW expects us to allocate atleast one page as process
> context space, and one for gang context space. This patch adds some
> object for the same.

This should be handled in the IP specific code for the MQD creation.
Each IP may have different requirements for MQD related metadata.

Alex


>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 57 +++++++++++++++++++
>  .../drm/amd/include/amdgpu_usermode_queue.h   |  8 +++
>  2 files changed, 65 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index b566ce4cb7f0..2a854a5e2f70 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -69,6 +69,56 @@ amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
>      return 0;
>  }
>
> +static int
> +amdgpu_userqueue_create_context(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
> +{
> +    int r;
> +    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
> +    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
> +    /*
> +     * The FW expects atleast one page space allocated for
> +     * process context related work, and one for gang context.
> +     */
> +    r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> +                                AMDGPU_GEM_DOMAIN_VRAM,
> +                                &pctx->obj,
> +                                &pctx->gpu_addr,
> +                                &pctx->cpu_ptr);
> +    if (r) {
> +        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
> +        return r;
> +    }
> +
> +    r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> +                                AMDGPU_GEM_DOMAIN_VRAM,
> +                                &gctx->obj,
> +                                &gctx->gpu_addr,
> +                                &gctx->cpu_ptr);
> +    if (r) {
> +        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
> +        amdgpu_bo_free_kernel(&pctx->obj,
> +                              &pctx->gpu_addr,
> +                              &pctx->cpu_ptr);
> +        return r;
> +    }
> +
> +    return 0;
> +}
> +
> +static void
> +amdgpu_userqueue_free_context(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
> +{
> +    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
> +    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
> +
> +    amdgpu_bo_free_kernel(&pctx->obj,
> +                          &pctx->gpu_addr,
> +                          &pctx->cpu_ptr);
> +    amdgpu_bo_free_kernel(&pctx->obj,
> +                          &gctx->gpu_addr,
> +                          &gctx->cpu_ptr);
> +}
> +
>  static void
>  amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
>  {
> @@ -282,6 +332,12 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>          goto free_mqd;
>      }
>
> +    r = amdgpu_userqueue_create_context(adev, queue);
> +    if (r < 0) {
> +        DRM_ERROR("Failed to create context for queue\n");
> +        goto free_mqd;
> +    }
> +
>      ctx->userq = queue;
>      args->out.q_id = queue->queue_id;
>      args->out.flags = 0;
> @@ -306,6 +362,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
>      struct amdgpu_usermode_queue *queue = ctx->userq;
>
>      mutex_lock(&adev->userq.userq_mutex);
> +    amdgpu_userqueue_free_context(adev, queue);
>      amdgpu_userqueue_destroy_mqd(queue);
>      amdgpu_userqueue_remove_index(adev, queue);
>      ctx->userq = NULL;
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> index c1fe39ffaf72..8bf3c0be6937 100644
> --- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> @@ -26,6 +26,12 @@
>
>  #define AMDGPU_MAX_USERQ 512
>
> +struct amdgpu_userq_ctx {
> +       struct amdgpu_bo *obj;
> +       uint64_t gpu_addr;
> +       void    *cpu_ptr;
> +};
> +
>  struct amdgpu_usermode_queue {
>         int             queue_id;
>         int             queue_type;
> @@ -44,6 +50,8 @@ struct amdgpu_usermode_queue {
>
>         struct amdgpu_bo        *mqd_obj;
>         struct amdgpu_vm        *vm;
> +       struct amdgpu_userq_ctx proc_ctx;
> +       struct amdgpu_userq_ctx gang_ctx;
>         struct list_head        list;
>  };
>
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 0/7] RFC: Usermode queue for AMDGPU driver
  2022-12-23 19:36 [RFC 0/7] RFC: Usermode queue for AMDGPU driver Shashank Sharma
                   ` (6 preceding siblings ...)
  2022-12-23 19:36 ` [RFC 7/7] drm/amdgpu: Secure semaphore for usermode queue Shashank Sharma
@ 2022-12-29 18:02 ` Alex Deucher
  2023-01-03  9:43   ` Shashank Sharma
  7 siblings, 1 reply; 64+ messages in thread
From: Alex Deucher @ 2022-12-29 18:02 UTC (permalink / raw)
  To: Shashank Sharma
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>
> This is a RFC series to implement usermode graphics queues for AMDGPU
> driver (Navi 3X and above). The idea of usermode graphics queue is to
> allow direct workload submission from a userspace graphics process who
> has amdgpu graphics context.
>
> Once we have some initial feedback on the design, we will publish a
> follow up V1 series with a libdrm consumer test.

I think this should look more like the following:
1. Convert doorbells to full fledged GEM objects just like vram.  Then
update the GEM IOCTL to allow allocation of doorbell BOs.
2. Store MQD data per amdgpu_ctx.
3. Create secure semaphore pool and map RO into each GPUVM.
4. Add callbacks to each IP type that supports user mode queues.
These callbacks should handle the IP specific MQD initialization and
mapping/unmapping details including allocation of BOs for the MQD
itself and any relevant metadata.  The USERQ IOCTL handler will look
up the callback based on the IP type specified in the IOCTL.

Alex

>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
>
> Alex Deucher (1):
>   drm/amdgpu: UAPI for user queue management
>
> Arunpravin Paneer Selvam (1):
>   drm/amdgpu: Secure semaphore for usermode queue
>
> Arvind Yadav (1):
>   drm/amdgpu: Create MQD for userspace queue
>
> Shashank Sharma (4):
>   drm/amdgpu: Add usermode queue for gfx work
>   drm/amdgpu: Allocate doorbell slot for user queue
>   drm/amdgpu: Create context for usermode queue
>   drm/amdgpu: Map userqueue into HW
>
>  drivers/gpu/drm/amd/amdgpu/Makefile           |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  14 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 ++++++++++++++++
>  .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 ++++++++
>  .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
>  .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 ++++++++++++++++++
>  include/uapi/drm/amdgpu_drm.h                 |  52 ++
>  8 files changed, 1413 insertions(+)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
>  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
>
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2022-12-27 16:58     ` Alex Deucher
@ 2023-01-02 11:27       ` Christian König
  2023-01-03 19:51         ` Alex Deucher
  0 siblings, 1 reply; 64+ messages in thread
From: Christian König @ 2023-01-02 11:27 UTC (permalink / raw)
  To: Alex Deucher, Bas Nieuwenhuizen
  Cc: Shashank Sharma, arunpravin.paneerselvam, arvind.yadav, amd-gfx,
	Alex Deucher, Christian Koenig

Am 27.12.22 um 17:58 schrieb Alex Deucher:
> On Sat, Dec 24, 2022 at 3:21 PM Bas Nieuwenhuizen
> <bas@basnieuwenhuizen.nl> wrote:
>> On Fri, Dec 23, 2022 at 8:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>>> From: Alex Deucher <alexander.deucher@amd.com>
>>>
>>> This patch intorduces new UAPI/IOCTL for usermode graphics
>>> queue. The userspace app will fill this structure and request
>>> the graphics driver to add a graphics work queue for it. The
>>> output of this UAPI is a queue id.
>>>
>>> This UAPI maps the queue into GPU, so the graphics app can start
>>> submitting work to the queue as soon as the call returns.
>>>
>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>> ---
>>>   include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
>>>   1 file changed, 52 insertions(+)
>>>
>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>> index 0d93ec132ebb..a3d0dd6f62c5 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -54,6 +54,7 @@ extern "C" {
>>>   #define DRM_AMDGPU_VM                  0x13
>>>   #define DRM_AMDGPU_FENCE_TO_HANDLE     0x14
>>>   #define DRM_AMDGPU_SCHED               0x15
>>> +#define DRM_AMDGPU_USERQ               0x16
>>>
>>>   #define DRM_IOCTL_AMDGPU_GEM_CREATE    DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>   #define DRM_IOCTL_AMDGPU_GEM_MMAP      DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>> @@ -71,6 +72,7 @@ extern "C" {
>>>   #define DRM_IOCTL_AMDGPU_VM            DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
>>>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>>>   #define DRM_IOCTL_AMDGPU_SCHED         DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
>>> +#define DRM_IOCTL_AMDGPU_USERQ         DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>>>
>>>   /**
>>>    * DOC: memory domains
>>> @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
>>>          union drm_amdgpu_ctx_out out;
>>>   };
>>>
>>> +/* user queue IOCTL */
>>> +#define AMDGPU_USERQ_OP_CREATE 1
>>> +#define AMDGPU_USERQ_OP_FREE   2
>>> +
>>> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
>>> +#define AMDGPU_USERQ_MQD_FLAGS_AQL     (1 << 1)
>> Can we document what AQL means here?
> AQL is the packet format used by KFD/ROCm.  The idea is to be able to
> create queues that support either format (AQL or PM4).

Could we make that a separate queue type? E.g. like SDMA, GFX, Compute?

It's not really a flag which can be applied independent of the queue.

Regards,
Christian.

>
>>
>>> +
>>> +struct drm_amdgpu_userq_mqd {
>>> +       /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
>>> +       __u32   flags;
>>> +       /** IP type: AMDGPU_HW_IP_* */
>>> +       __u32   ip_type;
>>> +       /** GEM object handle */
>>> +       __u32   doorbell_handle;
>>> +       /** Doorbell offset in dwords */
>>> +       __u32   doorbell_offset;
>> What are the doorbell handle/offset for? I don't see any of them used
>> in the rest of the series (we only check the handle isn't 0, which
>> isn't enough validation for a GEM handle to consider it valid), and
>> the kernel seems to allocate some kind of doorbell index in patch 4.
>> Does userspace need to know about that one? (similarly use_doorbell in
>> that patch seems like it is never explicitly written to)
> The doorbell is how you trigger the engine to start processing the
> user queue.  The idea is that each user process allocates a page of
> doorbell space (one of the PCI BARs) and then each 64 bit segment in
> that page could be used for a user mode queue.  So the UMD writes its
> data to the queue, updates the wptr, and then writes to the doorbell
> to tell the firmware to start processing the queue.
>
>> The other questions I have are about how this interacts with memory
>> management. Does this have access to all BOs allocated with
>> AMDGPU_GEM_CREATE_VM_ALWAYS_VALID? What about imported BOs? How does
>> this interact with VA unmap/map operations? (AFAICT we have no way to
>> tell if pagetable modifying operations are complete from userspace for
>> now). What happens if we need to spill BOs from VRAM due to
>> (cross-process) memory pressure?
> Effectively everything you map on the GPU would be valid.  If there is
> memory pressure, the kernel driver will behave similarly to KFD.  It
> will unmap the queues (which preempts all work on the engines), do any
> memory migrations, and then map the queues again.
>
> Alex
>
>>> +       /** GPU virtual address of the queue */
>>> +       __u64   queue_va;
>>> +       /** Size of the queue in bytes */
>>> +       __u64   queue_size;
>>> +       /** GPU virtual address of the rptr */
>>> +       __u64   rptr_va;
>>> +       /** GPU virtual address of the wptr */
>>> +       __u64   wptr_va;
>>> +};
>>> +
>>> +struct drm_amdgpu_userq_in {
>>> +       /** AMDGPU_USERQ_OP_* */
>>> +       __u32   op;
>>> +       /** Flags */
>>> +       __u32   flags;
>>> +       /** Context handle to associate the queue with */
>>> +       __u32   ctx_id;
>>> +       __u32   pad;
>>> +       /** Queue descriptor */
>>> +       struct drm_amdgpu_userq_mqd mqd;
>>> +};
>>> +
>>> +struct drm_amdgpu_userq_out {
>>> +       /** Queue handle */
>>> +       __u32   q_id;
>>> +       /** Flags */
>>> +       __u32   flags;
>>> +};
>>> +
>>> +union drm_amdgpu_userq {
>>> +       struct drm_amdgpu_userq_in in;
>>> +       struct drm_amdgpu_userq_out out;
>>> +};
>>> +
>>>   /* vm ioctl */
>>>   #define AMDGPU_VM_OP_RESERVE_VMID      1
>>>   #define AMDGPU_VM_OP_UNRESERVE_VMID    2
>>> --
>>> 2.34.1
>>>


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2022-12-26 10:41     ` Shashank Sharma
@ 2023-01-02 12:39       ` Christian König
  2023-01-03  9:12         ` Shashank Sharma
  0 siblings, 1 reply; 64+ messages in thread
From: Christian König @ 2023-01-02 12:39 UTC (permalink / raw)
  To: Shashank Sharma, amd-gfx
  Cc: Alex Deucher, Christian Koenig, arvind.yadav, arunpravin.paneerselvam

Hi Shashank,

Am 26.12.22 um 11:41 schrieb Shashank Sharma:
> [SNIP]
>>>         /* df */
>>>       struct amdgpu_df                df;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> index 0fa0e56daf67..f7413859b14f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>>       unsigned long            ras_counter_ce;
>>>       unsigned long            ras_counter_ue;
>>>       uint32_t            stable_pstate;
>>> +    struct amdgpu_usermode_queue    *userq;
>>
>> Why should we have this in the ctx here???
>
> We are allocating a few things dynamically for the queue, which would 
> be valid until we destroy this queue. Also we need to save this queue
>
> container at some place for the destroy function,  and I thought it 
> would make sense to keep this with the context ptr, as this is how we are
>
> identifying the incoming request.

I have absolutely no idea how you end up with that design.

The ctx object is the CS IOCTL context, that is not even remotely 
related to anything the user queues should be doing.

Please completely drop that relationship and don't use any of the ctx 
object stuff in the user queue code.

Christian.

>
> - Shashank
>
>>
>> Regards,
>> Christian.
>>
>>>   };
>>>     struct amdgpu_ctx_mgr {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>> new file mode 100644
>>> index 000000000000..3b6e8f75495c
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>> @@ -0,0 +1,187 @@
>>> +/*
>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person 
>>> obtaining a
>>> + * copy of this software and associated documentation files (the 
>>> "Software"),
>>> + * to deal in the Software without restriction, including without 
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to 
>>> whom the
>>> + * Software is furnished to do so, subject to the following 
>>> conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be 
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#include "amdgpu.h"
>>> +#include "amdgpu_vm.h"
>>> +#include "amdgpu_mes.h"
>>> +#include "amdgpu_usermode_queue.h"
>>> +#include "soc15_common.h"
>>> +
>>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
>>> sizeof(__u64)))
>>> +
>>> +static int
>>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>>> +{
>>> +    int index;
>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>> +
>>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, 
>>> GFP_KERNEL);
>>> +    return index;
>>> +}
>>> +
>>> +static void
>>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
>>> amdgpu_usermode_queue *queue)
>>> +{
>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>> +
>>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>>> +}
>>> +
>>> +static int
>>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct 
>>> drm_amdgpu_userq_mqd *mqd_in)
>>> +{
>>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || 
>>> mqd_in->doorbell_offset == 0) {
>>> +        DRM_ERROR("Invalid queue object address\n");
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || 
>>> mqd_in->wptr_va == 0) {
>>> +        DRM_ERROR("Invalid queue object value\n");
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= 
>>> AMDGPU_HW_IP_NUM) {
>>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    if (!CHECK_ACCESS(mqd_in->queue_va) || 
>>> !CHECK_ACCESS(mqd_in->rptr_va) ||
>>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>>> +            DRM_ERROR("Invalid mapping of queue ptrs, access 
>>> error\n");
>>> +            return -EINVAL;
>>> +    }
>>> +
>>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
>>> +    return 0;
>>> +}
>>> +
>>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct 
>>> drm_file *filp,
>>> +                            union drm_amdgpu_userq *args)
>>> +{
>>> +    int r, pasid;
>>> +    struct amdgpu_usermode_queue *queue;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>>> +
>>> +    if (!ctx) {
>>> +        DRM_ERROR("Invalid GPU context\n");
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    if (vm->pasid < 0) {
>>> +        DRM_WARN("No PASID info found\n");
>>> +        pasid = 0;
>>> +    }
>>> +
>>> +    mutex_lock(&adev->userq.userq_mutex);
>>> +
>>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
>>> +    if (!queue) {
>>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>>> +        mutex_unlock(&adev->userq.userq_mutex);
>>> +        return -ENOMEM;
>>> +    }
>>> +
>>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>>> +    if (r < 0) {
>>> +        DRM_ERROR("Invalid input to create queue\n");
>>> +        goto free_queue;
>>> +    }
>>> +
>>> +    queue->vm = vm;
>>> +    queue->pasid = pasid;
>>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>>> +    queue->queue_size = mqd_in->queue_size;
>>> +    queue->queue_type = mqd_in->ip_type;
>>> +    queue->paging = false;
>>> +    queue->flags = mqd_in->flags;
>>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>>> +
>>> +    ctx->userq = queue;
>>> +    args->out.q_id = queue->queue_id;
>>> +    args->out.flags = 0;
>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>> +    return 0;
>>> +
>>> +free_queue:
>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>> +    kfree(queue);
>>> +    return r;
>>> +}
>>> +
>>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct 
>>> drm_file *filp,
>>> +                              union drm_amdgpu_userq *args)
>>> +{
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>>> +
>>> +    mutex_lock(&adev->userq.userq_mutex);
>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>> +    ctx->userq = NULL;
>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>> +    kfree(queue);
>>> +}
>>> +
>>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>>> +               struct drm_file *filp)
>>> +{
>>> +    union drm_amdgpu_userq *args = data;
>>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>>> +    int r = 0;
>>> +
>>> +    switch (args->in.op) {
>>> +    case AMDGPU_USERQ_OP_CREATE:
>>> +        r = amdgpu_userqueue_create(adev, filp, args);
>>> +        if (r)
>>> +            DRM_ERROR("Failed to create usermode queue\n");
>>> +        break;
>>> +
>>> +    case AMDGPU_USERQ_OP_FREE:
>>> +        amdgpu_userqueue_destroy(adev, filp, args);
>>> +        break;
>>> +
>>> +    default:
>>> +        DRM_ERROR("Invalid user queue op specified: %d\n", 
>>> args->in.op);
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    return r;
>>> +}
>>> +
>>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>>> +{
>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>> +
>>> +    mutex_init(&uqg->userq_mutex);
>>> +    return 0;
>>> +}
>>> +
>>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>>> +{
>>> +
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h 
>>> b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>> new file mode 100644
>>> index 000000000000..c1fe39ffaf72
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>> @@ -0,0 +1,50 @@
>>> +/*
>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person 
>>> obtaining a
>>> + * copy of this software and associated documentation files (the 
>>> "Software"),
>>> + * to deal in the Software without restriction, including without 
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to 
>>> whom the
>>> + * Software is furnished to do so, subject to the following 
>>> conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be 
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>>> +#define AMDGPU_USERMODE_QUEUE_H_
>>> +
>>> +#define AMDGPU_MAX_USERQ 512
>>> +
>>> +struct amdgpu_usermode_queue {
>>> +    int        queue_id;
>>> +    int        queue_type;
>>> +    int        queue_size;
>>> +    int        paging;
>>> +    int        pasid;
>>> +    int        use_doorbell;
>>> +    int        doorbell_index;
>>> +
>>> +    uint64_t    mqd_gpu_addr;
>>> +    uint64_t    wptr_gpu_addr;
>>> +    uint64_t    rptr_gpu_addr;
>>> +    uint64_t    queue_gpu_addr;
>>> +    uint64_t    flags;
>>> +    void         *mqd_cpu_ptr;
>>> +
>>> +    struct amdgpu_bo    *mqd_obj;
>>> +    struct amdgpu_vm        *vm;
>>> +    struct list_head     list;
>>> +};
>>> +
>>> +#endif
>>


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2022-12-23 19:36 ` [RFC 1/7] drm/amdgpu: UAPI for user queue management Shashank Sharma
  2022-12-24 20:20   ` Bas Nieuwenhuizen
@ 2023-01-02 13:26   ` Christian König
  2023-01-03 14:23     ` Alex Deucher
  2023-01-03 18:29   ` Felix Kuehling
  2 siblings, 1 reply; 64+ messages in thread
From: Christian König @ 2023-01-02 13:26 UTC (permalink / raw)
  To: Shashank Sharma, amd-gfx
  Cc: Alex Deucher, arvind.yadav, arunpravin.paneerselvam

Am 23.12.22 um 20:36 schrieb Shashank Sharma:
> From: Alex Deucher <alexander.deucher@amd.com>
>
> This patch intorduces new UAPI/IOCTL for usermode graphics
> queue. The userspace app will fill this structure and request
> the graphics driver to add a graphics work queue for it. The
> output of this UAPI is a queue id.
>
> This UAPI maps the queue into GPU, so the graphics app can start
> submitting work to the queue as soon as the call returns.
>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>   include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
>   1 file changed, 52 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 0d93ec132ebb..a3d0dd6f62c5 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -54,6 +54,7 @@ extern "C" {
>   #define DRM_AMDGPU_VM			0x13
>   #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
>   #define DRM_AMDGPU_SCHED		0x15
> +#define DRM_AMDGPU_USERQ		0x16
>   
>   #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>   #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -71,6 +72,7 @@ extern "C" {
>   #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>   #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> +#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>   
>   /**
>    * DOC: memory domains
> @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
>   	union drm_amdgpu_ctx_out out;
>   };
>   
> +/* user queue IOCTL */
> +#define AMDGPU_USERQ_OP_CREATE	1
> +#define AMDGPU_USERQ_OP_FREE	2
> +
> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE	(1 << 0)
> +#define AMDGPU_USERQ_MQD_FLAGS_AQL	(1 << 1)
> +
> +struct drm_amdgpu_userq_mqd {
> +	/** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> +	__u32	flags;
> +	/** IP type: AMDGPU_HW_IP_* */
> +	__u32	ip_type;
> +	/** GEM object handle */
> +	__u32   doorbell_handle;
> +	/** Doorbell offset in dwords */
> +	__u32   doorbell_offset;
> +	/** GPU virtual address of the queue */
> +	__u64   queue_va;
> +	/** Size of the queue in bytes */
> +	__u64   queue_size;
> +	/** GPU virtual address of the rptr */
> +	__u64   rptr_va;
> +	/** GPU virtual address of the wptr */
> +	__u64   wptr_va;

We should probably note somewhere that those are inputs to the queue and 
need to be allocated by userspace somewhere.

> +};
> +
> +struct drm_amdgpu_userq_in {
> +	/** AMDGPU_USERQ_OP_* */
> +	__u32	op;
> +	/** Flags */
> +	__u32	flags;

> +	/** Context handle to associate the queue with */
> +	__u32	ctx_id;

Uff, this is just blunt nonsense. Queues are not related to ctx objects 
in any way possible.

> +	__u32	pad;
> +	/** Queue descriptor */
> +	struct drm_amdgpu_userq_mqd mqd;
> +};
> +
> +struct drm_amdgpu_userq_out {
> +	/** Queue handle */
> +	__u32	q_id;
> +	/** Flags */
> +	__u32	flags;
> +};
> +
> +union drm_amdgpu_userq {
> +	struct drm_amdgpu_userq_in in;
> +	struct drm_amdgpu_userq_out out;
> +};
> +
>   /* vm ioctl */
>   #define AMDGPU_VM_OP_RESERVE_VMID	1
>   #define AMDGPU_VM_OP_UNRESERVE_VMID	2


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2022-12-29 17:41   ` Alex Deucher
@ 2023-01-02 13:53     ` Christian König
  2023-01-03  9:32       ` Shashank Sharma
  2023-01-03  9:16     ` Shashank Sharma
  1 sibling, 1 reply; 64+ messages in thread
From: Christian König @ 2023-01-02 13:53 UTC (permalink / raw)
  To: Alex Deucher, Shashank Sharma
  Cc: Alex Deucher, amd-gfx, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

Am 29.12.22 um 18:41 schrieb Alex Deucher:
> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>> This patch adds skeleton code for usermode queue creation. It
>> typically contains:
>> - A new structure to keep all the user queue data in one place.
>> - An IOCTL function to create/free a usermode queue.
>> - A function to generate unique index for the queue.
>> - A global ptr in amdgpu_dev
>>
>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++++++++++++++++++
>>   .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
>>   5 files changed, 246 insertions(+)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>   create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 6ad39cf71bdd..e2a34ee57bfb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -209,6 +209,8 @@ amdgpu-y += \
>>   # add amdkfd interfaces
>>   amdgpu-y += amdgpu_amdkfd.o
>>
>> +# add usermode queue
>> +amdgpu-y += amdgpu_userqueue.o
>>
>>   ifneq ($(CONFIG_HSA_AMD),)
>>   AMDKFD_PATH := ../amdkfd
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 8639a4f9c6e8..4b566fcfca18 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -749,6 +749,11 @@ struct amdgpu_mqd {
>>                          struct amdgpu_mqd_prop *p);
>>   };
>>
>> +struct amdgpu_userq_globals {
>> +       struct ida ida;
>> +       struct mutex userq_mutex;
>> +};
>> +
>>   #define AMDGPU_RESET_MAGIC_NUM 64
>>   #define AMDGPU_MAX_DF_PERFMONS 4
>>   #define AMDGPU_PRODUCT_NAME_LEN 64
>> @@ -955,6 +960,7 @@ struct amdgpu_device {
>>          bool                            enable_mes_kiq;
>>          struct amdgpu_mes               mes;
>>          struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
>> +       struct amdgpu_userq_globals     userq;
>>
>>          /* df */
>>          struct amdgpu_df                df;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> index 0fa0e56daf67..f7413859b14f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>          unsigned long                   ras_counter_ce;
>>          unsigned long                   ras_counter_ue;
>>          uint32_t                        stable_pstate;
>> +       struct amdgpu_usermode_queue    *userq;
> There can be multiple queues per context.  We should make this a list.
>
>>   };
>>
>>   struct amdgpu_ctx_mgr {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> new file mode 100644
>> index 000000000000..3b6e8f75495c
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -0,0 +1,187 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include "amdgpu.h"
>> +#include "amdgpu_vm.h"
>> +#include "amdgpu_mes.h"
>> +#include "amdgpu_usermode_queue.h"
>> +#include "soc15_common.h"
>> +
>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))

You seem to have a very very big misunderstanding here.

access_ok() is used for CPU pointer validation, but this here are 
pointers into the GPUVM address space. This is something completely 
different!

Regards,
Christian.

>> +
>> +static int
>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>> +{
>> +    int index;
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
>> +    return index;
>> +}
>> +
>> +static void
>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
>> +{
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>> +}
>> +
>> +static int
>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct drm_amdgpu_userq_mqd *mqd_in)
>> +{
>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || mqd_in->doorbell_offset == 0) {
>> +        DRM_ERROR("Invalid queue object address\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || mqd_in->wptr_va == 0) {
>> +        DRM_ERROR("Invalid queue object value\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= AMDGPU_HW_IP_NUM) {
>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (!CHECK_ACCESS(mqd_in->queue_va) || !CHECK_ACCESS(mqd_in->rptr_va) ||
>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>> +            DRM_ERROR("Invalid mapping of queue ptrs, access error\n");
>> +            return -EINVAL;
>> +    }
> Need to check the flags as well.
>
>> +
>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
>> +    return 0;
>> +}
>> +
>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>> +                            union drm_amdgpu_userq *args)
>> +{
>> +    int r, pasid;
>> +    struct amdgpu_usermode_queue *queue;
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    struct amdgpu_vm *vm = &fpriv->vm;
>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>> +
>> +    if (!ctx) {
>> +        DRM_ERROR("Invalid GPU context\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (vm->pasid < 0) {
>> +        DRM_WARN("No PASID info found\n");
>> +        pasid = 0;
>> +    }
>> +
>> +    mutex_lock(&adev->userq.userq_mutex);
>> +
>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
>> +    if (!queue) {
>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>> +        mutex_unlock(&adev->userq.userq_mutex);
>> +        return -ENOMEM;
>> +    }
>> +
>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>> +    if (r < 0) {
>> +        DRM_ERROR("Invalid input to create queue\n");
>> +        goto free_queue;
>> +    }
>> +
>> +    queue->vm = vm;
>> +    queue->pasid = pasid;
>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>> +    queue->queue_size = mqd_in->queue_size;
>> +    queue->queue_type = mqd_in->ip_type;
>> +    queue->paging = false;
>> +    queue->flags = mqd_in->flags;
>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>> +
>> +    ctx->userq = queue;
>> +    args->out.q_id = queue->queue_id;
>> +    args->out.flags = 0;
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    return 0;
>> +
>> +free_queue:
>> +    amdgpu_userqueue_remove_index(adev, queue);
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    kfree(queue);
>> +    return r;
>> +}
>> +
>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
>> +                              union drm_amdgpu_userq *args)
>> +{
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>> +
>> +    mutex_lock(&adev->userq.userq_mutex);
>> +    amdgpu_userqueue_remove_index(adev, queue);
>> +    ctx->userq = NULL;
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    kfree(queue);
>> +}
>> +
>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>> +                      struct drm_file *filp)
>> +{
>> +    union drm_amdgpu_userq *args = data;
>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>> +    int r = 0;
>> +
>> +    switch (args->in.op) {
>> +    case AMDGPU_USERQ_OP_CREATE:
>> +        r = amdgpu_userqueue_create(adev, filp, args);
>> +        if (r)
>> +            DRM_ERROR("Failed to create usermode queue\n");
>> +        break;
>> +
>> +    case AMDGPU_USERQ_OP_FREE:
>> +        amdgpu_userqueue_destroy(adev, filp, args);
>> +        break;
>> +
>> +    default:
>> +        DRM_ERROR("Invalid user queue op specified: %d\n", args->in.op);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return r;
>> +}
>> +
>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>> +{
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    mutex_init(&uqg->userq_mutex);
>> +    return 0;
>> +}
>> +
>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>> +{
>> +
>> +}
>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> new file mode 100644
>> index 000000000000..c1fe39ffaf72
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> @@ -0,0 +1,50 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>> +#define AMDGPU_USERMODE_QUEUE_H_
>> +
>> +#define AMDGPU_MAX_USERQ 512
>> +
>> +struct amdgpu_usermode_queue {
>> +       int             queue_id;
>> +       int             queue_type;
>> +       int             queue_size;
>> +       int             paging;
>> +       int             pasid;
>> +       int             use_doorbell;
>> +       int             doorbell_index;
>> +
>> +       uint64_t        mqd_gpu_addr;
>> +       uint64_t        wptr_gpu_addr;
>> +       uint64_t        rptr_gpu_addr;
>> +       uint64_t        queue_gpu_addr;
>> +       uint64_t        flags;
>> +       void            *mqd_cpu_ptr;
>> +
>> +       struct amdgpu_bo        *mqd_obj;
>> +       struct amdgpu_vm        *vm;
>> +       struct list_head        list;
>> +};
>> +
>> +#endif
>> --
>> 2.34.1
>>


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2023-01-02 12:39       ` Christian König
@ 2023-01-03  9:12         ` Shashank Sharma
  2023-01-03  9:15           ` Christian König
  0 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2023-01-03  9:12 UTC (permalink / raw)
  To: Christian König, amd-gfx
  Cc: Alex Deucher, Christian Koenig, arvind.yadav, arunpravin.paneerselvam


On 02/01/2023 13:39, Christian König wrote:
> Hi Shashank,
>
> Am 26.12.22 um 11:41 schrieb Shashank Sharma:
>> [SNIP]
>>>>         /* df */
>>>>       struct amdgpu_df                df;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>> index 0fa0e56daf67..f7413859b14f 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>>>       unsigned long            ras_counter_ce;
>>>>       unsigned long            ras_counter_ue;
>>>>       uint32_t            stable_pstate;
>>>> +    struct amdgpu_usermode_queue    *userq;
>>>
>>> Why should we have this in the ctx here???
>>
>> We are allocating a few things dynamically for the queue, which would 
>> be valid until we destroy this queue. Also we need to save this queue
>>
>> container at some place for the destroy function,  and I thought it 
>> would make sense to keep this with the context ptr, as this is how we 
>> are
>>
>> identifying the incoming request.
>
> I have absolutely no idea how you end up with that design.
>
> The ctx object is the CS IOCTL context, that is not even remotely 
> related to anything the user queues should be doing.
>
> Please completely drop that relationship and don't use any of the ctx 
> object stuff in the user queue code.
>
Historically the workload submission always came with a context (due to 
CS IOCTL), so we thought it would make sense to still have its relevance 
in the new workload submission method. Would you prefer this new 
submission to be independent of AMDGPU context ?

- Shashank


> Christian.
>
>>
>> - Shashank
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>>   };
>>>>     struct amdgpu_ctx_mgr {
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> new file mode 100644
>>>> index 000000000000..3b6e8f75495c
>>>> --- /dev/null
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> @@ -0,0 +1,187 @@
>>>> +/*
>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>> + *
>>>> + * Permission is hereby granted, free of charge, to any person 
>>>> obtaining a
>>>> + * copy of this software and associated documentation files (the 
>>>> "Software"),
>>>> + * to deal in the Software without restriction, including without 
>>>> limitation
>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>> sublicense,
>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>> whom the
>>>> + * Software is furnished to do so, subject to the following 
>>>> conditions:
>>>> + *
>>>> + * The above copyright notice and this permission notice shall be 
>>>> included in
>>>> + * all copies or substantial portions of the Software.
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>>> EXPRESS OR
>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>> MERCHANTABILITY,
>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>> EVENT SHALL
>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>> DAMAGES OR
>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>> OTHERWISE,
>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>>> USE OR
>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>> + *
>>>> + */
>>>> +
>>>> +#include "amdgpu.h"
>>>> +#include "amdgpu_vm.h"
>>>> +#include "amdgpu_mes.h"
>>>> +#include "amdgpu_usermode_queue.h"
>>>> +#include "soc15_common.h"
>>>> +
>>>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
>>>> sizeof(__u64)))
>>>> +
>>>> +static int
>>>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>>>> +{
>>>> +    int index;
>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>> +
>>>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, 
>>>> GFP_KERNEL);
>>>> +    return index;
>>>> +}
>>>> +
>>>> +static void
>>>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
>>>> amdgpu_usermode_queue *queue)
>>>> +{
>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>> +
>>>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>>>> +}
>>>> +
>>>> +static int
>>>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct 
>>>> drm_amdgpu_userq_mqd *mqd_in)
>>>> +{
>>>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || 
>>>> mqd_in->doorbell_offset == 0) {
>>>> +        DRM_ERROR("Invalid queue object address\n");
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || 
>>>> mqd_in->wptr_va == 0) {
>>>> +        DRM_ERROR("Invalid queue object value\n");
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= 
>>>> AMDGPU_HW_IP_NUM) {
>>>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    if (!CHECK_ACCESS(mqd_in->queue_va) || 
>>>> !CHECK_ACCESS(mqd_in->rptr_va) ||
>>>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>>>> +            DRM_ERROR("Invalid mapping of queue ptrs, access 
>>>> error\n");
>>>> +            return -EINVAL;
>>>> +    }
>>>> +
>>>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct 
>>>> drm_file *filp,
>>>> +                            union drm_amdgpu_userq *args)
>>>> +{
>>>> +    int r, pasid;
>>>> +    struct amdgpu_usermode_queue *queue;
>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>>>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>>>> +
>>>> +    if (!ctx) {
>>>> +        DRM_ERROR("Invalid GPU context\n");
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    if (vm->pasid < 0) {
>>>> +        DRM_WARN("No PASID info found\n");
>>>> +        pasid = 0;
>>>> +    }
>>>> +
>>>> +    mutex_lock(&adev->userq.userq_mutex);
>>>> +
>>>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), 
>>>> GFP_KERNEL);
>>>> +    if (!queue) {
>>>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>>>> +        mutex_unlock(&adev->userq.userq_mutex);
>>>> +        return -ENOMEM;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>>>> +    if (r < 0) {
>>>> +        DRM_ERROR("Invalid input to create queue\n");
>>>> +        goto free_queue;
>>>> +    }
>>>> +
>>>> +    queue->vm = vm;
>>>> +    queue->pasid = pasid;
>>>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>>>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>>>> +    queue->queue_size = mqd_in->queue_size;
>>>> +    queue->queue_type = mqd_in->ip_type;
>>>> +    queue->paging = false;
>>>> +    queue->flags = mqd_in->flags;
>>>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>>>> +
>>>> +    ctx->userq = queue;
>>>> +    args->out.q_id = queue->queue_id;
>>>> +    args->out.flags = 0;
>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>> +    return 0;
>>>> +
>>>> +free_queue:
>>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>> +    kfree(queue);
>>>> +    return r;
>>>> +}
>>>> +
>>>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct 
>>>> drm_file *filp,
>>>> +                              union drm_amdgpu_userq *args)
>>>> +{
>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>>>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>>>> +
>>>> +    mutex_lock(&adev->userq.userq_mutex);
>>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>>> +    ctx->userq = NULL;
>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>> +    kfree(queue);
>>>> +}
>>>> +
>>>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>>>> +               struct drm_file *filp)
>>>> +{
>>>> +    union drm_amdgpu_userq *args = data;
>>>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>>>> +    int r = 0;
>>>> +
>>>> +    switch (args->in.op) {
>>>> +    case AMDGPU_USERQ_OP_CREATE:
>>>> +        r = amdgpu_userqueue_create(adev, filp, args);
>>>> +        if (r)
>>>> +            DRM_ERROR("Failed to create usermode queue\n");
>>>> +        break;
>>>> +
>>>> +    case AMDGPU_USERQ_OP_FREE:
>>>> +        amdgpu_userqueue_destroy(adev, filp, args);
>>>> +        break;
>>>> +
>>>> +    default:
>>>> +        DRM_ERROR("Invalid user queue op specified: %d\n", 
>>>> args->in.op);
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    return r;
>>>> +}
>>>> +
>>>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>>>> +{
>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>> +
>>>> +    mutex_init(&uqg->userq_mutex);
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>>>> +{
>>>> +
>>>> +}
>>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h 
>>>> b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>> new file mode 100644
>>>> index 000000000000..c1fe39ffaf72
>>>> --- /dev/null
>>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>> @@ -0,0 +1,50 @@
>>>> +/*
>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>> + *
>>>> + * Permission is hereby granted, free of charge, to any person 
>>>> obtaining a
>>>> + * copy of this software and associated documentation files (the 
>>>> "Software"),
>>>> + * to deal in the Software without restriction, including without 
>>>> limitation
>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>> sublicense,
>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>> whom the
>>>> + * Software is furnished to do so, subject to the following 
>>>> conditions:
>>>> + *
>>>> + * The above copyright notice and this permission notice shall be 
>>>> included in
>>>> + * all copies or substantial portions of the Software.
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>>> EXPRESS OR
>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>> MERCHANTABILITY,
>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>> EVENT SHALL
>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>> DAMAGES OR
>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>> OTHERWISE,
>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>>> USE OR
>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>> + *
>>>> + */
>>>> +
>>>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>>>> +#define AMDGPU_USERMODE_QUEUE_H_
>>>> +
>>>> +#define AMDGPU_MAX_USERQ 512
>>>> +
>>>> +struct amdgpu_usermode_queue {
>>>> +    int        queue_id;
>>>> +    int        queue_type;
>>>> +    int        queue_size;
>>>> +    int        paging;
>>>> +    int        pasid;
>>>> +    int        use_doorbell;
>>>> +    int        doorbell_index;
>>>> +
>>>> +    uint64_t    mqd_gpu_addr;
>>>> +    uint64_t    wptr_gpu_addr;
>>>> +    uint64_t    rptr_gpu_addr;
>>>> +    uint64_t    queue_gpu_addr;
>>>> +    uint64_t    flags;
>>>> +    void         *mqd_cpu_ptr;
>>>> +
>>>> +    struct amdgpu_bo    *mqd_obj;
>>>> +    struct amdgpu_vm        *vm;
>>>> +    struct list_head     list;
>>>> +};
>>>> +
>>>> +#endif
>>>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2023-01-03  9:12         ` Shashank Sharma
@ 2023-01-03  9:15           ` Christian König
  2023-01-03  9:22             ` Shashank Sharma
  0 siblings, 1 reply; 64+ messages in thread
From: Christian König @ 2023-01-03  9:15 UTC (permalink / raw)
  To: Shashank Sharma, amd-gfx
  Cc: Alex Deucher, Christian Koenig, arvind.yadav, arunpravin.paneerselvam

Am 03.01.23 um 10:12 schrieb Shashank Sharma:
>
> On 02/01/2023 13:39, Christian König wrote:
>> Hi Shashank,
>>
>> Am 26.12.22 um 11:41 schrieb Shashank Sharma:
>>> [SNIP]
>>>>>         /* df */
>>>>>       struct amdgpu_df                df;
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>> index 0fa0e56daf67..f7413859b14f 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>>>>       unsigned long            ras_counter_ce;
>>>>>       unsigned long            ras_counter_ue;
>>>>>       uint32_t            stable_pstate;
>>>>> +    struct amdgpu_usermode_queue    *userq;
>>>>
>>>> Why should we have this in the ctx here???
>>>
>>> We are allocating a few things dynamically for the queue, which 
>>> would be valid until we destroy this queue. Also we need to save 
>>> this queue
>>>
>>> container at some place for the destroy function,  and I thought it 
>>> would make sense to keep this with the context ptr, as this is how 
>>> we are
>>>
>>> identifying the incoming request.
>>
>> I have absolutely no idea how you end up with that design.
>>
>> The ctx object is the CS IOCTL context, that is not even remotely 
>> related to anything the user queues should be doing.
>>
>> Please completely drop that relationship and don't use any of the ctx 
>> object stuff in the user queue code.
>>
> Historically the workload submission always came with a context (due 
> to CS IOCTL), so we thought it would make sense to still have its 
> relevance in the new workload submission method. Would you prefer this 
> new submission to be independent of AMDGPU context ?

Well not prefer, the point is that this doesn't make any sense at all.

See the amdgpu_ctx object contains the resulting fence pointers for the 
CS IOCTL as well as information necessary for the CS IOCTL to work (e.g. 
scheduler entities etc...).

I don't see how anything from that stuff would be useful for the MES or 
user queues.

Christian.

>
> - Shashank
>
>
>> Christian.
>>
>>>
>>> - Shashank
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>   };
>>>>>     struct amdgpu_ctx_mgr {
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>> new file mode 100644
>>>>> index 000000000000..3b6e8f75495c
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>> @@ -0,0 +1,187 @@
>>>>> +/*
>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>> + *
>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>> obtaining a
>>>>> + * copy of this software and associated documentation files (the 
>>>>> "Software"),
>>>>> + * to deal in the Software without restriction, including without 
>>>>> limitation
>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>> sublicense,
>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>> whom the
>>>>> + * Software is furnished to do so, subject to the following 
>>>>> conditions:
>>>>> + *
>>>>> + * The above copyright notice and this permission notice shall be 
>>>>> included in
>>>>> + * all copies or substantial portions of the Software.
>>>>> + *
>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>> KIND, EXPRESS OR
>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>> MERCHANTABILITY,
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>> EVENT SHALL
>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>>> DAMAGES OR
>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>> OTHERWISE,
>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>>>> USE OR
>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#include "amdgpu.h"
>>>>> +#include "amdgpu_vm.h"
>>>>> +#include "amdgpu_mes.h"
>>>>> +#include "amdgpu_usermode_queue.h"
>>>>> +#include "soc15_common.h"
>>>>> +
>>>>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
>>>>> sizeof(__u64)))
>>>>> +
>>>>> +static int
>>>>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>>>>> +{
>>>>> +    int index;
>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>> +
>>>>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, 
>>>>> GFP_KERNEL);
>>>>> +    return index;
>>>>> +}
>>>>> +
>>>>> +static void
>>>>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
>>>>> amdgpu_usermode_queue *queue)
>>>>> +{
>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>> +
>>>>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>>>>> +}
>>>>> +
>>>>> +static int
>>>>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, 
>>>>> struct drm_amdgpu_userq_mqd *mqd_in)
>>>>> +{
>>>>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || 
>>>>> mqd_in->doorbell_offset == 0) {
>>>>> +        DRM_ERROR("Invalid queue object address\n");
>>>>> +        return -EINVAL;
>>>>> +    }
>>>>> +
>>>>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || 
>>>>> mqd_in->wptr_va == 0) {
>>>>> +        DRM_ERROR("Invalid queue object value\n");
>>>>> +        return -EINVAL;
>>>>> +    }
>>>>> +
>>>>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= 
>>>>> AMDGPU_HW_IP_NUM) {
>>>>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>>>>> +        return -EINVAL;
>>>>> +    }
>>>>> +
>>>>> +    if (!CHECK_ACCESS(mqd_in->queue_va) || 
>>>>> !CHECK_ACCESS(mqd_in->rptr_va) ||
>>>>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>>>>> +            DRM_ERROR("Invalid mapping of queue ptrs, access 
>>>>> error\n");
>>>>> +            return -EINVAL;
>>>>> +    }
>>>>> +
>>>>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are 
>>>>> valid\n");
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct 
>>>>> drm_file *filp,
>>>>> +                            union drm_amdgpu_userq *args)
>>>>> +{
>>>>> +    int r, pasid;
>>>>> +    struct amdgpu_usermode_queue *queue;
>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>>>>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>>>>> +
>>>>> +    if (!ctx) {
>>>>> +        DRM_ERROR("Invalid GPU context\n");
>>>>> +        return -EINVAL;
>>>>> +    }
>>>>> +
>>>>> +    if (vm->pasid < 0) {
>>>>> +        DRM_WARN("No PASID info found\n");
>>>>> +        pasid = 0;
>>>>> +    }
>>>>> +
>>>>> +    mutex_lock(&adev->userq.userq_mutex);
>>>>> +
>>>>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), 
>>>>> GFP_KERNEL);
>>>>> +    if (!queue) {
>>>>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>>>>> +        mutex_unlock(&adev->userq.userq_mutex);
>>>>> +        return -ENOMEM;
>>>>> +    }
>>>>> +
>>>>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>>>>> +    if (r < 0) {
>>>>> +        DRM_ERROR("Invalid input to create queue\n");
>>>>> +        goto free_queue;
>>>>> +    }
>>>>> +
>>>>> +    queue->vm = vm;
>>>>> +    queue->pasid = pasid;
>>>>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>>>>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>>>>> +    queue->queue_size = mqd_in->queue_size;
>>>>> +    queue->queue_type = mqd_in->ip_type;
>>>>> +    queue->paging = false;
>>>>> +    queue->flags = mqd_in->flags;
>>>>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>>>>> +
>>>>> +    ctx->userq = queue;
>>>>> +    args->out.q_id = queue->queue_id;
>>>>> +    args->out.flags = 0;
>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>> +    return 0;
>>>>> +
>>>>> +free_queue:
>>>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>> +    kfree(queue);
>>>>> +    return r;
>>>>> +}
>>>>> +
>>>>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct 
>>>>> drm_file *filp,
>>>>> +                              union drm_amdgpu_userq *args)
>>>>> +{
>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>>>>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>>>>> +
>>>>> +    mutex_lock(&adev->userq.userq_mutex);
>>>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>>>> +    ctx->userq = NULL;
>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>> +    kfree(queue);
>>>>> +}
>>>>> +
>>>>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>>>>> +               struct drm_file *filp)
>>>>> +{
>>>>> +    union drm_amdgpu_userq *args = data;
>>>>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>>>>> +    int r = 0;
>>>>> +
>>>>> +    switch (args->in.op) {
>>>>> +    case AMDGPU_USERQ_OP_CREATE:
>>>>> +        r = amdgpu_userqueue_create(adev, filp, args);
>>>>> +        if (r)
>>>>> +            DRM_ERROR("Failed to create usermode queue\n");
>>>>> +        break;
>>>>> +
>>>>> +    case AMDGPU_USERQ_OP_FREE:
>>>>> +        amdgpu_userqueue_destroy(adev, filp, args);
>>>>> +        break;
>>>>> +
>>>>> +    default:
>>>>> +        DRM_ERROR("Invalid user queue op specified: %d\n", 
>>>>> args->in.op);
>>>>> +        return -EINVAL;
>>>>> +    }
>>>>> +
>>>>> +    return r;
>>>>> +}
>>>>> +
>>>>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>>>>> +{
>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>> +
>>>>> +    mutex_init(&uqg->userq_mutex);
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>>>>> +{
>>>>> +
>>>>> +}
>>>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h 
>>>>> b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>> new file mode 100644
>>>>> index 000000000000..c1fe39ffaf72
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>> @@ -0,0 +1,50 @@
>>>>> +/*
>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>> + *
>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>> obtaining a
>>>>> + * copy of this software and associated documentation files (the 
>>>>> "Software"),
>>>>> + * to deal in the Software without restriction, including without 
>>>>> limitation
>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>> sublicense,
>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>> whom the
>>>>> + * Software is furnished to do so, subject to the following 
>>>>> conditions:
>>>>> + *
>>>>> + * The above copyright notice and this permission notice shall be 
>>>>> included in
>>>>> + * all copies or substantial portions of the Software.
>>>>> + *
>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>> KIND, EXPRESS OR
>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>> MERCHANTABILITY,
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>> EVENT SHALL
>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>>> DAMAGES OR
>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>> OTHERWISE,
>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>>>> USE OR
>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>>>>> +#define AMDGPU_USERMODE_QUEUE_H_
>>>>> +
>>>>> +#define AMDGPU_MAX_USERQ 512
>>>>> +
>>>>> +struct amdgpu_usermode_queue {
>>>>> +    int        queue_id;
>>>>> +    int        queue_type;
>>>>> +    int        queue_size;
>>>>> +    int        paging;
>>>>> +    int        pasid;
>>>>> +    int        use_doorbell;
>>>>> +    int        doorbell_index;
>>>>> +
>>>>> +    uint64_t    mqd_gpu_addr;
>>>>> +    uint64_t    wptr_gpu_addr;
>>>>> +    uint64_t    rptr_gpu_addr;
>>>>> +    uint64_t    queue_gpu_addr;
>>>>> +    uint64_t    flags;
>>>>> +    void         *mqd_cpu_ptr;
>>>>> +
>>>>> +    struct amdgpu_bo    *mqd_obj;
>>>>> +    struct amdgpu_vm        *vm;
>>>>> +    struct list_head     list;
>>>>> +};
>>>>> +
>>>>> +#endif
>>>>
>>


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2022-12-29 17:41   ` Alex Deucher
  2023-01-02 13:53     ` Christian König
@ 2023-01-03  9:16     ` Shashank Sharma
  1 sibling, 0 replies; 64+ messages in thread
From: Shashank Sharma @ 2023-01-03  9:16 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam


On 29/12/2022 18:41, Alex Deucher wrote:
> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>> This patch adds skeleton code for usermode queue creation. It
>> typically contains:
>> - A new structure to keep all the user queue data in one place.
>> - An IOCTL function to create/free a usermode queue.
>> - A function to generate unique index for the queue.
>> - A global ptr in amdgpu_dev
>>
>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++++++++++++++++++
>>   .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
>>   5 files changed, 246 insertions(+)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>   create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 6ad39cf71bdd..e2a34ee57bfb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -209,6 +209,8 @@ amdgpu-y += \
>>   # add amdkfd interfaces
>>   amdgpu-y += amdgpu_amdkfd.o
>>
>> +# add usermode queue
>> +amdgpu-y += amdgpu_userqueue.o
>>
>>   ifneq ($(CONFIG_HSA_AMD),)
>>   AMDKFD_PATH := ../amdkfd
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 8639a4f9c6e8..4b566fcfca18 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -749,6 +749,11 @@ struct amdgpu_mqd {
>>                          struct amdgpu_mqd_prop *p);
>>   };
>>
>> +struct amdgpu_userq_globals {
>> +       struct ida ida;
>> +       struct mutex userq_mutex;
>> +};
>> +
>>   #define AMDGPU_RESET_MAGIC_NUM 64
>>   #define AMDGPU_MAX_DF_PERFMONS 4
>>   #define AMDGPU_PRODUCT_NAME_LEN 64
>> @@ -955,6 +960,7 @@ struct amdgpu_device {
>>          bool                            enable_mes_kiq;
>>          struct amdgpu_mes               mes;
>>          struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
>> +       struct amdgpu_userq_globals     userq;
>>
>>          /* df */
>>          struct amdgpu_df                df;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> index 0fa0e56daf67..f7413859b14f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>          unsigned long                   ras_counter_ce;
>>          unsigned long                   ras_counter_ue;
>>          uint32_t                        stable_pstate;
>> +       struct amdgpu_usermode_queue    *userq;
> There can be multiple queues per context.  We should make this a list.

Noted, will change it into a queue. We are still in discussion (in 
another thread) if we have to move this from context to some place else.

>>   };
>>
>>   struct amdgpu_ctx_mgr {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> new file mode 100644
>> index 000000000000..3b6e8f75495c
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -0,0 +1,187 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include "amdgpu.h"
>> +#include "amdgpu_vm.h"
>> +#include "amdgpu_mes.h"
>> +#include "amdgpu_usermode_queue.h"
>> +#include "soc15_common.h"
>> +
>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
>> +
>> +static int
>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>> +{
>> +    int index;
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
>> +    return index;
>> +}
>> +
>> +static void
>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
>> +{
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>> +}
>> +
>> +static int
>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct drm_amdgpu_userq_mqd *mqd_in)
>> +{
>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || mqd_in->doorbell_offset == 0) {
>> +        DRM_ERROR("Invalid queue object address\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || mqd_in->wptr_va == 0) {
>> +        DRM_ERROR("Invalid queue object value\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= AMDGPU_HW_IP_NUM) {
>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (!CHECK_ACCESS(mqd_in->queue_va) || !CHECK_ACCESS(mqd_in->rptr_va) ||
>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>> +            DRM_ERROR("Invalid mapping of queue ptrs, access error\n");
>> +            return -EINVAL;
>> +    }
> Need to check the flags as well.

Noted

- Shashank

>
>> +
>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
>> +    return 0;
>> +}
>> +
>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>> +                            union drm_amdgpu_userq *args)
>> +{
>> +    int r, pasid;
>> +    struct amdgpu_usermode_queue *queue;
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    struct amdgpu_vm *vm = &fpriv->vm;
>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>> +
>> +    if (!ctx) {
>> +        DRM_ERROR("Invalid GPU context\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (vm->pasid < 0) {
>> +        DRM_WARN("No PASID info found\n");
>> +        pasid = 0;
>> +    }
>> +
>> +    mutex_lock(&adev->userq.userq_mutex);
>> +
>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
>> +    if (!queue) {
>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>> +        mutex_unlock(&adev->userq.userq_mutex);
>> +        return -ENOMEM;
>> +    }
>> +
>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>> +    if (r < 0) {
>> +        DRM_ERROR("Invalid input to create queue\n");
>> +        goto free_queue;
>> +    }
>> +
>> +    queue->vm = vm;
>> +    queue->pasid = pasid;
>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>> +    queue->queue_size = mqd_in->queue_size;
>> +    queue->queue_type = mqd_in->ip_type;
>> +    queue->paging = false;
>> +    queue->flags = mqd_in->flags;
>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>> +
>> +    ctx->userq = queue;
>> +    args->out.q_id = queue->queue_id;
>> +    args->out.flags = 0;
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    return 0;
>> +
>> +free_queue:
>> +    amdgpu_userqueue_remove_index(adev, queue);
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    kfree(queue);
>> +    return r;
>> +}
>> +
>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
>> +                              union drm_amdgpu_userq *args)
>> +{
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>> +
>> +    mutex_lock(&adev->userq.userq_mutex);
>> +    amdgpu_userqueue_remove_index(adev, queue);
>> +    ctx->userq = NULL;
>> +    mutex_unlock(&adev->userq.userq_mutex);
>> +    kfree(queue);
>> +}
>> +
>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>> +                      struct drm_file *filp)
>> +{
>> +    union drm_amdgpu_userq *args = data;
>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>> +    int r = 0;
>> +
>> +    switch (args->in.op) {
>> +    case AMDGPU_USERQ_OP_CREATE:
>> +        r = amdgpu_userqueue_create(adev, filp, args);
>> +        if (r)
>> +            DRM_ERROR("Failed to create usermode queue\n");
>> +        break;
>> +
>> +    case AMDGPU_USERQ_OP_FREE:
>> +        amdgpu_userqueue_destroy(adev, filp, args);
>> +        break;
>> +
>> +    default:
>> +        DRM_ERROR("Invalid user queue op specified: %d\n", args->in.op);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return r;
>> +}
>> +
>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>> +{
>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>> +
>> +    mutex_init(&uqg->userq_mutex);
>> +    return 0;
>> +}
>> +
>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>> +{
>> +
>> +}
>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> new file mode 100644
>> index 000000000000..c1fe39ffaf72
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> @@ -0,0 +1,50 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>> +#define AMDGPU_USERMODE_QUEUE_H_
>> +
>> +#define AMDGPU_MAX_USERQ 512
>> +
>> +struct amdgpu_usermode_queue {
>> +       int             queue_id;
>> +       int             queue_type;
>> +       int             queue_size;
>> +       int             paging;
>> +       int             pasid;
>> +       int             use_doorbell;
>> +       int             doorbell_index;
>> +
>> +       uint64_t        mqd_gpu_addr;
>> +       uint64_t        wptr_gpu_addr;
>> +       uint64_t        rptr_gpu_addr;
>> +       uint64_t        queue_gpu_addr;
>> +       uint64_t        flags;
>> +       void            *mqd_cpu_ptr;
>> +
>> +       struct amdgpu_bo        *mqd_obj;
>> +       struct amdgpu_vm        *vm;
>> +       struct list_head        list;
>> +};
>> +
>> +#endif
>> --
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2023-01-03  9:15           ` Christian König
@ 2023-01-03  9:22             ` Shashank Sharma
  2023-01-03  9:35               ` Christian König
  0 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2023-01-03  9:22 UTC (permalink / raw)
  To: Christian König, amd-gfx
  Cc: Alex Deucher, Christian Koenig, arvind.yadav, arunpravin.paneerselvam


On 03/01/2023 10:15, Christian König wrote:
> Am 03.01.23 um 10:12 schrieb Shashank Sharma:
>>
>> On 02/01/2023 13:39, Christian König wrote:
>>> Hi Shashank,
>>>
>>> Am 26.12.22 um 11:41 schrieb Shashank Sharma:
>>>> [SNIP]
>>>>>>         /* df */
>>>>>>       struct amdgpu_df                df;
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>>> index 0fa0e56daf67..f7413859b14f 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>>>>>       unsigned long            ras_counter_ce;
>>>>>>       unsigned long            ras_counter_ue;
>>>>>>       uint32_t            stable_pstate;
>>>>>> +    struct amdgpu_usermode_queue    *userq;
>>>>>
>>>>> Why should we have this in the ctx here???
>>>>
>>>> We are allocating a few things dynamically for the queue, which 
>>>> would be valid until we destroy this queue. Also we need to save 
>>>> this queue
>>>>
>>>> container at some place for the destroy function,  and I thought it 
>>>> would make sense to keep this with the context ptr, as this is how 
>>>> we are
>>>>
>>>> identifying the incoming request.
>>>
>>> I have absolutely no idea how you end up with that design.
>>>
>>> The ctx object is the CS IOCTL context, that is not even remotely 
>>> related to anything the user queues should be doing.
>>>
>>> Please completely drop that relationship and don't use any of the 
>>> ctx object stuff in the user queue code.
>>>
>> Historically the workload submission always came with a context (due 
>> to CS IOCTL), so we thought it would make sense to still have its 
>> relevance in the new workload submission method. Would you prefer 
>> this new submission to be independent of AMDGPU context ?
>
> Well not prefer, the point is that this doesn't make any sense at all.
>
> See the amdgpu_ctx object contains the resulting fence pointers for 
> the CS IOCTL as well as information necessary for the CS IOCTL to work 
> (e.g. scheduler entities etc...).
>
> I don't see how anything from that stuff would be useful for the MES 
> or user queues.
>
> Christian.


I am getting your point, and it makes sense as well. But in such 
scenario, we might have to create something parallel to AMDGPU_USERQ_CTX 
which is doing very much the same.

We can still do it to make a logically separate entity, but any 
suggestions on where to keep this udev_ctx ptr (if not in adev, as well 
as not ctx) ?

- Shashank


>
>>
>> - Shashank
>>
>>
>>> Christian.
>>>
>>>>
>>>> - Shashank
>>>>
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>>   };
>>>>>>     struct amdgpu_ctx_mgr {
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..3b6e8f75495c
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>>> @@ -0,0 +1,187 @@
>>>>>> +/*
>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>> obtaining a
>>>>>> + * copy of this software and associated documentation files (the 
>>>>>> "Software"),
>>>>>> + * to deal in the Software without restriction, including 
>>>>>> without limitation
>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>> sublicense,
>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>> whom the
>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>> conditions:
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>> be included in
>>>>>> + * all copies or substantial portions of the Software.
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>> KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>> MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>>> EVENT SHALL
>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>>>> DAMAGES OR
>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>> OTHERWISE,
>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>> THE USE OR
>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>> + *
>>>>>> + */
>>>>>> +
>>>>>> +#include "amdgpu.h"
>>>>>> +#include "amdgpu_vm.h"
>>>>>> +#include "amdgpu_mes.h"
>>>>>> +#include "amdgpu_usermode_queue.h"
>>>>>> +#include "soc15_common.h"
>>>>>> +
>>>>>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
>>>>>> sizeof(__u64)))
>>>>>> +
>>>>>> +static int
>>>>>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>>>>>> +{
>>>>>> +    int index;
>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>>> +
>>>>>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, 
>>>>>> GFP_KERNEL);
>>>>>> +    return index;
>>>>>> +}
>>>>>> +
>>>>>> +static void
>>>>>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
>>>>>> amdgpu_usermode_queue *queue)
>>>>>> +{
>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>>> +
>>>>>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>>>>>> +}
>>>>>> +
>>>>>> +static int
>>>>>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, 
>>>>>> struct drm_amdgpu_userq_mqd *mqd_in)
>>>>>> +{
>>>>>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || 
>>>>>> mqd_in->doorbell_offset == 0) {
>>>>>> +        DRM_ERROR("Invalid queue object address\n");
>>>>>> +        return -EINVAL;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || 
>>>>>> mqd_in->wptr_va == 0) {
>>>>>> +        DRM_ERROR("Invalid queue object value\n");
>>>>>> +        return -EINVAL;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= 
>>>>>> AMDGPU_HW_IP_NUM) {
>>>>>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>>>>>> +        return -EINVAL;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (!CHECK_ACCESS(mqd_in->queue_va) || 
>>>>>> !CHECK_ACCESS(mqd_in->rptr_va) ||
>>>>>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>>>>>> +            DRM_ERROR("Invalid mapping of queue ptrs, access 
>>>>>> error\n");
>>>>>> +            return -EINVAL;
>>>>>> +    }
>>>>>> +
>>>>>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are 
>>>>>> valid\n");
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct 
>>>>>> drm_file *filp,
>>>>>> +                            union drm_amdgpu_userq *args)
>>>>>> +{
>>>>>> +    int r, pasid;
>>>>>> +    struct amdgpu_usermode_queue *queue;
>>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, 
>>>>>> args->in.ctx_id);
>>>>>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>>>>>> +
>>>>>> +    if (!ctx) {
>>>>>> +        DRM_ERROR("Invalid GPU context\n");
>>>>>> +        return -EINVAL;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (vm->pasid < 0) {
>>>>>> +        DRM_WARN("No PASID info found\n");
>>>>>> +        pasid = 0;
>>>>>> +    }
>>>>>> +
>>>>>> +    mutex_lock(&adev->userq.userq_mutex);
>>>>>> +
>>>>>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), 
>>>>>> GFP_KERNEL);
>>>>>> +    if (!queue) {
>>>>>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>>>>>> +        mutex_unlock(&adev->userq.userq_mutex);
>>>>>> +        return -ENOMEM;
>>>>>> +    }
>>>>>> +
>>>>>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>>>>>> +    if (r < 0) {
>>>>>> +        DRM_ERROR("Invalid input to create queue\n");
>>>>>> +        goto free_queue;
>>>>>> +    }
>>>>>> +
>>>>>> +    queue->vm = vm;
>>>>>> +    queue->pasid = pasid;
>>>>>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>>>>>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>>>>>> +    queue->queue_size = mqd_in->queue_size;
>>>>>> +    queue->queue_type = mqd_in->ip_type;
>>>>>> +    queue->paging = false;
>>>>>> +    queue->flags = mqd_in->flags;
>>>>>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>>>>>> +
>>>>>> +    ctx->userq = queue;
>>>>>> +    args->out.q_id = queue->queue_id;
>>>>>> +    args->out.flags = 0;
>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>>> +    return 0;
>>>>>> +
>>>>>> +free_queue:
>>>>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>>> +    kfree(queue);
>>>>>> +    return r;
>>>>>> +}
>>>>>> +
>>>>>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct 
>>>>>> drm_file *filp,
>>>>>> +                              union drm_amdgpu_userq *args)
>>>>>> +{
>>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, 
>>>>>> args->in.ctx_id);
>>>>>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>>>>>> +
>>>>>> +    mutex_lock(&adev->userq.userq_mutex);
>>>>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>>>>> +    ctx->userq = NULL;
>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>>> +    kfree(queue);
>>>>>> +}
>>>>>> +
>>>>>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>>>>>> +               struct drm_file *filp)
>>>>>> +{
>>>>>> +    union drm_amdgpu_userq *args = data;
>>>>>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>>>>>> +    int r = 0;
>>>>>> +
>>>>>> +    switch (args->in.op) {
>>>>>> +    case AMDGPU_USERQ_OP_CREATE:
>>>>>> +        r = amdgpu_userqueue_create(adev, filp, args);
>>>>>> +        if (r)
>>>>>> +            DRM_ERROR("Failed to create usermode queue\n");
>>>>>> +        break;
>>>>>> +
>>>>>> +    case AMDGPU_USERQ_OP_FREE:
>>>>>> +        amdgpu_userqueue_destroy(adev, filp, args);
>>>>>> +        break;
>>>>>> +
>>>>>> +    default:
>>>>>> +        DRM_ERROR("Invalid user queue op specified: %d\n", 
>>>>>> args->in.op);
>>>>>> +        return -EINVAL;
>>>>>> +    }
>>>>>> +
>>>>>> +    return r;
>>>>>> +}
>>>>>> +
>>>>>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>>>>>> +{
>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>>> +
>>>>>> +    mutex_init(&uqg->userq_mutex);
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>>>>>> +{
>>>>>> +
>>>>>> +}
>>>>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h 
>>>>>> b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>>> new file mode 100644
>>>>>> index 000000000000..c1fe39ffaf72
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>>> @@ -0,0 +1,50 @@
>>>>>> +/*
>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>> obtaining a
>>>>>> + * copy of this software and associated documentation files (the 
>>>>>> "Software"),
>>>>>> + * to deal in the Software without restriction, including 
>>>>>> without limitation
>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>> sublicense,
>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>> whom the
>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>> conditions:
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>> be included in
>>>>>> + * all copies or substantial portions of the Software.
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>> KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>> MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>>> EVENT SHALL
>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>>>> DAMAGES OR
>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>> OTHERWISE,
>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>> THE USE OR
>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>> + *
>>>>>> + */
>>>>>> +
>>>>>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>>>>>> +#define AMDGPU_USERMODE_QUEUE_H_
>>>>>> +
>>>>>> +#define AMDGPU_MAX_USERQ 512
>>>>>> +
>>>>>> +struct amdgpu_usermode_queue {
>>>>>> +    int        queue_id;
>>>>>> +    int        queue_type;
>>>>>> +    int        queue_size;
>>>>>> +    int        paging;
>>>>>> +    int        pasid;
>>>>>> +    int        use_doorbell;
>>>>>> +    int        doorbell_index;
>>>>>> +
>>>>>> +    uint64_t    mqd_gpu_addr;
>>>>>> +    uint64_t    wptr_gpu_addr;
>>>>>> +    uint64_t    rptr_gpu_addr;
>>>>>> +    uint64_t    queue_gpu_addr;
>>>>>> +    uint64_t    flags;
>>>>>> +    void         *mqd_cpu_ptr;
>>>>>> +
>>>>>> +    struct amdgpu_bo    *mqd_obj;
>>>>>> +    struct amdgpu_vm        *vm;
>>>>>> +    struct list_head     list;
>>>>>> +};
>>>>>> +
>>>>>> +#endif
>>>>>
>>>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2023-01-02 13:53     ` Christian König
@ 2023-01-03  9:32       ` Shashank Sharma
  0 siblings, 0 replies; 64+ messages in thread
From: Shashank Sharma @ 2023-01-03  9:32 UTC (permalink / raw)
  To: Christian König, Alex Deucher
  Cc: Alex Deucher, amd-gfx, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam


On 02/01/2023 14:53, Christian König wrote:
> Am 29.12.22 um 18:41 schrieb Alex Deucher:
>> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma 
>> <shashank.sharma@amd.com> wrote:
>>> This patch adds skeleton code for usermode queue creation. It
>>> typically contains:
>>> - A new structure to keep all the user queue data in one place.
>>> - An IOCTL function to create/free a usermode queue.
>>> - A function to generate unique index for the queue.
>>> - A global ptr in amdgpu_dev
>>>
>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 
>>> ++++++++++++++++++
>>>   .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
>>>   5 files changed, 246 insertions(+)
>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>   create mode 100644 
>>> drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> index 6ad39cf71bdd..e2a34ee57bfb 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> @@ -209,6 +209,8 @@ amdgpu-y += \
>>>   # add amdkfd interfaces
>>>   amdgpu-y += amdgpu_amdkfd.o
>>>
>>> +# add usermode queue
>>> +amdgpu-y += amdgpu_userqueue.o
>>>
>>>   ifneq ($(CONFIG_HSA_AMD),)
>>>   AMDKFD_PATH := ../amdkfd
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 8639a4f9c6e8..4b566fcfca18 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -749,6 +749,11 @@ struct amdgpu_mqd {
>>>                          struct amdgpu_mqd_prop *p);
>>>   };
>>>
>>> +struct amdgpu_userq_globals {
>>> +       struct ida ida;
>>> +       struct mutex userq_mutex;
>>> +};
>>> +
>>>   #define AMDGPU_RESET_MAGIC_NUM 64
>>>   #define AMDGPU_MAX_DF_PERFMONS 4
>>>   #define AMDGPU_PRODUCT_NAME_LEN 64
>>> @@ -955,6 +960,7 @@ struct amdgpu_device {
>>>          bool                            enable_mes_kiq;
>>>          struct amdgpu_mes               mes;
>>>          struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
>>> +       struct amdgpu_userq_globals     userq;
>>>
>>>          /* df */
>>>          struct amdgpu_df                df;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> index 0fa0e56daf67..f7413859b14f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>>          unsigned long                   ras_counter_ce;
>>>          unsigned long                   ras_counter_ue;
>>>          uint32_t                        stable_pstate;
>>> +       struct amdgpu_usermode_queue    *userq;
>> There can be multiple queues per context.  We should make this a list.
>>
>>>   };
>>>
>>>   struct amdgpu_ctx_mgr {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>> new file mode 100644
>>> index 000000000000..3b6e8f75495c
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>> @@ -0,0 +1,187 @@
>>> +/*
>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person 
>>> obtaining a
>>> + * copy of this software and associated documentation files (the 
>>> "Software"),
>>> + * to deal in the Software without restriction, including without 
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to 
>>> whom the
>>> + * Software is furnished to do so, subject to the following 
>>> conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be 
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#include "amdgpu.h"
>>> +#include "amdgpu_vm.h"
>>> +#include "amdgpu_mes.h"
>>> +#include "amdgpu_usermode_queue.h"
>>> +#include "soc15_common.h"
>>> +
>>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
>>> sizeof(__u64)))
>
> You seem to have a very very big misunderstanding here.
>
> access_ok() is used for CPU pointer validation, but this here are 
> pointers into the GPUVM address space. This is something completely 
> different!

Thanks, It seems like there is a misunderstanding in my side on 
definition of these input parameters, let me follow up.

- Shashank


>
> Regards,
> Christian.
>
>>> +
>>> +static int
>>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>>> +{
>>> +    int index;
>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>> +
>>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, 
>>> GFP_KERNEL);
>>> +    return index;
>>> +}
>>> +
>>> +static void
>>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
>>> amdgpu_usermode_queue *queue)
>>> +{
>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>> +
>>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>>> +}
>>> +
>>> +static int
>>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct 
>>> drm_amdgpu_userq_mqd *mqd_in)
>>> +{
>>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || 
>>> mqd_in->doorbell_offset == 0) {
>>> +        DRM_ERROR("Invalid queue object address\n");
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || 
>>> mqd_in->wptr_va == 0) {
>>> +        DRM_ERROR("Invalid queue object value\n");
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= 
>>> AMDGPU_HW_IP_NUM) {
>>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    if (!CHECK_ACCESS(mqd_in->queue_va) || 
>>> !CHECK_ACCESS(mqd_in->rptr_va) ||
>>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>>> +            DRM_ERROR("Invalid mapping of queue ptrs, access 
>>> error\n");
>>> +            return -EINVAL;
>>> +    }
>> Need to check the flags as well.
>>
>>> +
>>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
>>> +    return 0;
>>> +}
>>> +
>>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct 
>>> drm_file *filp,
>>> +                            union drm_amdgpu_userq *args)
>>> +{
>>> +    int r, pasid;
>>> +    struct amdgpu_usermode_queue *queue;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>>> +
>>> +    if (!ctx) {
>>> +        DRM_ERROR("Invalid GPU context\n");
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    if (vm->pasid < 0) {
>>> +        DRM_WARN("No PASID info found\n");
>>> +        pasid = 0;
>>> +    }
>>> +
>>> +    mutex_lock(&adev->userq.userq_mutex);
>>> +
>>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
>>> +    if (!queue) {
>>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>>> +        mutex_unlock(&adev->userq.userq_mutex);
>>> +        return -ENOMEM;
>>> +    }
>>> +
>>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>>> +    if (r < 0) {
>>> +        DRM_ERROR("Invalid input to create queue\n");
>>> +        goto free_queue;
>>> +    }
>>> +
>>> +    queue->vm = vm;
>>> +    queue->pasid = pasid;
>>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>>> +    queue->queue_size = mqd_in->queue_size;
>>> +    queue->queue_type = mqd_in->ip_type;
>>> +    queue->paging = false;
>>> +    queue->flags = mqd_in->flags;
>>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>>> +
>>> +    ctx->userq = queue;
>>> +    args->out.q_id = queue->queue_id;
>>> +    args->out.flags = 0;
>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>> +    return 0;
>>> +
>>> +free_queue:
>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>> +    kfree(queue);
>>> +    return r;
>>> +}
>>> +
>>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct 
>>> drm_file *filp,
>>> +                              union drm_amdgpu_userq *args)
>>> +{
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
>>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>>> +
>>> +    mutex_lock(&adev->userq.userq_mutex);
>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>> +    ctx->userq = NULL;
>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>> +    kfree(queue);
>>> +}
>>> +
>>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>>> +                      struct drm_file *filp)
>>> +{
>>> +    union drm_amdgpu_userq *args = data;
>>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>>> +    int r = 0;
>>> +
>>> +    switch (args->in.op) {
>>> +    case AMDGPU_USERQ_OP_CREATE:
>>> +        r = amdgpu_userqueue_create(adev, filp, args);
>>> +        if (r)
>>> +            DRM_ERROR("Failed to create usermode queue\n");
>>> +        break;
>>> +
>>> +    case AMDGPU_USERQ_OP_FREE:
>>> +        amdgpu_userqueue_destroy(adev, filp, args);
>>> +        break;
>>> +
>>> +    default:
>>> +        DRM_ERROR("Invalid user queue op specified: %d\n", 
>>> args->in.op);
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    return r;
>>> +}
>>> +
>>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>>> +{
>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>> +
>>> +    mutex_init(&uqg->userq_mutex);
>>> +    return 0;
>>> +}
>>> +
>>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>>> +{
>>> +
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h 
>>> b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>> new file mode 100644
>>> index 000000000000..c1fe39ffaf72
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>> @@ -0,0 +1,50 @@
>>> +/*
>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person 
>>> obtaining a
>>> + * copy of this software and associated documentation files (the 
>>> "Software"),
>>> + * to deal in the Software without restriction, including without 
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to 
>>> whom the
>>> + * Software is furnished to do so, subject to the following 
>>> conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be 
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>>> +#define AMDGPU_USERMODE_QUEUE_H_
>>> +
>>> +#define AMDGPU_MAX_USERQ 512
>>> +
>>> +struct amdgpu_usermode_queue {
>>> +       int             queue_id;
>>> +       int             queue_type;
>>> +       int             queue_size;
>>> +       int             paging;
>>> +       int             pasid;
>>> +       int             use_doorbell;
>>> +       int             doorbell_index;
>>> +
>>> +       uint64_t        mqd_gpu_addr;
>>> +       uint64_t        wptr_gpu_addr;
>>> +       uint64_t        rptr_gpu_addr;
>>> +       uint64_t        queue_gpu_addr;
>>> +       uint64_t        flags;
>>> +       void            *mqd_cpu_ptr;
>>> +
>>> +       struct amdgpu_bo        *mqd_obj;
>>> +       struct amdgpu_vm        *vm;
>>> +       struct list_head        list;
>>> +};
>>> +
>>> +#endif
>>> -- 
>>> 2.34.1
>>>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2023-01-03  9:22             ` Shashank Sharma
@ 2023-01-03  9:35               ` Christian König
  2023-01-03 14:34                 ` Alex Deucher
  0 siblings, 1 reply; 64+ messages in thread
From: Christian König @ 2023-01-03  9:35 UTC (permalink / raw)
  To: Shashank Sharma, amd-gfx
  Cc: Alex Deucher, Christian Koenig, arvind.yadav, arunpravin.paneerselvam

Am 03.01.23 um 10:22 schrieb Shashank Sharma:
>
> On 03/01/2023 10:15, Christian König wrote:
>> Am 03.01.23 um 10:12 schrieb Shashank Sharma:
>>>
>>> On 02/01/2023 13:39, Christian König wrote:
>>>> Hi Shashank,
>>>>
>>>> Am 26.12.22 um 11:41 schrieb Shashank Sharma:
>>>>> [SNIP]
>>>>>>>         /* df */
>>>>>>>       struct amdgpu_df                df;
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>>>> index 0fa0e56daf67..f7413859b14f 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>>>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>>>>>>       unsigned long            ras_counter_ce;
>>>>>>>       unsigned long            ras_counter_ue;
>>>>>>>       uint32_t            stable_pstate;
>>>>>>> +    struct amdgpu_usermode_queue    *userq;
>>>>>>
>>>>>> Why should we have this in the ctx here???
>>>>>
>>>>> We are allocating a few things dynamically for the queue, which 
>>>>> would be valid until we destroy this queue. Also we need to save 
>>>>> this queue
>>>>>
>>>>> container at some place for the destroy function,  and I thought 
>>>>> it would make sense to keep this with the context ptr, as this is 
>>>>> how we are
>>>>>
>>>>> identifying the incoming request.
>>>>
>>>> I have absolutely no idea how you end up with that design.
>>>>
>>>> The ctx object is the CS IOCTL context, that is not even remotely 
>>>> related to anything the user queues should be doing.
>>>>
>>>> Please completely drop that relationship and don't use any of the 
>>>> ctx object stuff in the user queue code.
>>>>
>>> Historically the workload submission always came with a context (due 
>>> to CS IOCTL), so we thought it would make sense to still have its 
>>> relevance in the new workload submission method. Would you prefer 
>>> this new submission to be independent of AMDGPU context ?
>>
>> Well not prefer, the point is that this doesn't make any sense at all.
>>
>> See the amdgpu_ctx object contains the resulting fence pointers for 
>> the CS IOCTL as well as information necessary for the CS IOCTL to 
>> work (e.g. scheduler entities etc...).
>>
>> I don't see how anything from that stuff would be useful for the MES 
>> or user queues.
>>
>> Christian.
>
>
> I am getting your point, and it makes sense as well. But in such 
> scenario, we might have to create something parallel to 
> AMDGPU_USERQ_CTX which is doing very much the same.
>
> We can still do it to make a logically separate entity, but any 
> suggestions on where to keep this udev_ctx ptr (if not in adev, as 
> well as not ctx) ?


Take a look at the amdgpu_ctx_mgr object with the mutex and the idr and 
how this is embedded into the amdgpu_fpriv object. It should become 
pretty clear from there on.

I don't think we need an userq_ctx or similar, each userq should be an 
independent object. What we need is an userq_mgr object which holds the 
collection of all the useq objects the client application has created 
through it's fpriv connection to the driver.

Regards,
Christian.

>
> - Shashank
>
>
>>
>>>
>>> - Shashank
>>>
>>>
>>>> Christian.
>>>>
>>>>>
>>>>> - Shashank
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>>   };
>>>>>>>     struct amdgpu_ctx_mgr {
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..3b6e8f75495c
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>>>> @@ -0,0 +1,187 @@
>>>>>>> +/*
>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>> + *
>>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>>> obtaining a
>>>>>>> + * copy of this software and associated documentation files 
>>>>>>> (the "Software"),
>>>>>>> + * to deal in the Software without restriction, including 
>>>>>>> without limitation
>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>>> sublicense,
>>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>>> whom the
>>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>>> conditions:
>>>>>>> + *
>>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>>> be included in
>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>> + *
>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>>> KIND, EXPRESS OR
>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>>> MERCHANTABILITY,
>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>>>> EVENT SHALL
>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY 
>>>>>>> CLAIM, DAMAGES OR
>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>>> OTHERWISE,
>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>>> THE USE OR
>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>> + *
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include "amdgpu.h"
>>>>>>> +#include "amdgpu_vm.h"
>>>>>>> +#include "amdgpu_mes.h"
>>>>>>> +#include "amdgpu_usermode_queue.h"
>>>>>>> +#include "soc15_common.h"
>>>>>>> +
>>>>>>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
>>>>>>> sizeof(__u64)))
>>>>>>> +
>>>>>>> +static int
>>>>>>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>>>>>>> +{
>>>>>>> +    int index;
>>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>>>> +
>>>>>>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, 
>>>>>>> GFP_KERNEL);
>>>>>>> +    return index;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void
>>>>>>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, 
>>>>>>> struct amdgpu_usermode_queue *queue)
>>>>>>> +{
>>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>>>> +
>>>>>>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int
>>>>>>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, 
>>>>>>> struct drm_amdgpu_userq_mqd *mqd_in)
>>>>>>> +{
>>>>>>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 
>>>>>>> || mqd_in->doorbell_offset == 0) {
>>>>>>> +        DRM_ERROR("Invalid queue object address\n");
>>>>>>> +        return -EINVAL;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || 
>>>>>>> mqd_in->wptr_va == 0) {
>>>>>>> +        DRM_ERROR("Invalid queue object value\n");
>>>>>>> +        return -EINVAL;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type 
>>>>>>> >= AMDGPU_HW_IP_NUM) {
>>>>>>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>>>>>>> +        return -EINVAL;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (!CHECK_ACCESS(mqd_in->queue_va) || 
>>>>>>> !CHECK_ACCESS(mqd_in->rptr_va) ||
>>>>>>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>>>>>>> +            DRM_ERROR("Invalid mapping of queue ptrs, access 
>>>>>>> error\n");
>>>>>>> +            return -EINVAL;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are 
>>>>>>> valid\n");
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct 
>>>>>>> drm_file *filp,
>>>>>>> +                            union drm_amdgpu_userq *args)
>>>>>>> +{
>>>>>>> +    int r, pasid;
>>>>>>> +    struct amdgpu_usermode_queue *queue;
>>>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, 
>>>>>>> args->in.ctx_id);
>>>>>>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>>>>>>> +
>>>>>>> +    if (!ctx) {
>>>>>>> +        DRM_ERROR("Invalid GPU context\n");
>>>>>>> +        return -EINVAL;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (vm->pasid < 0) {
>>>>>>> +        DRM_WARN("No PASID info found\n");
>>>>>>> +        pasid = 0;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    mutex_lock(&adev->userq.userq_mutex);
>>>>>>> +
>>>>>>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), 
>>>>>>> GFP_KERNEL);
>>>>>>> +    if (!queue) {
>>>>>>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>>>>>>> + mutex_unlock(&adev->userq.userq_mutex);
>>>>>>> +        return -ENOMEM;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>>>>>>> +    if (r < 0) {
>>>>>>> +        DRM_ERROR("Invalid input to create queue\n");
>>>>>>> +        goto free_queue;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    queue->vm = vm;
>>>>>>> +    queue->pasid = pasid;
>>>>>>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>>>>>>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>>>>>>> +    queue->queue_size = mqd_in->queue_size;
>>>>>>> +    queue->queue_type = mqd_in->ip_type;
>>>>>>> +    queue->paging = false;
>>>>>>> +    queue->flags = mqd_in->flags;
>>>>>>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>>>>>>> +
>>>>>>> +    ctx->userq = queue;
>>>>>>> +    args->out.q_id = queue->queue_id;
>>>>>>> +    args->out.flags = 0;
>>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>>>> +    return 0;
>>>>>>> +
>>>>>>> +free_queue:
>>>>>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>>>> +    kfree(queue);
>>>>>>> +    return r;
>>>>>>> +}
>>>>>>> +
>>>>>>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, 
>>>>>>> struct drm_file *filp,
>>>>>>> +                              union drm_amdgpu_userq *args)
>>>>>>> +{
>>>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, 
>>>>>>> args->in.ctx_id);
>>>>>>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>>>>>>> +
>>>>>>> +    mutex_lock(&adev->userq.userq_mutex);
>>>>>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>>>>>> +    ctx->userq = NULL;
>>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>>>> +    kfree(queue);
>>>>>>> +}
>>>>>>> +
>>>>>>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>>>>>>> +               struct drm_file *filp)
>>>>>>> +{
>>>>>>> +    union drm_amdgpu_userq *args = data;
>>>>>>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>>>>>>> +    int r = 0;
>>>>>>> +
>>>>>>> +    switch (args->in.op) {
>>>>>>> +    case AMDGPU_USERQ_OP_CREATE:
>>>>>>> +        r = amdgpu_userqueue_create(adev, filp, args);
>>>>>>> +        if (r)
>>>>>>> +            DRM_ERROR("Failed to create usermode queue\n");
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case AMDGPU_USERQ_OP_FREE:
>>>>>>> +        amdgpu_userqueue_destroy(adev, filp, args);
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    default:
>>>>>>> +        DRM_ERROR("Invalid user queue op specified: %d\n", 
>>>>>>> args->in.op);
>>>>>>> +        return -EINVAL;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return r;
>>>>>>> +}
>>>>>>> +
>>>>>>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>>>>>>> +{
>>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>>>> +
>>>>>>> +    mutex_init(&uqg->userq_mutex);
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>>>>>>> +{
>>>>>>> +
>>>>>>> +}
>>>>>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h 
>>>>>>> b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..c1fe39ffaf72
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>>>> @@ -0,0 +1,50 @@
>>>>>>> +/*
>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>> + *
>>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>>> obtaining a
>>>>>>> + * copy of this software and associated documentation files 
>>>>>>> (the "Software"),
>>>>>>> + * to deal in the Software without restriction, including 
>>>>>>> without limitation
>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>>> sublicense,
>>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>>> whom the
>>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>>> conditions:
>>>>>>> + *
>>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>>> be included in
>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>> + *
>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>>> KIND, EXPRESS OR
>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>>> MERCHANTABILITY,
>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>>>> EVENT SHALL
>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY 
>>>>>>> CLAIM, DAMAGES OR
>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>>> OTHERWISE,
>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>>> THE USE OR
>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>> + *
>>>>>>> + */
>>>>>>> +
>>>>>>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>>>>>>> +#define AMDGPU_USERMODE_QUEUE_H_
>>>>>>> +
>>>>>>> +#define AMDGPU_MAX_USERQ 512
>>>>>>> +
>>>>>>> +struct amdgpu_usermode_queue {
>>>>>>> +    int        queue_id;
>>>>>>> +    int        queue_type;
>>>>>>> +    int        queue_size;
>>>>>>> +    int        paging;
>>>>>>> +    int        pasid;
>>>>>>> +    int        use_doorbell;
>>>>>>> +    int        doorbell_index;
>>>>>>> +
>>>>>>> +    uint64_t    mqd_gpu_addr;
>>>>>>> +    uint64_t    wptr_gpu_addr;
>>>>>>> +    uint64_t    rptr_gpu_addr;
>>>>>>> +    uint64_t    queue_gpu_addr;
>>>>>>> +    uint64_t    flags;
>>>>>>> +    void         *mqd_cpu_ptr;
>>>>>>> +
>>>>>>> +    struct amdgpu_bo    *mqd_obj;
>>>>>>> +    struct amdgpu_vm        *vm;
>>>>>>> +    struct list_head     list;
>>>>>>> +};
>>>>>>> +
>>>>>>> +#endif
>>>>>>
>>>>
>>


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2022-12-29 17:47   ` Alex Deucher
@ 2023-01-03  9:36     ` Shashank Sharma
  2023-01-03 18:37       ` Felix Kuehling
  0 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2023-01-03  9:36 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam


On 29/12/2022 18:47, Alex Deucher wrote:
> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>> From: Arvind Yadav <arvind.yadav@amd.com>
>>
>> MQD describes the properies of a user queue to the HW, and allows it to
>> accurately configure the queue while mapping it in GPU HW. This patch
>> adds:
>> - A new header file which contains the MQD definition
>> - A new function which creates an MQD object and fills it with userqueue
>>    data
>>
>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>>
>> Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 138 +++++
>>   .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 ++++++++++++++++++
>>   2 files changed, 682 insertions(+)
>>   create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> index 3b6e8f75495c..a91cc304cb9e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -25,7 +25,10 @@
>>   #include "amdgpu_vm.h"
>>   #include "amdgpu_mes.h"
>>   #include "amdgpu_usermode_queue.h"
>> +#include "amdgpu_usermode_queue_mqd.h"
>>   #include "soc15_common.h"
>> +#include "gc/gc_11_0_0_offset.h"
>> +#include "gc/gc_11_0_0_sh_mask.h"
> Don't add IP specific code to this file.
Noted,
>>   #define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
>>
>> @@ -47,6 +50,134 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode
>>       ida_simple_remove(&uqg->ida, queue->queue_id);
>>   }
>>
>> +static void
>> +amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
> This should be a ring callback or some new IP level callback to init
> an MQD since we'll need this for multiple IP types and generations of
> IPs.

We are still using the MES ring which has an existing callback for this, 
but I think it allows an IP specific callback as well. I will check that 
out.

>> +{
>> +    struct amdgpu_usermode_queue_mqd *mqd = queue->mqd_cpu_ptr;
>> +    uint64_t hqd_gpu_addr, wb_gpu_addr;
>> +    uint32_t tmp;
>> +    uint32_t rb_bufsz;
>> +
>> +    /* set up gfx hqd wptr */
>> +    mqd->cp_gfx_hqd_wptr = 0;
>> +    mqd->cp_gfx_hqd_wptr_hi = 0;
>> +
>> +    /* set the pointer to the MQD */
>> +    mqd->cp_mqd_base_addr = queue->mqd_gpu_addr & 0xfffffffc;
>> +    mqd->cp_mqd_base_addr_hi = upper_32_bits(queue->mqd_gpu_addr);
>> +
>> +    /* set up mqd control */
>> +    tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
>> +    tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
>> +    tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
>> +    tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
>> +    mqd->cp_gfx_mqd_control = tmp;
>> +
>> +    /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
>> +    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
>> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
>> +    mqd->cp_gfx_hqd_vmid = 0;
>> +
>> +    /* set up default queue priority level
>> +    * 0x0 = low priority, 0x1 = high priority */
>> +    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
>> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
>> +    mqd->cp_gfx_hqd_queue_priority = tmp;
>> +
>> +    /* set up time quantum */
>> +    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
>> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
>> +    mqd->cp_gfx_hqd_quantum = tmp;
>> +
>> +    /* set up gfx hqd base. this is similar as CP_RB_BASE */
>> +    hqd_gpu_addr = queue->queue_gpu_addr >> 8;
>> +    mqd->cp_gfx_hqd_base = hqd_gpu_addr;
>> +    mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
>> +
>> +    /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
>> +    wb_gpu_addr = queue->rptr_gpu_addr;
>> +    mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
>> +    mqd->cp_gfx_hqd_rptr_addr_hi =
>> +    upper_32_bits(wb_gpu_addr) & 0xffff;
>> +
>> +    /* set up rb_wptr_poll addr */
>> +    wb_gpu_addr = queue->wptr_gpu_addr;
>> +    mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
>> +    mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
>> +
>> +    /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
>> +    rb_bufsz = order_base_2(queue->queue_size / 4) - 1;
>> +    tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
>> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
>> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
>> +#ifdef __BIG_ENDIAN
>> +    tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
>> +#endif
>> +    mqd->cp_gfx_hqd_cntl = tmp;
>> +
>> +    /* set up cp_doorbell_control */
>> +    tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
>> +    if (queue->use_doorbell) {
>> +        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
>> +                    DOORBELL_OFFSET, queue->doorbell_index);
>> +        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
>> +                    DOORBELL_EN, 1);
>> +    } else {
>> +        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
>> +                    DOORBELL_EN, 0);
>> +    }
>> +    mqd->cp_rb_doorbell_control = tmp;
>> +
>> +    /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
>> +    mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
>> +
>> +    /* activate the queue */
>> +    mqd->cp_gfx_hqd_active = 1;
>> +}
>> +
>> +static int
>> +amdgpu_userqueue_create_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
>> +{
>> +    int r;
>> +    int size = sizeof(struct amdgpu_usermode_queue_mqd);
>> +
>> +    r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>> +                                &queue->mqd_obj,
>> +                                &queue->mqd_gpu_addr,
>> +                                &queue->mqd_cpu_ptr);
>> +    if (r) {
>> +        DRM_ERROR("Failed to allocate bo for userqueue (%d)", r);
>> +        return r;
>> +    }
>> +
>> +    memset(queue->mqd_cpu_ptr, 0, size);
>> +    r = amdgpu_bo_reserve(queue->mqd_obj, false);
>> +    if (unlikely(r != 0)) {
>> +        DRM_ERROR("Failed to reserve mqd for userqueue (%d)", r);
>> +        goto free_mqd;
>> +    }
>> +
>> +    /* Fill MQD with userqueue data */
>> +    amdgpu_userqueue_setup_mqd(adev, queue);
>> +    amdgpu_bo_unreserve(queue->mqd_obj);
>> +    return 0;
>> +
>> +free_mqd:
>> +    amdgpu_bo_free_kernel(&queue->mqd_obj,
>> +                           &queue->mqd_gpu_addr,
>> +                           &queue->mqd_cpu_ptr);
>> +    return r;
>> +}
>> +
>> +static void
>> +amdgpu_userqueue_destroy_mqd(struct amdgpu_usermode_queue *queue)
>> +{
>> +    amdgpu_bo_free_kernel(&queue->mqd_obj,
>> +                           &queue->mqd_gpu_addr,
>> +                           &queue->mqd_cpu_ptr);
>> +}
>> +
>>   static int
>>   amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct drm_amdgpu_userq_mqd *mqd_in)
>>   {
>> @@ -120,6 +251,12 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>>       queue->flags = mqd_in->flags;
>>       queue->queue_id = amdgpu_userqueue_index(adev);
>>
>> +    r = amdgpu_userqueue_create_mqd(adev, queue);
>> +    if (r < 0) {
>> +        DRM_ERROR("Failed to create mqd for queue\n");
>> +        goto free_queue;
>> +    }
>> +
>>       ctx->userq = queue;
>>       args->out.q_id = queue->queue_id;
>>       args->out.flags = 0;
>> @@ -141,6 +278,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
>>       struct amdgpu_usermode_queue *queue = ctx->userq;
>>
>>       mutex_lock(&adev->userq.userq_mutex);
>> +    amdgpu_userqueue_destroy_mqd(queue);
>>       amdgpu_userqueue_remove_index(adev, queue);
>>       ctx->userq = NULL;
>>       mutex_unlock(&adev->userq.userq_mutex);
>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
>> new file mode 100644
>> index 000000000000..d0a285708ba5
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
>> @@ -0,0 +1,544 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#ifndef AMDGPU_USERMODE_QUEUE_MQD_H_
>> +#define AMDGPU_USERMODE_QUEUE_MQD_H_
>> +
>> +/*MQD struct for usermode Queue*/
>> +struct amdgpu_usermode_queue_mqd
> This is specific to GC 11.  Every IP and version will have its own MQD
> format.  That should live in the IP specific code, not the generic
> code.  We already have the generic MQD parameters that we need from
> the userq IOCTL.

Noted, we can separate out the generic parameters from gen specific 
parameter, and will try to wrap it around the generic structure.

- Shashank

>
> Alex
>
>> +{
>> +       uint32_t shadow_base_lo; // offset: 0  (0x0)
>> +       uint32_t shadow_base_hi; // offset: 1  (0x1)
>> +       uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
>> +       uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
>> +       uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
>> +       uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
>> +       uint32_t shadow_initialized; // offset: 6  (0x6)
>> +       uint32_t ib_vmid; // offset: 7  (0x7)
>> +       uint32_t reserved_8; // offset: 8  (0x8)
>> +       uint32_t reserved_9; // offset: 9  (0x9)
>> +       uint32_t reserved_10; // offset: 10  (0xA)
>> +       uint32_t reserved_11; // offset: 11  (0xB)
>> +       uint32_t reserved_12; // offset: 12  (0xC)
>> +       uint32_t reserved_13; // offset: 13  (0xD)
>> +       uint32_t reserved_14; // offset: 14  (0xE)
>> +       uint32_t reserved_15; // offset: 15  (0xF)
>> +       uint32_t reserved_16; // offset: 16  (0x10)
>> +       uint32_t reserved_17; // offset: 17  (0x11)
>> +       uint32_t reserved_18; // offset: 18  (0x12)
>> +       uint32_t reserved_19; // offset: 19  (0x13)
>> +       uint32_t reserved_20; // offset: 20  (0x14)
>> +       uint32_t reserved_21; // offset: 21  (0x15)
>> +       uint32_t reserved_22; // offset: 22  (0x16)
>> +       uint32_t reserved_23; // offset: 23  (0x17)
>> +       uint32_t reserved_24; // offset: 24  (0x18)
>> +       uint32_t reserved_25; // offset: 25  (0x19)
>> +       uint32_t reserved_26; // offset: 26  (0x1A)
>> +       uint32_t reserved_27; // offset: 27  (0x1B)
>> +       uint32_t reserved_28; // offset: 28  (0x1C)
>> +       uint32_t reserved_29; // offset: 29  (0x1D)
>> +       uint32_t reserved_30; // offset: 30  (0x1E)
>> +       uint32_t reserved_31; // offset: 31  (0x1F)
>> +       uint32_t reserved_32; // offset: 32  (0x20)
>> +       uint32_t reserved_33; // offset: 33  (0x21)
>> +       uint32_t reserved_34; // offset: 34  (0x22)
>> +       uint32_t reserved_35; // offset: 35  (0x23)
>> +       uint32_t reserved_36; // offset: 36  (0x24)
>> +       uint32_t reserved_37; // offset: 37  (0x25)
>> +       uint32_t reserved_38; // offset: 38  (0x26)
>> +       uint32_t reserved_39; // offset: 39  (0x27)
>> +       uint32_t reserved_40; // offset: 40  (0x28)
>> +       uint32_t reserved_41; // offset: 41  (0x29)
>> +       uint32_t reserved_42; // offset: 42  (0x2A)
>> +       uint32_t reserved_43; // offset: 43  (0x2B)
>> +       uint32_t reserved_44; // offset: 44  (0x2C)
>> +       uint32_t reserved_45; // offset: 45  (0x2D)
>> +       uint32_t reserved_46; // offset: 46  (0x2E)
>> +       uint32_t reserved_47; // offset: 47  (0x2F)
>> +       uint32_t reserved_48; // offset: 48  (0x30)
>> +       uint32_t reserved_49; // offset: 49  (0x31)
>> +       uint32_t reserved_50; // offset: 50  (0x32)
>> +       uint32_t reserved_51; // offset: 51  (0x33)
>> +       uint32_t reserved_52; // offset: 52  (0x34)
>> +       uint32_t reserved_53; // offset: 53  (0x35)
>> +       uint32_t reserved_54; // offset: 54  (0x36)
>> +       uint32_t reserved_55; // offset: 55  (0x37)
>> +       uint32_t reserved_56; // offset: 56  (0x38)
>> +       uint32_t reserved_57; // offset: 57  (0x39)
>> +       uint32_t reserved_58; // offset: 58  (0x3A)
>> +       uint32_t reserved_59; // offset: 59  (0x3B)
>> +       uint32_t reserved_60; // offset: 60  (0x3C)
>> +       uint32_t reserved_61; // offset: 61  (0x3D)
>> +       uint32_t reserved_62; // offset: 62  (0x3E)
>> +       uint32_t reserved_63; // offset: 63  (0x3F)
>> +       uint32_t reserved_64; // offset: 64  (0x40)
>> +       uint32_t reserved_65; // offset: 65  (0x41)
>> +       uint32_t reserved_66; // offset: 66  (0x42)
>> +       uint32_t reserved_67; // offset: 67  (0x43)
>> +       uint32_t reserved_68; // offset: 68  (0x44)
>> +       uint32_t reserved_69; // offset: 69  (0x45)
>> +       uint32_t reserved_70; // offset: 70  (0x46)
>> +       uint32_t reserved_71; // offset: 71  (0x47)
>> +       uint32_t reserved_72; // offset: 72  (0x48)
>> +       uint32_t reserved_73; // offset: 73  (0x49)
>> +       uint32_t reserved_74; // offset: 74  (0x4A)
>> +       uint32_t reserved_75; // offset: 75  (0x4B)
>> +       uint32_t reserved_76; // offset: 76  (0x4C)
>> +       uint32_t reserved_77; // offset: 77  (0x4D)
>> +       uint32_t reserved_78; // offset: 78  (0x4E)
>> +       uint32_t reserved_79; // offset: 79  (0x4F)
>> +       uint32_t reserved_80; // offset: 80  (0x50)
>> +       uint32_t reserved_81; // offset: 81  (0x51)
>> +       uint32_t reserved_82; // offset: 82  (0x52)
>> +       uint32_t reserved_83; // offset: 83  (0x53)
>> +       uint32_t checksum_lo; // offset: 84  (0x54)
>> +       uint32_t checksum_hi; // offset: 85  (0x55)
>> +       uint32_t cp_mqd_query_time_lo; // offset: 86  (0x56)
>> +       uint32_t cp_mqd_query_time_hi; // offset: 87  (0x57)
>> +       uint32_t reserved_88; // offset: 88  (0x58)
>> +       uint32_t reserved_89; // offset: 89  (0x59)
>> +       uint32_t reserved_90; // offset: 90  (0x5A)
>> +       uint32_t reserved_91; // offset: 91  (0x5B)
>> +       uint32_t cp_mqd_query_wave_count; // offset: 92  (0x5C)
>> +       uint32_t cp_mqd_query_gfx_hqd_rptr; // offset: 93  (0x5D)
>> +       uint32_t cp_mqd_query_gfx_hqd_wptr; // offset: 94  (0x5E)
>> +       uint32_t cp_mqd_query_gfx_hqd_offset; // offset: 95  (0x5F)
>> +       uint32_t reserved_96; // offset: 96  (0x60)
>> +       uint32_t reserved_97; // offset: 97  (0x61)
>> +       uint32_t reserved_98; // offset: 98  (0x62)
>> +       uint32_t reserved_99; // offset: 99  (0x63)
>> +       uint32_t reserved_100; // offset: 100  (0x64)
>> +       uint32_t reserved_101; // offset: 101  (0x65)
>> +       uint32_t reserved_102; // offset: 102  (0x66)
>> +       uint32_t reserved_103; // offset: 103  (0x67)
>> +       uint32_t task_shader_control_buf_addr_lo; // offset: 104  (0x68)
>> +       uint32_t task_shader_control_buf_addr_hi; // offset: 105  (0x69)
>> +       uint32_t task_shader_read_rptr_lo; // offset: 106  (0x6A)
>> +       uint32_t task_shader_read_rptr_hi; // offset: 107  (0x6B)
>> +       uint32_t task_shader_num_entries; // offset: 108  (0x6C)
>> +       uint32_t task_shader_num_entries_bits; // offset: 109  (0x6D)
>> +       uint32_t task_shader_ring_buffer_addr_lo; // offset: 110  (0x6E)
>> +       uint32_t task_shader_ring_buffer_addr_hi; // offset: 111  (0x6F)
>> +       uint32_t reserved_112; // offset: 112  (0x70)
>> +       uint32_t reserved_113; // offset: 113  (0x71)
>> +       uint32_t reserved_114; // offset: 114  (0x72)
>> +       uint32_t reserved_115; // offset: 115  (0x73)
>> +       uint32_t reserved_116; // offset: 116  (0x74)
>> +       uint32_t reserved_117; // offset: 117  (0x75)
>> +       uint32_t reserved_118; // offset: 118  (0x76)
>> +       uint32_t reserved_119; // offset: 119  (0x77)
>> +       uint32_t reserved_120; // offset: 120  (0x78)
>> +       uint32_t reserved_121; // offset: 121  (0x79)
>> +       uint32_t reserved_122; // offset: 122  (0x7A)
>> +       uint32_t reserved_123; // offset: 123  (0x7B)
>> +       uint32_t reserved_124; // offset: 124  (0x7C)
>> +       uint32_t reserved_125; // offset: 125  (0x7D)
>> +       uint32_t reserved_126; // offset: 126  (0x7E)
>> +       uint32_t reserved_127; // offset: 127  (0x7F)
>> +       uint32_t cp_mqd_base_addr; // offset: 128  (0x80)
>> +       uint32_t cp_mqd_base_addr_hi; // offset: 129  (0x81)
>> +       uint32_t cp_gfx_hqd_active; // offset: 130  (0x82)
>> +       uint32_t cp_gfx_hqd_vmid; // offset: 131  (0x83)
>> +       uint32_t reserved_131; // offset: 132  (0x84)
>> +       uint32_t reserved_132; // offset: 133  (0x85)
>> +       uint32_t cp_gfx_hqd_queue_priority; // offset: 134  (0x86)
>> +       uint32_t cp_gfx_hqd_quantum; // offset: 135  (0x87)
>> +       uint32_t cp_gfx_hqd_base; // offset: 136  (0x88)
>> +       uint32_t cp_gfx_hqd_base_hi; // offset: 137  (0x89)
>> +       uint32_t cp_gfx_hqd_rptr; // offset: 138  (0x8A)
>> +       uint32_t cp_gfx_hqd_rptr_addr; // offset: 139  (0x8B)
>> +       uint32_t cp_gfx_hqd_rptr_addr_hi; // offset: 140  (0x8C)
>> +       uint32_t cp_rb_wptr_poll_addr_lo; // offset: 141  (0x8D)
>> +       uint32_t cp_rb_wptr_poll_addr_hi; // offset: 142  (0x8E)
>> +       uint32_t cp_rb_doorbell_control; // offset: 143  (0x8F)
>> +       uint32_t cp_gfx_hqd_offset; // offset: 144  (0x90)
>> +       uint32_t cp_gfx_hqd_cntl; // offset: 145  (0x91)
>> +       uint32_t reserved_146; // offset: 146  (0x92)
>> +       uint32_t reserved_147; // offset: 147  (0x93)
>> +       uint32_t cp_gfx_hqd_csmd_rptr; // offset: 148  (0x94)
>> +       uint32_t cp_gfx_hqd_wptr; // offset: 149  (0x95)
>> +       uint32_t cp_gfx_hqd_wptr_hi; // offset: 150  (0x96)
>> +       uint32_t reserved_151; // offset: 151  (0x97)
>> +       uint32_t reserved_152; // offset: 152  (0x98)
>> +       uint32_t reserved_153; // offset: 153  (0x99)
>> +       uint32_t reserved_154; // offset: 154  (0x9A)
>> +       uint32_t reserved_155; // offset: 155  (0x9B)
>> +       uint32_t cp_gfx_hqd_mapped; // offset: 156  (0x9C)
>> +       uint32_t cp_gfx_hqd_que_mgr_control; // offset: 157  (0x9D)
>> +       uint32_t reserved_158; // offset: 158  (0x9E)
>> +       uint32_t reserved_159; // offset: 159  (0x9F)
>> +       uint32_t cp_gfx_hqd_hq_status0; // offset: 160  (0xA0)
>> +       uint32_t cp_gfx_hqd_hq_control0; // offset: 161  (0xA1)
>> +       uint32_t cp_gfx_mqd_control; // offset: 162  (0xA2)
>> +       uint32_t reserved_163; // offset: 163  (0xA3)
>> +       uint32_t reserved_164; // offset: 164  (0xA4)
>> +       uint32_t reserved_165; // offset: 165  (0xA5)
>> +       uint32_t reserved_166; // offset: 166  (0xA6)
>> +       uint32_t reserved_167; // offset: 167  (0xA7)
>> +       uint32_t reserved_168; // offset: 168  (0xA8)
>> +       uint32_t reserved_169; // offset: 169  (0xA9)
>> +       uint32_t cp_num_prim_needed_count0_lo; // offset: 170  (0xAA)
>> +       uint32_t cp_num_prim_needed_count0_hi; // offset: 171  (0xAB)
>> +       uint32_t cp_num_prim_needed_count1_lo; // offset: 172  (0xAC)
>> +       uint32_t cp_num_prim_needed_count1_hi; // offset: 173  (0xAD)
>> +       uint32_t cp_num_prim_needed_count2_lo; // offset: 174  (0xAE)
>> +       uint32_t cp_num_prim_needed_count2_hi; // offset: 175  (0xAF)
>> +       uint32_t cp_num_prim_needed_count3_lo; // offset: 176  (0xB0)
>> +       uint32_t cp_num_prim_needed_count3_hi; // offset: 177  (0xB1)
>> +       uint32_t cp_num_prim_written_count0_lo; // offset: 178  (0xB2)
>> +       uint32_t cp_num_prim_written_count0_hi; // offset: 179  (0xB3)
>> +       uint32_t cp_num_prim_written_count1_lo; // offset: 180  (0xB4)
>> +       uint32_t cp_num_prim_written_count1_hi; // offset: 181  (0xB5)
>> +       uint32_t cp_num_prim_written_count2_lo; // offset: 182  (0xB6)
>> +       uint32_t cp_num_prim_written_count2_hi; // offset: 183  (0xB7)
>> +       uint32_t cp_num_prim_written_count3_lo; // offset: 184  (0xB8)
>> +       uint32_t cp_num_prim_written_count3_hi; // offset: 185  (0xB9)
>> +       uint32_t reserved_186; // offset: 186  (0xBA)
>> +       uint32_t reserved_187; // offset: 187  (0xBB)
>> +       uint32_t reserved_188; // offset: 188  (0xBC)
>> +       uint32_t reserved_189; // offset: 189  (0xBD)
>> +       uint32_t reserved_190; // offset: 190  (0xBE)
>> +       uint32_t reserved_191; // offset: 191  (0xBF)
>> +       uint32_t reserved_192; // offset: 192  (0xC0)
>> +       uint32_t reserved_193; // offset: 193  (0xC1)
>> +       uint32_t reserved_194; // offset: 194  (0xC2)
>> +       uint32_t reserved_195; // offset: 195  (0xC3)
>> +       uint32_t reserved_196; // offset: 196  (0xC4)
>> +       uint32_t reserved_197; // offset: 197  (0xC5)
>> +       uint32_t reserved_198; // offset: 198  (0xC6)
>> +       uint32_t reserved_199; // offset: 199  (0xC7)
>> +       uint32_t reserved_200; // offset: 200  (0xC8)
>> +       uint32_t reserved_201; // offset: 201  (0xC9)
>> +       uint32_t reserved_202; // offset: 202  (0xCA)
>> +       uint32_t reserved_203; // offset: 203  (0xCB)
>> +       uint32_t reserved_204; // offset: 204  (0xCC)
>> +       uint32_t reserved_205; // offset: 205  (0xCD)
>> +       uint32_t reserved_206; // offset: 206  (0xCE)
>> +       uint32_t reserved_207; // offset: 207  (0xCF)
>> +       uint32_t reserved_208; // offset: 208  (0xD0)
>> +       uint32_t reserved_209; // offset: 209  (0xD1)
>> +       uint32_t reserved_210; // offset: 210  (0xD2)
>> +       uint32_t reserved_211; // offset: 211  (0xD3)
>> +       uint32_t reserved_212; // offset: 212  (0xD4)
>> +       uint32_t reserved_213; // offset: 213  (0xD5)
>> +       uint32_t reserved_214; // offset: 214  (0xD6)
>> +       uint32_t reserved_215; // offset: 215  (0xD7)
>> +       uint32_t reserved_216; // offset: 216  (0xD8)
>> +       uint32_t reserved_217; // offset: 217  (0xD9)
>> +       uint32_t reserved_218; // offset: 218  (0xDA)
>> +       uint32_t reserved_219; // offset: 219  (0xDB)
>> +       uint32_t reserved_220; // offset: 220  (0xDC)
>> +       uint32_t reserved_221; // offset: 221  (0xDD)
>> +       uint32_t reserved_222; // offset: 222  (0xDE)
>> +       uint32_t reserved_223; // offset: 223  (0xDF)
>> +       uint32_t reserved_224; // offset: 224  (0xE0)
>> +       uint32_t reserved_225; // offset: 225  (0xE1)
>> +       uint32_t reserved_226; // offset: 226  (0xE2)
>> +       uint32_t reserved_227; // offset: 227  (0xE3)
>> +       uint32_t reserved_228; // offset: 228  (0xE4)
>> +       uint32_t reserved_229; // offset: 229  (0xE5)
>> +       uint32_t reserved_230; // offset: 230  (0xE6)
>> +       uint32_t reserved_231; // offset: 231  (0xE7)
>> +       uint32_t reserved_232; // offset: 232  (0xE8)
>> +       uint32_t reserved_233; // offset: 233  (0xE9)
>> +       uint32_t reserved_234; // offset: 234  (0xEA)
>> +       uint32_t reserved_235; // offset: 235  (0xEB)
>> +       uint32_t reserved_236; // offset: 236  (0xEC)
>> +       uint32_t reserved_237; // offset: 237  (0xED)
>> +       uint32_t reserved_238; // offset: 238  (0xEE)
>> +       uint32_t reserved_239; // offset: 239  (0xEF)
>> +       uint32_t reserved_240; // offset: 240  (0xF0)
>> +       uint32_t reserved_241; // offset: 241  (0xF1)
>> +       uint32_t reserved_242; // offset: 242  (0xF2)
>> +       uint32_t reserved_243; // offset: 243  (0xF3)
>> +       uint32_t reserved_244; // offset: 244  (0xF4)
>> +       uint32_t reserved_245; // offset: 245  (0xF5)
>> +       uint32_t reserved_246; // offset: 246  (0xF6)
>> +       uint32_t reserved_247; // offset: 247  (0xF7)
>> +       uint32_t reserved_248; // offset: 248  (0xF8)
>> +       uint32_t reserved_249; // offset: 249  (0xF9)
>> +       uint32_t reserved_250; // offset: 250  (0xFA)
>> +       uint32_t reserved_251; // offset: 251  (0xFB)
>> +       uint32_t reserved_252; // offset: 252  (0xFC)
>> +       uint32_t reserved_253; // offset: 253  (0xFD)
>> +       uint32_t reserved_254; // offset: 254  (0xFE)
>> +       uint32_t reserved_255; // offset: 255  (0xFF)
>> +       uint32_t reserved_256; // offset: 256  (0x100)
>> +       uint32_t reserved_257; // offset: 257  (0x101)
>> +       uint32_t reserved_258; // offset: 258  (0x102)
>> +       uint32_t reserved_259; // offset: 259  (0x103)
>> +       uint32_t reserved_260; // offset: 260  (0x104)
>> +       uint32_t reserved_261; // offset: 261  (0x105)
>> +       uint32_t reserved_262; // offset: 262  (0x106)
>> +       uint32_t reserved_263; // offset: 263  (0x107)
>> +       uint32_t reserved_264; // offset: 264  (0x108)
>> +       uint32_t reserved_265; // offset: 265  (0x109)
>> +       uint32_t reserved_266; // offset: 266  (0x10A)
>> +       uint32_t reserved_267; // offset: 267  (0x10B)
>> +       uint32_t reserved_268; // offset: 268  (0x10C)
>> +       uint32_t reserved_269; // offset: 269  (0x10D)
>> +       uint32_t reserved_270; // offset: 270  (0x10E)
>> +       uint32_t reserved_271; // offset: 271  (0x10F)
>> +       uint32_t reserved_272; // offset: 272  (0x110)
>> +       uint32_t reserved_273; // offset: 273  (0x111)
>> +       uint32_t reserved_274; // offset: 274  (0x112)
>> +       uint32_t reserved_275; // offset: 275  (0x113)
>> +       uint32_t reserved_276; // offset: 276  (0x114)
>> +       uint32_t reserved_277; // offset: 277  (0x115)
>> +       uint32_t reserved_278; // offset: 278  (0x116)
>> +       uint32_t reserved_279; // offset: 279  (0x117)
>> +       uint32_t reserved_280; // offset: 280  (0x118)
>> +       uint32_t reserved_281; // offset: 281  (0x119)
>> +       uint32_t reserved_282; // offset: 282  (0x11A)
>> +       uint32_t reserved_283; // offset: 283  (0x11B)
>> +       uint32_t reserved_284; // offset: 284  (0x11C)
>> +       uint32_t reserved_285; // offset: 285  (0x11D)
>> +       uint32_t reserved_286; // offset: 286  (0x11E)
>> +       uint32_t reserved_287; // offset: 287  (0x11F)
>> +       uint32_t reserved_288; // offset: 288  (0x120)
>> +       uint32_t reserved_289; // offset: 289  (0x121)
>> +       uint32_t reserved_290; // offset: 290  (0x122)
>> +       uint32_t reserved_291; // offset: 291  (0x123)
>> +       uint32_t reserved_292; // offset: 292  (0x124)
>> +       uint32_t reserved_293; // offset: 293  (0x125)
>> +       uint32_t reserved_294; // offset: 294  (0x126)
>> +       uint32_t reserved_295; // offset: 295  (0x127)
>> +       uint32_t reserved_296; // offset: 296  (0x128)
>> +       uint32_t reserved_297; // offset: 297  (0x129)
>> +       uint32_t reserved_298; // offset: 298  (0x12A)
>> +       uint32_t reserved_299; // offset: 299  (0x12B)
>> +       uint32_t reserved_300; // offset: 300  (0x12C)
>> +       uint32_t reserved_301; // offset: 301  (0x12D)
>> +       uint32_t reserved_302; // offset: 302  (0x12E)
>> +       uint32_t reserved_303; // offset: 303  (0x12F)
>> +       uint32_t reserved_304; // offset: 304  (0x130)
>> +       uint32_t reserved_305; // offset: 305  (0x131)
>> +       uint32_t reserved_306; // offset: 306  (0x132)
>> +       uint32_t reserved_307; // offset: 307  (0x133)
>> +       uint32_t reserved_308; // offset: 308  (0x134)
>> +       uint32_t reserved_309; // offset: 309  (0x135)
>> +       uint32_t reserved_310; // offset: 310  (0x136)
>> +       uint32_t reserved_311; // offset: 311  (0x137)
>> +       uint32_t reserved_312; // offset: 312  (0x138)
>> +       uint32_t reserved_313; // offset: 313  (0x139)
>> +       uint32_t reserved_314; // offset: 314  (0x13A)
>> +       uint32_t reserved_315; // offset: 315  (0x13B)
>> +       uint32_t reserved_316; // offset: 316  (0x13C)
>> +       uint32_t reserved_317; // offset: 317  (0x13D)
>> +       uint32_t reserved_318; // offset: 318  (0x13E)
>> +       uint32_t reserved_319; // offset: 319  (0x13F)
>> +       uint32_t reserved_320; // offset: 320  (0x140)
>> +       uint32_t reserved_321; // offset: 321  (0x141)
>> +       uint32_t reserved_322; // offset: 322  (0x142)
>> +       uint32_t reserved_323; // offset: 323  (0x143)
>> +       uint32_t reserved_324; // offset: 324  (0x144)
>> +       uint32_t reserved_325; // offset: 325  (0x145)
>> +       uint32_t reserved_326; // offset: 326  (0x146)
>> +       uint32_t reserved_327; // offset: 327  (0x147)
>> +       uint32_t reserved_328; // offset: 328  (0x148)
>> +       uint32_t reserved_329; // offset: 329  (0x149)
>> +       uint32_t reserved_330; // offset: 330  (0x14A)
>> +       uint32_t reserved_331; // offset: 331  (0x14B)
>> +       uint32_t reserved_332; // offset: 332  (0x14C)
>> +       uint32_t reserved_333; // offset: 333  (0x14D)
>> +       uint32_t reserved_334; // offset: 334  (0x14E)
>> +       uint32_t reserved_335; // offset: 335  (0x14F)
>> +       uint32_t reserved_336; // offset: 336  (0x150)
>> +       uint32_t reserved_337; // offset: 337  (0x151)
>> +       uint32_t reserved_338; // offset: 338  (0x152)
>> +       uint32_t reserved_339; // offset: 339  (0x153)
>> +       uint32_t reserved_340; // offset: 340  (0x154)
>> +       uint32_t reserved_341; // offset: 341  (0x155)
>> +       uint32_t reserved_342; // offset: 342  (0x156)
>> +       uint32_t reserved_343; // offset: 343  (0x157)
>> +       uint32_t reserved_344; // offset: 344  (0x158)
>> +       uint32_t reserved_345; // offset: 345  (0x159)
>> +       uint32_t reserved_346; // offset: 346  (0x15A)
>> +       uint32_t reserved_347; // offset: 347  (0x15B)
>> +       uint32_t reserved_348; // offset: 348  (0x15C)
>> +       uint32_t reserved_349; // offset: 349  (0x15D)
>> +       uint32_t reserved_350; // offset: 350  (0x15E)
>> +       uint32_t reserved_351; // offset: 351  (0x15F)
>> +       uint32_t reserved_352; // offset: 352  (0x160)
>> +       uint32_t reserved_353; // offset: 353  (0x161)
>> +       uint32_t reserved_354; // offset: 354  (0x162)
>> +       uint32_t reserved_355; // offset: 355  (0x163)
>> +       uint32_t reserved_356; // offset: 356  (0x164)
>> +       uint32_t reserved_357; // offset: 357  (0x165)
>> +       uint32_t reserved_358; // offset: 358  (0x166)
>> +       uint32_t reserved_359; // offset: 359  (0x167)
>> +       uint32_t reserved_360; // offset: 360  (0x168)
>> +       uint32_t reserved_361; // offset: 361  (0x169)
>> +       uint32_t reserved_362; // offset: 362  (0x16A)
>> +       uint32_t reserved_363; // offset: 363  (0x16B)
>> +       uint32_t reserved_364; // offset: 364  (0x16C)
>> +       uint32_t reserved_365; // offset: 365  (0x16D)
>> +       uint32_t reserved_366; // offset: 366  (0x16E)
>> +       uint32_t reserved_367; // offset: 367  (0x16F)
>> +       uint32_t reserved_368; // offset: 368  (0x170)
>> +       uint32_t reserved_369; // offset: 369  (0x171)
>> +       uint32_t reserved_370; // offset: 370  (0x172)
>> +       uint32_t reserved_371; // offset: 371  (0x173)
>> +       uint32_t reserved_372; // offset: 372  (0x174)
>> +       uint32_t reserved_373; // offset: 373  (0x175)
>> +       uint32_t reserved_374; // offset: 374  (0x176)
>> +       uint32_t reserved_375; // offset: 375  (0x177)
>> +       uint32_t reserved_376; // offset: 376  (0x178)
>> +       uint32_t reserved_377; // offset: 377  (0x179)
>> +       uint32_t reserved_378; // offset: 378  (0x17A)
>> +       uint32_t reserved_379; // offset: 379  (0x17B)
>> +       uint32_t reserved_380; // offset: 380  (0x17C)
>> +       uint32_t reserved_381; // offset: 381  (0x17D)
>> +       uint32_t reserved_382; // offset: 382  (0x17E)
>> +       uint32_t reserved_383; // offset: 383  (0x17F)
>> +       uint32_t reserved_384; // offset: 384  (0x180)
>> +       uint32_t reserved_385; // offset: 385  (0x181)
>> +       uint32_t reserved_386; // offset: 386  (0x182)
>> +       uint32_t reserved_387; // offset: 387  (0x183)
>> +       uint32_t reserved_388; // offset: 388  (0x184)
>> +       uint32_t reserved_389; // offset: 389  (0x185)
>> +       uint32_t reserved_390; // offset: 390  (0x186)
>> +       uint32_t reserved_391; // offset: 391  (0x187)
>> +       uint32_t reserved_392; // offset: 392  (0x188)
>> +       uint32_t reserved_393; // offset: 393  (0x189)
>> +       uint32_t reserved_394; // offset: 394  (0x18A)
>> +       uint32_t reserved_395; // offset: 395  (0x18B)
>> +       uint32_t reserved_396; // offset: 396  (0x18C)
>> +       uint32_t reserved_397; // offset: 397  (0x18D)
>> +       uint32_t reserved_398; // offset: 398  (0x18E)
>> +       uint32_t reserved_399; // offset: 399  (0x18F)
>> +       uint32_t reserved_400; // offset: 400  (0x190)
>> +       uint32_t reserved_401; // offset: 401  (0x191)
>> +       uint32_t reserved_402; // offset: 402  (0x192)
>> +       uint32_t reserved_403; // offset: 403  (0x193)
>> +       uint32_t reserved_404; // offset: 404  (0x194)
>> +       uint32_t reserved_405; // offset: 405  (0x195)
>> +       uint32_t reserved_406; // offset: 406  (0x196)
>> +       uint32_t reserved_407; // offset: 407  (0x197)
>> +       uint32_t reserved_408; // offset: 408  (0x198)
>> +       uint32_t reserved_409; // offset: 409  (0x199)
>> +       uint32_t reserved_410; // offset: 410  (0x19A)
>> +       uint32_t reserved_411; // offset: 411  (0x19B)
>> +       uint32_t reserved_412; // offset: 412  (0x19C)
>> +       uint32_t reserved_413; // offset: 413  (0x19D)
>> +       uint32_t reserved_414; // offset: 414  (0x19E)
>> +       uint32_t reserved_415; // offset: 415  (0x19F)
>> +       uint32_t reserved_416; // offset: 416  (0x1A0)
>> +       uint32_t reserved_417; // offset: 417  (0x1A1)
>> +       uint32_t reserved_418; // offset: 418  (0x1A2)
>> +       uint32_t reserved_419; // offset: 419  (0x1A3)
>> +       uint32_t reserved_420; // offset: 420  (0x1A4)
>> +       uint32_t reserved_421; // offset: 421  (0x1A5)
>> +       uint32_t reserved_422; // offset: 422  (0x1A6)
>> +       uint32_t reserved_423; // offset: 423  (0x1A7)
>> +       uint32_t reserved_424; // offset: 424  (0x1A8)
>> +       uint32_t reserved_425; // offset: 425  (0x1A9)
>> +       uint32_t reserved_426; // offset: 426  (0x1AA)
>> +       uint32_t reserved_427; // offset: 427  (0x1AB)
>> +       uint32_t reserved_428; // offset: 428  (0x1AC)
>> +       uint32_t reserved_429; // offset: 429  (0x1AD)
>> +       uint32_t reserved_430; // offset: 430  (0x1AE)
>> +       uint32_t reserved_431; // offset: 431  (0x1AF)
>> +       uint32_t reserved_432; // offset: 432  (0x1B0)
>> +       uint32_t reserved_433; // offset: 433  (0x1B1)
>> +       uint32_t reserved_434; // offset: 434  (0x1B2)
>> +       uint32_t reserved_435; // offset: 435  (0x1B3)
>> +       uint32_t reserved_436; // offset: 436  (0x1B4)
>> +       uint32_t reserved_437; // offset: 437  (0x1B5)
>> +       uint32_t reserved_438; // offset: 438  (0x1B6)
>> +       uint32_t reserved_439; // offset: 439  (0x1B7)
>> +       uint32_t reserved_440; // offset: 440  (0x1B8)
>> +       uint32_t reserved_441; // offset: 441  (0x1B9)
>> +       uint32_t reserved_442; // offset: 442  (0x1BA)
>> +       uint32_t reserved_443; // offset: 443  (0x1BB)
>> +       uint32_t reserved_444; // offset: 444  (0x1BC)
>> +       uint32_t reserved_445; // offset: 445  (0x1BD)
>> +       uint32_t reserved_446; // offset: 446  (0x1BE)
>> +       uint32_t reserved_447; // offset: 447  (0x1BF)
>> +       uint32_t reserved_448; // offset: 448  (0x1C0)
>> +       uint32_t reserved_449; // offset: 449  (0x1C1)
>> +       uint32_t reserved_450; // offset: 450  (0x1C2)
>> +       uint32_t reserved_451; // offset: 451  (0x1C3)
>> +       uint32_t reserved_452; // offset: 452  (0x1C4)
>> +       uint32_t reserved_453; // offset: 453  (0x1C5)
>> +       uint32_t reserved_454; // offset: 454  (0x1C6)
>> +       uint32_t reserved_455; // offset: 455  (0x1C7)
>> +       uint32_t reserved_456; // offset: 456  (0x1C8)
>> +       uint32_t reserved_457; // offset: 457  (0x1C9)
>> +       uint32_t reserved_458; // offset: 458  (0x1CA)
>> +       uint32_t reserved_459; // offset: 459  (0x1CB)
>> +       uint32_t reserved_460; // offset: 460  (0x1CC)
>> +       uint32_t reserved_461; // offset: 461  (0x1CD)
>> +       uint32_t reserved_462; // offset: 462  (0x1CE)
>> +       uint32_t reserved_463; // offset: 463  (0x1CF)
>> +       uint32_t reserved_464; // offset: 464  (0x1D0)
>> +       uint32_t reserved_465; // offset: 465  (0x1D1)
>> +       uint32_t reserved_466; // offset: 466  (0x1D2)
>> +       uint32_t reserved_467; // offset: 467  (0x1D3)
>> +       uint32_t reserved_468; // offset: 468  (0x1D4)
>> +       uint32_t reserved_469; // offset: 469  (0x1D5)
>> +       uint32_t reserved_470; // offset: 470  (0x1D6)
>> +       uint32_t reserved_471; // offset: 471  (0x1D7)
>> +       uint32_t reserved_472; // offset: 472  (0x1D8)
>> +       uint32_t reserved_473; // offset: 473  (0x1D9)
>> +       uint32_t reserved_474; // offset: 474  (0x1DA)
>> +       uint32_t reserved_475; // offset: 475  (0x1DB)
>> +       uint32_t reserved_476; // offset: 476  (0x1DC)
>> +       uint32_t reserved_477; // offset: 477  (0x1DD)
>> +       uint32_t reserved_478; // offset: 478  (0x1DE)
>> +       uint32_t reserved_479; // offset: 479  (0x1DF)
>> +       uint32_t reserved_480; // offset: 480  (0x1E0)
>> +       uint32_t reserved_481; // offset: 481  (0x1E1)
>> +       uint32_t reserved_482; // offset: 482  (0x1E2)
>> +       uint32_t reserved_483; // offset: 483  (0x1E3)
>> +       uint32_t reserved_484; // offset: 484  (0x1E4)
>> +       uint32_t reserved_485; // offset: 485  (0x1E5)
>> +       uint32_t reserved_486; // offset: 486  (0x1E6)
>> +       uint32_t reserved_487; // offset: 487  (0x1E7)
>> +       uint32_t reserved_488; // offset: 488  (0x1E8)
>> +       uint32_t reserved_489; // offset: 489  (0x1E9)
>> +       uint32_t reserved_490; // offset: 490  (0x1EA)
>> +       uint32_t reserved_491; // offset: 491  (0x1EB)
>> +       uint32_t reserved_492; // offset: 492  (0x1EC)
>> +       uint32_t reserved_493; // offset: 493  (0x1ED)
>> +       uint32_t reserved_494; // offset: 494  (0x1EE)
>> +       uint32_t reserved_495; // offset: 495  (0x1EF)
>> +       uint32_t reserved_496; // offset: 496  (0x1F0)
>> +       uint32_t reserved_497; // offset: 497  (0x1F1)
>> +       uint32_t reserved_498; // offset: 498  (0x1F2)
>> +       uint32_t reserved_499; // offset: 499  (0x1F3)
>> +       uint32_t reserved_500; // offset: 500  (0x1F4)
>> +       uint32_t reserved_501; // offset: 501  (0x1F5)
>> +       uint32_t reserved_502; // offset: 502  (0x1F6)
>> +       uint32_t reserved_503; // offset: 503  (0x1F7)
>> +       uint32_t reserved_504; // offset: 504  (0x1F8)
>> +       uint32_t reserved_505; // offset: 505  (0x1F9)
>> +       uint32_t reserved_506; // offset: 506  (0x1FA)
>> +       uint32_t reserved_507; // offset: 507  (0x1FB)
>> +       uint32_t reserved_508; // offset: 508  (0x1FC)
>> +       uint32_t reserved_509; // offset: 509  (0x1FD)
>> +       uint32_t reserved_510; // offset: 510  (0x1FE)
>> +       uint32_t reserved_511; // offset: 511  (0x1FF)
>> +};
>> +
>> +#endif
>> \ No newline at end of file
>> --
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 4/7] drm/amdgpu: Allocate doorbell slot for user queue
  2022-12-29 17:50   ` Alex Deucher
@ 2023-01-03  9:37     ` Shashank Sharma
  0 siblings, 0 replies; 64+ messages in thread
From: Shashank Sharma @ 2023-01-03  9:37 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam


On 29/12/2022 18:50, Alex Deucher wrote:
> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>> This patch allocates a doorbell slot in the bar, for the usermode queue.
>> We are using the unique queue-id to get this slot from MES.
> We should manage the doorbell BAR just like VRAM.  I had a set of
> patches to convert doorbell memory to GEM objects.  The user should be
> able to allocate doorbell memory via the GEM IOCTL just like VRAM or
> GTT.  Then when the user calls the USERQ IOCTL, we can just look up
> the GEM object from the handle and then calculate the doorbell offset
> based on the offset of the GEM object from the start of the BAR.
>
> Alex

Noted,

- Shashank

>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 28 +++++++++++++++++++
>>   1 file changed, 28 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> index a91cc304cb9e..b566ce4cb7f0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -50,6 +50,25 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode
>>       ida_simple_remove(&uqg->ida, queue->queue_id);
>>   }
>>
>> +static int
>> +amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
>> +                    struct amdgpu_usermode_queue *queue)
>> +{
>> +    int r;
>> +    unsigned int doorbell_index;
>> +
>> +    r = amdgpu_mes_alloc_process_doorbells(adev, &doorbell_index);
>> +       if (r < 0) {
>> +        DRM_ERROR("Failed to allocate doorbell for user queue\n");
>> +        return r;
>> +    }
>> +
>> +    /* We are using qnique queue-id to get doorbell here */
>> +    queue->doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev,
>> +                           doorbell_index, queue->queue_id);
>> +    return 0;
>> +}
>> +
>>   static void
>>   amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
>>   {
>> @@ -257,12 +276,21 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>>           goto free_queue;
>>       }
>>
>> +    r = amdgpu_userqueue_get_doorbell(adev, queue);
>> +    if (r) {
>> +        DRM_ERROR("Failed to create doorbell for queue\n");
>> +        goto free_mqd;
>> +    }
>> +
>>       ctx->userq = queue;
>>       args->out.q_id = queue->queue_id;
>>       args->out.flags = 0;
>>       mutex_unlock(&adev->userq.userq_mutex);
>>       return 0;
>>
>> +free_mqd:
>> +    amdgpu_userqueue_destroy_mqd(queue);
>> +
>>   free_queue:
>>       amdgpu_userqueue_remove_index(adev, queue);
>>       mutex_unlock(&adev->userq.userq_mutex);
>> --
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 6/7] drm/amdgpu: Map userqueue into HW
  2022-12-29 17:51   ` Alex Deucher
@ 2023-01-03  9:38     ` Shashank Sharma
  0 siblings, 0 replies; 64+ messages in thread
From: Shashank Sharma @ 2023-01-03  9:38 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam


On 29/12/2022 18:51, Alex Deucher wrote:
> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>> This patch add the function to map/unmap the usermode queue into the HW,
>> using the prepared MQD and other objects. After this mapping, the queue
>> will be ready to accept the workload.
> This should also be a callback into IP specific code.  It will be
> different for each IP type and version.
>
> Alex

Noted, so far we have two IP specific functions, .init_mqd() and .map()

- Shashank

>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 71 +++++++++++++++++++
>>   1 file changed, 71 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> index 2a854a5e2f70..b164e24247ca 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -50,6 +50,67 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct amdgpu_usermode
>>       ida_simple_remove(&uqg->ida, queue->queue_id);
>>   }
>>
>> +static int amdgpu_userqueue_map(struct amdgpu_device *adev,
>> +                    struct amdgpu_usermode_queue *queue)
>> +{
>> +    int r;
>> +    struct mes_add_queue_input queue_input;
>> +
>> +    memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
>> +
>> +    queue_input.process_va_start = 0;
>> +    queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
>> +    queue_input.process_quantum = 100000; /* 10ms */
>> +    queue_input.gang_quantum = 10000; /* 1ms */
>> +    queue_input.paging = false;
>> +
>> +    queue_input.gang_context_addr = queue->gang_ctx.gpu_addr;
>> +    queue_input.process_context_addr = queue->proc_ctx.gpu_addr;
>> +    queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
>> +    queue_input.gang_global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
>> +
>> +    queue_input.process_id = queue->pasid;
>> +    queue_input.queue_type = queue->queue_type;
>> +    queue_input.mqd_addr = queue->mqd_gpu_addr;
>> +    queue_input.wptr_addr = queue->wptr_gpu_addr;
>> +    queue_input.queue_size = queue->queue_size >> 2;
>> +    queue_input.doorbell_offset = queue->doorbell_index;
>> +    queue_input.page_table_base_addr =  queue->vm->pd_phys_addr;
>> +
>> +    amdgpu_mes_lock(&adev->mes);
>> +    r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
>> +    amdgpu_mes_unlock(&adev->mes);
>> +    if (r) {
>> +        DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
>> +        return r;
>> +    }
>> +
>> +    DRM_DEBUG_DRIVER("Queue %d mapped successfully\n", queue->queue_id);
>> +    return 0;
>> +}
>> +
>> +static void amdgpu_userqueue_unmap(struct amdgpu_device *adev,
>> +                    struct amdgpu_usermode_queue *queue)
>> +{
>> +    int r;
>> +    struct mes_remove_queue_input queue_input;
>> +
>> +    memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
>> +    queue_input.doorbell_offset = queue->doorbell_index;
>> +    queue_input.gang_context_addr = queue->gang_ctx.gpu_addr;
>> +
>> +    amdgpu_mes_lock(&adev->mes);
>> +    r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
>> +    amdgpu_mes_unlock(&adev->mes);
>> +
>> +    if (r) {
>> +        DRM_ERROR("Failed to unmap usermode queue %d\n", queue->queue_id);
>> +        return;
>> +    }
>> +
>> +    DRM_DEBUG_DRIVER("Usermode queue %d unmapped\n", queue->queue_id);
>> +}
>> +
>>   static int
>>   amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
>>                       struct amdgpu_usermode_queue *queue)
>> @@ -338,12 +399,21 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>>           goto free_mqd;
>>       }
>>
>> +    r = amdgpu_userqueue_map(adev, queue);
>> +    if (r < 0) {
>> +        DRM_ERROR("Failed to map queue\n");
>> +        goto free_ctx;
>> +    }
>> +
>>       ctx->userq = queue;
>>       args->out.q_id = queue->queue_id;
>>       args->out.flags = 0;
>>       mutex_unlock(&adev->userq.userq_mutex);
>>       return 0;
>>
>> +free_ctx:
>> +    amdgpu_userqueue_free_context(adev, queue);
>> +
>>   free_mqd:
>>       amdgpu_userqueue_destroy_mqd(queue);
>>
>> @@ -362,6 +432,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
>>       struct amdgpu_usermode_queue *queue = ctx->userq;
>>
>>       mutex_lock(&adev->userq.userq_mutex);
>> +    amdgpu_userqueue_unmap(adev, queue);
>>       amdgpu_userqueue_free_context(adev, queue);
>>       amdgpu_userqueue_destroy_mqd(queue);
>>       amdgpu_userqueue_remove_index(adev, queue);
>> --
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 5/7] drm/amdgpu: Create context for usermode queue
  2022-12-29 17:54   ` Alex Deucher
@ 2023-01-03  9:40     ` Shashank Sharma
  2023-01-03 14:48       ` Alex Deucher
  0 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2023-01-03  9:40 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam


On 29/12/2022 18:54, Alex Deucher wrote:
> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>> The FW expects us to allocate atleast one page as process
>> context space, and one for gang context space. This patch adds some
>> object for the same.
> This should be handled in the IP specific code for the MQD creation.
> Each IP may have different requirements for MQD related metadata.
>
> Alex

Noted, so 3 IP specific functions so far,

.init_mqd(), .map() and .create_ctx_space().

- Shashank

>
>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 57 +++++++++++++++++++
>>   .../drm/amd/include/amdgpu_usermode_queue.h   |  8 +++
>>   2 files changed, 65 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> index b566ce4cb7f0..2a854a5e2f70 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -69,6 +69,56 @@ amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
>>       return 0;
>>   }
>>
>> +static int
>> +amdgpu_userqueue_create_context(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
>> +{
>> +    int r;
>> +    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
>> +    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
>> +    /*
>> +     * The FW expects atleast one page space allocated for
>> +     * process context related work, and one for gang context.
>> +     */
>> +    r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>> +                                &pctx->obj,
>> +                                &pctx->gpu_addr,
>> +                                &pctx->cpu_ptr);
>> +    if (r) {
>> +        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
>> +        return r;
>> +    }
>> +
>> +    r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>> +                                &gctx->obj,
>> +                                &gctx->gpu_addr,
>> +                                &gctx->cpu_ptr);
>> +    if (r) {
>> +        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
>> +        amdgpu_bo_free_kernel(&pctx->obj,
>> +                              &pctx->gpu_addr,
>> +                              &pctx->cpu_ptr);
>> +        return r;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void
>> +amdgpu_userqueue_free_context(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
>> +{
>> +    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
>> +    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
>> +
>> +    amdgpu_bo_free_kernel(&pctx->obj,
>> +                          &pctx->gpu_addr,
>> +                          &pctx->cpu_ptr);
>> +    amdgpu_bo_free_kernel(&pctx->obj,
>> +                          &gctx->gpu_addr,
>> +                          &gctx->cpu_ptr);
>> +}
>> +
>>   static void
>>   amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
>>   {
>> @@ -282,6 +332,12 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
>>           goto free_mqd;
>>       }
>>
>> +    r = amdgpu_userqueue_create_context(adev, queue);
>> +    if (r < 0) {
>> +        DRM_ERROR("Failed to create context for queue\n");
>> +        goto free_mqd;
>> +    }
>> +
>>       ctx->userq = queue;
>>       args->out.q_id = queue->queue_id;
>>       args->out.flags = 0;
>> @@ -306,6 +362,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
>>       struct amdgpu_usermode_queue *queue = ctx->userq;
>>
>>       mutex_lock(&adev->userq.userq_mutex);
>> +    amdgpu_userqueue_free_context(adev, queue);
>>       amdgpu_userqueue_destroy_mqd(queue);
>>       amdgpu_userqueue_remove_index(adev, queue);
>>       ctx->userq = NULL;
>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> index c1fe39ffaf72..8bf3c0be6937 100644
>> --- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>> @@ -26,6 +26,12 @@
>>
>>   #define AMDGPU_MAX_USERQ 512
>>
>> +struct amdgpu_userq_ctx {
>> +       struct amdgpu_bo *obj;
>> +       uint64_t gpu_addr;
>> +       void    *cpu_ptr;
>> +};
>> +
>>   struct amdgpu_usermode_queue {
>>          int             queue_id;
>>          int             queue_type;
>> @@ -44,6 +50,8 @@ struct amdgpu_usermode_queue {
>>
>>          struct amdgpu_bo        *mqd_obj;
>>          struct amdgpu_vm        *vm;
>> +       struct amdgpu_userq_ctx proc_ctx;
>> +       struct amdgpu_userq_ctx gang_ctx;
>>          struct list_head        list;
>>   };
>>
>> --
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 0/7] RFC: Usermode queue for AMDGPU driver
  2022-12-29 18:02 ` [RFC 0/7] RFC: Usermode queue for AMDGPU driver Alex Deucher
@ 2023-01-03  9:43   ` Shashank Sharma
  2023-01-03  9:47     ` Christian König
  0 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2023-01-03  9:43 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam


On 29/12/2022 19:02, Alex Deucher wrote:
> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
>> This is a RFC series to implement usermode graphics queues for AMDGPU
>> driver (Navi 3X and above). The idea of usermode graphics queue is to
>> allow direct workload submission from a userspace graphics process who
>> has amdgpu graphics context.
>>
>> Once we have some initial feedback on the design, we will publish a
>> follow up V1 series with a libdrm consumer test.
> I think this should look more like the following:
> 1. Convert doorbells to full fledged GEM objects just like vram.  Then
> update the GEM IOCTL to allow allocation of doorbell BOs.
> 2. Store MQD data per amdgpu_ctx.

If my understanding of the comments is correct, we are having 
conflicting opinions here on where to save the MQD data. @Christian ?

> 3. Create secure semaphore pool and map RO into each GPUVM.
> 4. Add callbacks to each IP type that supports user mode queues.
> These callbacks should handle the IP specific MQD initialization and
> mapping/unmapping details including allocation of BOs for the MQD
> itself and any relevant metadata.  The USERQ IOCTL handler will look
> up the callback based on the IP type specified in the IOCTL.

Noted.

Shashank

>
> Alex
>
>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>>
>> Alex Deucher (1):
>>    drm/amdgpu: UAPI for user queue management
>>
>> Arunpravin Paneer Selvam (1):
>>    drm/amdgpu: Secure semaphore for usermode queue
>>
>> Arvind Yadav (1):
>>    drm/amdgpu: Create MQD for userspace queue
>>
>> Shashank Sharma (4):
>>    drm/amdgpu: Add usermode queue for gfx work
>>    drm/amdgpu: Allocate doorbell slot for user queue
>>    drm/amdgpu: Create context for usermode queue
>>    drm/amdgpu: Map userqueue into HW
>>
>>   drivers/gpu/drm/amd/amdgpu/Makefile           |   3 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  14 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 ++++++++++++++++
>>   .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 ++++++++
>>   .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
>>   .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 ++++++++++++++++++
>>   include/uapi/drm/amdgpu_drm.h                 |  52 ++
>>   8 files changed, 1413 insertions(+)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
>>   create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>   create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
>>
>> --
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 0/7] RFC: Usermode queue for AMDGPU driver
  2023-01-03  9:43   ` Shashank Sharma
@ 2023-01-03  9:47     ` Christian König
  2023-01-03 10:00       ` Shashank Sharma
  0 siblings, 1 reply; 64+ messages in thread
From: Christian König @ 2023-01-03  9:47 UTC (permalink / raw)
  To: Shashank Sharma, Alex Deucher
  Cc: Alex Deucher, amd-gfx, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

Am 03.01.23 um 10:43 schrieb Shashank Sharma:
>
> On 29/12/2022 19:02, Alex Deucher wrote:
>> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma 
>> <shashank.sharma@amd.com> wrote:
>>> This is a RFC series to implement usermode graphics queues for AMDGPU
>>> driver (Navi 3X and above). The idea of usermode graphics queue is to
>>> allow direct workload submission from a userspace graphics process who
>>> has amdgpu graphics context.
>>>
>>> Once we have some initial feedback on the design, we will publish a
>>> follow up V1 series with a libdrm consumer test.
>> I think this should look more like the following:
>> 1. Convert doorbells to full fledged GEM objects just like vram.  Then
>> update the GEM IOCTL to allow allocation of doorbell BOs.
>> 2. Store MQD data per amdgpu_ctx.
>
> If my understanding of the comments is correct, we are having 
> conflicting opinions here on where to save the MQD data. @Christian ?

You need something like an amdgpu_userq object which holds the BO with 
the MQD the hardware is using as well as anything else necessary for the 
queue.

Regards,
Christian.

>
>> 3. Create secure semaphore pool and map RO into each GPUVM.
>> 4. Add callbacks to each IP type that supports user mode queues.
>> These callbacks should handle the IP specific MQD initialization and
>> mapping/unmapping details including allocation of BOs for the MQD
>> itself and any relevant metadata.  The USERQ IOCTL handler will look
>> up the callback based on the IP type specified in the IOCTL.
>
> Noted.
>
> Shashank
>
>>
>> Alex
>>
>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>
>>> Alex Deucher (1):
>>>    drm/amdgpu: UAPI for user queue management
>>>
>>> Arunpravin Paneer Selvam (1):
>>>    drm/amdgpu: Secure semaphore for usermode queue
>>>
>>> Arvind Yadav (1):
>>>    drm/amdgpu: Create MQD for userspace queue
>>>
>>> Shashank Sharma (4):
>>>    drm/amdgpu: Add usermode queue for gfx work
>>>    drm/amdgpu: Allocate doorbell slot for user queue
>>>    drm/amdgpu: Create context for usermode queue
>>>    drm/amdgpu: Map userqueue into HW
>>>
>>>   drivers/gpu/drm/amd/amdgpu/Makefile           |   3 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  14 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 ++++++++++++++++
>>>   .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 ++++++++
>>>   .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
>>>   .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 
>>> ++++++++++++++++++
>>>   include/uapi/drm/amdgpu_drm.h                 |  52 ++
>>>   8 files changed, 1413 insertions(+)
>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>   create mode 100644 
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
>>>   create mode 100644 
>>> drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>   create mode 100644 
>>> drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
>>>
>>> -- 
>>> 2.34.1
>>>


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 0/7] RFC: Usermode queue for AMDGPU driver
  2023-01-03  9:47     ` Christian König
@ 2023-01-03 10:00       ` Shashank Sharma
  2023-01-03 10:02         ` Christian König
  0 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2023-01-03 10:00 UTC (permalink / raw)
  To: Christian König, Alex Deucher
  Cc: Alex Deucher, amd-gfx, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam


On 03/01/2023 10:47, Christian König wrote:
> Am 03.01.23 um 10:43 schrieb Shashank Sharma:
>>
>> On 29/12/2022 19:02, Alex Deucher wrote:
>>> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma 
>>> <shashank.sharma@amd.com> wrote:
>>>> This is a RFC series to implement usermode graphics queues for AMDGPU
>>>> driver (Navi 3X and above). The idea of usermode graphics queue is to
>>>> allow direct workload submission from a userspace graphics process who
>>>> has amdgpu graphics context.
>>>>
>>>> Once we have some initial feedback on the design, we will publish a
>>>> follow up V1 series with a libdrm consumer test.
>>> I think this should look more like the following:
>>> 1. Convert doorbells to full fledged GEM objects just like vram.  Then
>>> update the GEM IOCTL to allow allocation of doorbell BOs.
>>> 2. Store MQD data per amdgpu_ctx.
>>
>> If my understanding of the comments is correct, we are having 
>> conflicting opinions here on where to save the MQD data. @Christian ?
>
> You need something like an amdgpu_userq object which holds the BO with 
> the MQD the hardware is using as well as anything else necessary for 
> the queue.

And we will be storing it into fpriv->amdgpu driver_private area 
(probably by using something like amdgpu_useq_mgr or similar), similar 
to amdgpu_ctx_mgr.

- Shashank

>
> Regards,
> Christian.
>
>>
>>> 3. Create secure semaphore pool and map RO into each GPUVM.
>>> 4. Add callbacks to each IP type that supports user mode queues.
>>> These callbacks should handle the IP specific MQD initialization and
>>> mapping/unmapping details including allocation of BOs for the MQD
>>> itself and any relevant metadata.  The USERQ IOCTL handler will look
>>> up the callback based on the IP type specified in the IOCTL.
>>
>> Noted.
>>
>> Shashank
>>
>>>
>>> Alex
>>>
>>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>>
>>>> Alex Deucher (1):
>>>>    drm/amdgpu: UAPI for user queue management
>>>>
>>>> Arunpravin Paneer Selvam (1):
>>>>    drm/amdgpu: Secure semaphore for usermode queue
>>>>
>>>> Arvind Yadav (1):
>>>>    drm/amdgpu: Create MQD for userspace queue
>>>>
>>>> Shashank Sharma (4):
>>>>    drm/amdgpu: Add usermode queue for gfx work
>>>>    drm/amdgpu: Allocate doorbell slot for user queue
>>>>    drm/amdgpu: Create context for usermode queue
>>>>    drm/amdgpu: Map userqueue into HW
>>>>
>>>>   drivers/gpu/drm/amd/amdgpu/Makefile           |   3 +
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  14 +
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 ++++++++++++++++
>>>>   .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 ++++++++
>>>>   .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
>>>>   .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 
>>>> ++++++++++++++++++
>>>>   include/uapi/drm/amdgpu_drm.h                 |  52 ++
>>>>   8 files changed, 1413 insertions(+)
>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>   create mode 100644 
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
>>>>   create mode 100644 
>>>> drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>   create mode 100644 
>>>> drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
>>>>
>>>> -- 
>>>> 2.34.1
>>>>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 0/7] RFC: Usermode queue for AMDGPU driver
  2023-01-03 10:00       ` Shashank Sharma
@ 2023-01-03 10:02         ` Christian König
  0 siblings, 0 replies; 64+ messages in thread
From: Christian König @ 2023-01-03 10:02 UTC (permalink / raw)
  To: Shashank Sharma, Alex Deucher
  Cc: Alex Deucher, amd-gfx, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

Am 03.01.23 um 11:00 schrieb Shashank Sharma:
>
> On 03/01/2023 10:47, Christian König wrote:
>> Am 03.01.23 um 10:43 schrieb Shashank Sharma:
>>>
>>> On 29/12/2022 19:02, Alex Deucher wrote:
>>>> On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma 
>>>> <shashank.sharma@amd.com> wrote:
>>>>> This is a RFC series to implement usermode graphics queues for AMDGPU
>>>>> driver (Navi 3X and above). The idea of usermode graphics queue is to
>>>>> allow direct workload submission from a userspace graphics process 
>>>>> who
>>>>> has amdgpu graphics context.
>>>>>
>>>>> Once we have some initial feedback on the design, we will publish a
>>>>> follow up V1 series with a libdrm consumer test.
>>>> I think this should look more like the following:
>>>> 1. Convert doorbells to full fledged GEM objects just like vram.  Then
>>>> update the GEM IOCTL to allow allocation of doorbell BOs.
>>>> 2. Store MQD data per amdgpu_ctx.
>>>
>>> If my understanding of the comments is correct, we are having 
>>> conflicting opinions here on where to save the MQD data. @Christian ?
>>
>> You need something like an amdgpu_userq object which holds the BO 
>> with the MQD the hardware is using as well as anything else necessary 
>> for the queue.
>
> And we will be storing it into fpriv->amdgpu driver_private area 
> (probably by using something like amdgpu_useq_mgr or similar), similar 
> to amdgpu_ctx_mgr.

Exactly that, yes. The amdgpu_userq_mgr keeps the idr/mutex and 
everything necessary per client while the amdgpu_userq object represents 
the queue itself.

Christian.

>
> - Shashank
>
>>
>> Regards,
>> Christian.
>>
>>>
>>>> 3. Create secure semaphore pool and map RO into each GPUVM.
>>>> 4. Add callbacks to each IP type that supports user mode queues.
>>>> These callbacks should handle the IP specific MQD initialization and
>>>> mapping/unmapping details including allocation of BOs for the MQD
>>>> itself and any relevant metadata.  The USERQ IOCTL handler will look
>>>> up the callback based on the IP type specified in the IOCTL.
>>>
>>> Noted.
>>>
>>> Shashank
>>>
>>>>
>>>> Alex
>>>>
>>>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>>>
>>>>> Alex Deucher (1):
>>>>>    drm/amdgpu: UAPI for user queue management
>>>>>
>>>>> Arunpravin Paneer Selvam (1):
>>>>>    drm/amdgpu: Secure semaphore for usermode queue
>>>>>
>>>>> Arvind Yadav (1):
>>>>>    drm/amdgpu: Create MQD for userspace queue
>>>>>
>>>>> Shashank Sharma (4):
>>>>>    drm/amdgpu: Add usermode queue for gfx work
>>>>>    drm/amdgpu: Allocate doorbell slot for user queue
>>>>>    drm/amdgpu: Create context for usermode queue
>>>>>    drm/amdgpu: Map userqueue into HW
>>>>>
>>>>>   drivers/gpu/drm/amd/amdgpu/Makefile           |   3 +
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  14 +
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 
>>>>> ++++++++++++++++
>>>>>   .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 ++++++++
>>>>>   .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
>>>>>   .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 
>>>>> ++++++++++++++++++
>>>>>   include/uapi/drm/amdgpu_drm.h                 |  52 ++
>>>>>   8 files changed, 1413 insertions(+)
>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>>   create mode 100644 
>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
>>>>>   create mode 100644 
>>>>> drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>>   create mode 100644 
>>>>> drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h
>>>>>
>>>>> -- 
>>>>> 2.34.1
>>>>>
>>


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2023-01-02 13:26   ` Christian König
@ 2023-01-03 14:23     ` Alex Deucher
  0 siblings, 0 replies; 64+ messages in thread
From: Alex Deucher @ 2023-01-03 14:23 UTC (permalink / raw)
  To: Christian König
  Cc: Alex Deucher, arunpravin.paneerselvam, arvind.yadav, amd-gfx,
	Shashank Sharma

On Mon, Jan 2, 2023 at 8:26 AM Christian König <christian.koenig@amd.com> wrote:
>
> Am 23.12.22 um 20:36 schrieb Shashank Sharma:
> > From: Alex Deucher <alexander.deucher@amd.com>
> >
> > This patch intorduces new UAPI/IOCTL for usermode graphics
> > queue. The userspace app will fill this structure and request
> > the graphics driver to add a graphics work queue for it. The
> > output of this UAPI is a queue id.
> >
> > This UAPI maps the queue into GPU, so the graphics app can start
> > submitting work to the queue as soon as the call returns.
> >
> > Cc: Alex Deucher <alexander.deucher@amd.com>
> > Cc: Christian Koenig <christian.koenig@amd.com>
> > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> > ---
> >   include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
> >   1 file changed, 52 insertions(+)
> >
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 0d93ec132ebb..a3d0dd6f62c5 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -54,6 +54,7 @@ extern "C" {
> >   #define DRM_AMDGPU_VM                       0x13
> >   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
> >   #define DRM_AMDGPU_SCHED            0x15
> > +#define DRM_AMDGPU_USERQ             0x16
> >
> >   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -71,6 +72,7 @@ extern "C" {
> >   #define DRM_IOCTL_AMDGPU_VM         DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >   #define DRM_IOCTL_AMDGPU_SCHED              DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > +#define DRM_IOCTL_AMDGPU_USERQ               DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >
> >   /**
> >    * DOC: memory domains
> > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >       union drm_amdgpu_ctx_out out;
> >   };
> >
> > +/* user queue IOCTL */
> > +#define AMDGPU_USERQ_OP_CREATE       1
> > +#define AMDGPU_USERQ_OP_FREE 2
> > +
> > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE        (1 << 0)
> > +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> > +
> > +struct drm_amdgpu_userq_mqd {
> > +     /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > +     __u32   flags;
> > +     /** IP type: AMDGPU_HW_IP_* */
> > +     __u32   ip_type;
> > +     /** GEM object handle */
> > +     __u32   doorbell_handle;
> > +     /** Doorbell offset in dwords */
> > +     __u32   doorbell_offset;
> > +     /** GPU virtual address of the queue */
> > +     __u64   queue_va;
> > +     /** Size of the queue in bytes */
> > +     __u64   queue_size;
> > +     /** GPU virtual address of the rptr */
> > +     __u64   rptr_va;
> > +     /** GPU virtual address of the wptr */
> > +     __u64   wptr_va;
>
> We should probably note somewhere that those are inputs to the queue and
> need to be allocated by userspace somewhere.
>
> > +};
> > +
> > +struct drm_amdgpu_userq_in {
> > +     /** AMDGPU_USERQ_OP_* */
> > +     __u32   op;
> > +     /** Flags */
> > +     __u32   flags;
>
> > +     /** Context handle to associate the queue with */
> > +     __u32   ctx_id;
>
> Uff, this is just blunt nonsense. Queues are not related to ctx objects
> in any way possible.

I thought we wanted to have queues associated with contexts for
hang/guilty tracking.

Alex

>
> > +     __u32   pad;
> > +     /** Queue descriptor */
> > +     struct drm_amdgpu_userq_mqd mqd;
> > +};
> > +
> > +struct drm_amdgpu_userq_out {
> > +     /** Queue handle */
> > +     __u32   q_id;
> > +     /** Flags */
> > +     __u32   flags;
> > +};
> > +
> > +union drm_amdgpu_userq {
> > +     struct drm_amdgpu_userq_in in;
> > +     struct drm_amdgpu_userq_out out;
> > +};
> > +
> >   /* vm ioctl */
> >   #define AMDGPU_VM_OP_RESERVE_VMID   1
> >   #define AMDGPU_VM_OP_UNRESERVE_VMID 2
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2023-01-03  9:35               ` Christian König
@ 2023-01-03 14:34                 ` Alex Deucher
  2023-01-03 14:50                   ` Christian König
  0 siblings, 1 reply; 64+ messages in thread
From: Alex Deucher @ 2023-01-03 14:34 UTC (permalink / raw)
  To: Christian König
  Cc: arunpravin.paneerselvam, Shashank Sharma, arvind.yadav, amd-gfx,
	Alex Deucher, Christian Koenig

On Tue, Jan 3, 2023 at 4:35 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 03.01.23 um 10:22 schrieb Shashank Sharma:
> >
> > On 03/01/2023 10:15, Christian König wrote:
> >> Am 03.01.23 um 10:12 schrieb Shashank Sharma:
> >>>
> >>> On 02/01/2023 13:39, Christian König wrote:
> >>>> Hi Shashank,
> >>>>
> >>>> Am 26.12.22 um 11:41 schrieb Shashank Sharma:
> >>>>> [SNIP]
> >>>>>>>         /* df */
> >>>>>>>       struct amdgpu_df                df;
> >>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>>>>>> index 0fa0e56daf67..f7413859b14f 100644
> >>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>>>>>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
> >>>>>>>       unsigned long            ras_counter_ce;
> >>>>>>>       unsigned long            ras_counter_ue;
> >>>>>>>       uint32_t            stable_pstate;
> >>>>>>> +    struct amdgpu_usermode_queue    *userq;
> >>>>>>
> >>>>>> Why should we have this in the ctx here???
> >>>>>
> >>>>> We are allocating a few things dynamically for the queue, which
> >>>>> would be valid until we destroy this queue. Also we need to save
> >>>>> this queue
> >>>>>
> >>>>> container at some place for the destroy function,  and I thought
> >>>>> it would make sense to keep this with the context ptr, as this is
> >>>>> how we are
> >>>>>
> >>>>> identifying the incoming request.
> >>>>
> >>>> I have absolutely no idea how you end up with that design.
> >>>>
> >>>> The ctx object is the CS IOCTL context, that is not even remotely
> >>>> related to anything the user queues should be doing.
> >>>>
> >>>> Please completely drop that relationship and don't use any of the
> >>>> ctx object stuff in the user queue code.
> >>>>
> >>> Historically the workload submission always came with a context (due
> >>> to CS IOCTL), so we thought it would make sense to still have its
> >>> relevance in the new workload submission method. Would you prefer
> >>> this new submission to be independent of AMDGPU context ?
> >>
> >> Well not prefer, the point is that this doesn't make any sense at all.
> >>
> >> See the amdgpu_ctx object contains the resulting fence pointers for
> >> the CS IOCTL as well as information necessary for the CS IOCTL to
> >> work (e.g. scheduler entities etc...).
> >>
> >> I don't see how anything from that stuff would be useful for the MES
> >> or user queues.
> >>
> >> Christian.
> >
> >
> > I am getting your point, and it makes sense as well. But in such
> > scenario, we might have to create something parallel to
> > AMDGPU_USERQ_CTX which is doing very much the same.
> >
> > We can still do it to make a logically separate entity, but any
> > suggestions on where to keep this udev_ctx ptr (if not in adev, as
> > well as not ctx) ?
>
>
> Take a look at the amdgpu_ctx_mgr object with the mutex and the idr and
> how this is embedded into the amdgpu_fpriv object. It should become
> pretty clear from there on.
>
> I don't think we need an userq_ctx or similar, each userq should be an
> independent object. What we need is an userq_mgr object which holds the
> collection of all the useq objects the client application has created
> through it's fpriv connection to the driver.

Don't we want to associate the queues to a ctx for guilty tracking
purposes when there is a hang?

Alex

>
> Regards,
> Christian.
>
> >
> > - Shashank
> >
> >
> >>
> >>>
> >>> - Shashank
> >>>
> >>>
> >>>> Christian.
> >>>>
> >>>>>
> >>>>> - Shashank
> >>>>>
> >>>>>>
> >>>>>> Regards,
> >>>>>> Christian.
> >>>>>>
> >>>>>>>   };
> >>>>>>>     struct amdgpu_ctx_mgr {
> >>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >>>>>>> new file mode 100644
> >>>>>>> index 000000000000..3b6e8f75495c
> >>>>>>> --- /dev/null
> >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >>>>>>> @@ -0,0 +1,187 @@
> >>>>>>> +/*
> >>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
> >>>>>>> + *
> >>>>>>> + * Permission is hereby granted, free of charge, to any person
> >>>>>>> obtaining a
> >>>>>>> + * copy of this software and associated documentation files
> >>>>>>> (the "Software"),
> >>>>>>> + * to deal in the Software without restriction, including
> >>>>>>> without limitation
> >>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
> >>>>>>> sublicense,
> >>>>>>> + * and/or sell copies of the Software, and to permit persons to
> >>>>>>> whom the
> >>>>>>> + * Software is furnished to do so, subject to the following
> >>>>>>> conditions:
> >>>>>>> + *
> >>>>>>> + * The above copyright notice and this permission notice shall
> >>>>>>> be included in
> >>>>>>> + * all copies or substantial portions of the Software.
> >>>>>>> + *
> >>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> >>>>>>> KIND, EXPRESS OR
> >>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> >>>>>>> MERCHANTABILITY,
> >>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> >>>>>>> EVENT SHALL
> >>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
> >>>>>>> CLAIM, DAMAGES OR
> >>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> >>>>>>> OTHERWISE,
> >>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
> >>>>>>> THE USE OR
> >>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
> >>>>>>> + *
> >>>>>>> + */
> >>>>>>> +
> >>>>>>> +#include "amdgpu.h"
> >>>>>>> +#include "amdgpu_vm.h"
> >>>>>>> +#include "amdgpu_mes.h"
> >>>>>>> +#include "amdgpu_usermode_queue.h"
> >>>>>>> +#include "soc15_common.h"
> >>>>>>> +
> >>>>>>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a,
> >>>>>>> sizeof(__u64)))
> >>>>>>> +
> >>>>>>> +static int
> >>>>>>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
> >>>>>>> +{
> >>>>>>> +    int index;
> >>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> >>>>>>> +
> >>>>>>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ,
> >>>>>>> GFP_KERNEL);
> >>>>>>> +    return index;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static void
> >>>>>>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev,
> >>>>>>> struct amdgpu_usermode_queue *queue)
> >>>>>>> +{
> >>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> >>>>>>> +
> >>>>>>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static int
> >>>>>>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev,
> >>>>>>> struct drm_amdgpu_userq_mqd *mqd_in)
> >>>>>>> +{
> >>>>>>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0
> >>>>>>> || mqd_in->doorbell_offset == 0) {
> >>>>>>> +        DRM_ERROR("Invalid queue object address\n");
> >>>>>>> +        return -EINVAL;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 ||
> >>>>>>> mqd_in->wptr_va == 0) {
> >>>>>>> +        DRM_ERROR("Invalid queue object value\n");
> >>>>>>> +        return -EINVAL;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type
> >>>>>>> >= AMDGPU_HW_IP_NUM) {
> >>>>>>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
> >>>>>>> +        return -EINVAL;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    if (!CHECK_ACCESS(mqd_in->queue_va) ||
> >>>>>>> !CHECK_ACCESS(mqd_in->rptr_va) ||
> >>>>>>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
> >>>>>>> +            DRM_ERROR("Invalid mapping of queue ptrs, access
> >>>>>>> error\n");
> >>>>>>> +            return -EINVAL;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are
> >>>>>>> valid\n");
> >>>>>>> +    return 0;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct
> >>>>>>> drm_file *filp,
> >>>>>>> +                            union drm_amdgpu_userq *args)
> >>>>>>> +{
> >>>>>>> +    int r, pasid;
> >>>>>>> +    struct amdgpu_usermode_queue *queue;
> >>>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
> >>>>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
> >>>>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv,
> >>>>>>> args->in.ctx_id);
> >>>>>>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
> >>>>>>> +
> >>>>>>> +    if (!ctx) {
> >>>>>>> +        DRM_ERROR("Invalid GPU context\n");
> >>>>>>> +        return -EINVAL;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    if (vm->pasid < 0) {
> >>>>>>> +        DRM_WARN("No PASID info found\n");
> >>>>>>> +        pasid = 0;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    mutex_lock(&adev->userq.userq_mutex);
> >>>>>>> +
> >>>>>>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue),
> >>>>>>> GFP_KERNEL);
> >>>>>>> +    if (!queue) {
> >>>>>>> +        DRM_ERROR("Failed to allocate memory for queue\n");
> >>>>>>> + mutex_unlock(&adev->userq.userq_mutex);
> >>>>>>> +        return -ENOMEM;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
> >>>>>>> +    if (r < 0) {
> >>>>>>> +        DRM_ERROR("Invalid input to create queue\n");
> >>>>>>> +        goto free_queue;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    queue->vm = vm;
> >>>>>>> +    queue->pasid = pasid;
> >>>>>>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
> >>>>>>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
> >>>>>>> +    queue->queue_size = mqd_in->queue_size;
> >>>>>>> +    queue->queue_type = mqd_in->ip_type;
> >>>>>>> +    queue->paging = false;
> >>>>>>> +    queue->flags = mqd_in->flags;
> >>>>>>> +    queue->queue_id = amdgpu_userqueue_index(adev);
> >>>>>>> +
> >>>>>>> +    ctx->userq = queue;
> >>>>>>> +    args->out.q_id = queue->queue_id;
> >>>>>>> +    args->out.flags = 0;
> >>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
> >>>>>>> +    return 0;
> >>>>>>> +
> >>>>>>> +free_queue:
> >>>>>>> +    amdgpu_userqueue_remove_index(adev, queue);
> >>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
> >>>>>>> +    kfree(queue);
> >>>>>>> +    return r;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev,
> >>>>>>> struct drm_file *filp,
> >>>>>>> +                              union drm_amdgpu_userq *args)
> >>>>>>> +{
> >>>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
> >>>>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv,
> >>>>>>> args->in.ctx_id);
> >>>>>>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
> >>>>>>> +
> >>>>>>> +    mutex_lock(&adev->userq.userq_mutex);
> >>>>>>> +    amdgpu_userqueue_remove_index(adev, queue);
> >>>>>>> +    ctx->userq = NULL;
> >>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
> >>>>>>> +    kfree(queue);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
> >>>>>>> +               struct drm_file *filp)
> >>>>>>> +{
> >>>>>>> +    union drm_amdgpu_userq *args = data;
> >>>>>>> +    struct amdgpu_device *adev = drm_to_adev(dev);
> >>>>>>> +    int r = 0;
> >>>>>>> +
> >>>>>>> +    switch (args->in.op) {
> >>>>>>> +    case AMDGPU_USERQ_OP_CREATE:
> >>>>>>> +        r = amdgpu_userqueue_create(adev, filp, args);
> >>>>>>> +        if (r)
> >>>>>>> +            DRM_ERROR("Failed to create usermode queue\n");
> >>>>>>> +        break;
> >>>>>>> +
> >>>>>>> +    case AMDGPU_USERQ_OP_FREE:
> >>>>>>> +        amdgpu_userqueue_destroy(adev, filp, args);
> >>>>>>> +        break;
> >>>>>>> +
> >>>>>>> +    default:
> >>>>>>> +        DRM_ERROR("Invalid user queue op specified: %d\n",
> >>>>>>> args->in.op);
> >>>>>>> +        return -EINVAL;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    return r;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
> >>>>>>> +{
> >>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> >>>>>>> +
> >>>>>>> +    mutex_init(&uqg->userq_mutex);
> >>>>>>> +    return 0;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
> >>>>>>> +{
> >>>>>>> +
> >>>>>>> +}
> >>>>>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> >>>>>>> b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> >>>>>>> new file mode 100644
> >>>>>>> index 000000000000..c1fe39ffaf72
> >>>>>>> --- /dev/null
> >>>>>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> >>>>>>> @@ -0,0 +1,50 @@
> >>>>>>> +/*
> >>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
> >>>>>>> + *
> >>>>>>> + * Permission is hereby granted, free of charge, to any person
> >>>>>>> obtaining a
> >>>>>>> + * copy of this software and associated documentation files
> >>>>>>> (the "Software"),
> >>>>>>> + * to deal in the Software without restriction, including
> >>>>>>> without limitation
> >>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
> >>>>>>> sublicense,
> >>>>>>> + * and/or sell copies of the Software, and to permit persons to
> >>>>>>> whom the
> >>>>>>> + * Software is furnished to do so, subject to the following
> >>>>>>> conditions:
> >>>>>>> + *
> >>>>>>> + * The above copyright notice and this permission notice shall
> >>>>>>> be included in
> >>>>>>> + * all copies or substantial portions of the Software.
> >>>>>>> + *
> >>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> >>>>>>> KIND, EXPRESS OR
> >>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> >>>>>>> MERCHANTABILITY,
> >>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> >>>>>>> EVENT SHALL
> >>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
> >>>>>>> CLAIM, DAMAGES OR
> >>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> >>>>>>> OTHERWISE,
> >>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
> >>>>>>> THE USE OR
> >>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
> >>>>>>> + *
> >>>>>>> + */
> >>>>>>> +
> >>>>>>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
> >>>>>>> +#define AMDGPU_USERMODE_QUEUE_H_
> >>>>>>> +
> >>>>>>> +#define AMDGPU_MAX_USERQ 512
> >>>>>>> +
> >>>>>>> +struct amdgpu_usermode_queue {
> >>>>>>> +    int        queue_id;
> >>>>>>> +    int        queue_type;
> >>>>>>> +    int        queue_size;
> >>>>>>> +    int        paging;
> >>>>>>> +    int        pasid;
> >>>>>>> +    int        use_doorbell;
> >>>>>>> +    int        doorbell_index;
> >>>>>>> +
> >>>>>>> +    uint64_t    mqd_gpu_addr;
> >>>>>>> +    uint64_t    wptr_gpu_addr;
> >>>>>>> +    uint64_t    rptr_gpu_addr;
> >>>>>>> +    uint64_t    queue_gpu_addr;
> >>>>>>> +    uint64_t    flags;
> >>>>>>> +    void         *mqd_cpu_ptr;
> >>>>>>> +
> >>>>>>> +    struct amdgpu_bo    *mqd_obj;
> >>>>>>> +    struct amdgpu_vm        *vm;
> >>>>>>> +    struct list_head     list;
> >>>>>>> +};
> >>>>>>> +
> >>>>>>> +#endif
> >>>>>>
> >>>>
> >>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 5/7] drm/amdgpu: Create context for usermode queue
  2023-01-03  9:40     ` Shashank Sharma
@ 2023-01-03 14:48       ` Alex Deucher
  0 siblings, 0 replies; 64+ messages in thread
From: Alex Deucher @ 2023-01-03 14:48 UTC (permalink / raw)
  To: Shashank Sharma
  Cc: Alex Deucher, arvind.yadav, Christian Koenig, amd-gfx,
	arunpravin.paneerselvam

On Tue, Jan 3, 2023 at 4:40 AM Shashank Sharma <shashank.sharma@amd.com> wrote:
>
>
> On 29/12/2022 18:54, Alex Deucher wrote:
> > On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
> >> The FW expects us to allocate atleast one page as process
> >> context space, and one for gang context space. This patch adds some
> >> object for the same.
> > This should be handled in the IP specific code for the MQD creation.
> > Each IP may have different requirements for MQD related metadata.
> >
> > Alex
>
> Noted, so 3 IP specific functions so far,
>
> .init_mqd(), .map() and .create_ctx_space().
>

I think this can be handled in init_mqd().  No need for a separate callback.

Alex

> - Shashank
>
> >
> >> Cc: Alex Deucher <alexander.deucher@amd.com>
> >> Cc: Christian Koenig <christian.koenig@amd.com>
> >>
> >> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 57 +++++++++++++++++++
> >>   .../drm/amd/include/amdgpu_usermode_queue.h   |  8 +++
> >>   2 files changed, 65 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >> index b566ce4cb7f0..2a854a5e2f70 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >> @@ -69,6 +69,56 @@ amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
> >>       return 0;
> >>   }
> >>
> >> +static int
> >> +amdgpu_userqueue_create_context(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
> >> +{
> >> +    int r;
> >> +    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
> >> +    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
> >> +    /*
> >> +     * The FW expects atleast one page space allocated for
> >> +     * process context related work, and one for gang context.
> >> +     */
> >> +    r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> >> +                                AMDGPU_GEM_DOMAIN_VRAM,
> >> +                                &pctx->obj,
> >> +                                &pctx->gpu_addr,
> >> +                                &pctx->cpu_ptr);
> >> +    if (r) {
> >> +        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
> >> +        return r;
> >> +    }
> >> +
> >> +    r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> >> +                                AMDGPU_GEM_DOMAIN_VRAM,
> >> +                                &gctx->obj,
> >> +                                &gctx->gpu_addr,
> >> +                                &gctx->cpu_ptr);
> >> +    if (r) {
> >> +        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
> >> +        amdgpu_bo_free_kernel(&pctx->obj,
> >> +                              &pctx->gpu_addr,
> >> +                              &pctx->cpu_ptr);
> >> +        return r;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static void
> >> +amdgpu_userqueue_free_context(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
> >> +{
> >> +    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
> >> +    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
> >> +
> >> +    amdgpu_bo_free_kernel(&pctx->obj,
> >> +                          &pctx->gpu_addr,
> >> +                          &pctx->cpu_ptr);
> >> +    amdgpu_bo_free_kernel(&pctx->obj,
> >> +                          &gctx->gpu_addr,
> >> +                          &gctx->cpu_ptr);
> >> +}
> >> +
> >>   static void
> >>   amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue)
> >>   {
> >> @@ -282,6 +332,12 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
> >>           goto free_mqd;
> >>       }
> >>
> >> +    r = amdgpu_userqueue_create_context(adev, queue);
> >> +    if (r < 0) {
> >> +        DRM_ERROR("Failed to create context for queue\n");
> >> +        goto free_mqd;
> >> +    }
> >> +
> >>       ctx->userq = queue;
> >>       args->out.q_id = queue->queue_id;
> >>       args->out.flags = 0;
> >> @@ -306,6 +362,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
> >>       struct amdgpu_usermode_queue *queue = ctx->userq;
> >>
> >>       mutex_lock(&adev->userq.userq_mutex);
> >> +    amdgpu_userqueue_free_context(adev, queue);
> >>       amdgpu_userqueue_destroy_mqd(queue);
> >>       amdgpu_userqueue_remove_index(adev, queue);
> >>       ctx->userq = NULL;
> >> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> >> index c1fe39ffaf72..8bf3c0be6937 100644
> >> --- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> >> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> >> @@ -26,6 +26,12 @@
> >>
> >>   #define AMDGPU_MAX_USERQ 512
> >>
> >> +struct amdgpu_userq_ctx {
> >> +       struct amdgpu_bo *obj;
> >> +       uint64_t gpu_addr;
> >> +       void    *cpu_ptr;
> >> +};
> >> +
> >>   struct amdgpu_usermode_queue {
> >>          int             queue_id;
> >>          int             queue_type;
> >> @@ -44,6 +50,8 @@ struct amdgpu_usermode_queue {
> >>
> >>          struct amdgpu_bo        *mqd_obj;
> >>          struct amdgpu_vm        *vm;
> >> +       struct amdgpu_userq_ctx proc_ctx;
> >> +       struct amdgpu_userq_ctx gang_ctx;
> >>          struct list_head        list;
> >>   };
> >>
> >> --
> >> 2.34.1
> >>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2023-01-03 14:34                 ` Alex Deucher
@ 2023-01-03 14:50                   ` Christian König
  0 siblings, 0 replies; 64+ messages in thread
From: Christian König @ 2023-01-03 14:50 UTC (permalink / raw)
  To: Alex Deucher
  Cc: arunpravin.paneerselvam, Shashank Sharma, arvind.yadav, amd-gfx,
	Alex Deucher, Christian Koenig

Am 03.01.23 um 15:34 schrieb Alex Deucher:
> On Tue, Jan 3, 2023 at 4:35 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 03.01.23 um 10:22 schrieb Shashank Sharma:
>>> On 03/01/2023 10:15, Christian König wrote:
>>>> Am 03.01.23 um 10:12 schrieb Shashank Sharma:
>>>>> On 02/01/2023 13:39, Christian König wrote:
>>>>>> Hi Shashank,
>>>>>>
>>>>>> Am 26.12.22 um 11:41 schrieb Shashank Sharma:
>>>>>>> [SNIP]
>>>>>>>>>          /* df */
>>>>>>>>>        struct amdgpu_df                df;
>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>>>>>> index 0fa0e56daf67..f7413859b14f 100644
>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>>>>>>>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>>>>>>>>>        unsigned long            ras_counter_ce;
>>>>>>>>>        unsigned long            ras_counter_ue;
>>>>>>>>>        uint32_t            stable_pstate;
>>>>>>>>> +    struct amdgpu_usermode_queue    *userq;
>>>>>>>> Why should we have this in the ctx here???
>>>>>>> We are allocating a few things dynamically for the queue, which
>>>>>>> would be valid until we destroy this queue. Also we need to save
>>>>>>> this queue
>>>>>>>
>>>>>>> container at some place for the destroy function,  and I thought
>>>>>>> it would make sense to keep this with the context ptr, as this is
>>>>>>> how we are
>>>>>>>
>>>>>>> identifying the incoming request.
>>>>>> I have absolutely no idea how you end up with that design.
>>>>>>
>>>>>> The ctx object is the CS IOCTL context, that is not even remotely
>>>>>> related to anything the user queues should be doing.
>>>>>>
>>>>>> Please completely drop that relationship and don't use any of the
>>>>>> ctx object stuff in the user queue code.
>>>>>>
>>>>> Historically the workload submission always came with a context (due
>>>>> to CS IOCTL), so we thought it would make sense to still have its
>>>>> relevance in the new workload submission method. Would you prefer
>>>>> this new submission to be independent of AMDGPU context ?
>>>> Well not prefer, the point is that this doesn't make any sense at all.
>>>>
>>>> See the amdgpu_ctx object contains the resulting fence pointers for
>>>> the CS IOCTL as well as information necessary for the CS IOCTL to
>>>> work (e.g. scheduler entities etc...).
>>>>
>>>> I don't see how anything from that stuff would be useful for the MES
>>>> or user queues.
>>>>
>>>> Christian.
>>>
>>> I am getting your point, and it makes sense as well. But in such
>>> scenario, we might have to create something parallel to
>>> AMDGPU_USERQ_CTX which is doing very much the same.
>>>
>>> We can still do it to make a logically separate entity, but any
>>> suggestions on where to keep this udev_ctx ptr (if not in adev, as
>>> well as not ctx) ?
>>
>> Take a look at the amdgpu_ctx_mgr object with the mutex and the idr and
>> how this is embedded into the amdgpu_fpriv object. It should become
>> pretty clear from there on.
>>
>> I don't think we need an userq_ctx or similar, each userq should be an
>> independent object. What we need is an userq_mgr object which holds the
>> collection of all the useq objects the client application has created
>> through it's fpriv connection to the driver.
> Don't we want to associate the queues to a ctx for guilty tracking
> purposes when there is a hang?

Nope, absolutely not.

The hang detection around the context was just another design bug we 
inherited from the windows driver.

What we should do instead is to use the error field in the dma_fence 
object just like every other driver and component does.

Christian.

>
> Alex
>
>> Regards,
>> Christian.
>>
>>> - Shashank
>>>
>>>
>>>>> - Shashank
>>>>>
>>>>>
>>>>>> Christian.
>>>>>>
>>>>>>> - Shashank
>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>>>    };
>>>>>>>>>      struct amdgpu_ctx_mgr {
>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>>>>>> new file mode 100644
>>>>>>>>> index 000000000000..3b6e8f75495c
>>>>>>>>> --- /dev/null
>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>>>>>>> @@ -0,0 +1,187 @@
>>>>>>>>> +/*
>>>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>>>> + *
>>>>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>>>>> obtaining a
>>>>>>>>> + * copy of this software and associated documentation files
>>>>>>>>> (the "Software"),
>>>>>>>>> + * to deal in the Software without restriction, including
>>>>>>>>> without limitation
>>>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>>>>> sublicense,
>>>>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>>>>> whom the
>>>>>>>>> + * Software is furnished to do so, subject to the following
>>>>>>>>> conditions:
>>>>>>>>> + *
>>>>>>>>> + * The above copyright notice and this permission notice shall
>>>>>>>>> be included in
>>>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>>>> + *
>>>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>>>>> KIND, EXPRESS OR
>>>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>>>>> MERCHANTABILITY,
>>>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>>>>> EVENT SHALL
>>>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
>>>>>>>>> CLAIM, DAMAGES OR
>>>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>>>>> OTHERWISE,
>>>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>>>>>>>>> THE USE OR
>>>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>>>> + *
>>>>>>>>> + */
>>>>>>>>> +
>>>>>>>>> +#include "amdgpu.h"
>>>>>>>>> +#include "amdgpu_vm.h"
>>>>>>>>> +#include "amdgpu_mes.h"
>>>>>>>>> +#include "amdgpu_usermode_queue.h"
>>>>>>>>> +#include "soc15_common.h"
>>>>>>>>> +
>>>>>>>>> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a,
>>>>>>>>> sizeof(__u64)))
>>>>>>>>> +
>>>>>>>>> +static int
>>>>>>>>> +amdgpu_userqueue_index(struct amdgpu_device *adev)
>>>>>>>>> +{
>>>>>>>>> +    int index;
>>>>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>>>>>> +
>>>>>>>>> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ,
>>>>>>>>> GFP_KERNEL);
>>>>>>>>> +    return index;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static void
>>>>>>>>> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev,
>>>>>>>>> struct amdgpu_usermode_queue *queue)
>>>>>>>>> +{
>>>>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>>>>>> +
>>>>>>>>> +    ida_simple_remove(&uqg->ida, queue->queue_id);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static int
>>>>>>>>> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev,
>>>>>>>>> struct drm_amdgpu_userq_mqd *mqd_in)
>>>>>>>>> +{
>>>>>>>>> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0
>>>>>>>>> || mqd_in->doorbell_offset == 0) {
>>>>>>>>> +        DRM_ERROR("Invalid queue object address\n");
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 ||
>>>>>>>>> mqd_in->wptr_va == 0) {
>>>>>>>>> +        DRM_ERROR("Invalid queue object value\n");
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type
>>>>>>>>>> = AMDGPU_HW_IP_NUM) {
>>>>>>>>> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    if (!CHECK_ACCESS(mqd_in->queue_va) ||
>>>>>>>>> !CHECK_ACCESS(mqd_in->rptr_va) ||
>>>>>>>>> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
>>>>>>>>> +            DRM_ERROR("Invalid mapping of queue ptrs, access
>>>>>>>>> error\n");
>>>>>>>>> +            return -EINVAL;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    DRM_DEBUG_DRIVER("Input parameters to create queue are
>>>>>>>>> valid\n");
>>>>>>>>> +    return 0;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct
>>>>>>>>> drm_file *filp,
>>>>>>>>> +                            union drm_amdgpu_userq *args)
>>>>>>>>> +{
>>>>>>>>> +    int r, pasid;
>>>>>>>>> +    struct amdgpu_usermode_queue *queue;
>>>>>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>>>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>>>>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv,
>>>>>>>>> args->in.ctx_id);
>>>>>>>>> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>>>>>>>>> +
>>>>>>>>> +    if (!ctx) {
>>>>>>>>> +        DRM_ERROR("Invalid GPU context\n");
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    if (vm->pasid < 0) {
>>>>>>>>> +        DRM_WARN("No PASID info found\n");
>>>>>>>>> +        pasid = 0;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    mutex_lock(&adev->userq.userq_mutex);
>>>>>>>>> +
>>>>>>>>> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue),
>>>>>>>>> GFP_KERNEL);
>>>>>>>>> +    if (!queue) {
>>>>>>>>> +        DRM_ERROR("Failed to allocate memory for queue\n");
>>>>>>>>> + mutex_unlock(&adev->userq.userq_mutex);
>>>>>>>>> +        return -ENOMEM;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
>>>>>>>>> +    if (r < 0) {
>>>>>>>>> +        DRM_ERROR("Invalid input to create queue\n");
>>>>>>>>> +        goto free_queue;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    queue->vm = vm;
>>>>>>>>> +    queue->pasid = pasid;
>>>>>>>>> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
>>>>>>>>> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
>>>>>>>>> +    queue->queue_size = mqd_in->queue_size;
>>>>>>>>> +    queue->queue_type = mqd_in->ip_type;
>>>>>>>>> +    queue->paging = false;
>>>>>>>>> +    queue->flags = mqd_in->flags;
>>>>>>>>> +    queue->queue_id = amdgpu_userqueue_index(adev);
>>>>>>>>> +
>>>>>>>>> +    ctx->userq = queue;
>>>>>>>>> +    args->out.q_id = queue->queue_id;
>>>>>>>>> +    args->out.flags = 0;
>>>>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>>>>>> +    return 0;
>>>>>>>>> +
>>>>>>>>> +free_queue:
>>>>>>>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>>>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>>>>>> +    kfree(queue);
>>>>>>>>> +    return r;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev,
>>>>>>>>> struct drm_file *filp,
>>>>>>>>> +                              union drm_amdgpu_userq *args)
>>>>>>>>> +{
>>>>>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>>>>>> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv,
>>>>>>>>> args->in.ctx_id);
>>>>>>>>> +    struct amdgpu_usermode_queue *queue = ctx->userq;
>>>>>>>>> +
>>>>>>>>> +    mutex_lock(&adev->userq.userq_mutex);
>>>>>>>>> +    amdgpu_userqueue_remove_index(adev, queue);
>>>>>>>>> +    ctx->userq = NULL;
>>>>>>>>> +    mutex_unlock(&adev->userq.userq_mutex);
>>>>>>>>> +    kfree(queue);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
>>>>>>>>> +               struct drm_file *filp)
>>>>>>>>> +{
>>>>>>>>> +    union drm_amdgpu_userq *args = data;
>>>>>>>>> +    struct amdgpu_device *adev = drm_to_adev(dev);
>>>>>>>>> +    int r = 0;
>>>>>>>>> +
>>>>>>>>> +    switch (args->in.op) {
>>>>>>>>> +    case AMDGPU_USERQ_OP_CREATE:
>>>>>>>>> +        r = amdgpu_userqueue_create(adev, filp, args);
>>>>>>>>> +        if (r)
>>>>>>>>> +            DRM_ERROR("Failed to create usermode queue\n");
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case AMDGPU_USERQ_OP_FREE:
>>>>>>>>> +        amdgpu_userqueue_destroy(adev, filp, args);
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    default:
>>>>>>>>> +        DRM_ERROR("Invalid user queue op specified: %d\n",
>>>>>>>>> args->in.op);
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    return r;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +int amdgpu_userqueue_init(struct amdgpu_device *adev)
>>>>>>>>> +{
>>>>>>>>> +    struct amdgpu_userq_globals *uqg = &adev->userq;
>>>>>>>>> +
>>>>>>>>> +    mutex_init(&uqg->userq_mutex);
>>>>>>>>> +    return 0;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +void amdgpu_userqueue_fini(struct amdgpu_device *adev)
>>>>>>>>> +{
>>>>>>>>> +
>>>>>>>>> +}
>>>>>>>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>>>>>> b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>>>>>> new file mode 100644
>>>>>>>>> index 000000000000..c1fe39ffaf72
>>>>>>>>> --- /dev/null
>>>>>>>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>>>>>>>>> @@ -0,0 +1,50 @@
>>>>>>>>> +/*
>>>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>>>> + *
>>>>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>>>>> obtaining a
>>>>>>>>> + * copy of this software and associated documentation files
>>>>>>>>> (the "Software"),
>>>>>>>>> + * to deal in the Software without restriction, including
>>>>>>>>> without limitation
>>>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>>>>> sublicense,
>>>>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>>>>> whom the
>>>>>>>>> + * Software is furnished to do so, subject to the following
>>>>>>>>> conditions:
>>>>>>>>> + *
>>>>>>>>> + * The above copyright notice and this permission notice shall
>>>>>>>>> be included in
>>>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>>>> + *
>>>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>>>>> KIND, EXPRESS OR
>>>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>>>>> MERCHANTABILITY,
>>>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>>>>> EVENT SHALL
>>>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
>>>>>>>>> CLAIM, DAMAGES OR
>>>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>>>>> OTHERWISE,
>>>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>>>>>>>>> THE USE OR
>>>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>>>> + *
>>>>>>>>> + */
>>>>>>>>> +
>>>>>>>>> +#ifndef AMDGPU_USERMODE_QUEUE_H_
>>>>>>>>> +#define AMDGPU_USERMODE_QUEUE_H_
>>>>>>>>> +
>>>>>>>>> +#define AMDGPU_MAX_USERQ 512
>>>>>>>>> +
>>>>>>>>> +struct amdgpu_usermode_queue {
>>>>>>>>> +    int        queue_id;
>>>>>>>>> +    int        queue_type;
>>>>>>>>> +    int        queue_size;
>>>>>>>>> +    int        paging;
>>>>>>>>> +    int        pasid;
>>>>>>>>> +    int        use_doorbell;
>>>>>>>>> +    int        doorbell_index;
>>>>>>>>> +
>>>>>>>>> +    uint64_t    mqd_gpu_addr;
>>>>>>>>> +    uint64_t    wptr_gpu_addr;
>>>>>>>>> +    uint64_t    rptr_gpu_addr;
>>>>>>>>> +    uint64_t    queue_gpu_addr;
>>>>>>>>> +    uint64_t    flags;
>>>>>>>>> +    void         *mqd_cpu_ptr;
>>>>>>>>> +
>>>>>>>>> +    struct amdgpu_bo    *mqd_obj;
>>>>>>>>> +    struct amdgpu_vm        *vm;
>>>>>>>>> +    struct list_head     list;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +#endif


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2022-12-23 19:36 ` [RFC 1/7] drm/amdgpu: UAPI for user queue management Shashank Sharma
  2022-12-24 20:20   ` Bas Nieuwenhuizen
  2023-01-02 13:26   ` Christian König
@ 2023-01-03 18:29   ` Felix Kuehling
  2023-01-03 19:17     ` Liu, Shaoyun
  2023-01-03 19:18     ` Alex Deucher
  2 siblings, 2 replies; 64+ messages in thread
From: Felix Kuehling @ 2023-01-03 18:29 UTC (permalink / raw)
  To: Shashank Sharma, amd-gfx
  Cc: Alex Deucher, Christian Koenig, arvind.yadav, arunpravin.paneerselvam

Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> From: Alex Deucher <alexander.deucher@amd.com>
>
> This patch intorduces new UAPI/IOCTL for usermode graphics
> queue. The userspace app will fill this structure and request
> the graphics driver to add a graphics work queue for it. The
> output of this UAPI is a queue id.
>
> This UAPI maps the queue into GPU, so the graphics app can start
> submitting work to the queue as soon as the call returns.
>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>   include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
>   1 file changed, 52 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 0d93ec132ebb..a3d0dd6f62c5 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -54,6 +54,7 @@ extern "C" {
>   #define DRM_AMDGPU_VM			0x13
>   #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
>   #define DRM_AMDGPU_SCHED		0x15
> +#define DRM_AMDGPU_USERQ		0x16
>   
>   #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>   #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -71,6 +72,7 @@ extern "C" {
>   #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>   #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> +#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>   
>   /**
>    * DOC: memory domains
> @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
>   	union drm_amdgpu_ctx_out out;
>   };
>   
> +/* user queue IOCTL */
> +#define AMDGPU_USERQ_OP_CREATE	1
> +#define AMDGPU_USERQ_OP_FREE	2
> +
> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE	(1 << 0)

What does "secure" mean here? I don't see this flag referenced anywhere 
in the rest of the patch series.

Regards,
   Felix


> +#define AMDGPU_USERQ_MQD_FLAGS_AQL	(1 << 1)
> +
> +struct drm_amdgpu_userq_mqd {
> +	/** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> +	__u32	flags;
> +	/** IP type: AMDGPU_HW_IP_* */
> +	__u32	ip_type;
> +	/** GEM object handle */
> +	__u32   doorbell_handle;
> +	/** Doorbell offset in dwords */
> +	__u32   doorbell_offset;
> +	/** GPU virtual address of the queue */
> +	__u64   queue_va;
> +	/** Size of the queue in bytes */
> +	__u64   queue_size;
> +	/** GPU virtual address of the rptr */
> +	__u64   rptr_va;
> +	/** GPU virtual address of the wptr */
> +	__u64   wptr_va;
> +};
> +
> +struct drm_amdgpu_userq_in {
> +	/** AMDGPU_USERQ_OP_* */
> +	__u32	op;
> +	/** Flags */
> +	__u32	flags;
> +	/** Context handle to associate the queue with */
> +	__u32	ctx_id;
> +	__u32	pad;
> +	/** Queue descriptor */
> +	struct drm_amdgpu_userq_mqd mqd;
> +};
> +
> +struct drm_amdgpu_userq_out {
> +	/** Queue handle */
> +	__u32	q_id;
> +	/** Flags */
> +	__u32	flags;
> +};
> +
> +union drm_amdgpu_userq {
> +	struct drm_amdgpu_userq_in in;
> +	struct drm_amdgpu_userq_out out;
> +};
> +
>   /* vm ioctl */
>   #define AMDGPU_VM_OP_RESERVE_VMID	1
>   #define AMDGPU_VM_OP_UNRESERVE_VMID	2

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2023-01-03  9:36     ` Shashank Sharma
@ 2023-01-03 18:37       ` Felix Kuehling
  2023-01-04  6:21         ` Yadav, Arvind
  0 siblings, 1 reply; 64+ messages in thread
From: Felix Kuehling @ 2023-01-03 18:37 UTC (permalink / raw)
  To: Shashank Sharma, Alex Deucher
  Cc: Alex Deucher, amd-gfx, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam

Am 2023-01-03 um 04:36 schrieb Shashank Sharma:
>>> /*MQD struct for usermode Queue*/
>>> +struct amdgpu_usermode_queue_mqd
>> This is specific to GC 11.  Every IP and version will have its own MQD
>> format.  That should live in the IP specific code, not the generic
>> code.  We already have the generic MQD parameters that we need from
>> the userq IOCTL.
>
> Noted, we can separate out the generic parameters from gen specific 
> parameter, and will try to wrap it around the generic structure.
>
> - Shashank

Is there a reason why you can't use "struct v11_compute_mqd" from 
v11_structs.h?

Regards,
   Felix


^ permalink raw reply	[flat|nested] 64+ messages in thread

* RE: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2023-01-03 18:29   ` Felix Kuehling
@ 2023-01-03 19:17     ` Liu, Shaoyun
  2023-01-03 19:22       ` Alex Deucher
  2023-01-03 19:18     ` Alex Deucher
  1 sibling, 1 reply; 64+ messages in thread
From: Liu, Shaoyun @ 2023-01-03 19:17 UTC (permalink / raw)
  To: Kuehling, Felix, Sharma, Shashank, amd-gfx
  Cc: Deucher, Alexander, Koenig, Christian, Yadav, Arvind,
	Paneer Selvam, Arunpravin

[AMD Official Use Only - General]

Hsakmt  has  the  interfaces for compute user queue. Do we want a unify API for both  graphic and compute  ?

Regards
Shaoyun.liu

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Felix Kuehling
Sent: Tuesday, January 3, 2023 1:30 PM
To: Sharma, Shashank <Shashank.Sharma@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Yadav, Arvind <Arvind.Yadav@amd.com>; Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> From: Alex Deucher <alexander.deucher@amd.com>
>
> This patch intorduces new UAPI/IOCTL for usermode graphics queue. The
> userspace app will fill this structure and request the graphics driver
> to add a graphics work queue for it. The output of this UAPI is a
> queue id.
>
> This UAPI maps the queue into GPU, so the graphics app can start
> submitting work to the queue as soon as the call returns.
>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>   include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
>   1 file changed, 52 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h
> b/include/uapi/drm/amdgpu_drm.h index 0d93ec132ebb..a3d0dd6f62c5
> 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -54,6 +54,7 @@ extern "C" {
>   #define DRM_AMDGPU_VM                       0x13
>   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
>   #define DRM_AMDGPU_SCHED            0x15
> +#define DRM_AMDGPU_USERQ             0x16
>
>   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -71,6 +72,7 @@ extern "C" {
>   #define DRM_IOCTL_AMDGPU_VM         DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>   #define DRM_IOCTL_AMDGPU_SCHED              DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> +#define DRM_IOCTL_AMDGPU_USERQ               DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>
>   /**
>    * DOC: memory domains
> @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
>       union drm_amdgpu_ctx_out out;
>   };
>
> +/* user queue IOCTL */
> +#define AMDGPU_USERQ_OP_CREATE       1
> +#define AMDGPU_USERQ_OP_FREE 2
> +
> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE        (1 << 0)

What does "secure" mean here? I don't see this flag referenced anywhere in the rest of the patch series.

Regards,
   Felix


> +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> +
> +struct drm_amdgpu_userq_mqd {
> +     /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> +     __u32   flags;
> +     /** IP type: AMDGPU_HW_IP_* */
> +     __u32   ip_type;
> +     /** GEM object handle */
> +     __u32   doorbell_handle;
> +     /** Doorbell offset in dwords */
> +     __u32   doorbell_offset;
> +     /** GPU virtual address of the queue */
> +     __u64   queue_va;
> +     /** Size of the queue in bytes */
> +     __u64   queue_size;
> +     /** GPU virtual address of the rptr */
> +     __u64   rptr_va;
> +     /** GPU virtual address of the wptr */
> +     __u64   wptr_va;
> +};
> +
> +struct drm_amdgpu_userq_in {
> +     /** AMDGPU_USERQ_OP_* */
> +     __u32   op;
> +     /** Flags */
> +     __u32   flags;
> +     /** Context handle to associate the queue with */
> +     __u32   ctx_id;
> +     __u32   pad;
> +     /** Queue descriptor */
> +     struct drm_amdgpu_userq_mqd mqd;
> +};
> +
> +struct drm_amdgpu_userq_out {
> +     /** Queue handle */
> +     __u32   q_id;
> +     /** Flags */
> +     __u32   flags;
> +};
> +
> +union drm_amdgpu_userq {
> +     struct drm_amdgpu_userq_in in;
> +     struct drm_amdgpu_userq_out out;
> +};
> +
>   /* vm ioctl */
>   #define AMDGPU_VM_OP_RESERVE_VMID   1
>   #define AMDGPU_VM_OP_UNRESERVE_VMID 2

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2023-01-03 18:29   ` Felix Kuehling
  2023-01-03 19:17     ` Liu, Shaoyun
@ 2023-01-03 19:18     ` Alex Deucher
  1 sibling, 0 replies; 64+ messages in thread
From: Alex Deucher @ 2023-01-03 19:18 UTC (permalink / raw)
  To: Felix Kuehling
  Cc: arunpravin.paneerselvam, Shashank Sharma, arvind.yadav, amd-gfx,
	Alex Deucher, Christian Koenig

On Tue, Jan 3, 2023 at 1:30 PM Felix Kuehling <felix.kuehling@amd.com> wrote:
>
> Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> > From: Alex Deucher <alexander.deucher@amd.com>
> >
> > This patch intorduces new UAPI/IOCTL for usermode graphics
> > queue. The userspace app will fill this structure and request
> > the graphics driver to add a graphics work queue for it. The
> > output of this UAPI is a queue id.
> >
> > This UAPI maps the queue into GPU, so the graphics app can start
> > submitting work to the queue as soon as the call returns.
> >
> > Cc: Alex Deucher <alexander.deucher@amd.com>
> > Cc: Christian Koenig <christian.koenig@amd.com>
> > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> > ---
> >   include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
> >   1 file changed, 52 insertions(+)
> >
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 0d93ec132ebb..a3d0dd6f62c5 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -54,6 +54,7 @@ extern "C" {
> >   #define DRM_AMDGPU_VM                       0x13
> >   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
> >   #define DRM_AMDGPU_SCHED            0x15
> > +#define DRM_AMDGPU_USERQ             0x16
> >
> >   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -71,6 +72,7 @@ extern "C" {
> >   #define DRM_IOCTL_AMDGPU_VM         DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >   #define DRM_IOCTL_AMDGPU_SCHED              DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > +#define DRM_IOCTL_AMDGPU_USERQ               DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >
> >   /**
> >    * DOC: memory domains
> > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >       union drm_amdgpu_ctx_out out;
> >   };
> >
> > +/* user queue IOCTL */
> > +#define AMDGPU_USERQ_OP_CREATE       1
> > +#define AMDGPU_USERQ_OP_FREE 2
> > +
> > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE        (1 << 0)
>
> What does "secure" mean here? I don't see this flag referenced anywhere
> in the rest of the patch series.

It means the queue operates in secure mode (i.e. encrypted for content
protection -- TMZ in hardware parlance).

Alex

>
> Regards,
>    Felix
>
>
> > +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> > +
> > +struct drm_amdgpu_userq_mqd {
> > +     /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > +     __u32   flags;
> > +     /** IP type: AMDGPU_HW_IP_* */
> > +     __u32   ip_type;
> > +     /** GEM object handle */
> > +     __u32   doorbell_handle;
> > +     /** Doorbell offset in dwords */
> > +     __u32   doorbell_offset;
> > +     /** GPU virtual address of the queue */
> > +     __u64   queue_va;
> > +     /** Size of the queue in bytes */
> > +     __u64   queue_size;
> > +     /** GPU virtual address of the rptr */
> > +     __u64   rptr_va;
> > +     /** GPU virtual address of the wptr */
> > +     __u64   wptr_va;
> > +};
> > +
> > +struct drm_amdgpu_userq_in {
> > +     /** AMDGPU_USERQ_OP_* */
> > +     __u32   op;
> > +     /** Flags */
> > +     __u32   flags;
> > +     /** Context handle to associate the queue with */
> > +     __u32   ctx_id;
> > +     __u32   pad;
> > +     /** Queue descriptor */
> > +     struct drm_amdgpu_userq_mqd mqd;
> > +};
> > +
> > +struct drm_amdgpu_userq_out {
> > +     /** Queue handle */
> > +     __u32   q_id;
> > +     /** Flags */
> > +     __u32   flags;
> > +};
> > +
> > +union drm_amdgpu_userq {
> > +     struct drm_amdgpu_userq_in in;
> > +     struct drm_amdgpu_userq_out out;
> > +};
> > +
> >   /* vm ioctl */
> >   #define AMDGPU_VM_OP_RESERVE_VMID   1
> >   #define AMDGPU_VM_OP_UNRESERVE_VMID 2

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2023-01-03 19:17     ` Liu, Shaoyun
@ 2023-01-03 19:22       ` Alex Deucher
  2023-01-03 19:25         ` Liu, Shaoyun
  0 siblings, 1 reply; 64+ messages in thread
From: Alex Deucher @ 2023-01-03 19:22 UTC (permalink / raw)
  To: Liu, Shaoyun
  Cc: Sharma, Shashank, Kuehling, Felix, Paneer Selvam, Arunpravin,
	Yadav, Arvind, amd-gfx, Deucher, Alexander, Koenig, Christian

On Tue, Jan 3, 2023 at 2:17 PM Liu, Shaoyun <Shaoyun.Liu@amd.com> wrote:
>
> [AMD Official Use Only - General]
>
> Hsakmt  has  the  interfaces for compute user queue. Do we want a unify API for both  graphic and compute  ?

Yeah, that is the eventual goal, hence the flag for AQL vs PM4.

Alex

>
> Regards
> Shaoyun.liu
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Felix Kuehling
> Sent: Tuesday, January 3, 2023 1:30 PM
> To: Sharma, Shashank <Shashank.Sharma@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Yadav, Arvind <Arvind.Yadav@amd.com>; Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
>
> Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> > From: Alex Deucher <alexander.deucher@amd.com>
> >
> > This patch intorduces new UAPI/IOCTL for usermode graphics queue. The
> > userspace app will fill this structure and request the graphics driver
> > to add a graphics work queue for it. The output of this UAPI is a
> > queue id.
> >
> > This UAPI maps the queue into GPU, so the graphics app can start
> > submitting work to the queue as soon as the call returns.
> >
> > Cc: Alex Deucher <alexander.deucher@amd.com>
> > Cc: Christian Koenig <christian.koenig@amd.com>
> > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> > ---
> >   include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
> >   1 file changed, 52 insertions(+)
> >
> > diff --git a/include/uapi/drm/amdgpu_drm.h
> > b/include/uapi/drm/amdgpu_drm.h index 0d93ec132ebb..a3d0dd6f62c5
> > 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -54,6 +54,7 @@ extern "C" {
> >   #define DRM_AMDGPU_VM                       0x13
> >   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
> >   #define DRM_AMDGPU_SCHED            0x15
> > +#define DRM_AMDGPU_USERQ             0x16
> >
> >   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -71,6 +72,7 @@ extern "C" {
> >   #define DRM_IOCTL_AMDGPU_VM         DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >   #define DRM_IOCTL_AMDGPU_SCHED              DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > +#define DRM_IOCTL_AMDGPU_USERQ               DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >
> >   /**
> >    * DOC: memory domains
> > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >       union drm_amdgpu_ctx_out out;
> >   };
> >
> > +/* user queue IOCTL */
> > +#define AMDGPU_USERQ_OP_CREATE       1
> > +#define AMDGPU_USERQ_OP_FREE 2
> > +
> > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE        (1 << 0)
>
> What does "secure" mean here? I don't see this flag referenced anywhere in the rest of the patch series.
>
> Regards,
>    Felix
>
>
> > +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> > +
> > +struct drm_amdgpu_userq_mqd {
> > +     /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > +     __u32   flags;
> > +     /** IP type: AMDGPU_HW_IP_* */
> > +     __u32   ip_type;
> > +     /** GEM object handle */
> > +     __u32   doorbell_handle;
> > +     /** Doorbell offset in dwords */
> > +     __u32   doorbell_offset;
> > +     /** GPU virtual address of the queue */
> > +     __u64   queue_va;
> > +     /** Size of the queue in bytes */
> > +     __u64   queue_size;
> > +     /** GPU virtual address of the rptr */
> > +     __u64   rptr_va;
> > +     /** GPU virtual address of the wptr */
> > +     __u64   wptr_va;
> > +};
> > +
> > +struct drm_amdgpu_userq_in {
> > +     /** AMDGPU_USERQ_OP_* */
> > +     __u32   op;
> > +     /** Flags */
> > +     __u32   flags;
> > +     /** Context handle to associate the queue with */
> > +     __u32   ctx_id;
> > +     __u32   pad;
> > +     /** Queue descriptor */
> > +     struct drm_amdgpu_userq_mqd mqd;
> > +};
> > +
> > +struct drm_amdgpu_userq_out {
> > +     /** Queue handle */
> > +     __u32   q_id;
> > +     /** Flags */
> > +     __u32   flags;
> > +};
> > +
> > +union drm_amdgpu_userq {
> > +     struct drm_amdgpu_userq_in in;
> > +     struct drm_amdgpu_userq_out out;
> > +};
> > +
> >   /* vm ioctl */
> >   #define AMDGPU_VM_OP_RESERVE_VMID   1
> >   #define AMDGPU_VM_OP_UNRESERVE_VMID 2

^ permalink raw reply	[flat|nested] 64+ messages in thread

* RE: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2023-01-03 19:22       ` Alex Deucher
@ 2023-01-03 19:25         ` Liu, Shaoyun
  2023-01-03 19:52           ` Alex Deucher
  0 siblings, 1 reply; 64+ messages in thread
From: Liu, Shaoyun @ 2023-01-03 19:25 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Sharma, Shashank, Kuehling, Felix, Paneer Selvam, Arunpravin,
	Yadav, Arvind, amd-gfx, Deucher, Alexander, Koenig, Christian

[AMD Official Use Only - General]

What about the existing rocm apps that already use the  hsakmt APIs for user queue ?

Shaoyun.liu

-----Original Message-----
From: Alex Deucher <alexdeucher@gmail.com>
Sent: Tuesday, January 3, 2023 2:22 PM
To: Liu, Shaoyun <Shaoyun.Liu@amd.com>
Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Sharma, Shashank <Shashank.Sharma@amd.com>; amd-gfx@lists.freedesktop.org; Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Yadav, Arvind <Arvind.Yadav@amd.com>; Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

On Tue, Jan 3, 2023 at 2:17 PM Liu, Shaoyun <Shaoyun.Liu@amd.com> wrote:
>
> [AMD Official Use Only - General]
>
> Hsakmt  has  the  interfaces for compute user queue. Do we want a unify API for both  graphic and compute  ?

Yeah, that is the eventual goal, hence the flag for AQL vs PM4.

Alex

>
> Regards
> Shaoyun.liu
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> Felix Kuehling
> Sent: Tuesday, January 3, 2023 1:30 PM
> To: Sharma, Shashank <Shashank.Sharma@amd.com>;
> amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; Yadav, Arvind <Arvind.Yadav@amd.com>;
> Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
>
> Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> > From: Alex Deucher <alexander.deucher@amd.com>
> >
> > This patch intorduces new UAPI/IOCTL for usermode graphics queue.
> > The userspace app will fill this structure and request the graphics
> > driver to add a graphics work queue for it. The output of this UAPI
> > is a queue id.
> >
> > This UAPI maps the queue into GPU, so the graphics app can start
> > submitting work to the queue as soon as the call returns.
> >
> > Cc: Alex Deucher <alexander.deucher@amd.com>
> > Cc: Christian Koenig <christian.koenig@amd.com>
> > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> > ---
> >   include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
> >   1 file changed, 52 insertions(+)
> >
> > diff --git a/include/uapi/drm/amdgpu_drm.h
> > b/include/uapi/drm/amdgpu_drm.h index 0d93ec132ebb..a3d0dd6f62c5
> > 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -54,6 +54,7 @@ extern "C" {
> >   #define DRM_AMDGPU_VM                       0x13
> >   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
> >   #define DRM_AMDGPU_SCHED            0x15
> > +#define DRM_AMDGPU_USERQ             0x16
> >
> >   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -71,6 +72,7 @@ extern "C" {
> >   #define DRM_IOCTL_AMDGPU_VM         DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >   #define DRM_IOCTL_AMDGPU_SCHED              DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > +#define DRM_IOCTL_AMDGPU_USERQ               DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >
> >   /**
> >    * DOC: memory domains
> > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >       union drm_amdgpu_ctx_out out;
> >   };
> >
> > +/* user queue IOCTL */
> > +#define AMDGPU_USERQ_OP_CREATE       1
> > +#define AMDGPU_USERQ_OP_FREE 2
> > +
> > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE        (1 << 0)
>
> What does "secure" mean here? I don't see this flag referenced anywhere in the rest of the patch series.
>
> Regards,
>    Felix
>
>
> > +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> > +
> > +struct drm_amdgpu_userq_mqd {
> > +     /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > +     __u32   flags;
> > +     /** IP type: AMDGPU_HW_IP_* */
> > +     __u32   ip_type;
> > +     /** GEM object handle */
> > +     __u32   doorbell_handle;
> > +     /** Doorbell offset in dwords */
> > +     __u32   doorbell_offset;
> > +     /** GPU virtual address of the queue */
> > +     __u64   queue_va;
> > +     /** Size of the queue in bytes */
> > +     __u64   queue_size;
> > +     /** GPU virtual address of the rptr */
> > +     __u64   rptr_va;
> > +     /** GPU virtual address of the wptr */
> > +     __u64   wptr_va;
> > +};
> > +
> > +struct drm_amdgpu_userq_in {
> > +     /** AMDGPU_USERQ_OP_* */
> > +     __u32   op;
> > +     /** Flags */
> > +     __u32   flags;
> > +     /** Context handle to associate the queue with */
> > +     __u32   ctx_id;
> > +     __u32   pad;
> > +     /** Queue descriptor */
> > +     struct drm_amdgpu_userq_mqd mqd; };
> > +
> > +struct drm_amdgpu_userq_out {
> > +     /** Queue handle */
> > +     __u32   q_id;
> > +     /** Flags */
> > +     __u32   flags;
> > +};
> > +
> > +union drm_amdgpu_userq {
> > +     struct drm_amdgpu_userq_in in;
> > +     struct drm_amdgpu_userq_out out; };
> > +
> >   /* vm ioctl */
> >   #define AMDGPU_VM_OP_RESERVE_VMID   1
> >   #define AMDGPU_VM_OP_UNRESERVE_VMID 2

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2023-01-02 11:27       ` Christian König
@ 2023-01-03 19:51         ` Alex Deucher
  0 siblings, 0 replies; 64+ messages in thread
From: Alex Deucher @ 2023-01-03 19:51 UTC (permalink / raw)
  To: Christian König
  Cc: Shashank Sharma, arunpravin.paneerselvam, arvind.yadav, amd-gfx,
	Bas Nieuwenhuizen, Alex Deucher, Christian Koenig

On Mon, Jan 2, 2023 at 6:27 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 27.12.22 um 17:58 schrieb Alex Deucher:
> > On Sat, Dec 24, 2022 at 3:21 PM Bas Nieuwenhuizen
> > <bas@basnieuwenhuizen.nl> wrote:
> >> On Fri, Dec 23, 2022 at 8:37 PM Shashank Sharma <shashank.sharma@amd.com> wrote:
> >>> From: Alex Deucher <alexander.deucher@amd.com>
> >>>
> >>> This patch intorduces new UAPI/IOCTL for usermode graphics
> >>> queue. The userspace app will fill this structure and request
> >>> the graphics driver to add a graphics work queue for it. The
> >>> output of this UAPI is a queue id.
> >>>
> >>> This UAPI maps the queue into GPU, so the graphics app can start
> >>> submitting work to the queue as soon as the call returns.
> >>>
> >>> Cc: Alex Deucher <alexander.deucher@amd.com>
> >>> Cc: Christian Koenig <christian.koenig@amd.com>
> >>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> >>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> >>> ---
> >>>   include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
> >>>   1 file changed, 52 insertions(+)
> >>>
> >>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> >>> index 0d93ec132ebb..a3d0dd6f62c5 100644
> >>> --- a/include/uapi/drm/amdgpu_drm.h
> >>> +++ b/include/uapi/drm/amdgpu_drm.h
> >>> @@ -54,6 +54,7 @@ extern "C" {
> >>>   #define DRM_AMDGPU_VM                  0x13
> >>>   #define DRM_AMDGPU_FENCE_TO_HANDLE     0x14
> >>>   #define DRM_AMDGPU_SCHED               0x15
> >>> +#define DRM_AMDGPU_USERQ               0x16
> >>>
> >>>   #define DRM_IOCTL_AMDGPU_GEM_CREATE    DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >>>   #define DRM_IOCTL_AMDGPU_GEM_MMAP      DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> >>> @@ -71,6 +72,7 @@ extern "C" {
> >>>   #define DRM_IOCTL_AMDGPU_VM            DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >>>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >>>   #define DRM_IOCTL_AMDGPU_SCHED         DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> >>> +#define DRM_IOCTL_AMDGPU_USERQ         DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >>>
> >>>   /**
> >>>    * DOC: memory domains
> >>> @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >>>          union drm_amdgpu_ctx_out out;
> >>>   };
> >>>
> >>> +/* user queue IOCTL */
> >>> +#define AMDGPU_USERQ_OP_CREATE 1
> >>> +#define AMDGPU_USERQ_OP_FREE   2
> >>> +
> >>> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> >>> +#define AMDGPU_USERQ_MQD_FLAGS_AQL     (1 << 1)
> >> Can we document what AQL means here?
> > AQL is the packet format used by KFD/ROCm.  The idea is to be able to
> > create queues that support either format (AQL or PM4).
>
> Could we make that a separate queue type? E.g. like SDMA, GFX, Compute?
>
> It's not really a flag which can be applied independent of the queue.

I guess so, but the IP types we already expose don't different queue types:
#define AMDGPU_HW_IP_GFX          0
#define AMDGPU_HW_IP_COMPUTE      1
#define AMDGPU_HW_IP_DMA          2
#define AMDGPU_HW_IP_UVD          3
#define AMDGPU_HW_IP_VCE          4
#define AMDGPU_HW_IP_UVD_ENC      5
#define AMDGPU_HW_IP_VCN_DEC      6
/*
 * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support
 * both encoding and decoding jobs.
 */
#define AMDGPU_HW_IP_VCN_ENC      7
#define AMDGPU_HW_IP_VCN_JPEG     8
#define AMDGPU_HW_IP_NUM          9

I suppose we could add a new AMDGPU_HW_IP_COMPUTE_AQL.

Alex

>
> Regards,
> Christian.
>
> >
> >>
> >>> +
> >>> +struct drm_amdgpu_userq_mqd {
> >>> +       /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> >>> +       __u32   flags;
> >>> +       /** IP type: AMDGPU_HW_IP_* */
> >>> +       __u32   ip_type;
> >>> +       /** GEM object handle */
> >>> +       __u32   doorbell_handle;
> >>> +       /** Doorbell offset in dwords */
> >>> +       __u32   doorbell_offset;
> >> What are the doorbell handle/offset for? I don't see any of them used
> >> in the rest of the series (we only check the handle isn't 0, which
> >> isn't enough validation for a GEM handle to consider it valid), and
> >> the kernel seems to allocate some kind of doorbell index in patch 4.
> >> Does userspace need to know about that one? (similarly use_doorbell in
> >> that patch seems like it is never explicitly written to)
> > The doorbell is how you trigger the engine to start processing the
> > user queue.  The idea is that each user process allocates a page of
> > doorbell space (one of the PCI BARs) and then each 64 bit segment in
> > that page could be used for a user mode queue.  So the UMD writes its
> > data to the queue, updates the wptr, and then writes to the doorbell
> > to tell the firmware to start processing the queue.
> >
> >> The other questions I have are about how this interacts with memory
> >> management. Does this have access to all BOs allocated with
> >> AMDGPU_GEM_CREATE_VM_ALWAYS_VALID? What about imported BOs? How does
> >> this interact with VA unmap/map operations? (AFAICT we have no way to
> >> tell if pagetable modifying operations are complete from userspace for
> >> now). What happens if we need to spill BOs from VRAM due to
> >> (cross-process) memory pressure?
> > Effectively everything you map on the GPU would be valid.  If there is
> > memory pressure, the kernel driver will behave similarly to KFD.  It
> > will unmap the queues (which preempts all work on the engines), do any
> > memory migrations, and then map the queues again.
> >
> > Alex
> >
> >>> +       /** GPU virtual address of the queue */
> >>> +       __u64   queue_va;
> >>> +       /** Size of the queue in bytes */
> >>> +       __u64   queue_size;
> >>> +       /** GPU virtual address of the rptr */
> >>> +       __u64   rptr_va;
> >>> +       /** GPU virtual address of the wptr */
> >>> +       __u64   wptr_va;
> >>> +};
> >>> +
> >>> +struct drm_amdgpu_userq_in {
> >>> +       /** AMDGPU_USERQ_OP_* */
> >>> +       __u32   op;
> >>> +       /** Flags */
> >>> +       __u32   flags;
> >>> +       /** Context handle to associate the queue with */
> >>> +       __u32   ctx_id;
> >>> +       __u32   pad;
> >>> +       /** Queue descriptor */
> >>> +       struct drm_amdgpu_userq_mqd mqd;
> >>> +};
> >>> +
> >>> +struct drm_amdgpu_userq_out {
> >>> +       /** Queue handle */
> >>> +       __u32   q_id;
> >>> +       /** Flags */
> >>> +       __u32   flags;
> >>> +};
> >>> +
> >>> +union drm_amdgpu_userq {
> >>> +       struct drm_amdgpu_userq_in in;
> >>> +       struct drm_amdgpu_userq_out out;
> >>> +};
> >>> +
> >>>   /* vm ioctl */
> >>>   #define AMDGPU_VM_OP_RESERVE_VMID      1
> >>>   #define AMDGPU_VM_OP_UNRESERVE_VMID    2
> >>> --
> >>> 2.34.1
> >>>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2023-01-03 19:25         ` Liu, Shaoyun
@ 2023-01-03 19:52           ` Alex Deucher
  2023-01-03 20:05             ` Felix Kuehling
  0 siblings, 1 reply; 64+ messages in thread
From: Alex Deucher @ 2023-01-03 19:52 UTC (permalink / raw)
  To: Liu, Shaoyun
  Cc: Sharma, Shashank, Kuehling, Felix, Paneer Selvam, Arunpravin,
	Yadav, Arvind, amd-gfx, Deucher, Alexander, Koenig, Christian

On Tue, Jan 3, 2023 at 2:25 PM Liu, Shaoyun <Shaoyun.Liu@amd.com> wrote:
>
> [AMD Official Use Only - General]
>
> What about the existing rocm apps that already use the  hsakmt APIs for user queue ?

We'd have to keep both APIs around for existing chips for backwards
compatibility.

Alex

>
> Shaoyun.liu
>
> -----Original Message-----
> From: Alex Deucher <alexdeucher@gmail.com>
> Sent: Tuesday, January 3, 2023 2:22 PM
> To: Liu, Shaoyun <Shaoyun.Liu@amd.com>
> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Sharma, Shashank <Shashank.Sharma@amd.com>; amd-gfx@lists.freedesktop.org; Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Yadav, Arvind <Arvind.Yadav@amd.com>; Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
>
> On Tue, Jan 3, 2023 at 2:17 PM Liu, Shaoyun <Shaoyun.Liu@amd.com> wrote:
> >
> > [AMD Official Use Only - General]
> >
> > Hsakmt  has  the  interfaces for compute user queue. Do we want a unify API for both  graphic and compute  ?
>
> Yeah, that is the eventual goal, hence the flag for AQL vs PM4.
>
> Alex
>
> >
> > Regards
> > Shaoyun.liu
> >
> > -----Original Message-----
> > From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> > Felix Kuehling
> > Sent: Tuesday, January 3, 2023 1:30 PM
> > To: Sharma, Shashank <Shashank.Sharma@amd.com>;
> > amd-gfx@lists.freedesktop.org
> > Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
> > <Christian.Koenig@amd.com>; Yadav, Arvind <Arvind.Yadav@amd.com>;
> > Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> > Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
> >
> > Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> > > From: Alex Deucher <alexander.deucher@amd.com>
> > >
> > > This patch intorduces new UAPI/IOCTL for usermode graphics queue.
> > > The userspace app will fill this structure and request the graphics
> > > driver to add a graphics work queue for it. The output of this UAPI
> > > is a queue id.
> > >
> > > This UAPI maps the queue into GPU, so the graphics app can start
> > > submitting work to the queue as soon as the call returns.
> > >
> > > Cc: Alex Deucher <alexander.deucher@amd.com>
> > > Cc: Christian Koenig <christian.koenig@amd.com>
> > > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > > Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> > > ---
> > >   include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
> > >   1 file changed, 52 insertions(+)
> > >
> > > diff --git a/include/uapi/drm/amdgpu_drm.h
> > > b/include/uapi/drm/amdgpu_drm.h index 0d93ec132ebb..a3d0dd6f62c5
> > > 100644
> > > --- a/include/uapi/drm/amdgpu_drm.h
> > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > @@ -54,6 +54,7 @@ extern "C" {
> > >   #define DRM_AMDGPU_VM                       0x13
> > >   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
> > >   #define DRM_AMDGPU_SCHED            0x15
> > > +#define DRM_AMDGPU_USERQ             0x16
> > >
> > >   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> > >   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > > @@ -71,6 +72,7 @@ extern "C" {
> > >   #define DRM_IOCTL_AMDGPU_VM         DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
> > >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> > >   #define DRM_IOCTL_AMDGPU_SCHED              DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > > +#define DRM_IOCTL_AMDGPU_USERQ               DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> > >
> > >   /**
> > >    * DOC: memory domains
> > > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> > >       union drm_amdgpu_ctx_out out;
> > >   };
> > >
> > > +/* user queue IOCTL */
> > > +#define AMDGPU_USERQ_OP_CREATE       1
> > > +#define AMDGPU_USERQ_OP_FREE 2
> > > +
> > > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE        (1 << 0)
> >
> > What does "secure" mean here? I don't see this flag referenced anywhere in the rest of the patch series.
> >
> > Regards,
> >    Felix
> >
> >
> > > +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> > > +
> > > +struct drm_amdgpu_userq_mqd {
> > > +     /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > > +     __u32   flags;
> > > +     /** IP type: AMDGPU_HW_IP_* */
> > > +     __u32   ip_type;
> > > +     /** GEM object handle */
> > > +     __u32   doorbell_handle;
> > > +     /** Doorbell offset in dwords */
> > > +     __u32   doorbell_offset;
> > > +     /** GPU virtual address of the queue */
> > > +     __u64   queue_va;
> > > +     /** Size of the queue in bytes */
> > > +     __u64   queue_size;
> > > +     /** GPU virtual address of the rptr */
> > > +     __u64   rptr_va;
> > > +     /** GPU virtual address of the wptr */
> > > +     __u64   wptr_va;
> > > +};
> > > +
> > > +struct drm_amdgpu_userq_in {
> > > +     /** AMDGPU_USERQ_OP_* */
> > > +     __u32   op;
> > > +     /** Flags */
> > > +     __u32   flags;
> > > +     /** Context handle to associate the queue with */
> > > +     __u32   ctx_id;
> > > +     __u32   pad;
> > > +     /** Queue descriptor */
> > > +     struct drm_amdgpu_userq_mqd mqd; };
> > > +
> > > +struct drm_amdgpu_userq_out {
> > > +     /** Queue handle */
> > > +     __u32   q_id;
> > > +     /** Flags */
> > > +     __u32   flags;
> > > +};
> > > +
> > > +union drm_amdgpu_userq {
> > > +     struct drm_amdgpu_userq_in in;
> > > +     struct drm_amdgpu_userq_out out; };
> > > +
> > >   /* vm ioctl */
> > >   #define AMDGPU_VM_OP_RESERVE_VMID   1
> > >   #define AMDGPU_VM_OP_UNRESERVE_VMID 2

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
  2023-01-03 19:52           ` Alex Deucher
@ 2023-01-03 20:05             ` Felix Kuehling
  0 siblings, 0 replies; 64+ messages in thread
From: Felix Kuehling @ 2023-01-03 20:05 UTC (permalink / raw)
  To: Alex Deucher, Liu, Shaoyun
  Cc: Paneer Selvam, Arunpravin, Sharma, Shashank, Yadav, Arvind,
	amd-gfx, Deucher, Alexander, Koenig, Christian

I think at some point ROCr could start using libdrm APIs for memory 
management and user mode queues on kernels and GPUs that support this. I 
think more work is required on the memory management side, though. ROCr 
would fallback to libhsakmt on older kernels and older GPUs (pre-GFX11).

Regards,
   Felix


Am 2023-01-03 um 14:52 schrieb Alex Deucher:
> On Tue, Jan 3, 2023 at 2:25 PM Liu, Shaoyun <Shaoyun.Liu@amd.com> wrote:
>> [AMD Official Use Only - General]
>>
>> What about the existing rocm apps that already use the  hsakmt APIs for user queue ?
> We'd have to keep both APIs around for existing chips for backwards
> compatibility.
>
> Alex
>
>> Shaoyun.liu
>>
>> -----Original Message-----
>> From: Alex Deucher <alexdeucher@gmail.com>
>> Sent: Tuesday, January 3, 2023 2:22 PM
>> To: Liu, Shaoyun <Shaoyun.Liu@amd.com>
>> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Sharma, Shashank <Shashank.Sharma@amd.com>; amd-gfx@lists.freedesktop.org; Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Yadav, Arvind <Arvind.Yadav@amd.com>; Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>> Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
>>
>> On Tue, Jan 3, 2023 at 2:17 PM Liu, Shaoyun <Shaoyun.Liu@amd.com> wrote:
>>> [AMD Official Use Only - General]
>>>
>>> Hsakmt  has  the  interfaces for compute user queue. Do we want a unify API for both  graphic and compute  ?
>> Yeah, that is the eventual goal, hence the flag for AQL vs PM4.
>>
>> Alex
>>
>>> Regards
>>> Shaoyun.liu
>>>
>>> -----Original Message-----
>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>> Felix Kuehling
>>> Sent: Tuesday, January 3, 2023 1:30 PM
>>> To: Sharma, Shashank <Shashank.Sharma@amd.com>;
>>> amd-gfx@lists.freedesktop.org
>>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
>>> <Christian.Koenig@amd.com>; Yadav, Arvind <Arvind.Yadav@amd.com>;
>>> Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>>> Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
>>>
>>> Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
>>>> From: Alex Deucher <alexander.deucher@amd.com>
>>>>
>>>> This patch intorduces new UAPI/IOCTL for usermode graphics queue.
>>>> The userspace app will fill this structure and request the graphics
>>>> driver to add a graphics work queue for it. The output of this UAPI
>>>> is a queue id.
>>>>
>>>> This UAPI maps the queue into GPU, so the graphics app can start
>>>> submitting work to the queue as soon as the call returns.
>>>>
>>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>>> ---
>>>>    include/uapi/drm/amdgpu_drm.h | 52 +++++++++++++++++++++++++++++++++++
>>>>    1 file changed, 52 insertions(+)
>>>>
>>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>>> b/include/uapi/drm/amdgpu_drm.h index 0d93ec132ebb..a3d0dd6f62c5
>>>> 100644
>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>> @@ -54,6 +54,7 @@ extern "C" {
>>>>    #define DRM_AMDGPU_VM                       0x13
>>>>    #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
>>>>    #define DRM_AMDGPU_SCHED            0x15
>>>> +#define DRM_AMDGPU_USERQ             0x16
>>>>
>>>>    #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>>    #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>> @@ -71,6 +72,7 @@ extern "C" {
>>>>    #define DRM_IOCTL_AMDGPU_VM         DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
>>>>    #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>>>>    #define DRM_IOCTL_AMDGPU_SCHED              DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
>>>> +#define DRM_IOCTL_AMDGPU_USERQ               DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>>>>
>>>>    /**
>>>>     * DOC: memory domains
>>>> @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
>>>>        union drm_amdgpu_ctx_out out;
>>>>    };
>>>>
>>>> +/* user queue IOCTL */
>>>> +#define AMDGPU_USERQ_OP_CREATE       1
>>>> +#define AMDGPU_USERQ_OP_FREE 2
>>>> +
>>>> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE        (1 << 0)
>>> What does "secure" mean here? I don't see this flag referenced anywhere in the rest of the patch series.
>>>
>>> Regards,
>>>     Felix
>>>
>>>
>>>> +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
>>>> +
>>>> +struct drm_amdgpu_userq_mqd {
>>>> +     /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
>>>> +     __u32   flags;
>>>> +     /** IP type: AMDGPU_HW_IP_* */
>>>> +     __u32   ip_type;
>>>> +     /** GEM object handle */
>>>> +     __u32   doorbell_handle;
>>>> +     /** Doorbell offset in dwords */
>>>> +     __u32   doorbell_offset;
>>>> +     /** GPU virtual address of the queue */
>>>> +     __u64   queue_va;
>>>> +     /** Size of the queue in bytes */
>>>> +     __u64   queue_size;
>>>> +     /** GPU virtual address of the rptr */
>>>> +     __u64   rptr_va;
>>>> +     /** GPU virtual address of the wptr */
>>>> +     __u64   wptr_va;
>>>> +};
>>>> +
>>>> +struct drm_amdgpu_userq_in {
>>>> +     /** AMDGPU_USERQ_OP_* */
>>>> +     __u32   op;
>>>> +     /** Flags */
>>>> +     __u32   flags;
>>>> +     /** Context handle to associate the queue with */
>>>> +     __u32   ctx_id;
>>>> +     __u32   pad;
>>>> +     /** Queue descriptor */
>>>> +     struct drm_amdgpu_userq_mqd mqd; };
>>>> +
>>>> +struct drm_amdgpu_userq_out {
>>>> +     /** Queue handle */
>>>> +     __u32   q_id;
>>>> +     /** Flags */
>>>> +     __u32   flags;
>>>> +};
>>>> +
>>>> +union drm_amdgpu_userq {
>>>> +     struct drm_amdgpu_userq_in in;
>>>> +     struct drm_amdgpu_userq_out out; };
>>>> +
>>>>    /* vm ioctl */
>>>>    #define AMDGPU_VM_OP_RESERVE_VMID   1
>>>>    #define AMDGPU_VM_OP_UNRESERVE_VMID 2

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2023-01-03 18:37       ` Felix Kuehling
@ 2023-01-04  6:21         ` Yadav, Arvind
  2023-01-04  9:10           ` Christian König
  2023-01-04 14:28           ` Alex Deucher
  0 siblings, 2 replies; 64+ messages in thread
From: Yadav, Arvind @ 2023-01-04  6:21 UTC (permalink / raw)
  To: Felix Kuehling, Shashank Sharma, Alex Deucher
  Cc: Alex Deucher, amd-gfx, Christian Koenig, arvind.yadav,
	arunpravin.paneerselvam


On 1/4/2023 12:07 AM, Felix Kuehling wrote:
> Am 2023-01-03 um 04:36 schrieb Shashank Sharma:
>>>> /*MQD struct for usermode Queue*/
>>>> +struct amdgpu_usermode_queue_mqd
>>> This is specific to GC 11.  Every IP and version will have its own MQD
>>> format.  That should live in the IP specific code, not the generic
>>> code.  We already have the generic MQD parameters that we need from
>>> the userq IOCTL.
>>
>> Noted, we can separate out the generic parameters from gen specific 
>> parameter, and will try to wrap it around the generic structure.
>>
>> - Shashank
>
> Is there a reason why you can't use "struct v11_compute_mqd" from 
> v11_structs.h?

Hi Felix,

Yes,  V11_compute_mqd does not have these below member which is needed 
for usermode queue.

     uint32_t shadow_base_lo; // offset: 0  (0x0)
     uint32_t shadow_base_hi; // offset: 1  (0x1)
     uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
     uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
     uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
     uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
     uint32_t shadow_initialized; // offset: 6  (0x6)
     uint32_t ib_vmid; // offset: 7  (0x7)

So we had to add new MQD structs.

thanks

~arvind

>
> Regards,
>   Felix
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* RE: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2022-12-23 19:36 ` [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work Shashank Sharma
                     ` (2 preceding siblings ...)
  2022-12-29 17:41   ` Alex Deucher
@ 2023-01-04  8:55   ` Zhu, Jiadong
  2023-01-04  8:58     ` Shashank Sharma
  3 siblings, 1 reply; 64+ messages in thread
From: Zhu, Jiadong @ 2023-01-04  8:55 UTC (permalink / raw)
  To: Sharma, Shashank, amd-gfx
  Cc: Deucher, Alexander, Paneer Selvam, Arunpravin, Koenig, Christian,
	Yadav, Arvind, Sharma, Shashank

[AMD Official Use Only - General]

Hi Shashank,

I don't find how amdgpu_userq_ioctl is called, shall DRM_IOCTL_DEF_DRV(amdgpu_userq_ioctl...) be added somewhere to expose the ioctl?

Thanks,
Jiadong

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Shashank Sharma
Sent: Saturday, December 24, 2022 3:37 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Sharma, Shashank <Shashank.Sharma@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Yadav, Arvind <Arvind.Yadav@amd.com>; Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Subject: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work

This patch adds skeleton code for usermode queue creation. It typically contains:
- A new structure to keep all the user queue data in one place.
- An IOCTL function to create/free a usermode queue.
- A function to generate unique index for the queue.
- A global ptr in amdgpu_dev

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++++++++++++++++++
 .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
 5 files changed, 246 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 6ad39cf71bdd..e2a34ee57bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -209,6 +209,8 @@ amdgpu-y += \
 # add amdkfd interfaces
 amdgpu-y += amdgpu_amdkfd.o

+# add usermode queue
+amdgpu-y += amdgpu_userqueue.o

 ifneq ($(CONFIG_HSA_AMD),)
 AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8639a4f9c6e8..4b566fcfca18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -749,6 +749,11 @@ struct amdgpu_mqd {
                        struct amdgpu_mqd_prop *p);
 };

+struct amdgpu_userq_globals {
+       struct ida ida;
+       struct mutex userq_mutex;
+};
+
 #define AMDGPU_RESET_MAGIC_NUM 64
 #define AMDGPU_MAX_DF_PERFMONS 4
 #define AMDGPU_PRODUCT_NAME_LEN 64
@@ -955,6 +960,7 @@ struct amdgpu_device {
        bool                            enable_mes_kiq;
        struct amdgpu_mes               mes;
        struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
+       struct amdgpu_userq_globals     userq;

        /* df */
        struct amdgpu_df                df;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..f7413859b14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
        unsigned long                   ras_counter_ce;
        unsigned long                   ras_counter_ue;
        uint32_t                        stable_pstate;
+       struct amdgpu_usermode_queue    *userq;
 };

 struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
new file mode 100644
index 000000000000..3b6e8f75495c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+obtaining a
+ * copy of this software and associated documentation files (the
+"Software"),
+ * to deal in the Software without restriction, including without
+limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
+sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom
+the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
+SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
+DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "soc15_common.h"
+
+#define CHECK_ACCESS(a) (access_ok((const void __user *)a,
+sizeof(__u64)))
+
+static int
+amdgpu_userqueue_index(struct amdgpu_device *adev) {
+    int index;
+    struct amdgpu_userq_globals *uqg = &adev->userq;
+
+    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
+    return index;
+}
+
+static void
+amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct
+amdgpu_usermode_queue *queue) {
+    struct amdgpu_userq_globals *uqg = &adev->userq;
+
+    ida_simple_remove(&uqg->ida, queue->queue_id); }
+
+static int
+amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct
+drm_amdgpu_userq_mqd *mqd_in) {
+    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || mqd_in->doorbell_offset == 0) {
+        DRM_ERROR("Invalid queue object address\n");
+        return -EINVAL;
+    }
+
+    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || mqd_in->wptr_va == 0) {
+        DRM_ERROR("Invalid queue object value\n");
+        return -EINVAL;
+    }
+
+    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= AMDGPU_HW_IP_NUM) {
+        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
+        return -EINVAL;
+    }
+
+    if (!CHECK_ACCESS(mqd_in->queue_va) || !CHECK_ACCESS(mqd_in->rptr_va) ||
+        !CHECK_ACCESS(mqd_in->wptr_va)) {
+            DRM_ERROR("Invalid mapping of queue ptrs, access error\n");
+            return -EINVAL;
+    }
+
+    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
+    return 0;
+}
+
+int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
+                            union drm_amdgpu_userq *args) {
+    int r, pasid;
+    struct amdgpu_usermode_queue *queue;
+    struct amdgpu_fpriv *fpriv = filp->driver_priv;
+    struct amdgpu_vm *vm = &fpriv->vm;
+    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
+    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
+
+    if (!ctx) {
+        DRM_ERROR("Invalid GPU context\n");
+        return -EINVAL;
+    }
+
+    if (vm->pasid < 0) {
+        DRM_WARN("No PASID info found\n");
+        pasid = 0;
+    }
+
+    mutex_lock(&adev->userq.userq_mutex);
+
+    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
+    if (!queue) {
+        DRM_ERROR("Failed to allocate memory for queue\n");
+        mutex_unlock(&adev->userq.userq_mutex);
+        return -ENOMEM;
+    }
+
+    r = amdgpu_userqueue_validate_input(adev, mqd_in);
+    if (r < 0) {
+        DRM_ERROR("Invalid input to create queue\n");
+        goto free_queue;
+    }
+
+    queue->vm = vm;
+    queue->pasid = pasid;
+    queue->wptr_gpu_addr = mqd_in->wptr_va;
+    queue->rptr_gpu_addr = mqd_in->rptr_va;
+    queue->queue_size = mqd_in->queue_size;
+    queue->queue_type = mqd_in->ip_type;
+    queue->paging = false;
+    queue->flags = mqd_in->flags;
+    queue->queue_id = amdgpu_userqueue_index(adev);
+
+    ctx->userq = queue;
+    args->out.q_id = queue->queue_id;
+    args->out.flags = 0;
+    mutex_unlock(&adev->userq.userq_mutex);
+    return 0;
+
+free_queue:
+    amdgpu_userqueue_remove_index(adev, queue);
+    mutex_unlock(&adev->userq.userq_mutex);
+    kfree(queue);
+    return r;
+}
+
+void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
+                              union drm_amdgpu_userq *args) {
+    struct amdgpu_fpriv *fpriv = filp->driver_priv;
+    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
+    struct amdgpu_usermode_queue *queue = ctx->userq;
+
+    mutex_lock(&adev->userq.userq_mutex);
+    amdgpu_userqueue_remove_index(adev, queue);
+    ctx->userq = NULL;
+    mutex_unlock(&adev->userq.userq_mutex);
+    kfree(queue);
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+                      struct drm_file *filp)
+{
+    union drm_amdgpu_userq *args = data;
+    struct amdgpu_device *adev = drm_to_adev(dev);
+    int r = 0;
+
+    switch (args->in.op) {
+    case AMDGPU_USERQ_OP_CREATE:
+        r = amdgpu_userqueue_create(adev, filp, args);
+        if (r)
+            DRM_ERROR("Failed to create usermode queue\n");
+        break;
+
+    case AMDGPU_USERQ_OP_FREE:
+        amdgpu_userqueue_destroy(adev, filp, args);
+        break;
+
+    default:
+        DRM_ERROR("Invalid user queue op specified: %d\n", args->in.op);
+        return -EINVAL;
+    }
+
+    return r;
+}
+
+int amdgpu_userqueue_init(struct amdgpu_device *adev) {
+    struct amdgpu_userq_globals *uqg = &adev->userq;
+
+    mutex_init(&uqg->userq_mutex);
+    return 0;
+}
+
+void amdgpu_userqueue_fini(struct amdgpu_device *adev) {
+
+}
diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
new file mode 100644
index 000000000000..c1fe39ffaf72
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+obtaining a
+ * copy of this software and associated documentation files (the
+"Software"),
+ * to deal in the Software without restriction, including without
+limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
+sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom
+the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
+SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
+DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_USERMODE_QUEUE_H_
+#define AMDGPU_USERMODE_QUEUE_H_
+
+#define AMDGPU_MAX_USERQ 512
+
+struct amdgpu_usermode_queue {
+       int             queue_id;
+       int             queue_type;
+       int             queue_size;
+       int             paging;
+       int             pasid;
+       int             use_doorbell;
+       int             doorbell_index;
+
+       uint64_t        mqd_gpu_addr;
+       uint64_t        wptr_gpu_addr;
+       uint64_t        rptr_gpu_addr;
+       uint64_t        queue_gpu_addr;
+       uint64_t        flags;
+       void            *mqd_cpu_ptr;
+
+       struct amdgpu_bo        *mqd_obj;
+       struct amdgpu_vm        *vm;
+       struct list_head        list;
+};
+
+#endif
--
2.34.1


^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
  2023-01-04  8:55   ` Zhu, Jiadong
@ 2023-01-04  8:58     ` Shashank Sharma
  0 siblings, 0 replies; 64+ messages in thread
From: Shashank Sharma @ 2023-01-04  8:58 UTC (permalink / raw)
  To: Zhu, Jiadong, amd-gfx
  Cc: Deucher, Alexander, Koenig, Christian, Yadav, Arvind,
	Paneer Selvam, Arunpravin

Hello Jiadong,

Please check the first patch of the series, where we added the UAPI


- Shashank

On 04/01/2023 09:55, Zhu, Jiadong wrote:
> [AMD Official Use Only - General]
>
> Hi Shashank,
>
> I don't find how amdgpu_userq_ioctl is called, shall DRM_IOCTL_DEF_DRV(amdgpu_userq_ioctl...) be added somewhere to expose the ioctl?
>
> Thanks,
> Jiadong
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Shashank Sharma
> Sent: Saturday, December 24, 2022 3:37 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Sharma, Shashank <Shashank.Sharma@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Yadav, Arvind <Arvind.Yadav@amd.com>; Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> Subject: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work
>
> This patch adds skeleton code for usermode queue creation. It typically contains:
> - A new structure to keep all the user queue data in one place.
> - An IOCTL function to create/free a usermode queue.
> - A function to generate unique index for the queue.
> - A global ptr in amdgpu_dev
>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h       |   1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++++++++++++++++++
>   .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +++++
>   5 files changed, 246 insertions(+)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>   create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 6ad39cf71bdd..e2a34ee57bfb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -209,6 +209,8 @@ amdgpu-y += \
>   # add amdkfd interfaces
>   amdgpu-y += amdgpu_amdkfd.o
>
> +# add usermode queue
> +amdgpu-y += amdgpu_userqueue.o
>
>   ifneq ($(CONFIG_HSA_AMD),)
>   AMDKFD_PATH := ../amdkfd
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8639a4f9c6e8..4b566fcfca18 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -749,6 +749,11 @@ struct amdgpu_mqd {
>                          struct amdgpu_mqd_prop *p);
>   };
>
> +struct amdgpu_userq_globals {
> +       struct ida ida;
> +       struct mutex userq_mutex;
> +};
> +
>   #define AMDGPU_RESET_MAGIC_NUM 64
>   #define AMDGPU_MAX_DF_PERFMONS 4
>   #define AMDGPU_PRODUCT_NAME_LEN 64
> @@ -955,6 +960,7 @@ struct amdgpu_device {
>          bool                            enable_mes_kiq;
>          struct amdgpu_mes               mes;
>          struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
> +       struct amdgpu_userq_globals     userq;
>
>          /* df */
>          struct amdgpu_df                df;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 0fa0e56daf67..f7413859b14f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
>          unsigned long                   ras_counter_ce;
>          unsigned long                   ras_counter_ue;
>          uint32_t                        stable_pstate;
> +       struct amdgpu_usermode_queue    *userq;
>   };
>
>   struct amdgpu_ctx_mgr {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> new file mode 100644
> index 000000000000..3b6e8f75495c
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -0,0 +1,187 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include "amdgpu.h"
> +#include "amdgpu_vm.h"
> +#include "amdgpu_mes.h"
> +#include "amdgpu_usermode_queue.h"
> +#include "soc15_common.h"
> +
> +#define CHECK_ACCESS(a) (access_ok((const void __user *)a,
> +sizeof(__u64)))
> +
> +static int
> +amdgpu_userqueue_index(struct amdgpu_device *adev) {
> +    int index;
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
> +    return index;
> +}
> +
> +static void
> +amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct
> +amdgpu_usermode_queue *queue) {
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    ida_simple_remove(&uqg->ida, queue->queue_id); }
> +
> +static int
> +amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct
> +drm_amdgpu_userq_mqd *mqd_in) {
> +    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || mqd_in->doorbell_offset == 0) {
> +        DRM_ERROR("Invalid queue object address\n");
> +        return -EINVAL;
> +    }
> +
> +    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || mqd_in->wptr_va == 0) {
> +        DRM_ERROR("Invalid queue object value\n");
> +        return -EINVAL;
> +    }
> +
> +    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= AMDGPU_HW_IP_NUM) {
> +        DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
> +        return -EINVAL;
> +    }
> +
> +    if (!CHECK_ACCESS(mqd_in->queue_va) || !CHECK_ACCESS(mqd_in->rptr_va) ||
> +        !CHECK_ACCESS(mqd_in->wptr_va)) {
> +            DRM_ERROR("Invalid mapping of queue ptrs, access error\n");
> +            return -EINVAL;
> +    }
> +
> +    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
> +    return 0;
> +}
> +
> +int amdgpu_userqueue_create(struct amdgpu_device *adev, struct drm_file *filp,
> +                            union drm_amdgpu_userq *args) {
> +    int r, pasid;
> +    struct amdgpu_usermode_queue *queue;
> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +    struct amdgpu_vm *vm = &fpriv->vm;
> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
> +    struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
> +
> +    if (!ctx) {
> +        DRM_ERROR("Invalid GPU context\n");
> +        return -EINVAL;
> +    }
> +
> +    if (vm->pasid < 0) {
> +        DRM_WARN("No PASID info found\n");
> +        pasid = 0;
> +    }
> +
> +    mutex_lock(&adev->userq.userq_mutex);
> +
> +    queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
> +    if (!queue) {
> +        DRM_ERROR("Failed to allocate memory for queue\n");
> +        mutex_unlock(&adev->userq.userq_mutex);
> +        return -ENOMEM;
> +    }
> +
> +    r = amdgpu_userqueue_validate_input(adev, mqd_in);
> +    if (r < 0) {
> +        DRM_ERROR("Invalid input to create queue\n");
> +        goto free_queue;
> +    }
> +
> +    queue->vm = vm;
> +    queue->pasid = pasid;
> +    queue->wptr_gpu_addr = mqd_in->wptr_va;
> +    queue->rptr_gpu_addr = mqd_in->rptr_va;
> +    queue->queue_size = mqd_in->queue_size;
> +    queue->queue_type = mqd_in->ip_type;
> +    queue->paging = false;
> +    queue->flags = mqd_in->flags;
> +    queue->queue_id = amdgpu_userqueue_index(adev);
> +
> +    ctx->userq = queue;
> +    args->out.q_id = queue->queue_id;
> +    args->out.flags = 0;
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    return 0;
> +
> +free_queue:
> +    amdgpu_userqueue_remove_index(adev, queue);
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    kfree(queue);
> +    return r;
> +}
> +
> +void amdgpu_userqueue_destroy(struct amdgpu_device *adev, struct drm_file *filp,
> +                              union drm_amdgpu_userq *args) {
> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +    struct amdgpu_ctx *ctx = amdgpu_ctx_get(fpriv, args->in.ctx_id);
> +    struct amdgpu_usermode_queue *queue = ctx->userq;
> +
> +    mutex_lock(&adev->userq.userq_mutex);
> +    amdgpu_userqueue_remove_index(adev, queue);
> +    ctx->userq = NULL;
> +    mutex_unlock(&adev->userq.userq_mutex);
> +    kfree(queue);
> +}
> +
> +int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
> +                      struct drm_file *filp)
> +{
> +    union drm_amdgpu_userq *args = data;
> +    struct amdgpu_device *adev = drm_to_adev(dev);
> +    int r = 0;
> +
> +    switch (args->in.op) {
> +    case AMDGPU_USERQ_OP_CREATE:
> +        r = amdgpu_userqueue_create(adev, filp, args);
> +        if (r)
> +            DRM_ERROR("Failed to create usermode queue\n");
> +        break;
> +
> +    case AMDGPU_USERQ_OP_FREE:
> +        amdgpu_userqueue_destroy(adev, filp, args);
> +        break;
> +
> +    default:
> +        DRM_ERROR("Invalid user queue op specified: %d\n", args->in.op);
> +        return -EINVAL;
> +    }
> +
> +    return r;
> +}
> +
> +int amdgpu_userqueue_init(struct amdgpu_device *adev) {
> +    struct amdgpu_userq_globals *uqg = &adev->userq;
> +
> +    mutex_init(&uqg->userq_mutex);
> +    return 0;
> +}
> +
> +void amdgpu_userqueue_fini(struct amdgpu_device *adev) {
> +
> +}
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> new file mode 100644
> index 000000000000..c1fe39ffaf72
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> @@ -0,0 +1,50 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef AMDGPU_USERMODE_QUEUE_H_
> +#define AMDGPU_USERMODE_QUEUE_H_
> +
> +#define AMDGPU_MAX_USERQ 512
> +
> +struct amdgpu_usermode_queue {
> +       int             queue_id;
> +       int             queue_type;
> +       int             queue_size;
> +       int             paging;
> +       int             pasid;
> +       int             use_doorbell;
> +       int             doorbell_index;
> +
> +       uint64_t        mqd_gpu_addr;
> +       uint64_t        wptr_gpu_addr;
> +       uint64_t        rptr_gpu_addr;
> +       uint64_t        queue_gpu_addr;
> +       uint64_t        flags;
> +       void            *mqd_cpu_ptr;
> +
> +       struct amdgpu_bo        *mqd_obj;
> +       struct amdgpu_vm        *vm;
> +       struct list_head        list;
> +};
> +
> +#endif
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2023-01-04  6:21         ` Yadav, Arvind
@ 2023-01-04  9:10           ` Christian König
  2023-01-04  9:13             ` Shashank Sharma
  2023-01-04 14:28           ` Alex Deucher
  1 sibling, 1 reply; 64+ messages in thread
From: Christian König @ 2023-01-04  9:10 UTC (permalink / raw)
  To: Yadav, Arvind, Felix Kuehling, Shashank Sharma, Alex Deucher
  Cc: Alex Deucher, amd-gfx, arvind.yadav, arunpravin.paneerselvam

Am 04.01.23 um 07:21 schrieb Yadav, Arvind:
>
> On 1/4/2023 12:07 AM, Felix Kuehling wrote:
>> Am 2023-01-03 um 04:36 schrieb Shashank Sharma:
>>>>> /*MQD struct for usermode Queue*/
>>>>> +struct amdgpu_usermode_queue_mqd
>>>> This is specific to GC 11.  Every IP and version will have its own MQD
>>>> format.  That should live in the IP specific code, not the generic
>>>> code.  We already have the generic MQD parameters that we need from
>>>> the userq IOCTL.
>>>
>>> Noted, we can separate out the generic parameters from gen specific 
>>> parameter, and will try to wrap it around the generic structure.
>>>
>>> - Shashank
>>
>> Is there a reason why you can't use "struct v11_compute_mqd" from 
>> v11_structs.h?
>
> Hi Felix,
>
> Yes,  V11_compute_mqd does not have these below member which is needed 
> for usermode queue.
>
>     uint32_t shadow_base_lo; // offset: 0  (0x0)
>     uint32_t shadow_base_hi; // offset: 1  (0x1)
>     uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
>     uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
>     uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
>     uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
>     uint32_t shadow_initialized; // offset: 6  (0x6)
>     uint32_t ib_vmid; // offset: 7  (0x7)
>
> So we had to add new MQD structs.

Would it make more sense to update the existing MQD structures than 
adding new ones?

Regards,
Christian.

>
> thanks
>
> ~arvind
>
>>
>> Regards,
>>   Felix
>>


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2023-01-04  9:10           ` Christian König
@ 2023-01-04  9:13             ` Shashank Sharma
  2023-01-04  9:17               ` Christian König
  0 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2023-01-04  9:13 UTC (permalink / raw)
  To: Christian König, Yadav, Arvind, Felix Kuehling, Alex Deucher
  Cc: Alex Deucher, amd-gfx, arvind.yadav, arunpravin.paneerselvam


On 04/01/2023 10:10, Christian König wrote:
> Am 04.01.23 um 07:21 schrieb Yadav, Arvind:
>>
>> On 1/4/2023 12:07 AM, Felix Kuehling wrote:
>>> Am 2023-01-03 um 04:36 schrieb Shashank Sharma:
>>>>>> /*MQD struct for usermode Queue*/
>>>>>> +struct amdgpu_usermode_queue_mqd
>>>>> This is specific to GC 11.  Every IP and version will have its own 
>>>>> MQD
>>>>> format.  That should live in the IP specific code, not the generic
>>>>> code.  We already have the generic MQD parameters that we need from
>>>>> the userq IOCTL.
>>>>
>>>> Noted, we can separate out the generic parameters from gen specific 
>>>> parameter, and will try to wrap it around the generic structure.
>>>>
>>>> - Shashank
>>>
>>> Is there a reason why you can't use "struct v11_compute_mqd" from 
>>> v11_structs.h?
>>
>> Hi Felix,
>>
>> Yes,  V11_compute_mqd does not have these below member which is 
>> needed for usermode queue.
>>
>>     uint32_t shadow_base_lo; // offset: 0  (0x0)
>>     uint32_t shadow_base_hi; // offset: 1  (0x1)
>>     uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
>>     uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
>>     uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
>>     uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
>>     uint32_t shadow_initialized; // offset: 6  (0x6)
>>     uint32_t ib_vmid; // offset: 7  (0x7)
>>
>> So we had to add new MQD structs.
>
> Would it make more sense to update the existing MQD structures than 
> adding new ones?
>
Imo, It might be a bit complicated in the bring-up state, but we can 
take a note of converting this structure into a union of two, or may be 
renaming it into a superset structure.

- Shashank

> Regards,
> Christian.
>
>>
>> thanks
>>
>> ~arvind
>>
>>>
>>> Regards,
>>>   Felix
>>>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2023-01-04  9:13             ` Shashank Sharma
@ 2023-01-04  9:17               ` Christian König
  2023-01-04  9:23                 ` Shashank Sharma
  0 siblings, 1 reply; 64+ messages in thread
From: Christian König @ 2023-01-04  9:17 UTC (permalink / raw)
  To: Shashank Sharma, Yadav, Arvind, Felix Kuehling, Alex Deucher
  Cc: Alex Deucher, amd-gfx, arvind.yadav, arunpravin.paneerselvam

Am 04.01.23 um 10:13 schrieb Shashank Sharma:
>
> On 04/01/2023 10:10, Christian König wrote:
>> Am 04.01.23 um 07:21 schrieb Yadav, Arvind:
>>>
>>> On 1/4/2023 12:07 AM, Felix Kuehling wrote:
>>>> Am 2023-01-03 um 04:36 schrieb Shashank Sharma:
>>>>>>> /*MQD struct for usermode Queue*/
>>>>>>> +struct amdgpu_usermode_queue_mqd
>>>>>> This is specific to GC 11.  Every IP and version will have its 
>>>>>> own MQD
>>>>>> format.  That should live in the IP specific code, not the generic
>>>>>> code.  We already have the generic MQD parameters that we need from
>>>>>> the userq IOCTL.
>>>>>
>>>>> Noted, we can separate out the generic parameters from gen 
>>>>> specific parameter, and will try to wrap it around the generic 
>>>>> structure.
>>>>>
>>>>> - Shashank
>>>>
>>>> Is there a reason why you can't use "struct v11_compute_mqd" from 
>>>> v11_structs.h?
>>>
>>> Hi Felix,
>>>
>>> Yes,  V11_compute_mqd does not have these below member which is 
>>> needed for usermode queue.
>>>
>>>     uint32_t shadow_base_lo; // offset: 0  (0x0)
>>>     uint32_t shadow_base_hi; // offset: 1  (0x1)
>>>     uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
>>>     uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
>>>     uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
>>>     uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
>>>     uint32_t shadow_initialized; // offset: 6  (0x6)
>>>     uint32_t ib_vmid; // offset: 7  (0x7)
>>>
>>> So we had to add new MQD structs.
>>
>> Would it make more sense to update the existing MQD structures than 
>> adding new ones?
>>
> Imo, It might be a bit complicated in the bring-up state, but we can 
> take a note of converting this structure into a union of two, or may 
> be renaming it into a superset structure.

Union? Does that mean we have stuff which is individual for both 
versions of the struct?

BTW: Could we drop the "// offset:" stuff? This could cause problems 
with automated checkers.

Christian.

>
> - Shashank
>
>> Regards,
>> Christian.
>>
>>>
>>> thanks
>>>
>>> ~arvind
>>>
>>>>
>>>> Regards,
>>>>   Felix
>>>>
>>


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2023-01-04  9:17               ` Christian König
@ 2023-01-04  9:23                 ` Shashank Sharma
  2023-01-04 14:35                   ` Felix Kuehling
  0 siblings, 1 reply; 64+ messages in thread
From: Shashank Sharma @ 2023-01-04  9:23 UTC (permalink / raw)
  To: Christian König, Yadav, Arvind, Felix Kuehling, Alex Deucher
  Cc: Alex Deucher, amd-gfx, arvind.yadav, arunpravin.paneerselvam


On 04/01/2023 10:17, Christian König wrote:
> Am 04.01.23 um 10:13 schrieb Shashank Sharma:
>>
>> On 04/01/2023 10:10, Christian König wrote:
>>> Am 04.01.23 um 07:21 schrieb Yadav, Arvind:
>>>>
>>>> On 1/4/2023 12:07 AM, Felix Kuehling wrote:
>>>>> Am 2023-01-03 um 04:36 schrieb Shashank Sharma:
>>>>>>>> /*MQD struct for usermode Queue*/
>>>>>>>> +struct amdgpu_usermode_queue_mqd
>>>>>>> This is specific to GC 11.  Every IP and version will have its 
>>>>>>> own MQD
>>>>>>> format.  That should live in the IP specific code, not the generic
>>>>>>> code.  We already have the generic MQD parameters that we need from
>>>>>>> the userq IOCTL.
>>>>>>
>>>>>> Noted, we can separate out the generic parameters from gen 
>>>>>> specific parameter, and will try to wrap it around the generic 
>>>>>> structure.
>>>>>>
>>>>>> - Shashank
>>>>>
>>>>> Is there a reason why you can't use "struct v11_compute_mqd" from 
>>>>> v11_structs.h?
>>>>
>>>> Hi Felix,
>>>>
>>>> Yes,  V11_compute_mqd does not have these below member which is 
>>>> needed for usermode queue.
>>>>
>>>>     uint32_t shadow_base_lo; // offset: 0  (0x0)
>>>>     uint32_t shadow_base_hi; // offset: 1  (0x1)
>>>>     uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
>>>>     uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
>>>>     uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
>>>>     uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
>>>>     uint32_t shadow_initialized; // offset: 6  (0x6)
>>>>     uint32_t ib_vmid; // offset: 7  (0x7)
>>>>
>>>> So we had to add new MQD structs.
>>>
>>> Would it make more sense to update the existing MQD structures than 
>>> adding new ones?
>>>
>> Imo, It might be a bit complicated in the bring-up state, but we can 
>> take a note of converting this structure into a union of two, or may 
>> be renaming it into a superset structure.
>
> Union? Does that mean we have stuff which is individual for both 
> versions of the struct?
So far it seems like Gfx MQD structure is a superset of two, but we have 
not compared them neck-to-neck yet, hence I feel like we can defer this 
task for sometime (but add into to-do list).
>
> BTW: Could we drop the "// offset:" stuff? This could cause problems 
> with automated checkers.

Sure, we will do it.

- Shashank

>
> Christian.
>
>>
>> - Shashank
>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>> thanks
>>>>
>>>> ~arvind
>>>>
>>>>>
>>>>> Regards,
>>>>>   Felix
>>>>>
>>>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2023-01-04  6:21         ` Yadav, Arvind
  2023-01-04  9:10           ` Christian König
@ 2023-01-04 14:28           ` Alex Deucher
  1 sibling, 0 replies; 64+ messages in thread
From: Alex Deucher @ 2023-01-04 14:28 UTC (permalink / raw)
  To: Yadav, Arvind
  Cc: Shashank Sharma, Felix Kuehling, arunpravin.paneerselvam,
	amd-gfx, arvind.yadav, Alex Deucher, Christian Koenig

On Wed, Jan 4, 2023 at 1:21 AM Yadav, Arvind <arvyadav@amd.com> wrote:
>
>
> On 1/4/2023 12:07 AM, Felix Kuehling wrote:
> > Am 2023-01-03 um 04:36 schrieb Shashank Sharma:
> >>>> /*MQD struct for usermode Queue*/
> >>>> +struct amdgpu_usermode_queue_mqd
> >>> This is specific to GC 11.  Every IP and version will have its own MQD
> >>> format.  That should live in the IP specific code, not the generic
> >>> code.  We already have the generic MQD parameters that we need from
> >>> the userq IOCTL.
> >>
> >> Noted, we can separate out the generic parameters from gen specific
> >> parameter, and will try to wrap it around the generic structure.
> >>
> >> - Shashank
> >
> > Is there a reason why you can't use "struct v11_compute_mqd" from
> > v11_structs.h?
>
> Hi Felix,
>
> Yes,  V11_compute_mqd does not have these below member which is needed
> for usermode queue.
>
>      uint32_t shadow_base_lo; // offset: 0  (0x0)
>      uint32_t shadow_base_hi; // offset: 1  (0x1)
>      uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
>      uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
>      uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
>      uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
>      uint32_t shadow_initialized; // offset: 6  (0x6)
>      uint32_t ib_vmid; // offset: 7  (0x7)
>
> So we had to add new MQD structs.

Just update the existing structure.  It's the same.  Only reserved
fields are getting updated.

Alex

>
> thanks
>
> ~arvind
>
> >
> > Regards,
> >   Felix
> >

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2023-01-04  9:23                 ` Shashank Sharma
@ 2023-01-04 14:35                   ` Felix Kuehling
  2023-01-04 14:38                     ` Yadav, Arvind
  2023-01-04 14:41                     ` Shashank Sharma
  0 siblings, 2 replies; 64+ messages in thread
From: Felix Kuehling @ 2023-01-04 14:35 UTC (permalink / raw)
  To: Shashank Sharma, Christian König, Yadav, Arvind, Alex Deucher
  Cc: Alex Deucher, amd-gfx, arvind.yadav, arunpravin.paneerselvam

Am 2023-01-04 um 04:23 schrieb Shashank Sharma:
>
> On 04/01/2023 10:17, Christian König wrote:
>> Am 04.01.23 um 10:13 schrieb Shashank Sharma:
>>>
>>> On 04/01/2023 10:10, Christian König wrote:
>>>> Am 04.01.23 um 07:21 schrieb Yadav, Arvind:
>>>>>
>>>>> On 1/4/2023 12:07 AM, Felix Kuehling wrote:
>>>>>> Am 2023-01-03 um 04:36 schrieb Shashank Sharma:
>>>>>>>>> /*MQD struct for usermode Queue*/
>>>>>>>>> +struct amdgpu_usermode_queue_mqd
>>>>>>>> This is specific to GC 11.  Every IP and version will have its 
>>>>>>>> own MQD
>>>>>>>> format.  That should live in the IP specific code, not the generic
>>>>>>>> code.  We already have the generic MQD parameters that we need 
>>>>>>>> from
>>>>>>>> the userq IOCTL.
>>>>>>>
>>>>>>> Noted, we can separate out the generic parameters from gen 
>>>>>>> specific parameter, and will try to wrap it around the generic 
>>>>>>> structure.
>>>>>>>
>>>>>>> - Shashank
>>>>>>
>>>>>> Is there a reason why you can't use "struct v11_compute_mqd" from 
>>>>>> v11_structs.h?
>>>>>
>>>>> Hi Felix,
>>>>>
>>>>> Yes,  V11_compute_mqd does not have these below member which is 
>>>>> needed for usermode queue.
>>>>>
>>>>>     uint32_t shadow_base_lo; // offset: 0  (0x0)
>>>>>     uint32_t shadow_base_hi; // offset: 1  (0x1)
>>>>>     uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
>>>>>     uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
>>>>>     uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
>>>>>     uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
>>>>>     uint32_t shadow_initialized; // offset: 6  (0x6)
>>>>>     uint32_t ib_vmid; // offset: 7  (0x7)
>>>>>
>>>>> So we had to add new MQD structs.
>>>>
>>>> Would it make more sense to update the existing MQD structures than 
>>>> adding new ones?
>>>>
>>> Imo, It might be a bit complicated in the bring-up state, but we can 
>>> take a note of converting this structure into a union of two, or may 
>>> be renaming it into a superset structure.
>>
>> Union? Does that mean we have stuff which is individual for both 
>> versions of the struct?
> So far it seems like Gfx MQD structure is a superset of two, but we 
> have not compared them neck-to-neck yet, hence I feel like we can 
> defer this task for sometime (but add into to-do list).

v11_gfx_mqd has these fields reserved. Updating the definition with the 
fields you need should not be a problem. v11_gfx_mqd is already used in 
gfx_v11_0.c.

The firmware shouldn't care much whether a queue is a kernel mode queue 
or a user mode queue. The MQD layout should be the same. So having two 
different structure definitions in two different places doesn't make 
sense. I don't think it's wise to leave this for cleanup later. That 
would only cause churn and ultimately more work than doing the right 
thing in the first place.

Regards,
   Felix



>>
>> BTW: Could we drop the "// offset:" stuff? This could cause problems 
>> with automated checkers.
>
> Sure, we will do it.
>
> - Shashank
>
>>
>> Christian.
>>
>>>
>>> - Shashank
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>
>>>>> thanks
>>>>>
>>>>> ~arvind
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>>   Felix
>>>>>>
>>>>
>>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2023-01-04 14:35                   ` Felix Kuehling
@ 2023-01-04 14:38                     ` Yadav, Arvind
  2023-01-04 14:41                     ` Shashank Sharma
  1 sibling, 0 replies; 64+ messages in thread
From: Yadav, Arvind @ 2023-01-04 14:38 UTC (permalink / raw)
  To: Felix Kuehling, Shashank Sharma, Christian König, Alex Deucher
  Cc: Alex Deucher, amd-gfx, arvind.yadav, arunpravin.paneerselvam


On 1/4/2023 8:05 PM, Felix Kuehling wrote:
> Am 2023-01-04 um 04:23 schrieb Shashank Sharma:
>>
>> On 04/01/2023 10:17, Christian König wrote:
>>> Am 04.01.23 um 10:13 schrieb Shashank Sharma:
>>>>
>>>> On 04/01/2023 10:10, Christian König wrote:
>>>>> Am 04.01.23 um 07:21 schrieb Yadav, Arvind:
>>>>>>
>>>>>> On 1/4/2023 12:07 AM, Felix Kuehling wrote:
>>>>>>> Am 2023-01-03 um 04:36 schrieb Shashank Sharma:
>>>>>>>>>> /*MQD struct for usermode Queue*/
>>>>>>>>>> +struct amdgpu_usermode_queue_mqd
>>>>>>>>> This is specific to GC 11.  Every IP and version will have its 
>>>>>>>>> own MQD
>>>>>>>>> format.  That should live in the IP specific code, not the 
>>>>>>>>> generic
>>>>>>>>> code.  We already have the generic MQD parameters that we need 
>>>>>>>>> from
>>>>>>>>> the userq IOCTL.
>>>>>>>>
>>>>>>>> Noted, we can separate out the generic parameters from gen 
>>>>>>>> specific parameter, and will try to wrap it around the generic 
>>>>>>>> structure.
>>>>>>>>
>>>>>>>> - Shashank
>>>>>>>
>>>>>>> Is there a reason why you can't use "struct v11_compute_mqd" 
>>>>>>> from v11_structs.h?
>>>>>>
>>>>>> Hi Felix,
>>>>>>
>>>>>> Yes,  V11_compute_mqd does not have these below member which is 
>>>>>> needed for usermode queue.
>>>>>>
>>>>>>     uint32_t shadow_base_lo; // offset: 0  (0x0)
>>>>>>     uint32_t shadow_base_hi; // offset: 1  (0x1)
>>>>>>     uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
>>>>>>     uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
>>>>>>     uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
>>>>>>     uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
>>>>>>     uint32_t shadow_initialized; // offset: 6  (0x6)
>>>>>>     uint32_t ib_vmid; // offset: 7  (0x7)
>>>>>>
>>>>>> So we had to add new MQD structs.
>>>>>
>>>>> Would it make more sense to update the existing MQD structures 
>>>>> than adding new ones?
>>>>>
>>>> Imo, It might be a bit complicated in the bring-up state, but we 
>>>> can take a note of converting this structure into a union of two, 
>>>> or may be renaming it into a superset structure.
>>>
>>> Union? Does that mean we have stuff which is individual for both 
>>> versions of the struct?
>> So far it seems like Gfx MQD structure is a superset of two, but we 
>> have not compared them neck-to-neck yet, hence I feel like we can 
>> defer this task for sometime (but add into to-do list).
>
> v11_gfx_mqd has these fields reserved. Updating the definition with 
> the fields you need should not be a problem. v11_gfx_mqd is already 
> used in gfx_v11_0.c.
>
> The firmware shouldn't care much whether a queue is a kernel mode 
> queue or a user mode queue. The MQD layout should be the same. So 
> having two different structure definitions in two different places 
> doesn't make sense. I don't think it's wise to leave this for cleanup 
> later. That would only cause churn and ultimately more work than doing 
> the right thing in the first place.
>
Thankyou for your comment. We will update as per your suggestions.

~arvind

> Regards,
>   Felix
>
>
>
>>>
>>> BTW: Could we drop the "// offset:" stuff? This could cause problems 
>>> with automated checkers.
>>
>> Sure, we will do it.
>>
>> - Shashank
>>
>>>
>>> Christian.
>>>
>>>>
>>>> - Shashank
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>>
>>>>>> thanks
>>>>>>
>>>>>> ~arvind
>>>>>>
>>>>>>>
>>>>>>> Regards,
>>>>>>>   Felix
>>>>>>>
>>>>>
>>>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue
  2023-01-04 14:35                   ` Felix Kuehling
  2023-01-04 14:38                     ` Yadav, Arvind
@ 2023-01-04 14:41                     ` Shashank Sharma
  1 sibling, 0 replies; 64+ messages in thread
From: Shashank Sharma @ 2023-01-04 14:41 UTC (permalink / raw)
  To: Felix Kuehling, Christian König, Yadav, Arvind, Alex Deucher
  Cc: Alex Deucher, amd-gfx, arvind.yadav, arunpravin.paneerselvam


On 04/01/2023 15:35, Felix Kuehling wrote:
> Am 2023-01-04 um 04:23 schrieb Shashank Sharma:
>>
>> On 04/01/2023 10:17, Christian König wrote:
>>> Am 04.01.23 um 10:13 schrieb Shashank Sharma:
>>>>
>>>> On 04/01/2023 10:10, Christian König wrote:
>>>>> Am 04.01.23 um 07:21 schrieb Yadav, Arvind:
>>>>>>
>>>>>> On 1/4/2023 12:07 AM, Felix Kuehling wrote:
>>>>>>> Am 2023-01-03 um 04:36 schrieb Shashank Sharma:
>>>>>>>>>> /*MQD struct for usermode Queue*/
>>>>>>>>>> +struct amdgpu_usermode_queue_mqd
>>>>>>>>> This is specific to GC 11.  Every IP and version will have its 
>>>>>>>>> own MQD
>>>>>>>>> format.  That should live in the IP specific code, not the 
>>>>>>>>> generic
>>>>>>>>> code.  We already have the generic MQD parameters that we need 
>>>>>>>>> from
>>>>>>>>> the userq IOCTL.
>>>>>>>>
>>>>>>>> Noted, we can separate out the generic parameters from gen 
>>>>>>>> specific parameter, and will try to wrap it around the generic 
>>>>>>>> structure.
>>>>>>>>
>>>>>>>> - Shashank
>>>>>>>
>>>>>>> Is there a reason why you can't use "struct v11_compute_mqd" 
>>>>>>> from v11_structs.h?
>>>>>>
>>>>>> Hi Felix,
>>>>>>
>>>>>> Yes,  V11_compute_mqd does not have these below member which is 
>>>>>> needed for usermode queue.
>>>>>>
>>>>>>     uint32_t shadow_base_lo; // offset: 0  (0x0)
>>>>>>     uint32_t shadow_base_hi; // offset: 1  (0x1)
>>>>>>     uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
>>>>>>     uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
>>>>>>     uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
>>>>>>     uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
>>>>>>     uint32_t shadow_initialized; // offset: 6  (0x6)
>>>>>>     uint32_t ib_vmid; // offset: 7  (0x7)
>>>>>>
>>>>>> So we had to add new MQD structs.
>>>>>
>>>>> Would it make more sense to update the existing MQD structures 
>>>>> than adding new ones?
>>>>>
>>>> Imo, It might be a bit complicated in the bring-up state, but we 
>>>> can take a note of converting this structure into a union of two, 
>>>> or may be renaming it into a superset structure.
>>>
>>> Union? Does that mean we have stuff which is individual for both 
>>> versions of the struct?
>> So far it seems like Gfx MQD structure is a superset of two, but we 
>> have not compared them neck-to-neck yet, hence I feel like we can 
>> defer this task for sometime (but add into to-do list).
>
> v11_gfx_mqd has these fields reserved. Updating the definition with 
> the fields you need should not be a problem. v11_gfx_mqd is already 
> used in gfx_v11_0.c.
>
> The firmware shouldn't care much whether a queue is a kernel mode 
> queue or a user mode queue. The MQD layout should be the same. So 
> having two different structure definitions in two different places 
> doesn't make sense. I don't think it's wise to leave this for cleanup 
> later. That would only cause churn and ultimately more work than doing 
> the right thing in the first place.
>
> Regards,
>   Felix
>
Hey Felix, noted, we will try to reuse this same structure.

- Shashank

>
>
>>>
>>> BTW: Could we drop the "// offset:" stuff? This could cause problems 
>>> with automated checkers.
>>
>> Sure, we will do it.
>>
>> - Shashank
>>
>>>
>>> Christian.
>>>
>>>>
>>>> - Shashank
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>>
>>>>>> thanks
>>>>>>
>>>>>> ~arvind
>>>>>>
>>>>>>>
>>>>>>> Regards,
>>>>>>>   Felix
>>>>>>>
>>>>>
>>>

^ permalink raw reply	[flat|nested] 64+ messages in thread

end of thread, other threads:[~2023-01-04 14:41 UTC | newest]

Thread overview: 64+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-23 19:36 [RFC 0/7] RFC: Usermode queue for AMDGPU driver Shashank Sharma
2022-12-23 19:36 ` [RFC 1/7] drm/amdgpu: UAPI for user queue management Shashank Sharma
2022-12-24 20:20   ` Bas Nieuwenhuizen
2022-12-27 16:58     ` Alex Deucher
2023-01-02 11:27       ` Christian König
2023-01-03 19:51         ` Alex Deucher
2023-01-02 13:26   ` Christian König
2023-01-03 14:23     ` Alex Deucher
2023-01-03 18:29   ` Felix Kuehling
2023-01-03 19:17     ` Liu, Shaoyun
2023-01-03 19:22       ` Alex Deucher
2023-01-03 19:25         ` Liu, Shaoyun
2023-01-03 19:52           ` Alex Deucher
2023-01-03 20:05             ` Felix Kuehling
2023-01-03 19:18     ` Alex Deucher
2022-12-23 19:36 ` [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work Shashank Sharma
2022-12-24 18:19   ` Oded Gabbay
2022-12-26 10:34     ` Shashank Sharma
2022-12-25 15:44   ` Christian König
2022-12-26 10:41     ` Shashank Sharma
2023-01-02 12:39       ` Christian König
2023-01-03  9:12         ` Shashank Sharma
2023-01-03  9:15           ` Christian König
2023-01-03  9:22             ` Shashank Sharma
2023-01-03  9:35               ` Christian König
2023-01-03 14:34                 ` Alex Deucher
2023-01-03 14:50                   ` Christian König
2022-12-29 17:41   ` Alex Deucher
2023-01-02 13:53     ` Christian König
2023-01-03  9:32       ` Shashank Sharma
2023-01-03  9:16     ` Shashank Sharma
2023-01-04  8:55   ` Zhu, Jiadong
2023-01-04  8:58     ` Shashank Sharma
2022-12-23 19:36 ` [RFC 3/7] drm/amdgpu: Create MQD for userspace queue Shashank Sharma
2022-12-29 17:47   ` Alex Deucher
2023-01-03  9:36     ` Shashank Sharma
2023-01-03 18:37       ` Felix Kuehling
2023-01-04  6:21         ` Yadav, Arvind
2023-01-04  9:10           ` Christian König
2023-01-04  9:13             ` Shashank Sharma
2023-01-04  9:17               ` Christian König
2023-01-04  9:23                 ` Shashank Sharma
2023-01-04 14:35                   ` Felix Kuehling
2023-01-04 14:38                     ` Yadav, Arvind
2023-01-04 14:41                     ` Shashank Sharma
2023-01-04 14:28           ` Alex Deucher
2022-12-23 19:36 ` [RFC 4/7] drm/amdgpu: Allocate doorbell slot for user queue Shashank Sharma
2022-12-29 17:50   ` Alex Deucher
2023-01-03  9:37     ` Shashank Sharma
2022-12-23 19:36 ` [RFC 5/7] drm/amdgpu: Create context for usermode queue Shashank Sharma
2022-12-29 17:54   ` Alex Deucher
2023-01-03  9:40     ` Shashank Sharma
2023-01-03 14:48       ` Alex Deucher
2022-12-23 19:36 ` [RFC 6/7] drm/amdgpu: Map userqueue into HW Shashank Sharma
2022-12-29 17:51   ` Alex Deucher
2023-01-03  9:38     ` Shashank Sharma
2022-12-23 19:36 ` [RFC 7/7] drm/amdgpu: Secure semaphore for usermode queue Shashank Sharma
2022-12-25 10:07   ` Zhang, Yifan
2022-12-27  9:32     ` Arunpravin Paneer Selvam
2022-12-29 18:02 ` [RFC 0/7] RFC: Usermode queue for AMDGPU driver Alex Deucher
2023-01-03  9:43   ` Shashank Sharma
2023-01-03  9:47     ` Christian König
2023-01-03 10:00       ` Shashank Sharma
2023-01-03 10:02         ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.