All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
@ 2022-09-09  1:50 jiadong.zhu
  2022-09-09  1:50 ` [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3) jiadong.zhu
                   ` (5 more replies)
  0 siblings, 6 replies; 28+ messages in thread
From: jiadong.zhu @ 2022-09-09  1:50 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ray.Huang, Jiadong.Zhu

From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>

The software ring is created to support priority
context while there is only one hardware queue
for gfx.

Every software rings has its fence driver and could
be used as an ordinary ring for the gpu_scheduler.
Multiple software rings are binded to a real ring
with the ring muxer. The packages committed on the
software ring are copied to the real ring.

v2: use array to store software ring entry.
v3: remove unnecessary prints.

Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
 7 files changed, 509 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 3e0e2eb7e235..85224bc81ce5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
 	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+	amdgpu_sw_ring.o amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..0de8e3cd0f1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE			0x00000000L
@@ -346,6 +347,8 @@ struct amdgpu_gfx {
 	struct amdgpu_gfx_ras		*ras;
 
 	bool				is_poweron;
+
+	struct amdgpu_ring_mux			muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..fe33a683bfba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -278,6 +278,9 @@ struct amdgpu_ring {
 	bool			is_mes_queue;
 	uint32_t		hw_queue_id;
 	struct amdgpu_mes_ctx_data *mes_ctx;
+
+	bool			is_sw_ring;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index 000000000000..ea4a3c66119a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_print.h>
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+
+#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
+
+static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+	u64 s_begin, u64 s_end);
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	mux->real_ring = ring;
+	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
+	mux->num_ring_entries = 0;
+	spin_lock_init(&mux->lock);
+	return 0;
+}
+
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
+{
+	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
+	mux->num_ring_entries = 0;
+}
+
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+
+	if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
+		DRM_ERROR("adding sw ring exceeds max gfx num\n");
+		return -ENOMEM;
+	}
+
+	e = &mux->ring_entries[mux->num_ring_entries++];
+
+	e->ring = ring;
+	e->start_ptr_in_hw_ring = 0;
+	e->end_ptr_in_hw_ring = 0;
+	e->sw_cptr = 0;
+	e->sw_rptr = 0;
+	e->sw_wptr = 0;
+
+	return 0;
+}
+
+static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct amdgpu_ring_mux *mux,
+				struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+	int i;
+
+	e = NULL;
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		if (mux->ring_entries[i].ring == ring) {
+			e = &mux->ring_entries[i];
+			break;
+		}
+	}
+
+	return e;
+}
+
+void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
+{
+	struct amdgpu_mux_entry *e;
+
+	e = amdgpu_get_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry for sw ring\n");
+		return;
+	}
+
+	spin_lock(&mux->lock);
+	e->sw_cptr = e->sw_wptr;
+	e->sw_wptr = wptr;
+	e->start_ptr_in_hw_ring = mux->real_ring->wptr;
+
+	if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
+		e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+		amdgpu_ring_commit(mux->real_ring);
+	}
+
+	spin_unlock(&mux->lock);
+}
+
+u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+
+	e = amdgpu_get_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry for sw ring\n");
+		return 0;
+	}
+
+	return e->sw_wptr;
+}
+
+u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+	u64 r_rptr, r_wptr, offset, start, end;
+
+	e = amdgpu_get_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("no sw entry found!\n");
+		return 0;
+	}
+
+	r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
+	r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
+
+	if (r_wptr < r_rptr)
+		r_wptr += mux->real_ring->ring_size >> 2;
+
+	start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
+	end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
+	if (start > end)
+		end += mux->real_ring->ring_size >> 2;
+	if (r_rptr <= end && r_rptr >= start) {
+		offset = r_rptr - start;
+		e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
+	} else if (r_rptr < start) {
+		e->sw_rptr = e->sw_cptr;
+	} else {
+		e->sw_rptr = e->sw_wptr;
+	}
+
+	return e->sw_rptr;
+}
+
+/*copy packages on sw ring range[begin, end) */
+static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+	u64 s_begin, u64 s_end)
+{
+	u64 begin, end, r_begin, r_end;
+	struct amdgpu_ring *real_ring = mux->real_ring;
+
+	begin = s_begin & ring->buf_mask;
+	end = s_end & ring->buf_mask;
+
+	r_begin = real_ring->wptr & real_ring->buf_mask;
+	if (begin == end)
+		return -ERANGE;
+	if (begin > end) {
+		amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - begin);
+		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin],
+			(ring->ring_size >> 2) - begin);
+		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
+	} else {
+		amdgpu_ring_alloc(real_ring, end - begin);
+		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin], end - begin);
+	}
+
+	r_end = real_ring->wptr & real_ring->buf_mask;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
new file mode 100644
index 000000000000..d058c43bb063
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RING_MUX__
+#define __AMDGPU_RING_MUX__
+
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include "amdgpu_ring.h"
+
+struct amdgpu_ring;
+/*
+ * start_ptr_in_hw_ring - last copied start loc on hw ring
+ * end_ptr_in_hw_ring - last copied end loc on hw ring
+ *sw_cptr -the begin of copy ptr in sw ring
+ *sw_rptr; the read ptr in sw ring
+ *sw_wptr; the write ptr in sw ring
+ */
+struct amdgpu_mux_entry {
+	struct amdgpu_ring	*ring;
+	u64 start_ptr_in_hw_ring;
+	u64 end_ptr_in_hw_ring;
+
+	u64 sw_cptr;
+	u64 sw_rptr;
+	u64 sw_wptr;
+};
+
+struct amdgpu_ring_mux {
+	struct amdgpu_ring *real_ring;
+
+	struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
+
+	unsigned num_ring_entries;
+
+	spinlock_t			lock;
+
+};
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
+u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
new file mode 100644
index 000000000000..452d0ff37758
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu_sw_ring.h"
+#include "amdgpu_ring_mux.h"
+
+#define amdgpu_ring_get_gpu_addr(ring, offset)				\
+	(ring->is_mes_queue ?						\
+	 (ring->mes_ctx->meta_data_gpu_addr + offset) :			\
+	 (ring->adev->wb.gpu_addr + offset * 4))
+
+#define amdgpu_ring_get_cpu_addr(ring, offset)				\
+	(ring->is_mes_queue ?						\
+	 (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
+	 (&ring->adev->wb.wb[offset]))
+
+
+int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
+		     unsigned int irq_type, unsigned int hw_prio,
+		     atomic_t *sched_score)
+{
+	int r;
+	int sched_hw_submission = amdgpu_sched_hw_submission;
+	u32 *num_sched;
+	u32 hw_ip;
+
+	BUG_ON(!ring->is_sw_ring);
+
+	if (ring->adev == NULL) {
+		if (adev->num_rings >= AMDGPU_MAX_RINGS)
+			return -EINVAL;
+
+		ring->adev = adev;
+		ring->num_hw_submission = sched_hw_submission;
+		ring->sched_score = sched_score;
+		ring->vmid_wait = dma_fence_get_stub();
+
+		if (!ring->is_mes_queue) {
+			ring->idx = adev->num_rings++;
+			adev->rings[ring->idx] = ring;
+		}
+
+		r = amdgpu_fence_driver_init_ring(ring);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
+		return r;
+	}
+
+	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
+		return r;
+	}
+
+	r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
+		return r;
+	}
+
+	r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
+		return r;
+	}
+
+	ring->fence_gpu_addr =
+		amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
+	ring->fence_cpu_addr =
+		amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
+
+	ring->trail_fence_gpu_addr =
+		amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
+	ring->trail_fence_cpu_addr =
+		amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
+
+	ring->cond_exe_gpu_addr =
+		amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
+	ring->cond_exe_cpu_addr =
+		amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
+
+	/* always set cond_exec_polling to CONTINUE */
+	*ring->cond_exe_cpu_addr = 1;
+
+	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
+	if (r) {
+		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
+		return r;
+	}
+
+	ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+
+	ring->buf_mask = (ring->ring_size / 4) - 1;
+	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
+		0xffffffffffffffff : ring->buf_mask;
+
+	/* Allocate ring buffer */
+	if (ring->ring == NULL) {
+		ring->ring = kzalloc(ring->ring_size + ring->funcs->extra_dw, GFP_KERNEL);
+		if (!ring->ring) {
+			dev_err(adev->dev, "(%d) swring create failed\n", r);
+			return r;
+		}
+
+		amdgpu_ring_clear_ring(ring);
+	}
+
+	ring->max_dw = max_dw;
+	ring->hw_prio = hw_prio;
+
+	if (!ring->no_scheduler) {
+		hw_ip = ring->funcs->type;
+		num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+		adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
+			&ring->sched;
+	}
+
+	return 0;
+}
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	BUG_ON(!ring->is_sw_ring);
+	return amdgpu_ring_get_rptr_from_mux(mux, ring);
+}
+
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	BUG_ON(!ring->is_sw_ring);
+	return amdgpu_ring_get_wptr_from_mux(mux, ring);
+}
+
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+	BUG_ON(!ring->is_sw_ring);
+}
+
+void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	BUG_ON(!ring->is_sw_ring);
+	amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
+}
+
+void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
+{
+	BUG_ON(!ring->is_sw_ring);
+
+	/* Not to finish a ring which is not initialized */
+	if (!(ring->adev) ||
+	    (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
+		return;
+
+	ring->sched.ready = false;
+
+	amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
+	amdgpu_device_wb_free(ring->adev, ring->fence_offs);
+
+	kfree((void *)ring->ring);
+
+	dma_fence_put(ring->vmid_wait);
+	ring->vmid_wait = NULL;
+	ring->me = 0;
+
+	ring->adev->rings[ring->idx] = NULL;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
new file mode 100644
index 000000000000..c05d8a94ad0c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/amdgpu_drm.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/drm_print.h>
+
+#include "amdgpu_irq.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#ifndef __AMDGPU_SWRING_H__
+#define __AMDGPU_SWRING_H__
+
+int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *sw_ring,
+		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
+		     unsigned int irq_type, unsigned int hw_prio,
+		     atomic_t *sched_score);
+void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
+
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
+
+#endif
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3)
  2022-09-09  1:50 [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) jiadong.zhu
@ 2022-09-09  1:50 ` jiadong.zhu
  2022-09-09 16:46   ` Andrey Grodzovsky
  2022-09-13 15:23   ` Luben Tuikov
  2022-09-09  1:50 ` [PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9(v2) jiadong.zhu
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 28+ messages in thread
From: jiadong.zhu @ 2022-09-09  1:50 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ray.Huang, Jiadong.Zhu

From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>

Set ring functions with software ring callbacks
on gfx9.

The software ring could be tested by debugfs_test_ib
case.

v2: set sw_ring 2 to enable software ring by default.
v3: remove the parameter for software ring enablement.

Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  16 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 116 +++++++++++++++++++++--
 5 files changed, 128 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 96d058c4cd4b..525df0b4d55f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -207,6 +207,7 @@ extern bool amdgpu_ignore_bad_page_threshold;
 extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
 extern int amdgpu_async_gfx_ring;
 extern int amdgpu_mcbp;
+extern int amdgpu_sw_ring;
 extern int amdgpu_discovery;
 extern int amdgpu_mes;
 extern int amdgpu_mes_kiq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 0de8e3cd0f1c..5eec82014f0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -348,6 +348,8 @@ struct amdgpu_gfx {
 
 	bool				is_poweron;
 
+	/*software ring*/
+	unsigned						num_sw_gfx_rings;
 	struct amdgpu_ring_mux			muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13db99d653bd..5b70a2c36d81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -33,6 +33,7 @@
 
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
+#include "amdgpu_sw_ring.h"
 #include "atom.h"
 
 /*
@@ -121,6 +122,11 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 {
 	uint32_t count;
 
+	if (ring->is_sw_ring) {
+		amdgpu_sw_ring_commit(ring);
+		return;
+	}
+
 	/* We pad to match fetch size */
 	count = ring->funcs->align_mask + 1 -
 		(ring->wptr & ring->funcs->align_mask);
@@ -183,6 +189,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	u32 *num_sched;
 	u32 hw_ip;
 
+	if (adev->gfx.num_sw_gfx_rings > 0 && ring->is_sw_ring) {
+		return amdgpu_sw_ring_init(adev, ring, max_dw, irq_src, irq_type,
+			hw_prio, sched_score);
+	}
+
 	/* Set the hw submission limit higher for KIQ because
 	 * it's used for a number of gfx/compute tasks by both
 	 * KFD and KGD which may have outstanding fences and
@@ -343,7 +354,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
  */
 void amdgpu_ring_fini(struct amdgpu_ring *ring)
 {
-
+	if (ring->is_sw_ring) {
+		amdgpu_sw_ring_fini(ring);
+		return;
+	}
 	/* Not to finish a ring which is not initialized */
 	if (!(ring->adev) ||
 	    (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index fe33a683bfba..ba6d8c753f7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -38,7 +38,8 @@ struct amdgpu_vm;
 /* max number of rings */
 #define AMDGPU_MAX_RINGS		28
 #define AMDGPU_MAX_HWIP_RINGS		8
-#define AMDGPU_MAX_GFX_RINGS		2
+/*2 software ring and 1 real ring*/
+#define AMDGPU_MAX_GFX_RINGS		3
 #define AMDGPU_MAX_COMPUTE_RINGS	8
 #define AMDGPU_MAX_VCE_RINGS		3
 #define AMDGPU_MAX_UVD_ENC_RINGS	2
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5349ca4d19e3..774e44e1074a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_sw_ring.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -55,7 +56,8 @@
 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
 #include "asic_reg/gc/gc_9_0_default.h"
 
-#define GFX9_NUM_GFX_RINGS     1
+#define GFX9_NUM_GFX_RINGS     3
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@@ -2270,6 +2272,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
 static int gfx_v9_0_sw_init(void *handle)
 {
 	int i, j, k, r, ring_id;
+	unsigned int hw_prio;
 	struct amdgpu_ring *ring;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2356,13 +2359,40 @@ static int gfx_v9_0_sw_init(void *handle)
 			sprintf(ring->name, "gfx_%d", i);
 		ring->use_doorbell = true;
 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+		ring->is_sw_ring = (adev->gfx.num_sw_gfx_rings > 1) && (i > 0);
+
+		if (adev->gfx.num_sw_gfx_rings > 1 && i == 2)
+			hw_prio = AMDGPU_RING_PRIO_2;
+		else
+			hw_prio = AMDGPU_RING_PRIO_DEFAULT;
+		if (adev->gfx.num_sw_gfx_rings > 0 && i == 0)
+			ring->no_scheduler = true;
+
 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
-				     AMDGPU_RING_PRIO_DEFAULT, NULL);
+				     hw_prio, NULL);
 		if (r)
 			return r;
+
+		if (ring->is_sw_ring)
+			ring->wptr = 0;
 	}
 
+	/*init the muxer and add sw rings */
+	if (adev->gfx.num_sw_gfx_rings > 0) {
+		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0]);
+		if (r) {
+			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
+			return r;
+		}
+		for (i = 1; i < adev->gfx.num_gfx_rings; i++) {
+			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, &adev->gfx.gfx_ring[i]);
+			if (r) {
+				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
+				return r;
+			}
+		}
+	}
 	/* set up the compute queues - allocate horizontally across pipes */
 	ring_id = 0;
 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
@@ -2413,6 +2443,9 @@ static int gfx_v9_0_sw_fini(void *handle)
 	int i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->gfx.num_sw_gfx_rings > 0)
+		amdgpu_ring_mux_fini(&adev->gfx.muxer);
+
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
@@ -4709,8 +4742,9 @@ static int gfx_v9_0_early_init(void *handle)
 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
 		adev->gfx.num_gfx_rings = 0;
-	else
-		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
+
+	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
+	adev->gfx.num_sw_gfx_rings = GFX9_NUM_SW_GFX_RINGS;
 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
 					  AMDGPU_MAX_COMPUTE_RINGS);
 	gfx_v9_0_set_kiq_pm4_funcs(adev);
@@ -5877,7 +5911,11 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
 
 	switch (me_id) {
 	case 0:
-		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+		if (adev->gfx.num_sw_gfx_rings > 1) {
+			for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++)
+				amdgpu_fence_process(&adev->gfx.gfx_ring[i]);
+		} else
+			amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
 		break;
 	case 1:
 	case 2:
@@ -6882,6 +6920,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
 };
 
+
+static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
+	.type = AMDGPU_RING_TYPE_GFX,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.secure_submission_supported = true,
+	.vmhub = AMDGPU_GFXHUB_0,
+	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
+	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
+	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
+	.emit_frame_size = /* totally 242 maximum if 16 IBs */
+		5 +  /* COND_EXEC */
+		7 +  /* PIPELINE_SYNC */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* VM_FLUSH */
+		8 +  /* FENCE for VM_FLUSH */
+		20 + /* GDS switch */
+		4 + /* double SWITCH_BUFFER,
+		     * the first COND_EXEC jump to the place just
+		     * prior to this double SWITCH_BUFFER
+		     */
+		5 + /* COND_EXEC */
+		7 +	 /*	HDP_flush */
+		4 +	 /*	VGT_flush */
+		14 + /*	CE_META */
+		31 + /*	DE_META */
+		3 + /* CNTX_CTRL */
+		5 + /* HDP_INVL */
+		8 + 8 + /* FENCE x2 */
+		2 + /* SWITCH_BUFFER */
+		7, /* gfx_v9_0_emit_mem_sync */
+	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
+	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
+	.emit_fence = gfx_v9_0_ring_emit_fence,
+	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+	.test_ring = gfx_v9_0_ring_test_ring,
+	.test_ib = gfx_v9_0_ring_test_ib,
+	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_switch_buffer = gfx_v9_ring_emit_sb,
+	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
+	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
+	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
+	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
+	.emit_wreg = gfx_v9_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+	.soft_recovery = gfx_v9_0_ring_soft_recovery,
+	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
+};
+
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.type = AMDGPU_RING_TYPE_COMPUTE,
 	.align_mask = 0xff,
@@ -6956,9 +7050,15 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
 
 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
 
-	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
-
+	if (adev->gfx.num_sw_gfx_rings > 0) {
+		//first one is the real ring
+		adev->gfx.gfx_ring[0].funcs = &gfx_v9_0_ring_funcs_gfx;
+		for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++)
+			adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
+	} else {
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+			adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
+	}
 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9(v2)
  2022-09-09  1:50 [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) jiadong.zhu
  2022-09-09  1:50 ` [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3) jiadong.zhu
@ 2022-09-09  1:50 ` jiadong.zhu
  2022-09-09 16:48   ` Andrey Grodzovsky
  2022-09-09  1:50 ` [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2) jiadong.zhu
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 28+ messages in thread
From: jiadong.zhu @ 2022-09-09  1:50 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ray.Huang, Jiadong.Zhu

From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>

1. Modify the unmap_queue package on gfx9.
   Add trailing fence to track the preemption done.
2. Modify emit_ce_meta emit_de_meta functions
   for the resumed ibs.

v2: restyle code not to use ternary operator.

Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 181 +++++++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/soc15d.h      |   2 +
 3 files changed, 155 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index ba6d8c753f7e..d3155dc86c07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -60,6 +60,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC          (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 774e44e1074a..89a5c45b1006 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -753,7 +753,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 				struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 					  void *ras_error_status);
@@ -826,9 +826,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
-		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-		amdgpu_ring_write(kiq_ring, seq);
+		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+
 	} else {
 		amdgpu_ring_write(kiq_ring, 0);
 		amdgpu_ring_write(kiq_ring, 0);
@@ -5356,11 +5357,16 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 
 	control |= ib->length_dw | (vmid << 24);
 
-	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+	if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
 		control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+		if (flags & AMDGPU_IB_PREEMPTED)
+			control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-			gfx_v9_0_ring_emit_de_meta(ring);
+			gfx_v9_0_ring_emit_de_meta(ring,
+				 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ?
+					true : false);
 	}
 
 	amdgpu_ring_write(ring, header);
@@ -5415,17 +5421,23 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+	uint32_t dw2 = 0;
 
 	/* RELEASE_MEM - flush caches, send int */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
-					       EOP_TC_NC_ACTION_EN) :
-					      (EOP_TCL1_ACTION_EN |
-					       EOP_TC_ACTION_EN |
-					       EOP_TC_WB_ACTION_EN |
-					       EOP_TC_MD_ACTION_EN)) |
-				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-				 EVENT_INDEX(5)));
+
+	if (writeback) {
+		dw2 = EOP_TC_WB_ACTION_EN | EOP_TC_NC_ACTION_EN;
+	} else {
+		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
+				EOP_TC_WB_ACTION_EN | EOP_TC_MD_ACTION_EN;
+	}
+	dw2 |= EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
+	if (exec)
+		dw2 |= EOP_EXEC;
+
+	amdgpu_ring_write(ring, dw2);
 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
 
 	/*
@@ -5530,33 +5542,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0);
 }
 
-static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct v9_ce_ib_state ce_payload = {0};
-	uint64_t csa_addr;
+	uint64_t offset, ce_payload_gpu_addr;
+	void *ce_payload_cpu_addr;
 	int cnt;
 
 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
-	csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+	if (ring->is_mes_queue) {
+		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+				  gfx[0].gfx_meta_data) +
+			offsetof(struct v9_gfx_meta_data, ce_payload);
+		ce_payload_gpu_addr =
+			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+		ce_payload_cpu_addr =
+			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+	} else {
+		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+	}
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
 				 WRITE_DATA_DST_SEL(8) |
 				 WR_CONFIRM) |
 				 WRITE_DATA_CACHE_POLICY(0));
-	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
-	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
-	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
+	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
+
+	if (resume)
+		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+					   sizeof(ce_payload) >> 2);
+	else
+		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
+					   sizeof(ce_payload) >> 2);
+}
+
+static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+	int i, r = 0;
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	unsigned long flags;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		return -ENOMEM;
+	}
+
+	/* assert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+	ring->trail_seq += 1;
+	amdgpu_ring_alloc(ring, 13);
+	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+				  ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC);
+	/*reset the CP_VMID_PREEMPT after trailing fence*/
+	amdgpu_ring_emit_wreg(ring,
+				  SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
+				  0x0);
+
+	/* assert IB preemption, emit the trailing fence */
+	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+				   ring->trail_fence_gpu_addr,
+				   ring->trail_seq);
+
+	amdgpu_ring_commit(kiq_ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	/* poll the trailing fence */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (ring->trail_seq ==
+		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		r = -EINVAL;
+		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+	}
+
+	amdgpu_ring_commit(ring);
+
+	/* deassert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, true);
+	return r;
 }
 
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct v9_de_ib_state de_payload = {0};
-	uint64_t csa_addr, gds_addr;
+	uint64_t offset, gds_addr, de_payload_gpu_addr;
+	void *de_payload_cpu_addr;
 	int cnt;
 
-	csa_addr = amdgpu_csa_vaddr(ring->adev);
-	gds_addr = csa_addr + 4096;
+	if (ring->is_mes_queue) {
+		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+				  gfx[0].gfx_meta_data) +
+			offsetof(struct v9_gfx_meta_data, de_payload);
+		de_payload_gpu_addr =
+			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+		de_payload_cpu_addr =
+			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+				  gfx[0].gds_backup) +
+			offsetof(struct v9_gfx_meta_data, de_payload);
+		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+	} else {
+		offset = offsetof(struct v9_gfx_meta_data, de_payload);
+		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
+				 PAGE_SIZE);
+	}
+
 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
 
@@ -5566,9 +5680,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 				 WRITE_DATA_DST_SEL(8) |
 				 WR_CONFIRM) |
 				 WRITE_DATA_CACHE_POLICY(0));
-	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
-	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
-	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
+	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+	if (resume)
+		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+					   sizeof(de_payload) >> 2);
+	else
+		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+					   sizeof(de_payload) >> 2);
 }
 
 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
@@ -5584,8 +5704,10 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
 {
 	uint32_t dw2 = 0;
 
-	if (amdgpu_sriov_vf(ring->adev))
-		gfx_v9_0_ring_emit_ce_meta(ring);
+	if (amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp)
+		gfx_v9_0_ring_emit_ce_meta(ring,
+					(!amdgpu_sriov_vf(ring->adev) &&
+						flags & AMDGPU_IB_PREEMPTED) ? true : false);
 
 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
@@ -6912,6 +7034,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
+	.preempt_ib = gfx_v9_0_ring_preempt_ib,
 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 799925d22fc8..614e9f8467fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -162,6 +162,7 @@
 		 * 2 - Bypass
 		 */
 #define     INDIRECT_BUFFER_PRE_ENB(x)		 ((x) << 21)
+#define     INDIRECT_BUFFER_PRE_RESUME(x)           ((x) << 30)
 #define	PACKET3_COPY_DATA				0x40
 #define	PACKET3_PFP_SYNC_ME				0x42
 #define	PACKET3_COND_WRITE				0x45
@@ -184,6 +185,7 @@
 #define		EOP_TC_ACTION_EN                        (1 << 17) /* L2 */
 #define		EOP_TC_NC_ACTION_EN			(1 << 19)
 #define		EOP_TC_MD_ACTION_EN			(1 << 21) /* L2 metadata */
+#define		EOP_EXEC					(1 << 28) /* For Trailing Fence */
 
 #define		DATA_SEL(x)                             ((x) << 29)
 		/* 0 - discard
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2)
  2022-09-09  1:50 [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) jiadong.zhu
  2022-09-09  1:50 ` [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3) jiadong.zhu
  2022-09-09  1:50 ` [PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9(v2) jiadong.zhu
@ 2022-09-09  1:50 ` jiadong.zhu
  2022-09-09 17:02   ` Andrey Grodzovsky
  2022-09-13 15:47   ` Luben Tuikov
  2022-09-09 14:24 ` [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) Christian König
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 28+ messages in thread
From: jiadong.zhu @ 2022-09-09  1:50 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ray.Huang, Jiadong.Zhu

From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>

Trigger MCBP according to the priroty of the
software rings and the hw fence signaling
condition.

The muxer records some lastest locations from the
software ring which is used to resubmit packages
in preemption scenarios.

v2: update comment style

Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile          |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c       |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c     | 101 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h     |  29 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c     |  12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 163 ++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  16 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  26 +++
 9 files changed, 351 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 85224bc81ce5..24c5aa19bbf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
 	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-	amdgpu_sw_ring.o amdgpu_ring_mux.o
+	amdgpu_sw_ring.o amdgpu_ring_mux.o amdgpu_mcbp.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..af86d87e2f3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		}
 	}
 
+	amdgpu_ring_ib_begin(ring);
 	if (job && ring->funcs->init_cond_exec)
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	    ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
 		ring->funcs->emit_wave_limit(ring, false);
 
+	amdgpu_ring_ib_end(ring);
 	amdgpu_ring_commit(ring);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
new file mode 100644
index 000000000000..2a12101a7699
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <drm/gpu_scheduler.h>
+
+#include "amdgpu.h"
+#include "amdgpu_mcbp.h"
+#include "amdgpu_ring.h"
+
+/* trigger mcbp and find if we need resubmit */
+int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_ring *ring = NULL;
+	int i;
+
+	DRM_INFO("%s in\n", __func__);
+
+	spin_lock(&mux->lock);
+
+	amdgpu_ring_preempt_ib(mux->real_ring);
+
+	ring = NULL;
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		e = &mux->ring_entries[i];
+		if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+			ring = e->ring;
+			break;
+		}
+	}
+
+	if (!ring) {
+		DRM_ERROR("cannot find low priority ring\n");
+		return -ENOENT;
+	}
+
+	amdgpu_fence_process(ring);
+
+	DRM_INFO("after preempted ring_prio(%d) last_seq(%x) sync_seq(%x)\n",
+		ring->hw_prio, atomic_read(&ring->fence_drv.last_seq), ring->fence_drv.sync_seq);
+
+	if (atomic_read(&ring->fence_drv.last_seq) !=
+	    ring->fence_drv.sync_seq) {
+		DRM_INFO("schedule resubmit\n");
+		mux->s_resubmit = true;
+		amdgpu_ring_mux_schedule_resubmit(mux);
+	}
+
+	spin_unlock(&mux->lock);
+	return 0;
+}
+
+
+/*scan on low prio rings to have unsignaled fence and high ring has no fence.*/
+int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_ring *ring;
+	uint32_t seq, last_seq;
+	int i, need_preempt;
+
+	need_preempt = 0;
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		ring = mux->ring_entries[i].ring;
+		last_seq = atomic_read(&ring->fence_drv.last_seq);
+		seq = READ_ONCE(ring->fence_drv.sync_seq);
+		DRM_INFO("ring(%p) prio(%d) last_seq(%x) seq(%x)\n",
+			ring, ring->hw_prio, last_seq, seq);
+		if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT && last_seq < seq)
+			return 0;
+		if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && last_seq < seq)
+			need_preempt = 1;
+	}
+
+	DRM_INFO("%s return %d\n", __func__, need_preempt && !mux->s_resubmit);
+	return need_preempt && !mux->s_resubmit;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
new file mode 100644
index 000000000000..0033bcba8d03
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_MCBP_H__
+#define __AMDGPU_MCBP_H__
+
+int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux);
+int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 5b70a2c36d81..6d7f8a40e308 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -583,3 +583,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
 
 	return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
 }
+
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
+{
+	if (ring->is_sw_ring)
+		amdgpu_sw_ring_ib_begin(ring);
+}
+
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
+{
+	if (ring->is_sw_ring)
+		amdgpu_sw_ring_ib_end(ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index d3155dc86c07..399037b0d6e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -311,6 +311,9 @@ struct amdgpu_ring {
 #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
 
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index ea4a3c66119a..0c9b639b844e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -20,28 +20,60 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-
+#include <linux/slab.h>
 #include <drm/drm_print.h>
 
 #include "amdgpu_ring_mux.h"
+#include "amdgpu_mcbp.h"
 #include "amdgpu_ring.h"
 
 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
 
+static struct kmem_cache *amdgpu_mux_chunk_slab;
+
 static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
 	u64 s_begin, u64 s_end);
+static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux);
+static void amdgpu_mux_resubmit_fallback(struct timer_list *t);
 
 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
 {
 	mux->real_ring = ring;
+
 	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
 	mux->num_ring_entries = 0;
+
+	mux->s_resubmit = false;
+
+	amdgpu_mux_chunk_slab = kmem_cache_create(
+		"amdgpu_mux_chunk", sizeof(struct amdgpu_mux_chunk), 0,
+		SLAB_HWCACHE_ALIGN, NULL);
+	if (!amdgpu_mux_chunk_slab) {
+		DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
+		return -ENOMEM;
+	}
+
 	spin_lock_init(&mux->lock);
+
+	timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
+
 	return 0;
 }
 
 void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
 {
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk, *chunk2;
+	int i;
+
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		e = &mux->ring_entries[i];
+		list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
+			list_del(&chunk->entry);
+			kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+		}
+	}
+	kmem_cache_destroy(amdgpu_mux_chunk_slab);
 	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
 	mux->num_ring_entries = 0;
 }
@@ -64,6 +96,8 @@ int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring
 	e->sw_rptr = 0;
 	e->sw_wptr = 0;
 
+	INIT_LIST_HEAD(&e->list);
+
 	return 0;
 }
 
@@ -180,3 +214,130 @@ static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring
 
 	return 0;
 }
+
+void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
+{
+	mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
+}
+
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk;
+
+	if (mux->s_resubmit)
+		amdgpu_mux_resubmit_chunks(mux);
+
+	e = amdgpu_get_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry!\n");
+		return;
+	}
+
+	chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
+	if (!chunk) {
+		DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
+		return;
+	}
+
+	chunk->start = ring->wptr;
+	list_add_tail(&chunk->entry, &e->list);
+}
+
+static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	uint32_t last_seq, size = 0;
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk, *tmp;
+
+	e = amdgpu_get_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry!\n");
+		return;
+	}
+
+	last_seq = atomic_read(&ring->fence_drv.last_seq);
+
+	list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
+		if (chunk->sync_seq <= last_seq) {
+			list_del(&chunk->entry);
+			kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+		} else {
+			size++;
+		}
+	}
+}
+
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk;
+
+	e = amdgpu_get_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry!\n");
+		return;
+	}
+
+	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+	if (!chunk) {
+		DRM_ERROR("cannot find chunk!\n");
+		return;
+	}
+
+	chunk->end = ring->wptr;
+	chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+
+	scan_and_remove_signaled_chunk(mux, ring);
+}
+
+static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_mux_entry *e = NULL;
+	struct amdgpu_mux_chunk *chunk;
+	uint32_t seq, last_seq;
+	int i;
+
+	/*find low priority entries:*/
+	spin_lock(&mux->lock);
+
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		if (mux->ring_entries[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+				e = &mux->ring_entries[i];
+			break;
+		}
+	}
+
+	if (!e) {
+		DRM_ERROR("%s no low priority ring found\n", __func__);
+		return;
+	}
+
+	last_seq = atomic_read(&e->ring->fence_drv.last_seq);
+	seq = READ_ONCE(e->ring->fence_drv.sync_seq);
+	if (seq == last_seq) {
+		DRM_INFO("skip as fence signaled seq=%x\n", seq);
+		return;
+	}
+	DRM_INFO("begin to copy resubmit chunks\n");
+
+	/*resubmit all the fences between (last_seq, seq]*/
+	list_for_each_entry(chunk, &e->list, entry) {
+		if (chunk->sync_seq > last_seq) {
+			copy_pkt_from_sw_ring(mux, e->ring, chunk->start, chunk->end);
+			amdgpu_ring_commit(mux->real_ring);
+		}
+	}
+	spin_unlock(&mux->lock);
+
+	del_timer(&mux->resubmit_timer);
+	mux->s_resubmit = false;
+}
+
+static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
+{
+	struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
+
+	DRM_INFO("calling %s\n", __func__);
+	amdgpu_mux_resubmit_chunks(mux);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
index d058c43bb063..1d91c235061a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -44,17 +44,27 @@ struct amdgpu_mux_entry {
 	u64 sw_cptr;
 	u64 sw_rptr;
 	u64 sw_wptr;
+
+	struct list_head list;
 };
 
 struct amdgpu_ring_mux {
 	struct amdgpu_ring *real_ring;
 
 	struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
-
 	unsigned num_ring_entries;
 
 	spinlock_t			lock;
 
+	bool s_resubmit;
+	struct timer_list		resubmit_timer;
+};
+
+struct amdgpu_mux_chunk {
+	struct list_head entry;
+	uint32_t sync_seq;
+	u64 start;
+	u64 end;
 };
 
 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
@@ -64,4 +74,8 @@ void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring
 u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
 u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
 
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
index 452d0ff37758..143a84c18534 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
@@ -26,6 +26,7 @@
 
 #include "amdgpu_sw_ring.h"
 #include "amdgpu_ring_mux.h"
+#include "amdgpu_mcbp.h"
 
 #define amdgpu_ring_get_gpu_addr(ring, offset)				\
 	(ring->is_mes_queue ?						\
@@ -202,3 +203,28 @@ void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
 	ring->adev->rings[ring->idx] = NULL;
 }
 
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	BUG_ON(!ring->is_sw_ring);
+	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+		if (amdgpu_mcbp_scan(mux) > 0)
+			amdgpu_mcbp_trigger_preempt(mux);
+		return;
+	}
+
+	amdgpu_ring_mux_start_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	BUG_ON(!ring->is_sw_ring);
+	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+		return;
+	amdgpu_ring_mux_end_ib(mux, ring);
+}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-09  1:50 [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) jiadong.zhu
                   ` (2 preceding siblings ...)
  2022-09-09  1:50 ` [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2) jiadong.zhu
@ 2022-09-09 14:24 ` Christian König
  2022-09-12 14:31   ` Luben Tuikov
  2022-09-09 16:45 ` Andrey Grodzovsky
  2022-09-13 15:12 ` Luben Tuikov
  5 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2022-09-09 14:24 UTC (permalink / raw)
  To: amd-gfx, Andrey Grodzovsky, Tuikov, Luben; +Cc: Ray.Huang, jiadong.zhu

Andrey and Luben please take a look at this set here and help with 
reviewing it.

Thanks,
Christian.

Am 09.09.22 um 03:50 schrieb jiadong.zhu@amd.com:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>
> The software ring is created to support priority
> context while there is only one hardware queue
> for gfx.
>
> Every software rings has its fence driver and could
> be used as an ordinary ring for the gpu_scheduler.
> Multiple software rings are binded to a real ring
> with the ring muxer. The packages committed on the
> software ring are copied to the real ring.
>
> v2: use array to store software ring entry.
> v3: remove unnecessary prints.
>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 +++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>   7 files changed, 509 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 3e0e2eb7e235..85224bc81ce5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>   	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
>   	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>   	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> -	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
> +	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> +	amdgpu_sw_ring.o amdgpu_ring_mux.o
>   
>   amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 53526ffb2ce1..0de8e3cd0f1c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -33,6 +33,7 @@
>   #include "amdgpu_imu.h"
>   #include "soc15.h"
>   #include "amdgpu_ras.h"
> +#include "amdgpu_ring_mux.h"
>   
>   /* GFX current status */
>   #define AMDGPU_GFX_NORMAL_MODE			0x00000000L
> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>   	struct amdgpu_gfx_ras		*ras;
>   
>   	bool				is_poweron;
> +
> +	struct amdgpu_ring_mux			muxer;
>   };
>   
>   #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 7d89a52091c0..fe33a683bfba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>   	bool			is_mes_queue;
>   	uint32_t		hw_queue_id;
>   	struct amdgpu_mes_ctx_data *mes_ctx;
> +
> +	bool			is_sw_ring;
> +
>   };
>   
>   #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> new file mode 100644
> index 000000000000..ea4a3c66119a
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> @@ -0,0 +1,182 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <drm/drm_print.h>
> +
> +#include "amdgpu_ring_mux.h"
> +#include "amdgpu_ring.h"
> +
> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
> +
> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
> +	u64 s_begin, u64 s_end);
> +
> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	mux->real_ring = ring;
> +	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
> +	mux->num_ring_entries = 0;
> +	spin_lock_init(&mux->lock);
> +	return 0;
> +}
> +
> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
> +{
> +	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
> +	mux->num_ring_entries = 0;
> +}
> +
> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +
> +	if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
> +		DRM_ERROR("adding sw ring exceeds max gfx num\n");
> +		return -ENOMEM;
> +	}
> +
> +	e = &mux->ring_entries[mux->num_ring_entries++];
> +
> +	e->ring = ring;
> +	e->start_ptr_in_hw_ring = 0;
> +	e->end_ptr_in_hw_ring = 0;
> +	e->sw_cptr = 0;
> +	e->sw_rptr = 0;
> +	e->sw_wptr = 0;
> +
> +	return 0;
> +}
> +
> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct amdgpu_ring_mux *mux,
> +				struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +	int i;
> +
> +	e = NULL;
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		if (mux->ring_entries[i].ring == ring) {
> +			e = &mux->ring_entries[i];
> +			break;
> +		}
> +	}
> +
> +	return e;
> +}
> +
> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
> +{
> +	struct amdgpu_mux_entry *e;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry for sw ring\n");
> +		return;
> +	}
> +
> +	spin_lock(&mux->lock);
> +	e->sw_cptr = e->sw_wptr;
> +	e->sw_wptr = wptr;
> +	e->start_ptr_in_hw_ring = mux->real_ring->wptr;
> +
> +	if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
> +		e->end_ptr_in_hw_ring = mux->real_ring->wptr;
> +		amdgpu_ring_commit(mux->real_ring);
> +	}
> +
> +	spin_unlock(&mux->lock);
> +}
> +
> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry for sw ring\n");
> +		return 0;
> +	}
> +
> +	return e->sw_wptr;
> +}
> +
> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +	u64 r_rptr, r_wptr, offset, start, end;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("no sw entry found!\n");
> +		return 0;
> +	}
> +
> +	r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
> +	r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
> +
> +	if (r_wptr < r_rptr)
> +		r_wptr += mux->real_ring->ring_size >> 2;
> +
> +	start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
> +	end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
> +	if (start > end)
> +		end += mux->real_ring->ring_size >> 2;
> +	if (r_rptr <= end && r_rptr >= start) {
> +		offset = r_rptr - start;
> +		e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
> +	} else if (r_rptr < start) {
> +		e->sw_rptr = e->sw_cptr;
> +	} else {
> +		e->sw_rptr = e->sw_wptr;
> +	}
> +
> +	return e->sw_rptr;
> +}
> +
> +/*copy packages on sw ring range[begin, end) */
> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
> +	u64 s_begin, u64 s_end)
> +{
> +	u64 begin, end, r_begin, r_end;
> +	struct amdgpu_ring *real_ring = mux->real_ring;
> +
> +	begin = s_begin & ring->buf_mask;
> +	end = s_end & ring->buf_mask;
> +
> +	r_begin = real_ring->wptr & real_ring->buf_mask;
> +	if (begin == end)
> +		return -ERANGE;
> +	if (begin > end) {
> +		amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - begin);
> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin],
> +			(ring->ring_size >> 2) - begin);
> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
> +	} else {
> +		amdgpu_ring_alloc(real_ring, end - begin);
> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin], end - begin);
> +	}
> +
> +	r_end = real_ring->wptr & real_ring->buf_mask;
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> new file mode 100644
> index 000000000000..d058c43bb063
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __AMDGPU_RING_MUX__
> +#define __AMDGPU_RING_MUX__
> +
> +#include <linux/timer.h>
> +#include <linux/spinlock.h>
> +#include "amdgpu_ring.h"
> +
> +struct amdgpu_ring;
> +/*
> + * start_ptr_in_hw_ring - last copied start loc on hw ring
> + * end_ptr_in_hw_ring - last copied end loc on hw ring
> + *sw_cptr -the begin of copy ptr in sw ring
> + *sw_rptr; the read ptr in sw ring
> + *sw_wptr; the write ptr in sw ring
> + */
> +struct amdgpu_mux_entry {
> +	struct amdgpu_ring	*ring;
> +	u64 start_ptr_in_hw_ring;
> +	u64 end_ptr_in_hw_ring;
> +
> +	u64 sw_cptr;
> +	u64 sw_rptr;
> +	u64 sw_wptr;
> +};
> +
> +struct amdgpu_ring_mux {
> +	struct amdgpu_ring *real_ring;
> +
> +	struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
> +
> +	unsigned num_ring_entries;
> +
> +	spinlock_t			lock;
> +
> +};
> +
> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> new file mode 100644
> index 000000000000..452d0ff37758
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> @@ -0,0 +1,204 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + */
> +
> +#include "amdgpu_sw_ring.h"
> +#include "amdgpu_ring_mux.h"
> +
> +#define amdgpu_ring_get_gpu_addr(ring, offset)				\
> +	(ring->is_mes_queue ?						\
> +	 (ring->mes_ctx->meta_data_gpu_addr + offset) :			\
> +	 (ring->adev->wb.gpu_addr + offset * 4))
> +
> +#define amdgpu_ring_get_cpu_addr(ring, offset)				\
> +	(ring->is_mes_queue ?						\
> +	 (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
> +	 (&ring->adev->wb.wb[offset]))
> +
> +
> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
> +		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
> +		     unsigned int irq_type, unsigned int hw_prio,
> +		     atomic_t *sched_score)
> +{
> +	int r;
> +	int sched_hw_submission = amdgpu_sched_hw_submission;
> +	u32 *num_sched;
> +	u32 hw_ip;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +
> +	if (ring->adev == NULL) {
> +		if (adev->num_rings >= AMDGPU_MAX_RINGS)
> +			return -EINVAL;
> +
> +		ring->adev = adev;
> +		ring->num_hw_submission = sched_hw_submission;
> +		ring->sched_score = sched_score;
> +		ring->vmid_wait = dma_fence_get_stub();
> +
> +		if (!ring->is_mes_queue) {
> +			ring->idx = adev->num_rings++;
> +			adev->rings[ring->idx] = ring;
> +		}
> +
> +		r = amdgpu_fence_driver_init_ring(ring);
> +		if (r)
> +			return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	ring->fence_gpu_addr =
> +		amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
> +	ring->fence_cpu_addr =
> +		amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
> +
> +	ring->trail_fence_gpu_addr =
> +		amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
> +	ring->trail_fence_cpu_addr =
> +		amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
> +
> +	ring->cond_exe_gpu_addr =
> +		amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
> +	ring->cond_exe_cpu_addr =
> +		amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
> +
> +	/* always set cond_exec_polling to CONTINUE */
> +	*ring->cond_exe_cpu_addr = 1;
> +
> +	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
> +	if (r) {
> +		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
> +		return r;
> +	}
> +
> +	ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
> +
> +	ring->buf_mask = (ring->ring_size / 4) - 1;
> +	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
> +		0xffffffffffffffff : ring->buf_mask;
> +
> +	/* Allocate ring buffer */
> +	if (ring->ring == NULL) {
> +		ring->ring = kzalloc(ring->ring_size + ring->funcs->extra_dw, GFP_KERNEL);
> +		if (!ring->ring) {
> +			dev_err(adev->dev, "(%d) swring create failed\n", r);
> +			return r;
> +		}
> +
> +		amdgpu_ring_clear_ring(ring);
> +	}
> +
> +	ring->max_dw = max_dw;
> +	ring->hw_prio = hw_prio;
> +
> +	if (!ring->no_scheduler) {
> +		hw_ip = ring->funcs->type;
> +		num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
> +		adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
> +			&ring->sched;
> +	}
> +
> +	return 0;
> +}
> +
> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	return amdgpu_ring_get_rptr_from_mux(mux, ring);
> +}
> +
> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	return amdgpu_ring_get_wptr_from_mux(mux, ring);
> +}
> +
> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
> +{
> +	BUG_ON(!ring->is_sw_ring);
> +}
> +
> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
> +}
> +
> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
> +{
> +	BUG_ON(!ring->is_sw_ring);
> +
> +	/* Not to finish a ring which is not initialized */
> +	if (!(ring->adev) ||
> +	    (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
> +		return;
> +
> +	ring->sched.ready = false;
> +
> +	amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
> +	amdgpu_device_wb_free(ring->adev, ring->fence_offs);
> +
> +	kfree((void *)ring->ring);
> +
> +	dma_fence_put(ring->vmid_wait);
> +	ring->vmid_wait = NULL;
> +	ring->me = 0;
> +
> +	ring->adev->rings[ring->idx] = NULL;
> +}
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
> new file mode 100644
> index 000000000000..c05d8a94ad0c
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
> @@ -0,0 +1,48 @@
> +/*
> + * Copyright 2012 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <drm/amdgpu_drm.h>
> +#include <drm/gpu_scheduler.h>
> +#include <drm/drm_print.h>
> +
> +#include "amdgpu_irq.h"
> +#include "amdgpu_ring.h"
> +#include "amdgpu.h"
> +
> +#ifndef __AMDGPU_SWRING_H__
> +#define __AMDGPU_SWRING_H__
> +
> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *sw_ring,
> +		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
> +		     unsigned int irq_type, unsigned int hw_prio,
> +		     atomic_t *sched_score);
> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
> +
> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
> +
> +#endif


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-09  1:50 [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) jiadong.zhu
                   ` (3 preceding siblings ...)
  2022-09-09 14:24 ` [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) Christian König
@ 2022-09-09 16:45 ` Andrey Grodzovsky
  2022-09-12 10:20   ` Christian König
  2022-09-13 15:12 ` Luben Tuikov
  5 siblings, 1 reply; 28+ messages in thread
From: Andrey Grodzovsky @ 2022-09-09 16:45 UTC (permalink / raw)
  To: jiadong.zhu, amd-gfx; +Cc: Ray.Huang


On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>
> The software ring is created to support priority
> context while there is only one hardware queue
> for gfx.
>
> Every software rings has its fence driver and could
> be used as an ordinary ring for the gpu_scheduler.
> Multiple software rings are binded to a real ring
> with the ring muxer. The packages committed on the
> software ring are copied to the real ring.
>
> v2: use array to store software ring entry.
> v3: remove unnecessary prints.
>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 +++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>   7 files changed, 509 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 3e0e2eb7e235..85224bc81ce5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>   	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
>   	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>   	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> -	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
> +	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> +	amdgpu_sw_ring.o amdgpu_ring_mux.o
>   
>   amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 53526ffb2ce1..0de8e3cd0f1c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -33,6 +33,7 @@
>   #include "amdgpu_imu.h"
>   #include "soc15.h"
>   #include "amdgpu_ras.h"
> +#include "amdgpu_ring_mux.h"
>   
>   /* GFX current status */
>   #define AMDGPU_GFX_NORMAL_MODE			0x00000000L
> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>   	struct amdgpu_gfx_ras		*ras;
>   
>   	bool				is_poweron;
> +
> +	struct amdgpu_ring_mux			muxer;
>   };
>   
>   #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 7d89a52091c0..fe33a683bfba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>   	bool			is_mes_queue;
>   	uint32_t		hw_queue_id;
>   	struct amdgpu_mes_ctx_data *mes_ctx;
> +
> +	bool			is_sw_ring;
> +
>   };
>   
>   #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> new file mode 100644
> index 000000000000..ea4a3c66119a
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> @@ -0,0 +1,182 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <drm/drm_print.h>
> +
> +#include "amdgpu_ring_mux.h"
> +#include "amdgpu_ring.h"
> +
> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
> +
> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
> +	u64 s_begin, u64 s_end);
> +
> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	mux->real_ring = ring;
> +	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
> +	mux->num_ring_entries = 0;
> +	spin_lock_init(&mux->lock);
> +	return 0;
> +}
> +
> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
> +{
> +	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
> +	mux->num_ring_entries = 0;
> +}
> +
> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +
> +	if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
> +		DRM_ERROR("adding sw ring exceeds max gfx num\n");
> +		return -ENOMEM;
> +	}
> +
> +	e = &mux->ring_entries[mux->num_ring_entries++];
> +
> +	e->ring = ring;
> +	e->start_ptr_in_hw_ring = 0;
> +	e->end_ptr_in_hw_ring = 0;
> +	e->sw_cptr = 0;
> +	e->sw_rptr = 0;
> +	e->sw_wptr = 0;
> +
> +	return 0;
> +}
> +
> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct amdgpu_ring_mux *mux,
> +				struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +	int i;
> +
> +	e = NULL;
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		if (mux->ring_entries[i].ring == ring) {
> +			e = &mux->ring_entries[i];
> +			break;
> +		}
> +	}
> +
> +	return e;
> +}
> +
> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
> +{
> +	struct amdgpu_mux_entry *e;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry for sw ring\n");
> +		return;
> +	}
> +
> +	spin_lock(&mux->lock);


A bit more generic question, I assume the spinlock here protects from 
concurrent runs
of amdgpu_ib_schedule. For them to be even theoretically concurrent it 
must be from
direct submissions to HW (because any scheduler mediated submission is 
serialized though
the dedicated scheduler worker thread). But in such case why we protect 
only here ? If i am
not missing something there is no total per HW ring lock when calling 
amdgpu_ib_schedule today
and we do a lot of HW accesses there to ring  which should probably be 
protected from
concurrent accesses.

So if any one can answer this question ?


> +	e->sw_cptr = e->sw_wptr;
> +	e->sw_wptr = wptr;
> +	e->start_ptr_in_hw_ring = mux->real_ring->wptr;
> +
> +	if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
> +		e->end_ptr_in_hw_ring = mux->real_ring->wptr;
> +		amdgpu_ring_commit(mux->real_ring);
> +	}
> +
> +	spin_unlock(&mux->lock);
> +}
> +
> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry for sw ring\n");
> +		return 0;
> +	}
> +
> +	return e->sw_wptr;
> +}
> +
> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +	u64 r_rptr, r_wptr, offset, start, end;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("no sw entry found!\n");
> +		return 0;
> +	}
> +
> +	r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
> +	r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
> +
> +	if (r_wptr < r_rptr)
> +		r_wptr += mux->real_ring->ring_size >> 2;
> +
> +	start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
> +	end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
> +	if (start > end)
> +		end += mux->real_ring->ring_size >> 2;
> +	if (r_rptr <= end && r_rptr >= start) {
> +		offset = r_rptr - start;
> +		e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
> +	} else if (r_rptr < start) {
> +		e->sw_rptr = e->sw_cptr;
> +	} else {
> +		e->sw_rptr = e->sw_wptr;
> +	}
> +
> +	return e->sw_rptr;
> +}
> +
> +/*copy packages on sw ring range[begin, end) */
> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
> +	u64 s_begin, u64 s_end)
> +{
> +	u64 begin, end, r_begin, r_end;
> +	struct amdgpu_ring *real_ring = mux->real_ring;
> +
> +	begin = s_begin & ring->buf_mask;
> +	end = s_end & ring->buf_mask;
> +
> +	r_begin = real_ring->wptr & real_ring->buf_mask;
> +	if (begin == end)
> +		return -ERANGE;
> +	if (begin > end) {
> +		amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - begin);
> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin],
> +			(ring->ring_size >> 2) - begin);
> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
> +	} else {
> +		amdgpu_ring_alloc(real_ring, end - begin);
> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin], end - begin);
> +	}
> +
> +	r_end = real_ring->wptr & real_ring->buf_mask;
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> new file mode 100644
> index 000000000000..d058c43bb063
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __AMDGPU_RING_MUX__
> +#define __AMDGPU_RING_MUX__
> +
> +#include <linux/timer.h>
> +#include <linux/spinlock.h>
> +#include "amdgpu_ring.h"
> +
> +struct amdgpu_ring;
> +/*
> + * start_ptr_in_hw_ring - last copied start loc on hw ring
> + * end_ptr_in_hw_ring - last copied end loc on hw ring
> + *sw_cptr -the begin of copy ptr in sw ring
> + *sw_rptr; the read ptr in sw ring
> + *sw_wptr; the write ptr in sw ring
> + */
> +struct amdgpu_mux_entry {
> +	struct amdgpu_ring	*ring;
> +	u64 start_ptr_in_hw_ring;
> +	u64 end_ptr_in_hw_ring;
> +
> +	u64 sw_cptr;
> +	u64 sw_rptr;
> +	u64 sw_wptr;
> +};
> +
> +struct amdgpu_ring_mux {
> +	struct amdgpu_ring *real_ring;
> +
> +	struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
> +
> +	unsigned num_ring_entries;
> +
> +	spinlock_t			lock;
> +
> +};
> +
> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> new file mode 100644
> index 000000000000..452d0ff37758
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> @@ -0,0 +1,204 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + */
> +
> +#include "amdgpu_sw_ring.h"
> +#include "amdgpu_ring_mux.h"
> +
> +#define amdgpu_ring_get_gpu_addr(ring, offset)				\
> +	(ring->is_mes_queue ?						\
> +	 (ring->mes_ctx->meta_data_gpu_addr + offset) :			\
> +	 (ring->adev->wb.gpu_addr + offset * 4))
> +
> +#define amdgpu_ring_get_cpu_addr(ring, offset)				\
> +	(ring->is_mes_queue ?						\
> +	 (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
> +	 (&ring->adev->wb.wb[offset]))
> +
> +
> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
> +		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
> +		     unsigned int irq_type, unsigned int hw_prio,
> +		     atomic_t *sched_score)
> +{
> +	int r;
> +	int sched_hw_submission = amdgpu_sched_hw_submission;
> +	u32 *num_sched;
> +	u32 hw_ip;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +
> +	if (ring->adev == NULL) {
> +		if (adev->num_rings >= AMDGPU_MAX_RINGS)
> +			return -EINVAL;
> +
> +		ring->adev = adev;
> +		ring->num_hw_submission = sched_hw_submission;
> +		ring->sched_score = sched_score;
> +		ring->vmid_wait = dma_fence_get_stub();
> +
> +		if (!ring->is_mes_queue) {
> +			ring->idx = adev->num_rings++;
> +			adev->rings[ring->idx] = ring;
> +		}
> +
> +		r = amdgpu_fence_driver_init_ring(ring);
> +		if (r)
> +			return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
> +		return r;
> +	}


Looks like a typo copy pase duplicate of the above

> +
> +	r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	ring->fence_gpu_addr =
> +		amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
> +	ring->fence_cpu_addr =
> +		amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
> +
> +	ring->trail_fence_gpu_addr =
> +		amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
> +	ring->trail_fence_cpu_addr =
> +		amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
> +
> +	ring->cond_exe_gpu_addr =
> +		amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
> +	ring->cond_exe_cpu_addr =
> +		amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
> +
> +	/* always set cond_exec_polling to CONTINUE */
> +	*ring->cond_exe_cpu_addr = 1;
> +
> +	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
> +	if (r) {
> +		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
> +		return r;
> +	}
> +
> +	ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
> +
> +	ring->buf_mask = (ring->ring_size / 4) - 1;
> +	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
> +		0xffffffffffffffff : ring->buf_mask;
> +
> +	/* Allocate ring buffer */
> +	if (ring->ring == NULL) {
> +		ring->ring = kzalloc(ring->ring_size + ring->funcs->extra_dw, GFP_KERNEL);
> +		if (!ring->ring) {
> +			dev_err(adev->dev, "(%d) swring create failed\n", r);
> +			return r;
> +		}
> +
> +		amdgpu_ring_clear_ring(ring);
> +	}
> +
> +	ring->max_dw = max_dw;
> +	ring->hw_prio = hw_prio;
> +
> +	if (!ring->no_scheduler) {
> +		hw_ip = ring->funcs->type;
> +		num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
> +		adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
> +			&ring->sched;
> +	}
> +
> +	return 0;
> +}


In general i see this function is a big one to one subset of 
amdgpu_ring_init.
Could you maybe see a way to refactor such that this function is the base
and for HW related code that different (like BO allocation for ring 
buffer) you
maybe can add if (!ring->sw_ring)... and add those code snippets ? To avoid
substantial code duplication.

Andrey


> +
> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	return amdgpu_ring_get_rptr_from_mux(mux, ring);
> +}
> +
> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	return amdgpu_ring_get_wptr_from_mux(mux, ring);
> +}
> +
> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
> +{
> +	BUG_ON(!ring->is_sw_ring);
> +}
> +
> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
> +}
> +
> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
> +{
> +	BUG_ON(!ring->is_sw_ring);
> +
> +	/* Not to finish a ring which is not initialized */
> +	if (!(ring->adev) ||
> +	    (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
> +		return;
> +
> +	ring->sched.ready = false;
> +
> +	amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
> +	amdgpu_device_wb_free(ring->adev, ring->fence_offs);
> +
> +	kfree((void *)ring->ring);
> +
> +	dma_fence_put(ring->vmid_wait);
> +	ring->vmid_wait = NULL;
> +	ring->me = 0;
> +
> +	ring->adev->rings[ring->idx] = NULL;
> +}
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
> new file mode 100644
> index 000000000000..c05d8a94ad0c
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
> @@ -0,0 +1,48 @@
> +/*
> + * Copyright 2012 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <drm/amdgpu_drm.h>
> +#include <drm/gpu_scheduler.h>
> +#include <drm/drm_print.h>
> +
> +#include "amdgpu_irq.h"
> +#include "amdgpu_ring.h"
> +#include "amdgpu.h"
> +
> +#ifndef __AMDGPU_SWRING_H__
> +#define __AMDGPU_SWRING_H__
> +
> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *sw_ring,
> +		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
> +		     unsigned int irq_type, unsigned int hw_prio,
> +		     atomic_t *sched_score);
> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
> +
> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
> +
> +#endif

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3)
  2022-09-09  1:50 ` [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3) jiadong.zhu
@ 2022-09-09 16:46   ` Andrey Grodzovsky
  2022-09-13 15:23   ` Luben Tuikov
  1 sibling, 0 replies; 28+ messages in thread
From: Andrey Grodzovsky @ 2022-09-09 16:46 UTC (permalink / raw)
  To: jiadong.zhu, amd-gfx; +Cc: Ray.Huang

Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Andrey

On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>
> Set ring functions with software ring callbacks
> on gfx9.
>
> The software ring could be tested by debugfs_test_ib
> case.
>
> v2: set sw_ring 2 to enable software ring by default.
> v3: remove the parameter for software ring enablement.
>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |   1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  16 +++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +-
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 116 +++++++++++++++++++++--
>   5 files changed, 128 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 96d058c4cd4b..525df0b4d55f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -207,6 +207,7 @@ extern bool amdgpu_ignore_bad_page_threshold;
>   extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
>   extern int amdgpu_async_gfx_ring;
>   extern int amdgpu_mcbp;
> +extern int amdgpu_sw_ring;
>   extern int amdgpu_discovery;
>   extern int amdgpu_mes;
>   extern int amdgpu_mes_kiq;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 0de8e3cd0f1c..5eec82014f0a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -348,6 +348,8 @@ struct amdgpu_gfx {
>   
>   	bool				is_poweron;
>   
> +	/*software ring*/
> +	unsigned						num_sw_gfx_rings;
>   	struct amdgpu_ring_mux			muxer;
>   };
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 13db99d653bd..5b70a2c36d81 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -33,6 +33,7 @@
>   
>   #include <drm/amdgpu_drm.h>
>   #include "amdgpu.h"
> +#include "amdgpu_sw_ring.h"
>   #include "atom.h"
>   
>   /*
> @@ -121,6 +122,11 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
>   {
>   	uint32_t count;
>   
> +	if (ring->is_sw_ring) {
> +		amdgpu_sw_ring_commit(ring);
> +		return;
> +	}
> +
>   	/* We pad to match fetch size */
>   	count = ring->funcs->align_mask + 1 -
>   		(ring->wptr & ring->funcs->align_mask);
> @@ -183,6 +189,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>   	u32 *num_sched;
>   	u32 hw_ip;
>   
> +	if (adev->gfx.num_sw_gfx_rings > 0 && ring->is_sw_ring) {
> +		return amdgpu_sw_ring_init(adev, ring, max_dw, irq_src, irq_type,
> +			hw_prio, sched_score);
> +	}
> +
>   	/* Set the hw submission limit higher for KIQ because
>   	 * it's used for a number of gfx/compute tasks by both
>   	 * KFD and KGD which may have outstanding fences and
> @@ -343,7 +354,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>    */
>   void amdgpu_ring_fini(struct amdgpu_ring *ring)
>   {
> -
> +	if (ring->is_sw_ring) {
> +		amdgpu_sw_ring_fini(ring);
> +		return;
> +	}
>   	/* Not to finish a ring which is not initialized */
>   	if (!(ring->adev) ||
>   	    (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index fe33a683bfba..ba6d8c753f7e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -38,7 +38,8 @@ struct amdgpu_vm;
>   /* max number of rings */
>   #define AMDGPU_MAX_RINGS		28
>   #define AMDGPU_MAX_HWIP_RINGS		8
> -#define AMDGPU_MAX_GFX_RINGS		2
> +/*2 software ring and 1 real ring*/
> +#define AMDGPU_MAX_GFX_RINGS		3
>   #define AMDGPU_MAX_COMPUTE_RINGS	8
>   #define AMDGPU_MAX_VCE_RINGS		3
>   #define AMDGPU_MAX_UVD_ENC_RINGS	2
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5349ca4d19e3..774e44e1074a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -47,6 +47,7 @@
>   
>   #include "amdgpu_ras.h"
>   
> +#include "amdgpu_sw_ring.h"
>   #include "gfx_v9_4.h"
>   #include "gfx_v9_0.h"
>   #include "gfx_v9_4_2.h"
> @@ -55,7 +56,8 @@
>   #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
>   #include "asic_reg/gc/gc_9_0_default.h"
>   
> -#define GFX9_NUM_GFX_RINGS     1
> +#define GFX9_NUM_GFX_RINGS     3
> +#define GFX9_NUM_SW_GFX_RINGS  2
>   #define GFX9_MEC_HPD_SIZE 4096
>   #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
>   #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
> @@ -2270,6 +2272,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
>   static int gfx_v9_0_sw_init(void *handle)
>   {
>   	int i, j, k, r, ring_id;
> +	unsigned int hw_prio;
>   	struct amdgpu_ring *ring;
>   	struct amdgpu_kiq *kiq;
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -2356,13 +2359,40 @@ static int gfx_v9_0_sw_init(void *handle)
>   			sprintf(ring->name, "gfx_%d", i);
>   		ring->use_doorbell = true;
>   		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
> +		ring->is_sw_ring = (adev->gfx.num_sw_gfx_rings > 1) && (i > 0);
> +
> +		if (adev->gfx.num_sw_gfx_rings > 1 && i == 2)
> +			hw_prio = AMDGPU_RING_PRIO_2;
> +		else
> +			hw_prio = AMDGPU_RING_PRIO_DEFAULT;
> +		if (adev->gfx.num_sw_gfx_rings > 0 && i == 0)
> +			ring->no_scheduler = true;
> +
>   		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
>   				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
> -				     AMDGPU_RING_PRIO_DEFAULT, NULL);
> +				     hw_prio, NULL);
>   		if (r)
>   			return r;
> +
> +		if (ring->is_sw_ring)
> +			ring->wptr = 0;
>   	}
>   
> +	/*init the muxer and add sw rings */
> +	if (adev->gfx.num_sw_gfx_rings > 0) {
> +		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0]);
> +		if (r) {
> +			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
> +			return r;
> +		}
> +		for (i = 1; i < adev->gfx.num_gfx_rings; i++) {
> +			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, &adev->gfx.gfx_ring[i]);
> +			if (r) {
> +				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
> +				return r;
> +			}
> +		}
> +	}
>   	/* set up the compute queues - allocate horizontally across pipes */
>   	ring_id = 0;
>   	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
> @@ -2413,6 +2443,9 @@ static int gfx_v9_0_sw_fini(void *handle)
>   	int i;
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +	if (adev->gfx.num_sw_gfx_rings > 0)
> +		amdgpu_ring_mux_fini(&adev->gfx.muxer);
> +
>   	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>   		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>   	for (i = 0; i < adev->gfx.num_compute_rings; i++)
> @@ -4709,8 +4742,9 @@ static int gfx_v9_0_early_init(void *handle)
>   	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
>   	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
>   		adev->gfx.num_gfx_rings = 0;
> -	else
> -		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> +
> +	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> +	adev->gfx.num_sw_gfx_rings = GFX9_NUM_SW_GFX_RINGS;
>   	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
>   					  AMDGPU_MAX_COMPUTE_RINGS);
>   	gfx_v9_0_set_kiq_pm4_funcs(adev);
> @@ -5877,7 +5911,11 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
>   
>   	switch (me_id) {
>   	case 0:
> -		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
> +		if (adev->gfx.num_sw_gfx_rings > 1) {
> +			for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++)
> +				amdgpu_fence_process(&adev->gfx.gfx_ring[i]);
> +		} else
> +			amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
>   		break;
>   	case 1:
>   	case 2:
> @@ -6882,6 +6920,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>   	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
>   };
>   
> +
> +static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
> +	.type = AMDGPU_RING_TYPE_GFX,
> +	.align_mask = 0xff,
> +	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
> +	.support_64bit_ptrs = true,
> +	.secure_submission_supported = true,
> +	.vmhub = AMDGPU_GFXHUB_0,
> +	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
> +	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
> +	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
> +	.emit_frame_size = /* totally 242 maximum if 16 IBs */
> +		5 +  /* COND_EXEC */
> +		7 +  /* PIPELINE_SYNC */
> +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> +		2 + /* VM_FLUSH */
> +		8 +  /* FENCE for VM_FLUSH */
> +		20 + /* GDS switch */
> +		4 + /* double SWITCH_BUFFER,
> +		     * the first COND_EXEC jump to the place just
> +		     * prior to this double SWITCH_BUFFER
> +		     */
> +		5 + /* COND_EXEC */
> +		7 +	 /*	HDP_flush */
> +		4 +	 /*	VGT_flush */
> +		14 + /*	CE_META */
> +		31 + /*	DE_META */
> +		3 + /* CNTX_CTRL */
> +		5 + /* HDP_INVL */
> +		8 + 8 + /* FENCE x2 */
> +		2 + /* SWITCH_BUFFER */
> +		7, /* gfx_v9_0_emit_mem_sync */
> +	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
> +	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
> +	.emit_fence = gfx_v9_0_ring_emit_fence,
> +	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
> +	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
> +	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
> +	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
> +	.test_ring = gfx_v9_0_ring_test_ring,
> +	.test_ib = gfx_v9_0_ring_test_ib,
> +	.insert_nop = amdgpu_ring_insert_nop,
> +	.pad_ib = amdgpu_ring_generic_pad_ib,
> +	.emit_switch_buffer = gfx_v9_ring_emit_sb,
> +	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
> +	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
> +	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
> +	.emit_wreg = gfx_v9_0_ring_emit_wreg,
> +	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> +	.soft_recovery = gfx_v9_0_ring_soft_recovery,
> +	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
> +};
> +
>   static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>   	.type = AMDGPU_RING_TYPE_COMPUTE,
>   	.align_mask = 0xff,
> @@ -6956,9 +7050,15 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
>   
>   	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
>   
> -	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
> -
> +	if (adev->gfx.num_sw_gfx_rings > 0) {
> +		//first one is the real ring
> +		adev->gfx.gfx_ring[0].funcs = &gfx_v9_0_ring_funcs_gfx;
> +		for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++)
> +			adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
> +	} else {
> +		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> +			adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
> +	}
>   	for (i = 0; i < adev->gfx.num_compute_rings; i++)
>   		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
>   }

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9(v2)
  2022-09-09  1:50 ` [PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9(v2) jiadong.zhu
@ 2022-09-09 16:48   ` Andrey Grodzovsky
  0 siblings, 0 replies; 28+ messages in thread
From: Andrey Grodzovsky @ 2022-09-09 16:48 UTC (permalink / raw)
  To: jiadong.zhu, amd-gfx; +Cc: Ray.Huang

Really can't say to much here as I am not really familiar with queues 
map/unmap...

Andrey

On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>
> 1. Modify the unmap_queue package on gfx9.
>     Add trailing fence to track the preemption done.
> 2. Modify emit_ce_meta emit_de_meta functions
>     for the resumed ibs.
>
> v2: restyle code not to use ternary operator.
>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 181 +++++++++++++++++++----
>   drivers/gpu/drm/amd/amdgpu/soc15d.h      |   2 +
>   3 files changed, 155 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index ba6d8c753f7e..d3155dc86c07 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -60,6 +60,7 @@ enum amdgpu_ring_priority_level {
>   #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
>   #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
>   #define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2)
> +#define AMDGPU_FENCE_FLAG_EXEC          (1 << 3)
>   
>   #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 774e44e1074a..89a5c45b1006 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -753,7 +753,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
>   static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
>   				struct amdgpu_cu_info *cu_info);
>   static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
> -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
> +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
>   static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
>   static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
>   					  void *ras_error_status);
> @@ -826,9 +826,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
>   			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
>   
>   	if (action == PREEMPT_QUEUES_NO_UNMAP) {
> -		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
> -		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
> -		amdgpu_ring_write(kiq_ring, seq);
> +		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
> +		amdgpu_ring_write(kiq_ring, 0);
> +		amdgpu_ring_write(kiq_ring, 0);
> +
>   	} else {
>   		amdgpu_ring_write(kiq_ring, 0);
>   		amdgpu_ring_write(kiq_ring, 0);
> @@ -5356,11 +5357,16 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>   
>   	control |= ib->length_dw | (vmid << 24);
>   
> -	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
> +	if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
>   		control |= INDIRECT_BUFFER_PRE_ENB(1);
>   
> +		if (flags & AMDGPU_IB_PREEMPTED)
> +			control |= INDIRECT_BUFFER_PRE_RESUME(1);
> +
>   		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
> -			gfx_v9_0_ring_emit_de_meta(ring);
> +			gfx_v9_0_ring_emit_de_meta(ring,
> +				 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ?
> +					true : false);
>   	}
>   
>   	amdgpu_ring_write(ring, header);
> @@ -5415,17 +5421,23 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
>   	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
>   	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
>   	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
> +	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
> +	uint32_t dw2 = 0;
>   
>   	/* RELEASE_MEM - flush caches, send int */
>   	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
> -	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
> -					       EOP_TC_NC_ACTION_EN) :
> -					      (EOP_TCL1_ACTION_EN |
> -					       EOP_TC_ACTION_EN |
> -					       EOP_TC_WB_ACTION_EN |
> -					       EOP_TC_MD_ACTION_EN)) |
> -				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
> -				 EVENT_INDEX(5)));
> +
> +	if (writeback) {
> +		dw2 = EOP_TC_WB_ACTION_EN | EOP_TC_NC_ACTION_EN;
> +	} else {
> +		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
> +				EOP_TC_WB_ACTION_EN | EOP_TC_MD_ACTION_EN;
> +	}
> +	dw2 |= EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
> +	if (exec)
> +		dw2 |= EOP_EXEC;
> +
> +	amdgpu_ring_write(ring, dw2);
>   	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
>   
>   	/*
> @@ -5530,33 +5542,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
>   	amdgpu_ring_write(ring, 0);
>   }
>   
> -static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
> +static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
>   {
> +	struct amdgpu_device *adev = ring->adev;
>   	struct v9_ce_ib_state ce_payload = {0};
> -	uint64_t csa_addr;
> +	uint64_t offset, ce_payload_gpu_addr;
> +	void *ce_payload_cpu_addr;
>   	int cnt;
>   
>   	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
> -	csa_addr = amdgpu_csa_vaddr(ring->adev);
> +
> +	if (ring->is_mes_queue) {
> +		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
> +				  gfx[0].gfx_meta_data) +
> +			offsetof(struct v9_gfx_meta_data, ce_payload);
> +		ce_payload_gpu_addr =
> +			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
> +		ce_payload_cpu_addr =
> +			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
> +	} else {
> +		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
> +		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
> +		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
> +	}
>   
>   	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
>   	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
>   				 WRITE_DATA_DST_SEL(8) |
>   				 WR_CONFIRM) |
>   				 WRITE_DATA_CACHE_POLICY(0));
> -	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
> -	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
> -	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
> +	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
> +	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
> +
> +	if (resume)
> +		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
> +					   sizeof(ce_payload) >> 2);
> +	else
> +		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
> +					   sizeof(ce_payload) >> 2);
> +}
> +
> +static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
> +{
> +	int i, r = 0;
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +	struct amdgpu_ring *kiq_ring = &kiq->ring;
> +	unsigned long flags;
> +
> +	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
> +		return -EINVAL;
> +
> +	spin_lock_irqsave(&kiq->ring_lock, flags);
> +
> +	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
> +		spin_unlock_irqrestore(&kiq->ring_lock, flags);
> +		return -ENOMEM;
> +	}
> +
> +	/* assert preemption condition */
> +	amdgpu_ring_set_preempt_cond_exec(ring, false);
> +
> +	ring->trail_seq += 1;
> +	amdgpu_ring_alloc(ring, 13);
> +	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
> +				  ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC);
> +	/*reset the CP_VMID_PREEMPT after trailing fence*/
> +	amdgpu_ring_emit_wreg(ring,
> +				  SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
> +				  0x0);
> +
> +	/* assert IB preemption, emit the trailing fence */
> +	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
> +				   ring->trail_fence_gpu_addr,
> +				   ring->trail_seq);
> +
> +	amdgpu_ring_commit(kiq_ring);
> +	spin_unlock_irqrestore(&kiq->ring_lock, flags);
> +
> +	/* poll the trailing fence */
> +	for (i = 0; i < adev->usec_timeout; i++) {
> +		if (ring->trail_seq ==
> +		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
> +			break;
> +		udelay(1);
> +	}
> +
> +	if (i >= adev->usec_timeout) {
> +		r = -EINVAL;
> +		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
> +	}
> +
> +	amdgpu_ring_commit(ring);
> +
> +	/* deassert preemption condition */
> +	amdgpu_ring_set_preempt_cond_exec(ring, true);
> +	return r;
>   }
>   
> -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
> +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
>   {
> +	struct amdgpu_device *adev = ring->adev;
>   	struct v9_de_ib_state de_payload = {0};
> -	uint64_t csa_addr, gds_addr;
> +	uint64_t offset, gds_addr, de_payload_gpu_addr;
> +	void *de_payload_cpu_addr;
>   	int cnt;
>   
> -	csa_addr = amdgpu_csa_vaddr(ring->adev);
> -	gds_addr = csa_addr + 4096;
> +	if (ring->is_mes_queue) {
> +		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
> +				  gfx[0].gfx_meta_data) +
> +			offsetof(struct v9_gfx_meta_data, de_payload);
> +		de_payload_gpu_addr =
> +			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
> +		de_payload_cpu_addr =
> +			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
> +
> +		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
> +				  gfx[0].gds_backup) +
> +			offsetof(struct v9_gfx_meta_data, de_payload);
> +		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
> +	} else {
> +		offset = offsetof(struct v9_gfx_meta_data, de_payload);
> +		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
> +		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
> +
> +		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
> +				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
> +				 PAGE_SIZE);
> +	}
> +
>   	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
>   	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
>   
> @@ -5566,9 +5680,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
>   				 WRITE_DATA_DST_SEL(8) |
>   				 WR_CONFIRM) |
>   				 WRITE_DATA_CACHE_POLICY(0));
> -	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
> -	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
> -	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
> +	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
> +	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
> +
> +	if (resume)
> +		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
> +					   sizeof(de_payload) >> 2);
> +	else
> +		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
> +					   sizeof(de_payload) >> 2);
>   }
>   
>   static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> @@ -5584,8 +5704,10 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
>   {
>   	uint32_t dw2 = 0;
>   
> -	if (amdgpu_sriov_vf(ring->adev))
> -		gfx_v9_0_ring_emit_ce_meta(ring);
> +	if (amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp)
> +		gfx_v9_0_ring_emit_ce_meta(ring,
> +					(!amdgpu_sriov_vf(ring->adev) &&
> +						flags & AMDGPU_IB_PREEMPTED) ? true : false);
>   
>   	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
>   	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
> @@ -6912,6 +7034,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>   	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
>   	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
>   	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> +	.preempt_ib = gfx_v9_0_ring_preempt_ib,
>   	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
>   	.emit_wreg = gfx_v9_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
> index 799925d22fc8..614e9f8467fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
> @@ -162,6 +162,7 @@
>   		 * 2 - Bypass
>   		 */
>   #define     INDIRECT_BUFFER_PRE_ENB(x)		 ((x) << 21)
> +#define     INDIRECT_BUFFER_PRE_RESUME(x)           ((x) << 30)
>   #define	PACKET3_COPY_DATA				0x40
>   #define	PACKET3_PFP_SYNC_ME				0x42
>   #define	PACKET3_COND_WRITE				0x45
> @@ -184,6 +185,7 @@
>   #define		EOP_TC_ACTION_EN                        (1 << 17) /* L2 */
>   #define		EOP_TC_NC_ACTION_EN			(1 << 19)
>   #define		EOP_TC_MD_ACTION_EN			(1 << 21) /* L2 metadata */
> +#define		EOP_EXEC					(1 << 28) /* For Trailing Fence */
>   
>   #define		DATA_SEL(x)                             ((x) << 29)
>   		/* 0 - discard

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2)
  2022-09-09  1:50 ` [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2) jiadong.zhu
@ 2022-09-09 17:02   ` Andrey Grodzovsky
  2022-09-13  1:32     ` Zhu, Jiadong
  2022-09-13 15:47   ` Luben Tuikov
  1 sibling, 1 reply; 28+ messages in thread
From: Andrey Grodzovsky @ 2022-09-09 17:02 UTC (permalink / raw)
  To: jiadong.zhu, amd-gfx; +Cc: Ray.Huang


On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>
> Trigger MCBP according to the priroty of the
> software rings and the hw fence signaling
> condition.
>
> The muxer records some lastest locations from the
> software ring which is used to resubmit packages
> in preemption scenarios.
>
> v2: update comment style
>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Makefile          |   2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c       |   2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c     | 101 ++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h     |  29 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c     |  12 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 163 ++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  16 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  26 +++
>   9 files changed, 351 insertions(+), 3 deletions(-)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 85224bc81ce5..24c5aa19bbf2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>   	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>   	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>   	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> -	amdgpu_sw_ring.o amdgpu_ring_mux.o
> +	amdgpu_sw_ring.o amdgpu_ring_mux.o amdgpu_mcbp.o
>   
>   amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 258cffe3c06a..af86d87e2f3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		}
>   	}
>   
> +	amdgpu_ring_ib_begin(ring);
>   	if (job && ring->funcs->init_cond_exec)
>   		patch_offset = amdgpu_ring_init_cond_exec(ring);
>   
> @@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   	    ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
>   		ring->funcs->emit_wave_limit(ring, false);
>   
> +	amdgpu_ring_ib_end(ring);
>   	amdgpu_ring_commit(ring);
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
> new file mode 100644
> index 000000000000..2a12101a7699
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
> @@ -0,0 +1,101 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/kernel.h>
> +#include <linux/firmware.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <drm/gpu_scheduler.h>
> +
> +#include "amdgpu.h"
> +#include "amdgpu_mcbp.h"
> +#include "amdgpu_ring.h"
> +
> +/* trigger mcbp and find if we need resubmit */
> +int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
> +{
> +	struct amdgpu_mux_entry *e;
> +	struct amdgpu_ring *ring = NULL;
> +	int i;
> +
> +	DRM_INFO("%s in\n", __func__);
> +
> +	spin_lock(&mux->lock);


Same comment/question about locking as in patch 1


> +
> +	amdgpu_ring_preempt_ib(mux->real_ring);
> +
> +	ring = NULL;
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		e = &mux->ring_entries[i];
> +		if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
> +			ring = e->ring;
> +			break;
> +		}
> +	}
> +
> +	if (!ring) {
> +		DRM_ERROR("cannot find low priority ring\n");
> +		return -ENOENT;
> +	}
> +
> +	amdgpu_fence_process(ring);


What's the role of fence signaling here (sorry, I am not very 
knowledgeable about how exactly mcbp works) ?


> +
> +	DRM_INFO("after preempted ring_prio(%d) last_seq(%x) sync_seq(%x)\n",
> +		ring->hw_prio, atomic_read(&ring->fence_drv.last_seq), ring->fence_drv.sync_seq);
> +
> +	if (atomic_read(&ring->fence_drv.last_seq) !=
> +	    ring->fence_drv.sync_seq) {
> +		DRM_INFO("schedule resubmit\n");
> +		mux->s_resubmit = true;
> +		amdgpu_ring_mux_schedule_resubmit(mux);
> +	}
> +
> +	spin_unlock(&mux->lock);
> +	return 0;
> +}
> +
> +
> +/*scan on low prio rings to have unsignaled fence and high ring has no fence.*/
> +int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
> +{
> +	struct amdgpu_ring *ring;
> +	uint32_t seq, last_seq;
> +	int i, need_preempt;
> +
> +	need_preempt = 0;
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		ring = mux->ring_entries[i].ring;
> +		last_seq = atomic_read(&ring->fence_drv.last_seq);
> +		seq = READ_ONCE(ring->fence_drv.sync_seq);
> +		DRM_INFO("ring(%p) prio(%d) last_seq(%x) seq(%x)\n",
> +			ring, ring->hw_prio, last_seq, seq);
> +		if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT && last_seq < seq)
> +			return 0;
> +		if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && last_seq < seq)
> +			need_preempt = 1;
> +	}
> +
> +	DRM_INFO("%s return %d\n", __func__, need_preempt && !mux->s_resubmit);
> +	return need_preempt && !mux->s_resubmit;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
> new file mode 100644
> index 000000000000..0033bcba8d03
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
> @@ -0,0 +1,29 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __AMDGPU_MCBP_H__
> +#define __AMDGPU_MCBP_H__
> +
> +int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux);
> +int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux);
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 5b70a2c36d81..6d7f8a40e308 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -583,3 +583,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
>   
>   	return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
>   }
> +
> +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
> +{
> +	if (ring->is_sw_ring)
> +		amdgpu_sw_ring_ib_begin(ring);
> +}
> +
> +void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
> +{
> +	if (ring->is_sw_ring)
> +		amdgpu_sw_ring_ib_end(ring);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index d3155dc86c07..399037b0d6e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -311,6 +311,9 @@ struct amdgpu_ring {
>   #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
>   
>   int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
> +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
> +void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
> +
>   void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
>   void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
>   void amdgpu_ring_commit(struct amdgpu_ring *ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> index ea4a3c66119a..0c9b639b844e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> @@ -20,28 +20,60 @@
>    * OTHER DEALINGS IN THE SOFTWARE.
>    *
>    */
> -
> +#include <linux/slab.h>
>   #include <drm/drm_print.h>
>   
>   #include "amdgpu_ring_mux.h"
> +#include "amdgpu_mcbp.h"
>   #include "amdgpu_ring.h"
>   
>   #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>   
> +static struct kmem_cache *amdgpu_mux_chunk_slab;
> +
>   static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
>   	u64 s_begin, u64 s_end);
> +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux);
> +static void amdgpu_mux_resubmit_fallback(struct timer_list *t);
>   
>   int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
>   {
>   	mux->real_ring = ring;
> +
>   	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>   	mux->num_ring_entries = 0;
> +
> +	mux->s_resubmit = false;
> +
> +	amdgpu_mux_chunk_slab = kmem_cache_create(
> +		"amdgpu_mux_chunk", sizeof(struct amdgpu_mux_chunk), 0,
> +		SLAB_HWCACHE_ALIGN, NULL);
> +	if (!amdgpu_mux_chunk_slab) {
> +		DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
> +		return -ENOMEM;
> +	}
> +
>   	spin_lock_init(&mux->lock);
> +
> +	timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
> +
>   	return 0;
>   }
>   
>   void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
>   {
> +	struct amdgpu_mux_entry *e;
> +	struct amdgpu_mux_chunk *chunk, *chunk2;
> +	int i;
> +
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		e = &mux->ring_entries[i];
> +		list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
> +			list_del(&chunk->entry);
> +			kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
> +		}
> +	}
> +	kmem_cache_destroy(amdgpu_mux_chunk_slab);
>   	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>   	mux->num_ring_entries = 0;
>   }
> @@ -64,6 +96,8 @@ int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring
>   	e->sw_rptr = 0;
>   	e->sw_wptr = 0;
>   
> +	INIT_LIST_HEAD(&e->list);
> +
>   	return 0;
>   }
>   
> @@ -180,3 +214,130 @@ static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring
>   
>   	return 0;
>   }
> +
> +void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
> +{
> +	mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
> +}
> +
> +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +	struct amdgpu_mux_chunk *chunk;
> +
> +	if (mux->s_resubmit)
> +		amdgpu_mux_resubmit_chunks(mux);
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry!\n");
> +		return;
> +	}
> +
> +	chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
> +	if (!chunk) {
> +		DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
> +		return;
> +	}
> +
> +	chunk->start = ring->wptr;
> +	list_add_tail(&chunk->entry, &e->list);
> +}
> +
> +static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	uint32_t last_seq, size = 0;
> +	struct amdgpu_mux_entry *e;
> +	struct amdgpu_mux_chunk *chunk, *tmp;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry!\n");
> +		return;
> +	}
> +
> +	last_seq = atomic_read(&ring->fence_drv.last_seq);
> +
> +	list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
> +		if (chunk->sync_seq <= last_seq) {
> +			list_del(&chunk->entry);
> +			kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
> +		} else {
> +			size++;


What's the role of size here ? Seems to have no impact.

Andrey


> +		}
> +	}
> +}
> +
> +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +	struct amdgpu_mux_chunk *chunk;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry!\n");
> +		return;
> +	}
> +
> +	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
> +	if (!chunk) {
> +		DRM_ERROR("cannot find chunk!\n");
> +		return;
> +	}
> +
> +	chunk->end = ring->wptr;
> +	chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
> +
> +	scan_and_remove_signaled_chunk(mux, ring);
> +}
> +
> +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
> +{
> +	struct amdgpu_mux_entry *e = NULL;
> +	struct amdgpu_mux_chunk *chunk;
> +	uint32_t seq, last_seq;
> +	int i;
> +
> +	/*find low priority entries:*/
> +	spin_lock(&mux->lock);
> +
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		if (mux->ring_entries[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
> +				e = &mux->ring_entries[i];
> +			break;
> +		}
> +	}
> +
> +	if (!e) {
> +		DRM_ERROR("%s no low priority ring found\n", __func__);
> +		return;
> +	}
> +
> +	last_seq = atomic_read(&e->ring->fence_drv.last_seq);
> +	seq = READ_ONCE(e->ring->fence_drv.sync_seq);
> +	if (seq == last_seq) {
> +		DRM_INFO("skip as fence signaled seq=%x\n", seq);
> +		return;
> +	}
> +	DRM_INFO("begin to copy resubmit chunks\n");
> +
> +	/*resubmit all the fences between (last_seq, seq]*/
> +	list_for_each_entry(chunk, &e->list, entry) {
> +		if (chunk->sync_seq > last_seq) {
> +			copy_pkt_from_sw_ring(mux, e->ring, chunk->start, chunk->end);
> +			amdgpu_ring_commit(mux->real_ring);
> +		}
> +	}
> +	spin_unlock(&mux->lock);
> +
> +	del_timer(&mux->resubmit_timer);
> +	mux->s_resubmit = false;
> +}
> +
> +static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
> +{
> +	struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
> +
> +	DRM_INFO("calling %s\n", __func__);
> +	amdgpu_mux_resubmit_chunks(mux);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> index d058c43bb063..1d91c235061a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -44,17 +44,27 @@ struct amdgpu_mux_entry {
>   	u64 sw_cptr;
>   	u64 sw_rptr;
>   	u64 sw_wptr;
> +
> +	struct list_head list;
>   };
>   
>   struct amdgpu_ring_mux {
>   	struct amdgpu_ring *real_ring;
>   
>   	struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
> -
>   	unsigned num_ring_entries;
>   
>   	spinlock_t			lock;
>   
> +	bool s_resubmit;
> +	struct timer_list		resubmit_timer;
> +};
> +
> +struct amdgpu_mux_chunk {
> +	struct list_head entry;
> +	uint32_t sync_seq;
> +	u64 start;
> +	u64 end;
>   };
>   
>   int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> @@ -64,4 +74,8 @@ void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring
>   u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
>   u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
>   
> +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux);
> +
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> index 452d0ff37758..143a84c18534 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> @@ -26,6 +26,7 @@
>   
>   #include "amdgpu_sw_ring.h"
>   #include "amdgpu_ring_mux.h"
> +#include "amdgpu_mcbp.h"
>   
>   #define amdgpu_ring_get_gpu_addr(ring, offset)				\
>   	(ring->is_mes_queue ?						\
> @@ -202,3 +203,28 @@ void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
>   	ring->adev->rings[ring->idx] = NULL;
>   }
>   
> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
> +		if (amdgpu_mcbp_scan(mux) > 0)
> +			amdgpu_mcbp_trigger_preempt(mux);
> +		return;
> +	}
> +
> +	amdgpu_ring_mux_start_ib(mux, ring);
> +}
> +
> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
> +		return;
> +	amdgpu_ring_mux_end_ib(mux, ring);
> +}

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-09 16:45 ` Andrey Grodzovsky
@ 2022-09-12 10:20   ` Christian König
  2022-09-12 13:22     ` Andrey Grodzovsky
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2022-09-12 10:20 UTC (permalink / raw)
  To: Andrey Grodzovsky, jiadong.zhu, amd-gfx; +Cc: Ray.Huang

Am 09.09.22 um 18:45 schrieb Andrey Grodzovsky:
>
> On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>
>> The software ring is created to support priority
>> context while there is only one hardware queue
>> for gfx.
>>
>> Every software rings has its fence driver and could
>> be used as an ordinary ring for the gpu_scheduler.
>> Multiple software rings are binded to a real ring
>> with the ring muxer. The packages committed on the
>> software ring are copied to the real ring.
>>
>> v2: use array to store software ring entry.
>> v3: remove unnecessary prints.
>>
>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 +++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>   7 files changed, 509 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 3e0e2eb7e235..85224bc81ce5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o 
>> amdgpu_nbio.o \
>>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>> -    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>> +    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>> +    amdgpu_sw_ring.o amdgpu_ring_mux.o
>>     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> @@ -33,6 +33,7 @@
>>   #include "amdgpu_imu.h"
>>   #include "soc15.h"
>>   #include "amdgpu_ras.h"
>> +#include "amdgpu_ring_mux.h"
>>     /* GFX current status */
>>   #define AMDGPU_GFX_NORMAL_MODE            0x00000000L
>> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>>       struct amdgpu_gfx_ras        *ras;
>>         bool                is_poweron;
>> +
>> +    struct amdgpu_ring_mux            muxer;
>>   };
>>     #define amdgpu_gfx_get_gpu_clock_counter(adev) 
>> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index 7d89a52091c0..fe33a683bfba 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>       bool            is_mes_queue;
>>       uint32_t        hw_queue_id;
>>       struct amdgpu_mes_ctx_data *mes_ctx;
>> +
>> +    bool            is_sw_ring;
>> +
>>   };
>>     #define amdgpu_ring_parse_cs(r, p, job, ib) 
>> ((r)->funcs->parse_cs((p), (job), (ib)))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>> new file mode 100644
>> index 000000000000..ea4a3c66119a
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>> @@ -0,0 +1,182 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person 
>> obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without 
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, 
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom 
>> the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be 
>> included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>> EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>> DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>> OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>> USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include <drm/drm_print.h>
>> +
>> +#include "amdgpu_ring_mux.h"
>> +#include "amdgpu_ring.h"
>> +
>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>> +
>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct 
>> amdgpu_ring *ring,
>> +    u64 s_begin, u64 s_end);
>> +
>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>> amdgpu_ring *ring)
>> +{
>> +    mux->real_ring = ring;
>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>> +    mux->num_ring_entries = 0;
>> +    spin_lock_init(&mux->lock);
>> +    return 0;
>> +}
>> +
>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
>> +{
>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>> +    mux->num_ring_entries = 0;
>> +}
>> +
>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct 
>> amdgpu_ring *ring)
>> +{
>> +    struct amdgpu_mux_entry *e;
>> +
>> +    if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>> +        DRM_ERROR("adding sw ring exceeds max gfx num\n");
>> +        return -ENOMEM;
>> +    }
>> +
>> +    e = &mux->ring_entries[mux->num_ring_entries++];
>> +
>> +    e->ring = ring;
>> +    e->start_ptr_in_hw_ring = 0;
>> +    e->end_ptr_in_hw_ring = 0;
>> +    e->sw_cptr = 0;
>> +    e->sw_rptr = 0;
>> +    e->sw_wptr = 0;
>> +
>> +    return 0;
>> +}
>> +
>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct 
>> amdgpu_ring_mux *mux,
>> +                struct amdgpu_ring *ring)
>> +{
>> +    struct amdgpu_mux_entry *e;
>> +    int i;
>> +
>> +    e = NULL;
>> +    for (i = 0; i < mux->num_ring_entries; i++) {
>> +        if (mux->ring_entries[i].ring == ring) {
>> +            e = &mux->ring_entries[i];
>> +            break;
>> +        }
>> +    }
>> +
>> +    return e;
>> +}
>> +
>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct 
>> amdgpu_ring *ring, u64 wptr)
>> +{
>> +    struct amdgpu_mux_entry *e;
>> +
>> +    e = amdgpu_get_sw_entry(mux, ring);
>> +    if (!e) {
>> +        DRM_ERROR("cannot find entry for sw ring\n");
>> +        return;
>> +    }
>> +
>> +    spin_lock(&mux->lock);
>
>
> A bit more generic question, I assume the spinlock here protects from 
> concurrent runs
> of amdgpu_ib_schedule. For them to be even theoretically concurrent it 
> must be from
> direct submissions to HW (because any scheduler mediated submission is 
> serialized though
> the dedicated scheduler worker thread). But in such case why we 
> protect only here ? If i am
> not missing something there is no total per HW ring lock when calling 
> amdgpu_ib_schedule today
> and we do a lot of HW accesses there to ring  which should probably be 
> protected from
> concurrent accesses.
>
> So if any one can answer this question ?

Well what we have is in general two schedulers which push their work 
into one hardware ring.

So we need a lock to make sure that only one is modifying the hw ring at 
the same time.

 From the implementation I think we first write the commands into a 
shadow ring buffer and then copy them over to the real hw ring here.

So this is the only place where we actually touch the hw ring buffer and 
to need to grab the lock.

Did I get this right?

Thanks,
Christian.

>
>
>> +    e->sw_cptr = e->sw_wptr;
>> +    e->sw_wptr = wptr;
>> +    e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>> +
>> +    if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
>> +        e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>> +        amdgpu_ring_commit(mux->real_ring);
>> +    }
>> +
>> +    spin_unlock(&mux->lock);
>> +}
>> +
>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>> struct amdgpu_ring *ring)
>> +{
>> +    struct amdgpu_mux_entry *e;
>> +
>> +    e = amdgpu_get_sw_entry(mux, ring);
>> +    if (!e) {
>> +        DRM_ERROR("cannot find entry for sw ring\n");
>> +        return 0;
>> +    }
>> +
>> +    return e->sw_wptr;
>> +}
>> +
>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>> struct amdgpu_ring *ring)
>> +{
>> +    struct amdgpu_mux_entry *e;
>> +    u64 r_rptr, r_wptr, offset, start, end;
>> +
>> +    e = amdgpu_get_sw_entry(mux, ring);
>> +    if (!e) {
>> +        DRM_ERROR("no sw entry found!\n");
>> +        return 0;
>> +    }
>> +
>> +    r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>> +    r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>> +
>> +    if (r_wptr < r_rptr)
>> +        r_wptr += mux->real_ring->ring_size >> 2;
>> +
>> +    start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>> +    end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>> +    if (start > end)
>> +        end += mux->real_ring->ring_size >> 2;
>> +    if (r_rptr <= end && r_rptr >= start) {
>> +        offset = r_rptr - start;
>> +        e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>> +    } else if (r_rptr < start) {
>> +        e->sw_rptr = e->sw_cptr;
>> +    } else {
>> +        e->sw_rptr = e->sw_wptr;
>> +    }
>> +
>> +    return e->sw_rptr;
>> +}
>> +
>> +/*copy packages on sw ring range[begin, end) */
>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct 
>> amdgpu_ring *ring,
>> +    u64 s_begin, u64 s_end)
>> +{
>> +    u64 begin, end, r_begin, r_end;
>> +    struct amdgpu_ring *real_ring = mux->real_ring;
>> +
>> +    begin = s_begin & ring->buf_mask;
>> +    end = s_end & ring->buf_mask;
>> +
>> +    r_begin = real_ring->wptr & real_ring->buf_mask;
>> +    if (begin == end)
>> +        return -ERANGE;
>> +    if (begin > end) {
>> +        amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - 
>> begin);
>> +        amdgpu_ring_write_multiple(real_ring, (void 
>> *)&ring->ring[begin],
>> +            (ring->ring_size >> 2) - begin);
>> +        amdgpu_ring_write_multiple(real_ring, (void 
>> *)&ring->ring[0], end);
>> +    } else {
>> +        amdgpu_ring_alloc(real_ring, end - begin);
>> +        amdgpu_ring_write_multiple(real_ring, (void 
>> *)&ring->ring[begin], end - begin);
>> +    }
>> +
>> +    r_end = real_ring->wptr & real_ring->buf_mask;
>> +
>> +    return 0;
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>> new file mode 100644
>> index 000000000000..d058c43bb063
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>> @@ -0,0 +1,67 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person 
>> obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without 
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, 
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom 
>> the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be 
>> included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>> EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>> DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>> OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>> USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#ifndef __AMDGPU_RING_MUX__
>> +#define __AMDGPU_RING_MUX__
>> +
>> +#include <linux/timer.h>
>> +#include <linux/spinlock.h>
>> +#include "amdgpu_ring.h"
>> +
>> +struct amdgpu_ring;
>> +/*
>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>> + * end_ptr_in_hw_ring - last copied end loc on hw ring
>> + *sw_cptr -the begin of copy ptr in sw ring
>> + *sw_rptr; the read ptr in sw ring
>> + *sw_wptr; the write ptr in sw ring
>> + */
>> +struct amdgpu_mux_entry {
>> +    struct amdgpu_ring    *ring;
>> +    u64 start_ptr_in_hw_ring;
>> +    u64 end_ptr_in_hw_ring;
>> +
>> +    u64 sw_cptr;
>> +    u64 sw_rptr;
>> +    u64 sw_wptr;
>> +};
>> +
>> +struct amdgpu_ring_mux {
>> +    struct amdgpu_ring *real_ring;
>> +
>> +    struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>> +
>> +    unsigned num_ring_entries;
>> +
>> +    spinlock_t            lock;
>> +
>> +};
>> +
>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>> amdgpu_ring *ring);
>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct 
>> amdgpu_ring *ring);
>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct 
>> amdgpu_ring *ring, u64 wptr);
>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>> struct amdgpu_ring *ring);
>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>> struct amdgpu_ring *ring);
>> +
>> +#endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>> new file mode 100644
>> index 000000000000..452d0ff37758
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>> @@ -0,0 +1,204 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person 
>> obtaining a
>> + * copy of this software and associated documentation files (the
>> + * "Software"), to deal in the Software without restriction, including
>> + * without limitation the rights to use, copy, modify, merge, publish,
>> + * distribute, sub license, and/or sell copies of the Software, and to
>> + * permit persons to whom the Software is furnished to do so, 
>> subject to
>> + * the following conditions:
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO 
>> EVENT SHALL
>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR 
>> ANY CLAIM,
>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
>> TORT OR
>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
>> SOFTWARE OR THE
>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial 
>> portions
>> + * of the Software.
>> + *
>> + */
>> +
>> +#include "amdgpu_sw_ring.h"
>> +#include "amdgpu_ring_mux.h"
>> +
>> +#define amdgpu_ring_get_gpu_addr(ring, offset)                \
>> +    (ring->is_mes_queue ?                        \
>> +     (ring->mes_ctx->meta_data_gpu_addr + offset) :            \
>> +     (ring->adev->wb.gpu_addr + offset * 4))
>> +
>> +#define amdgpu_ring_get_cpu_addr(ring, offset)                \
>> +    (ring->is_mes_queue ?                        \
>> +     (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
>> +     (&ring->adev->wb.wb[offset]))
>> +
>> +
>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>> amdgpu_ring *ring,
>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>> +             unsigned int irq_type, unsigned int hw_prio,
>> +             atomic_t *sched_score)
>> +{
>> +    int r;
>> +    int sched_hw_submission = amdgpu_sched_hw_submission;
>> +    u32 *num_sched;
>> +    u32 hw_ip;
>> +
>> +    BUG_ON(!ring->is_sw_ring);
>> +
>> +    if (ring->adev == NULL) {
>> +        if (adev->num_rings >= AMDGPU_MAX_RINGS)
>> +            return -EINVAL;
>> +
>> +        ring->adev = adev;
>> +        ring->num_hw_submission = sched_hw_submission;
>> +        ring->sched_score = sched_score;
>> +        ring->vmid_wait = dma_fence_get_stub();
>> +
>> +        if (!ring->is_mes_queue) {
>> +            ring->idx = adev->num_rings++;
>> +            adev->rings[ring->idx] = ring;
>> +        }
>> +
>> +        r = amdgpu_fence_driver_init_ring(ring);
>> +        if (r)
>> +            return r;
>> +    }
>> +
>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>> +    if (r) {
>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", 
>> r);
>> +        return r;
>> +    }
>> +
>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>> +    if (r) {
>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", 
>> r);
>> +        return r;
>> +    }
>
>
> Looks like a typo copy pase duplicate of the above
>
>> +
>> +    r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>> +    if (r) {
>> +        dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc 
>> failed\n", r);
>> +        return r;
>> +    }
>> +
>> +    r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>> +    if (r) {
>> +        dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc 
>> failed\n", r);
>> +        return r;
>> +    }
>> +
>> +    ring->fence_gpu_addr =
>> +        amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>> +    ring->fence_cpu_addr =
>> +        amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>> +
>> +    ring->trail_fence_gpu_addr =
>> +        amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>> +    ring->trail_fence_cpu_addr =
>> +        amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>> +
>> +    ring->cond_exe_gpu_addr =
>> +        amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>> +    ring->cond_exe_cpu_addr =
>> +        amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>> +
>> +    /* always set cond_exec_polling to CONTINUE */
>> +    *ring->cond_exe_cpu_addr = 1;
>> +
>> +    r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
>> +    if (r) {
>> +        dev_err(adev->dev, "failed initializing fences (%d).\n", r);
>> +        return r;
>> +    }
>> +
>> +    ring->ring_size = roundup_pow_of_two(max_dw * 4 * 
>> sched_hw_submission);
>> +
>> +    ring->buf_mask = (ring->ring_size / 4) - 1;
>> +    ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>> +        0xffffffffffffffff : ring->buf_mask;
>> +
>> +    /* Allocate ring buffer */
>> +    if (ring->ring == NULL) {
>> +        ring->ring = kzalloc(ring->ring_size + 
>> ring->funcs->extra_dw, GFP_KERNEL);
>> +        if (!ring->ring) {
>> +            dev_err(adev->dev, "(%d) swring create failed\n", r);
>> +            return r;
>> +        }
>> +
>> +        amdgpu_ring_clear_ring(ring);
>> +    }
>> +
>> +    ring->max_dw = max_dw;
>> +    ring->hw_prio = hw_prio;
>> +
>> +    if (!ring->no_scheduler) {
>> +        hw_ip = ring->funcs->type;
>> +        num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>> + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>> +            &ring->sched;
>> +    }
>> +
>> +    return 0;
>> +}
>
>
> In general i see this function is a big one to one subset of 
> amdgpu_ring_init.
> Could you maybe see a way to refactor such that this function is the base
> and for HW related code that different (like BO allocation for ring 
> buffer) you
> maybe can add if (!ring->sw_ring)... and add those code snippets ? To 
> avoid
> substantial code duplication.
>
> Andrey
>
>
>> +
>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
>> +{
>> +    struct amdgpu_device *adev = ring->adev;
>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>> +
>> +    BUG_ON(!ring->is_sw_ring);
>> +    return amdgpu_ring_get_rptr_from_mux(mux, ring);
>> +}
>> +
>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
>> +{
>> +    struct amdgpu_device *adev = ring->adev;
>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>> +
>> +    BUG_ON(!ring->is_sw_ring);
>> +    return amdgpu_ring_get_wptr_from_mux(mux, ring);
>> +}
>> +
>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
>> +{
>> +    BUG_ON(!ring->is_sw_ring);
>> +}
>> +
>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
>> +{
>> +    struct amdgpu_device *adev = ring->adev;
>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>> +
>> +    BUG_ON(!ring->is_sw_ring);
>> +    amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
>> +}
>> +
>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
>> +{
>> +    BUG_ON(!ring->is_sw_ring);
>> +
>> +    /* Not to finish a ring which is not initialized */
>> +    if (!(ring->adev) ||
>> +        (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
>> +        return;
>> +
>> +    ring->sched.ready = false;
>> +
>> +    amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>> +    amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>> +
>> +    kfree((void *)ring->ring);
>> +
>> +    dma_fence_put(ring->vmid_wait);
>> +    ring->vmid_wait = NULL;
>> +    ring->me = 0;
>> +
>> +    ring->adev->rings[ring->idx] = NULL;
>> +}
>> +
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>> new file mode 100644
>> index 000000000000..c05d8a94ad0c
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>> @@ -0,0 +1,48 @@
>> +/*
>> + * Copyright 2012 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person 
>> obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without 
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, 
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom 
>> the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be 
>> included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>> EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>> DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>> OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>> USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include <drm/amdgpu_drm.h>
>> +#include <drm/gpu_scheduler.h>
>> +#include <drm/drm_print.h>
>> +
>> +#include "amdgpu_irq.h"
>> +#include "amdgpu_ring.h"
>> +#include "amdgpu.h"
>> +
>> +#ifndef __AMDGPU_SWRING_H__
>> +#define __AMDGPU_SWRING_H__
>> +
>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>> amdgpu_ring *sw_ring,
>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>> +             unsigned int irq_type, unsigned int hw_prio,
>> +             atomic_t *sched_score);
>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>> +
>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
>> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>> +
>> +#endif


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-12 10:20   ` Christian König
@ 2022-09-12 13:22     ` Andrey Grodzovsky
  2022-09-12 13:27       ` Christian König
  0 siblings, 1 reply; 28+ messages in thread
From: Andrey Grodzovsky @ 2022-09-12 13:22 UTC (permalink / raw)
  To: Christian König, jiadong.zhu, amd-gfx; +Cc: Ray.Huang


On 2022-09-12 06:20, Christian König wrote:
> Am 09.09.22 um 18:45 schrieb Andrey Grodzovsky:
>>
>> On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
>>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>>
>>> The software ring is created to support priority
>>> context while there is only one hardware queue
>>> for gfx.
>>>
>>> Every software rings has its fence driver and could
>>> be used as an ordinary ring for the gpu_scheduler.
>>> Multiple software rings are binded to a real ring
>>> with the ring muxer. The packages committed on the
>>> software ring are copied to the real ring.
>>>
>>> v2: use array to store software ring entry.
>>> v3: remove unnecessary prints.
>>>
>>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +++++++++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 
>>> +++++++++++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>>   7 files changed, 509 insertions(+), 1 deletion(-)
>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> index 3e0e2eb7e235..85224bc81ce5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o 
>>> amdgpu_nbio.o \
>>>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>>>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>> -    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>> +    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>>> +    amdgpu_sw_ring.o amdgpu_ring_mux.o
>>>     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> @@ -33,6 +33,7 @@
>>>   #include "amdgpu_imu.h"
>>>   #include "soc15.h"
>>>   #include "amdgpu_ras.h"
>>> +#include "amdgpu_ring_mux.h"
>>>     /* GFX current status */
>>>   #define AMDGPU_GFX_NORMAL_MODE            0x00000000L
>>> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>>>       struct amdgpu_gfx_ras        *ras;
>>>         bool                is_poweron;
>>> +
>>> +    struct amdgpu_ring_mux            muxer;
>>>   };
>>>     #define amdgpu_gfx_get_gpu_clock_counter(adev) 
>>> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> index 7d89a52091c0..fe33a683bfba 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>>       bool            is_mes_queue;
>>>       uint32_t        hw_queue_id;
>>>       struct amdgpu_mes_ctx_data *mes_ctx;
>>> +
>>> +    bool            is_sw_ring;
>>> +
>>>   };
>>>     #define amdgpu_ring_parse_cs(r, p, job, ib) 
>>> ((r)->funcs->parse_cs((p), (job), (ib)))
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>> new file mode 100644
>>> index 000000000000..ea4a3c66119a
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>> @@ -0,0 +1,182 @@
>>> +/*
>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person 
>>> obtaining a
>>> + * copy of this software and associated documentation files (the 
>>> "Software"),
>>> + * to deal in the Software without restriction, including without 
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to 
>>> whom the
>>> + * Software is furnished to do so, subject to the following 
>>> conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be 
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#include <drm/drm_print.h>
>>> +
>>> +#include "amdgpu_ring_mux.h"
>>> +#include "amdgpu_ring.h"
>>> +
>>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>>> +
>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 
>>> struct amdgpu_ring *ring,
>>> +    u64 s_begin, u64 s_end);
>>> +
>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>>> amdgpu_ring *ring)
>>> +{
>>> +    mux->real_ring = ring;
>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>> +    mux->num_ring_entries = 0;
>>> +    spin_lock_init(&mux->lock);
>>> +    return 0;
>>> +}
>>> +
>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
>>> +{
>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>> +    mux->num_ring_entries = 0;
>>> +}
>>> +
>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct 
>>> amdgpu_ring *ring)
>>> +{
>>> +    struct amdgpu_mux_entry *e;
>>> +
>>> +    if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>>> +        DRM_ERROR("adding sw ring exceeds max gfx num\n");
>>> +        return -ENOMEM;
>>> +    }
>>> +
>>> +    e = &mux->ring_entries[mux->num_ring_entries++];
>>> +
>>> +    e->ring = ring;
>>> +    e->start_ptr_in_hw_ring = 0;
>>> +    e->end_ptr_in_hw_ring = 0;
>>> +    e->sw_cptr = 0;
>>> +    e->sw_rptr = 0;
>>> +    e->sw_wptr = 0;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct 
>>> amdgpu_ring_mux *mux,
>>> +                struct amdgpu_ring *ring)
>>> +{
>>> +    struct amdgpu_mux_entry *e;
>>> +    int i;
>>> +
>>> +    e = NULL;
>>> +    for (i = 0; i < mux->num_ring_entries; i++) {
>>> +        if (mux->ring_entries[i].ring == ring) {
>>> +            e = &mux->ring_entries[i];
>>> +            break;
>>> +        }
>>> +    }
>>> +
>>> +    return e;
>>> +}
>>> +
>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
>>> struct amdgpu_ring *ring, u64 wptr)
>>> +{
>>> +    struct amdgpu_mux_entry *e;
>>> +
>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>> +    if (!e) {
>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>> +        return;
>>> +    }
>>> +
>>> +    spin_lock(&mux->lock);
>>
>>
>> A bit more generic question, I assume the spinlock here protects from 
>> concurrent runs
>> of amdgpu_ib_schedule. For them to be even theoretically concurrent 
>> it must be from
>> direct submissions to HW (because any scheduler mediated submission 
>> is serialized though
>> the dedicated scheduler worker thread). But in such case why we 
>> protect only here ? If i am
>> not missing something there is no total per HW ring lock when calling 
>> amdgpu_ib_schedule today
>> and we do a lot of HW accesses there to ring  which should probably 
>> be protected from
>> concurrent accesses.
>>
>> So if any one can answer this question ?
>
> Well what we have is in general two schedulers which push their work 
> into one hardware ring.
>
> So we need a lock to make sure that only one is modifying the hw ring 
> at the same time.
>
> From the implementation I think we first write the commands into a 
> shadow ring buffer and then copy them over to the real hw ring here.
>
> So this is the only place where we actually touch the hw ring buffer 
> and to need to grab the lock.
>
> Did I get this right?
>
> Thanks,
> Christian.


For the case of the sw ring yes, but I was asking in general, accesses 
to real HW rings, amdgpu_ib_schedule writes to HW rings,
we may be accessing same HW ring from 2 different contexts when doing 
direct submissions (i.e. calling  amdgpu_ib_schedule
directly from 2 threads concurrently) this opens possibility to 
concurrent access to HW. Or am i missing something here ?

Andrey


>
>>
>>
>>> +    e->sw_cptr = e->sw_wptr;
>>> +    e->sw_wptr = wptr;
>>> +    e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>>> +
>>> +    if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
>>> +        e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>>> +        amdgpu_ring_commit(mux->real_ring);
>>> +    }
>>> +
>>> +    spin_unlock(&mux->lock);
>>> +}
>>> +
>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>>> struct amdgpu_ring *ring)
>>> +{
>>> +    struct amdgpu_mux_entry *e;
>>> +
>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>> +    if (!e) {
>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>> +        return 0;
>>> +    }
>>> +
>>> +    return e->sw_wptr;
>>> +}
>>> +
>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>>> struct amdgpu_ring *ring)
>>> +{
>>> +    struct amdgpu_mux_entry *e;
>>> +    u64 r_rptr, r_wptr, offset, start, end;
>>> +
>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>> +    if (!e) {
>>> +        DRM_ERROR("no sw entry found!\n");
>>> +        return 0;
>>> +    }
>>> +
>>> +    r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>>> +    r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>>> +
>>> +    if (r_wptr < r_rptr)
>>> +        r_wptr += mux->real_ring->ring_size >> 2;
>>> +
>>> +    start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>> +    end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>> +    if (start > end)
>>> +        end += mux->real_ring->ring_size >> 2;
>>> +    if (r_rptr <= end && r_rptr >= start) {
>>> +        offset = r_rptr - start;
>>> +        e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>>> +    } else if (r_rptr < start) {
>>> +        e->sw_rptr = e->sw_cptr;
>>> +    } else {
>>> +        e->sw_rptr = e->sw_wptr;
>>> +    }
>>> +
>>> +    return e->sw_rptr;
>>> +}
>>> +
>>> +/*copy packages on sw ring range[begin, end) */
>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 
>>> struct amdgpu_ring *ring,
>>> +    u64 s_begin, u64 s_end)
>>> +{
>>> +    u64 begin, end, r_begin, r_end;
>>> +    struct amdgpu_ring *real_ring = mux->real_ring;
>>> +
>>> +    begin = s_begin & ring->buf_mask;
>>> +    end = s_end & ring->buf_mask;
>>> +
>>> +    r_begin = real_ring->wptr & real_ring->buf_mask;
>>> +    if (begin == end)
>>> +        return -ERANGE;
>>> +    if (begin > end) {
>>> +        amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - 
>>> begin);
>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>> *)&ring->ring[begin],
>>> +            (ring->ring_size >> 2) - begin);
>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>> *)&ring->ring[0], end);
>>> +    } else {
>>> +        amdgpu_ring_alloc(real_ring, end - begin);
>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>> *)&ring->ring[begin], end - begin);
>>> +    }
>>> +
>>> +    r_end = real_ring->wptr & real_ring->buf_mask;
>>> +
>>> +    return 0;
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>> new file mode 100644
>>> index 000000000000..d058c43bb063
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>> @@ -0,0 +1,67 @@
>>> +/*
>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person 
>>> obtaining a
>>> + * copy of this software and associated documentation files (the 
>>> "Software"),
>>> + * to deal in the Software without restriction, including without 
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to 
>>> whom the
>>> + * Software is furnished to do so, subject to the following 
>>> conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be 
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#ifndef __AMDGPU_RING_MUX__
>>> +#define __AMDGPU_RING_MUX__
>>> +
>>> +#include <linux/timer.h>
>>> +#include <linux/spinlock.h>
>>> +#include "amdgpu_ring.h"
>>> +
>>> +struct amdgpu_ring;
>>> +/*
>>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>>> + * end_ptr_in_hw_ring - last copied end loc on hw ring
>>> + *sw_cptr -the begin of copy ptr in sw ring
>>> + *sw_rptr; the read ptr in sw ring
>>> + *sw_wptr; the write ptr in sw ring
>>> + */
>>> +struct amdgpu_mux_entry {
>>> +    struct amdgpu_ring    *ring;
>>> +    u64 start_ptr_in_hw_ring;
>>> +    u64 end_ptr_in_hw_ring;
>>> +
>>> +    u64 sw_cptr;
>>> +    u64 sw_rptr;
>>> +    u64 sw_wptr;
>>> +};
>>> +
>>> +struct amdgpu_ring_mux {
>>> +    struct amdgpu_ring *real_ring;
>>> +
>>> +    struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>>> +
>>> +    unsigned num_ring_entries;
>>> +
>>> +    spinlock_t            lock;
>>> +
>>> +};
>>> +
>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>>> amdgpu_ring *ring);
>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct 
>>> amdgpu_ring *ring);
>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
>>> struct amdgpu_ring *ring, u64 wptr);
>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>>> struct amdgpu_ring *ring);
>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>>> struct amdgpu_ring *ring);
>>> +
>>> +#endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>> new file mode 100644
>>> index 000000000000..452d0ff37758
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>> @@ -0,0 +1,204 @@
>>> +/*
>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>> + * All Rights Reserved.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person 
>>> obtaining a
>>> + * copy of this software and associated documentation files (the
>>> + * "Software"), to deal in the Software without restriction, including
>>> + * without limitation the rights to use, copy, modify, merge, publish,
>>> + * distribute, sub license, and/or sell copies of the Software, and to
>>> + * permit persons to whom the Software is furnished to do so, 
>>> subject to
>>> + * the following conditions:
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO 
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE 
>>> FOR ANY CLAIM,
>>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
>>> TORT OR
>>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
>>> SOFTWARE OR THE
>>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + * The above copyright notice and this permission notice (including 
>>> the
>>> + * next paragraph) shall be included in all copies or substantial 
>>> portions
>>> + * of the Software.
>>> + *
>>> + */
>>> +
>>> +#include "amdgpu_sw_ring.h"
>>> +#include "amdgpu_ring_mux.h"
>>> +
>>> +#define amdgpu_ring_get_gpu_addr(ring, offset) \
>>> +    (ring->is_mes_queue ?                        \
>>> +     (ring->mes_ctx->meta_data_gpu_addr + offset) :            \
>>> +     (ring->adev->wb.gpu_addr + offset * 4))
>>> +
>>> +#define amdgpu_ring_get_cpu_addr(ring, offset) \
>>> +    (ring->is_mes_queue ?                        \
>>> +     (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
>>> +     (&ring->adev->wb.wb[offset]))
>>> +
>>> +
>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>>> amdgpu_ring *ring,
>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>> +             unsigned int irq_type, unsigned int hw_prio,
>>> +             atomic_t *sched_score)
>>> +{
>>> +    int r;
>>> +    int sched_hw_submission = amdgpu_sched_hw_submission;
>>> +    u32 *num_sched;
>>> +    u32 hw_ip;
>>> +
>>> +    BUG_ON(!ring->is_sw_ring);
>>> +
>>> +    if (ring->adev == NULL) {
>>> +        if (adev->num_rings >= AMDGPU_MAX_RINGS)
>>> +            return -EINVAL;
>>> +
>>> +        ring->adev = adev;
>>> +        ring->num_hw_submission = sched_hw_submission;
>>> +        ring->sched_score = sched_score;
>>> +        ring->vmid_wait = dma_fence_get_stub();
>>> +
>>> +        if (!ring->is_mes_queue) {
>>> +            ring->idx = adev->num_rings++;
>>> +            adev->rings[ring->idx] = ring;
>>> +        }
>>> +
>>> +        r = amdgpu_fence_driver_init_ring(ring);
>>> +        if (r)
>>> +            return r;
>>> +    }
>>> +
>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>> +    if (r) {
>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc 
>>> failed\n", r);
>>> +        return r;
>>> +    }
>>> +
>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>> +    if (r) {
>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc 
>>> failed\n", r);
>>> +        return r;
>>> +    }
>>
>>
>> Looks like a typo copy pase duplicate of the above
>>
>>> +
>>> +    r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>>> +    if (r) {
>>> +        dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc 
>>> failed\n", r);
>>> +        return r;
>>> +    }
>>> +
>>> +    r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>>> +    if (r) {
>>> +        dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc 
>>> failed\n", r);
>>> +        return r;
>>> +    }
>>> +
>>> +    ring->fence_gpu_addr =
>>> +        amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>>> +    ring->fence_cpu_addr =
>>> +        amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>>> +
>>> +    ring->trail_fence_gpu_addr =
>>> +        amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>>> +    ring->trail_fence_cpu_addr =
>>> +        amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>>> +
>>> +    ring->cond_exe_gpu_addr =
>>> +        amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>>> +    ring->cond_exe_cpu_addr =
>>> +        amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>>> +
>>> +    /* always set cond_exec_polling to CONTINUE */
>>> +    *ring->cond_exe_cpu_addr = 1;
>>> +
>>> +    r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
>>> +    if (r) {
>>> +        dev_err(adev->dev, "failed initializing fences (%d).\n", r);
>>> +        return r;
>>> +    }
>>> +
>>> +    ring->ring_size = roundup_pow_of_two(max_dw * 4 * 
>>> sched_hw_submission);
>>> +
>>> +    ring->buf_mask = (ring->ring_size / 4) - 1;
>>> +    ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>>> +        0xffffffffffffffff : ring->buf_mask;
>>> +
>>> +    /* Allocate ring buffer */
>>> +    if (ring->ring == NULL) {
>>> +        ring->ring = kzalloc(ring->ring_size + 
>>> ring->funcs->extra_dw, GFP_KERNEL);
>>> +        if (!ring->ring) {
>>> +            dev_err(adev->dev, "(%d) swring create failed\n", r);
>>> +            return r;
>>> +        }
>>> +
>>> +        amdgpu_ring_clear_ring(ring);
>>> +    }
>>> +
>>> +    ring->max_dw = max_dw;
>>> +    ring->hw_prio = hw_prio;
>>> +
>>> +    if (!ring->no_scheduler) {
>>> +        hw_ip = ring->funcs->type;
>>> +        num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>>> + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>>> +            &ring->sched;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>
>>
>> In general i see this function is a big one to one subset of 
>> amdgpu_ring_init.
>> Could you maybe see a way to refactor such that this function is the 
>> base
>> and for HW related code that different (like BO allocation for ring 
>> buffer) you
>> maybe can add if (!ring->sw_ring)... and add those code snippets ? To 
>> avoid
>> substantial code duplication.
>>
>> Andrey
>>
>>
>>> +
>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
>>> +{
>>> +    struct amdgpu_device *adev = ring->adev;
>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>> +
>>> +    BUG_ON(!ring->is_sw_ring);
>>> +    return amdgpu_ring_get_rptr_from_mux(mux, ring);
>>> +}
>>> +
>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
>>> +{
>>> +    struct amdgpu_device *adev = ring->adev;
>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>> +
>>> +    BUG_ON(!ring->is_sw_ring);
>>> +    return amdgpu_ring_get_wptr_from_mux(mux, ring);
>>> +}
>>> +
>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
>>> +{
>>> +    BUG_ON(!ring->is_sw_ring);
>>> +}
>>> +
>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
>>> +{
>>> +    struct amdgpu_device *adev = ring->adev;
>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>> +
>>> +    BUG_ON(!ring->is_sw_ring);
>>> +    amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
>>> +}
>>> +
>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
>>> +{
>>> +    BUG_ON(!ring->is_sw_ring);
>>> +
>>> +    /* Not to finish a ring which is not initialized */
>>> +    if (!(ring->adev) ||
>>> +        (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
>>> +        return;
>>> +
>>> +    ring->sched.ready = false;
>>> +
>>> +    amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>>> +    amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>>> +
>>> +    kfree((void *)ring->ring);
>>> +
>>> +    dma_fence_put(ring->vmid_wait);
>>> +    ring->vmid_wait = NULL;
>>> +    ring->me = 0;
>>> +
>>> +    ring->adev->rings[ring->idx] = NULL;
>>> +}
>>> +
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>> new file mode 100644
>>> index 000000000000..c05d8a94ad0c
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>> @@ -0,0 +1,48 @@
>>> +/*
>>> + * Copyright 2012 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person 
>>> obtaining a
>>> + * copy of this software and associated documentation files (the 
>>> "Software"),
>>> + * to deal in the Software without restriction, including without 
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to 
>>> whom the
>>> + * Software is furnished to do so, subject to the following 
>>> conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be 
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#include <drm/amdgpu_drm.h>
>>> +#include <drm/gpu_scheduler.h>
>>> +#include <drm/drm_print.h>
>>> +
>>> +#include "amdgpu_irq.h"
>>> +#include "amdgpu_ring.h"
>>> +#include "amdgpu.h"
>>> +
>>> +#ifndef __AMDGPU_SWRING_H__
>>> +#define __AMDGPU_SWRING_H__
>>> +
>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>>> amdgpu_ring *sw_ring,
>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>> +             unsigned int irq_type, unsigned int hw_prio,
>>> +             atomic_t *sched_score);
>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>>> +
>>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
>>> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>>> +
>>> +#endif
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-12 13:22     ` Andrey Grodzovsky
@ 2022-09-12 13:27       ` Christian König
  2022-09-12 15:34         ` Andrey Grodzovsky
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2022-09-12 13:27 UTC (permalink / raw)
  To: Andrey Grodzovsky, jiadong.zhu, amd-gfx; +Cc: Ray.Huang

Am 12.09.22 um 15:22 schrieb Andrey Grodzovsky:
>
> On 2022-09-12 06:20, Christian König wrote:
>> Am 09.09.22 um 18:45 schrieb Andrey Grodzovsky:
>>>
>>> On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
>>>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>>>
>>>> The software ring is created to support priority
>>>> context while there is only one hardware queue
>>>> for gfx.
>>>>
>>>> Every software rings has its fence driver and could
>>>> be used as an ordinary ring for the gpu_scheduler.
>>>> Multiple software rings are binded to a real ring
>>>> with the ring muxer. The packages committed on the
>>>> software ring are copied to the real ring.
>>>>
>>>> v2: use array to store software ring entry.
>>>> v3: remove unnecessary prints.
>>>>
>>>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +++++++++++++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 
>>>> +++++++++++++++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>>>   7 files changed, 509 insertions(+), 1 deletion(-)
>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>> index 3e0e2eb7e235..85224bc81ce5 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>>       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o 
>>>> amdgpu_nbio.o \
>>>>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>>>>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>>> -    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>>> +    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>>>> +    amdgpu_sw_ring.o amdgpu_ring_mux.o
>>>>     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>> @@ -33,6 +33,7 @@
>>>>   #include "amdgpu_imu.h"
>>>>   #include "soc15.h"
>>>>   #include "amdgpu_ras.h"
>>>> +#include "amdgpu_ring_mux.h"
>>>>     /* GFX current status */
>>>>   #define AMDGPU_GFX_NORMAL_MODE            0x00000000L
>>>> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>>>>       struct amdgpu_gfx_ras        *ras;
>>>>         bool                is_poweron;
>>>> +
>>>> +    struct amdgpu_ring_mux            muxer;
>>>>   };
>>>>     #define amdgpu_gfx_get_gpu_clock_counter(adev) 
>>>> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>> index 7d89a52091c0..fe33a683bfba 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>>>       bool            is_mes_queue;
>>>>       uint32_t        hw_queue_id;
>>>>       struct amdgpu_mes_ctx_data *mes_ctx;
>>>> +
>>>> +    bool            is_sw_ring;
>>>> +
>>>>   };
>>>>     #define amdgpu_ring_parse_cs(r, p, job, ib) 
>>>> ((r)->funcs->parse_cs((p), (job), (ib)))
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>> new file mode 100644
>>>> index 000000000000..ea4a3c66119a
>>>> --- /dev/null
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>> @@ -0,0 +1,182 @@
>>>> +/*
>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>> + *
>>>> + * Permission is hereby granted, free of charge, to any person 
>>>> obtaining a
>>>> + * copy of this software and associated documentation files (the 
>>>> "Software"),
>>>> + * to deal in the Software without restriction, including without 
>>>> limitation
>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>> sublicense,
>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>> whom the
>>>> + * Software is furnished to do so, subject to the following 
>>>> conditions:
>>>> + *
>>>> + * The above copyright notice and this permission notice shall be 
>>>> included in
>>>> + * all copies or substantial portions of the Software.
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>>> EXPRESS OR
>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>> MERCHANTABILITY,
>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>> EVENT SHALL
>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>> DAMAGES OR
>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>> OTHERWISE,
>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>>> USE OR
>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>> + *
>>>> + */
>>>> +
>>>> +#include <drm/drm_print.h>
>>>> +
>>>> +#include "amdgpu_ring_mux.h"
>>>> +#include "amdgpu_ring.h"
>>>> +
>>>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>>>> +
>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 
>>>> struct amdgpu_ring *ring,
>>>> +    u64 s_begin, u64 s_end);
>>>> +
>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>>>> amdgpu_ring *ring)
>>>> +{
>>>> +    mux->real_ring = ring;
>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>> +    mux->num_ring_entries = 0;
>>>> +    spin_lock_init(&mux->lock);
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
>>>> +{
>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>> +    mux->num_ring_entries = 0;
>>>> +}
>>>> +
>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, 
>>>> struct amdgpu_ring *ring)
>>>> +{
>>>> +    struct amdgpu_mux_entry *e;
>>>> +
>>>> +    if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>>>> +        DRM_ERROR("adding sw ring exceeds max gfx num\n");
>>>> +        return -ENOMEM;
>>>> +    }
>>>> +
>>>> +    e = &mux->ring_entries[mux->num_ring_entries++];
>>>> +
>>>> +    e->ring = ring;
>>>> +    e->start_ptr_in_hw_ring = 0;
>>>> +    e->end_ptr_in_hw_ring = 0;
>>>> +    e->sw_cptr = 0;
>>>> +    e->sw_rptr = 0;
>>>> +    e->sw_wptr = 0;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct 
>>>> amdgpu_ring_mux *mux,
>>>> +                struct amdgpu_ring *ring)
>>>> +{
>>>> +    struct amdgpu_mux_entry *e;
>>>> +    int i;
>>>> +
>>>> +    e = NULL;
>>>> +    for (i = 0; i < mux->num_ring_entries; i++) {
>>>> +        if (mux->ring_entries[i].ring == ring) {
>>>> +            e = &mux->ring_entries[i];
>>>> +            break;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return e;
>>>> +}
>>>> +
>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
>>>> struct amdgpu_ring *ring, u64 wptr)
>>>> +{
>>>> +    struct amdgpu_mux_entry *e;
>>>> +
>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>> +    if (!e) {
>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    spin_lock(&mux->lock);
>>>
>>>
>>> A bit more generic question, I assume the spinlock here protects 
>>> from concurrent runs
>>> of amdgpu_ib_schedule. For them to be even theoretically concurrent 
>>> it must be from
>>> direct submissions to HW (because any scheduler mediated submission 
>>> is serialized though
>>> the dedicated scheduler worker thread). But in such case why we 
>>> protect only here ? If i am
>>> not missing something there is no total per HW ring lock when 
>>> calling amdgpu_ib_schedule today
>>> and we do a lot of HW accesses there to ring  which should probably 
>>> be protected from
>>> concurrent accesses.
>>>
>>> So if any one can answer this question ?
>>
>> Well what we have is in general two schedulers which push their work 
>> into one hardware ring.
>>
>> So we need a lock to make sure that only one is modifying the hw ring 
>> at the same time.
>>
>> From the implementation I think we first write the commands into a 
>> shadow ring buffer and then copy them over to the real hw ring here.
>>
>> So this is the only place where we actually touch the hw ring buffer 
>> and to need to grab the lock.
>>
>> Did I get this right?
>>
>> Thanks,
>> Christian.
>
>
> For the case of the sw ring yes, but I was asking in general, accesses 
> to real HW rings, amdgpu_ib_schedule writes to HW rings,
> we may be accessing same HW ring from 2 different contexts when doing 
> direct submissions (i.e. calling  amdgpu_ib_schedule
> directly from 2 threads concurrently) this opens possibility to 
> concurrent access to HW. Or am i missing something here ?

No, that's pretty much correct.

The general idea is that amdgpu_ib_schedule() first writes into a 
separate software ring buffer for each scheduler. So no locking needed 
for that.

Then when the set_wptr callback is called we grab the lock and copy the 
software ring content to the real hw ring and telling the hw to execute it.

The spin_lock is to protect from concurrent hw access.

Regards,
Christian.


>
> Andrey
>
>
>>
>>>
>>>
>>>> +    e->sw_cptr = e->sw_wptr;
>>>> +    e->sw_wptr = wptr;
>>>> +    e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>>>> +
>>>> +    if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
>>>> +        e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>>>> +        amdgpu_ring_commit(mux->real_ring);
>>>> +    }
>>>> +
>>>> +    spin_unlock(&mux->lock);
>>>> +}
>>>> +
>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>> struct amdgpu_ring *ring)
>>>> +{
>>>> +    struct amdgpu_mux_entry *e;
>>>> +
>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>> +    if (!e) {
>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>> +        return 0;
>>>> +    }
>>>> +
>>>> +    return e->sw_wptr;
>>>> +}
>>>> +
>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>> struct amdgpu_ring *ring)
>>>> +{
>>>> +    struct amdgpu_mux_entry *e;
>>>> +    u64 r_rptr, r_wptr, offset, start, end;
>>>> +
>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>> +    if (!e) {
>>>> +        DRM_ERROR("no sw entry found!\n");
>>>> +        return 0;
>>>> +    }
>>>> +
>>>> +    r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>>>> +    r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>>>> +
>>>> +    if (r_wptr < r_rptr)
>>>> +        r_wptr += mux->real_ring->ring_size >> 2;
>>>> +
>>>> +    start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>> +    end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>> +    if (start > end)
>>>> +        end += mux->real_ring->ring_size >> 2;
>>>> +    if (r_rptr <= end && r_rptr >= start) {
>>>> +        offset = r_rptr - start;
>>>> +        e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>>>> +    } else if (r_rptr < start) {
>>>> +        e->sw_rptr = e->sw_cptr;
>>>> +    } else {
>>>> +        e->sw_rptr = e->sw_wptr;
>>>> +    }
>>>> +
>>>> +    return e->sw_rptr;
>>>> +}
>>>> +
>>>> +/*copy packages on sw ring range[begin, end) */
>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 
>>>> struct amdgpu_ring *ring,
>>>> +    u64 s_begin, u64 s_end)
>>>> +{
>>>> +    u64 begin, end, r_begin, r_end;
>>>> +    struct amdgpu_ring *real_ring = mux->real_ring;
>>>> +
>>>> +    begin = s_begin & ring->buf_mask;
>>>> +    end = s_end & ring->buf_mask;
>>>> +
>>>> +    r_begin = real_ring->wptr & real_ring->buf_mask;
>>>> +    if (begin == end)
>>>> +        return -ERANGE;
>>>> +    if (begin > end) {
>>>> +        amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end 
>>>> - begin);
>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>> *)&ring->ring[begin],
>>>> +            (ring->ring_size >> 2) - begin);
>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>> *)&ring->ring[0], end);
>>>> +    } else {
>>>> +        amdgpu_ring_alloc(real_ring, end - begin);
>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>> *)&ring->ring[begin], end - begin);
>>>> +    }
>>>> +
>>>> +    r_end = real_ring->wptr & real_ring->buf_mask;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>> new file mode 100644
>>>> index 000000000000..d058c43bb063
>>>> --- /dev/null
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>> @@ -0,0 +1,67 @@
>>>> +/*
>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>> + *
>>>> + * Permission is hereby granted, free of charge, to any person 
>>>> obtaining a
>>>> + * copy of this software and associated documentation files (the 
>>>> "Software"),
>>>> + * to deal in the Software without restriction, including without 
>>>> limitation
>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>> sublicense,
>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>> whom the
>>>> + * Software is furnished to do so, subject to the following 
>>>> conditions:
>>>> + *
>>>> + * The above copyright notice and this permission notice shall be 
>>>> included in
>>>> + * all copies or substantial portions of the Software.
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>>> EXPRESS OR
>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>> MERCHANTABILITY,
>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>> EVENT SHALL
>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>> DAMAGES OR
>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>> OTHERWISE,
>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>>> USE OR
>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>> + *
>>>> + */
>>>> +
>>>> +#ifndef __AMDGPU_RING_MUX__
>>>> +#define __AMDGPU_RING_MUX__
>>>> +
>>>> +#include <linux/timer.h>
>>>> +#include <linux/spinlock.h>
>>>> +#include "amdgpu_ring.h"
>>>> +
>>>> +struct amdgpu_ring;
>>>> +/*
>>>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>>>> + * end_ptr_in_hw_ring - last copied end loc on hw ring
>>>> + *sw_cptr -the begin of copy ptr in sw ring
>>>> + *sw_rptr; the read ptr in sw ring
>>>> + *sw_wptr; the write ptr in sw ring
>>>> + */
>>>> +struct amdgpu_mux_entry {
>>>> +    struct amdgpu_ring    *ring;
>>>> +    u64 start_ptr_in_hw_ring;
>>>> +    u64 end_ptr_in_hw_ring;
>>>> +
>>>> +    u64 sw_cptr;
>>>> +    u64 sw_rptr;
>>>> +    u64 sw_wptr;
>>>> +};
>>>> +
>>>> +struct amdgpu_ring_mux {
>>>> +    struct amdgpu_ring *real_ring;
>>>> +
>>>> +    struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>>>> +
>>>> +    unsigned num_ring_entries;
>>>> +
>>>> +    spinlock_t            lock;
>>>> +
>>>> +};
>>>> +
>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>>>> amdgpu_ring *ring);
>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, 
>>>> struct amdgpu_ring *ring);
>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
>>>> struct amdgpu_ring *ring, u64 wptr);
>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>> struct amdgpu_ring *ring);
>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>> struct amdgpu_ring *ring);
>>>> +
>>>> +#endif
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>> new file mode 100644
>>>> index 000000000000..452d0ff37758
>>>> --- /dev/null
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>> @@ -0,0 +1,204 @@
>>>> +/*
>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>> + * All Rights Reserved.
>>>> + *
>>>> + * Permission is hereby granted, free of charge, to any person 
>>>> obtaining a
>>>> + * copy of this software and associated documentation files (the
>>>> + * "Software"), to deal in the Software without restriction, 
>>>> including
>>>> + * without limitation the rights to use, copy, modify, merge, 
>>>> publish,
>>>> + * distribute, sub license, and/or sell copies of the Software, 
>>>> and to
>>>> + * permit persons to whom the Software is furnished to do so, 
>>>> subject to
>>>> + * the following conditions:
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>>> EXPRESS OR
>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>> MERCHANTABILITY,
>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO 
>>>> EVENT SHALL
>>>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE 
>>>> FOR ANY CLAIM,
>>>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
>>>> TORT OR
>>>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
>>>> SOFTWARE OR THE
>>>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>> + *
>>>> + * The above copyright notice and this permission notice 
>>>> (including the
>>>> + * next paragraph) shall be included in all copies or substantial 
>>>> portions
>>>> + * of the Software.
>>>> + *
>>>> + */
>>>> +
>>>> +#include "amdgpu_sw_ring.h"
>>>> +#include "amdgpu_ring_mux.h"
>>>> +
>>>> +#define amdgpu_ring_get_gpu_addr(ring, offset) \
>>>> +    (ring->is_mes_queue ?                        \
>>>> +     (ring->mes_ctx->meta_data_gpu_addr + offset) :            \
>>>> +     (ring->adev->wb.gpu_addr + offset * 4))
>>>> +
>>>> +#define amdgpu_ring_get_cpu_addr(ring, offset) \
>>>> +    (ring->is_mes_queue ?                        \
>>>> +     (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
>>>> +     (&ring->adev->wb.wb[offset]))
>>>> +
>>>> +
>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>>>> amdgpu_ring *ring,
>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>> +             atomic_t *sched_score)
>>>> +{
>>>> +    int r;
>>>> +    int sched_hw_submission = amdgpu_sched_hw_submission;
>>>> +    u32 *num_sched;
>>>> +    u32 hw_ip;
>>>> +
>>>> +    BUG_ON(!ring->is_sw_ring);
>>>> +
>>>> +    if (ring->adev == NULL) {
>>>> +        if (adev->num_rings >= AMDGPU_MAX_RINGS)
>>>> +            return -EINVAL;
>>>> +
>>>> +        ring->adev = adev;
>>>> +        ring->num_hw_submission = sched_hw_submission;
>>>> +        ring->sched_score = sched_score;
>>>> +        ring->vmid_wait = dma_fence_get_stub();
>>>> +
>>>> +        if (!ring->is_mes_queue) {
>>>> +            ring->idx = adev->num_rings++;
>>>> +            adev->rings[ring->idx] = ring;
>>>> +        }
>>>> +
>>>> +        r = amdgpu_fence_driver_init_ring(ring);
>>>> +        if (r)
>>>> +            return r;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>> +    if (r) {
>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc 
>>>> failed\n", r);
>>>> +        return r;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>> +    if (r) {
>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc 
>>>> failed\n", r);
>>>> +        return r;
>>>> +    }
>>>
>>>
>>> Looks like a typo copy pase duplicate of the above
>>>
>>>> +
>>>> +    r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>>>> +    if (r) {
>>>> +        dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc 
>>>> failed\n", r);
>>>> +        return r;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>>>> +    if (r) {
>>>> +        dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc 
>>>> failed\n", r);
>>>> +        return r;
>>>> +    }
>>>> +
>>>> +    ring->fence_gpu_addr =
>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>>>> +    ring->fence_cpu_addr =
>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>>>> +
>>>> +    ring->trail_fence_gpu_addr =
>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>>>> +    ring->trail_fence_cpu_addr =
>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>>>> +
>>>> +    ring->cond_exe_gpu_addr =
>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>>>> +    ring->cond_exe_cpu_addr =
>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>>>> +
>>>> +    /* always set cond_exec_polling to CONTINUE */
>>>> +    *ring->cond_exe_cpu_addr = 1;
>>>> +
>>>> +    r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
>>>> +    if (r) {
>>>> +        dev_err(adev->dev, "failed initializing fences (%d).\n", r);
>>>> +        return r;
>>>> +    }
>>>> +
>>>> +    ring->ring_size = roundup_pow_of_two(max_dw * 4 * 
>>>> sched_hw_submission);
>>>> +
>>>> +    ring->buf_mask = (ring->ring_size / 4) - 1;
>>>> +    ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>>>> +        0xffffffffffffffff : ring->buf_mask;
>>>> +
>>>> +    /* Allocate ring buffer */
>>>> +    if (ring->ring == NULL) {
>>>> +        ring->ring = kzalloc(ring->ring_size + 
>>>> ring->funcs->extra_dw, GFP_KERNEL);
>>>> +        if (!ring->ring) {
>>>> +            dev_err(adev->dev, "(%d) swring create failed\n", r);
>>>> +            return r;
>>>> +        }
>>>> +
>>>> +        amdgpu_ring_clear_ring(ring);
>>>> +    }
>>>> +
>>>> +    ring->max_dw = max_dw;
>>>> +    ring->hw_prio = hw_prio;
>>>> +
>>>> +    if (!ring->no_scheduler) {
>>>> +        hw_ip = ring->funcs->type;
>>>> +        num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>>>> + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>>>> +            &ring->sched;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>
>>>
>>> In general i see this function is a big one to one subset of 
>>> amdgpu_ring_init.
>>> Could you maybe see a way to refactor such that this function is the 
>>> base
>>> and for HW related code that different (like BO allocation for ring 
>>> buffer) you
>>> maybe can add if (!ring->sw_ring)... and add those code snippets ? 
>>> To avoid
>>> substantial code duplication.
>>>
>>> Andrey
>>>
>>>
>>>> +
>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
>>>> +{
>>>> +    struct amdgpu_device *adev = ring->adev;
>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>> +
>>>> +    BUG_ON(!ring->is_sw_ring);
>>>> +    return amdgpu_ring_get_rptr_from_mux(mux, ring);
>>>> +}
>>>> +
>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
>>>> +{
>>>> +    struct amdgpu_device *adev = ring->adev;
>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>> +
>>>> +    BUG_ON(!ring->is_sw_ring);
>>>> +    return amdgpu_ring_get_wptr_from_mux(mux, ring);
>>>> +}
>>>> +
>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
>>>> +{
>>>> +    BUG_ON(!ring->is_sw_ring);
>>>> +}
>>>> +
>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
>>>> +{
>>>> +    struct amdgpu_device *adev = ring->adev;
>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>> +
>>>> +    BUG_ON(!ring->is_sw_ring);
>>>> +    amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
>>>> +}
>>>> +
>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
>>>> +{
>>>> +    BUG_ON(!ring->is_sw_ring);
>>>> +
>>>> +    /* Not to finish a ring which is not initialized */
>>>> +    if (!(ring->adev) ||
>>>> +        (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
>>>> +        return;
>>>> +
>>>> +    ring->sched.ready = false;
>>>> +
>>>> +    amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>>>> +    amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>>>> +
>>>> +    kfree((void *)ring->ring);
>>>> +
>>>> +    dma_fence_put(ring->vmid_wait);
>>>> +    ring->vmid_wait = NULL;
>>>> +    ring->me = 0;
>>>> +
>>>> +    ring->adev->rings[ring->idx] = NULL;
>>>> +}
>>>> +
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>> new file mode 100644
>>>> index 000000000000..c05d8a94ad0c
>>>> --- /dev/null
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>> @@ -0,0 +1,48 @@
>>>> +/*
>>>> + * Copyright 2012 Advanced Micro Devices, Inc.
>>>> + *
>>>> + * Permission is hereby granted, free of charge, to any person 
>>>> obtaining a
>>>> + * copy of this software and associated documentation files (the 
>>>> "Software"),
>>>> + * to deal in the Software without restriction, including without 
>>>> limitation
>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>> sublicense,
>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>> whom the
>>>> + * Software is furnished to do so, subject to the following 
>>>> conditions:
>>>> + *
>>>> + * The above copyright notice and this permission notice shall be 
>>>> included in
>>>> + * all copies or substantial portions of the Software.
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>>>> EXPRESS OR
>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>> MERCHANTABILITY,
>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>> EVENT SHALL
>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>> DAMAGES OR
>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>> OTHERWISE,
>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>>> USE OR
>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>> + *
>>>> + */
>>>> +
>>>> +#include <drm/amdgpu_drm.h>
>>>> +#include <drm/gpu_scheduler.h>
>>>> +#include <drm/drm_print.h>
>>>> +
>>>> +#include "amdgpu_irq.h"
>>>> +#include "amdgpu_ring.h"
>>>> +#include "amdgpu.h"
>>>> +
>>>> +#ifndef __AMDGPU_SWRING_H__
>>>> +#define __AMDGPU_SWRING_H__
>>>> +
>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>>>> amdgpu_ring *sw_ring,
>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>> +             atomic_t *sched_score);
>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>>>> +
>>>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
>>>> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>>>> +
>>>> +#endif
>>


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-09 14:24 ` [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) Christian König
@ 2022-09-12 14:31   ` Luben Tuikov
  0 siblings, 0 replies; 28+ messages in thread
From: Luben Tuikov @ 2022-09-12 14:31 UTC (permalink / raw)
  To: Christian König, amd-gfx, Andrey Grodzovsky; +Cc: Ray.Huang, jiadong.zhu

No problem Christian--will do today. Thanks.

Regards,
Luben

On 2022-09-09 10:24, Christian König wrote:
> Andrey and Luben please take a look at this set here and help with 
> reviewing it.
> 
> Thanks,
> Christian.
> 
> Am 09.09.22 um 03:50 schrieb jiadong.zhu@amd.com:
>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>
>> The software ring is created to support priority
>> context while there is only one hardware queue
>> for gfx.
>>
>> Every software rings has its fence driver and could
>> be used as an ordinary ring for the gpu_scheduler.
>> Multiple software rings are binded to a real ring
>> with the ring muxer. The packages committed on the
>> software ring are copied to the real ring.
>>
>> v2: use array to store software ring entry.
>> v3: remove unnecessary prints.
>>
>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 +++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>   7 files changed, 509 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 3e0e2eb7e235..85224bc81ce5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>   	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
>>   	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>>   	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>> -	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>> +	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>> +	amdgpu_sw_ring.o amdgpu_ring_mux.o
>>   
>>   amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> @@ -33,6 +33,7 @@
>>   #include "amdgpu_imu.h"
>>   #include "soc15.h"
>>   #include "amdgpu_ras.h"
>> +#include "amdgpu_ring_mux.h"
>>   
>>   /* GFX current status */
>>   #define AMDGPU_GFX_NORMAL_MODE			0x00000000L
>> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>>   	struct amdgpu_gfx_ras		*ras;
>>   
>>   	bool				is_poweron;
>> +
>> +	struct amdgpu_ring_mux			muxer;
>>   };
>>   
>>   #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index 7d89a52091c0..fe33a683bfba 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>   	bool			is_mes_queue;
>>   	uint32_t		hw_queue_id;
>>   	struct amdgpu_mes_ctx_data *mes_ctx;
>> +
>> +	bool			is_sw_ring;
>> +
>>   };
>>   
>>   #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>> new file mode 100644
>> index 000000000000..ea4a3c66119a
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>> @@ -0,0 +1,182 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include <drm/drm_print.h>
>> +
>> +#include "amdgpu_ring_mux.h"
>> +#include "amdgpu_ring.h"
>> +
>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>> +
>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
>> +	u64 s_begin, u64 s_end);
>> +
>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
>> +{
>> +	mux->real_ring = ring;
>> +	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>> +	mux->num_ring_entries = 0;
>> +	spin_lock_init(&mux->lock);
>> +	return 0;
>> +}
>> +
>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
>> +{
>> +	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>> +	mux->num_ring_entries = 0;
>> +}
>> +
>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
>> +{
>> +	struct amdgpu_mux_entry *e;
>> +
>> +	if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>> +		DRM_ERROR("adding sw ring exceeds max gfx num\n");
>> +		return -ENOMEM;
>> +	}
>> +
>> +	e = &mux->ring_entries[mux->num_ring_entries++];
>> +
>> +	e->ring = ring;
>> +	e->start_ptr_in_hw_ring = 0;
>> +	e->end_ptr_in_hw_ring = 0;
>> +	e->sw_cptr = 0;
>> +	e->sw_rptr = 0;
>> +	e->sw_wptr = 0;
>> +
>> +	return 0;
>> +}
>> +
>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct amdgpu_ring_mux *mux,
>> +				struct amdgpu_ring *ring)
>> +{
>> +	struct amdgpu_mux_entry *e;
>> +	int i;
>> +
>> +	e = NULL;
>> +	for (i = 0; i < mux->num_ring_entries; i++) {
>> +		if (mux->ring_entries[i].ring == ring) {
>> +			e = &mux->ring_entries[i];
>> +			break;
>> +		}
>> +	}
>> +
>> +	return e;
>> +}
>> +
>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
>> +{
>> +	struct amdgpu_mux_entry *e;
>> +
>> +	e = amdgpu_get_sw_entry(mux, ring);
>> +	if (!e) {
>> +		DRM_ERROR("cannot find entry for sw ring\n");
>> +		return;
>> +	}
>> +
>> +	spin_lock(&mux->lock);
>> +	e->sw_cptr = e->sw_wptr;
>> +	e->sw_wptr = wptr;
>> +	e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>> +
>> +	if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
>> +		e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>> +		amdgpu_ring_commit(mux->real_ring);
>> +	}
>> +
>> +	spin_unlock(&mux->lock);
>> +}
>> +
>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
>> +{
>> +	struct amdgpu_mux_entry *e;
>> +
>> +	e = amdgpu_get_sw_entry(mux, ring);
>> +	if (!e) {
>> +		DRM_ERROR("cannot find entry for sw ring\n");
>> +		return 0;
>> +	}
>> +
>> +	return e->sw_wptr;
>> +}
>> +
>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
>> +{
>> +	struct amdgpu_mux_entry *e;
>> +	u64 r_rptr, r_wptr, offset, start, end;
>> +
>> +	e = amdgpu_get_sw_entry(mux, ring);
>> +	if (!e) {
>> +		DRM_ERROR("no sw entry found!\n");
>> +		return 0;
>> +	}
>> +
>> +	r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>> +	r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>> +
>> +	if (r_wptr < r_rptr)
>> +		r_wptr += mux->real_ring->ring_size >> 2;
>> +
>> +	start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>> +	end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>> +	if (start > end)
>> +		end += mux->real_ring->ring_size >> 2;
>> +	if (r_rptr <= end && r_rptr >= start) {
>> +		offset = r_rptr - start;
>> +		e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>> +	} else if (r_rptr < start) {
>> +		e->sw_rptr = e->sw_cptr;
>> +	} else {
>> +		e->sw_rptr = e->sw_wptr;
>> +	}
>> +
>> +	return e->sw_rptr;
>> +}
>> +
>> +/*copy packages on sw ring range[begin, end) */
>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
>> +	u64 s_begin, u64 s_end)
>> +{
>> +	u64 begin, end, r_begin, r_end;
>> +	struct amdgpu_ring *real_ring = mux->real_ring;
>> +
>> +	begin = s_begin & ring->buf_mask;
>> +	end = s_end & ring->buf_mask;
>> +
>> +	r_begin = real_ring->wptr & real_ring->buf_mask;
>> +	if (begin == end)
>> +		return -ERANGE;
>> +	if (begin > end) {
>> +		amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - begin);
>> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin],
>> +			(ring->ring_size >> 2) - begin);
>> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
>> +	} else {
>> +		amdgpu_ring_alloc(real_ring, end - begin);
>> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin], end - begin);
>> +	}
>> +
>> +	r_end = real_ring->wptr & real_ring->buf_mask;
>> +
>> +	return 0;
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>> new file mode 100644
>> index 000000000000..d058c43bb063
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>> @@ -0,0 +1,67 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#ifndef __AMDGPU_RING_MUX__
>> +#define __AMDGPU_RING_MUX__
>> +
>> +#include <linux/timer.h>
>> +#include <linux/spinlock.h>
>> +#include "amdgpu_ring.h"
>> +
>> +struct amdgpu_ring;
>> +/*
>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>> + * end_ptr_in_hw_ring - last copied end loc on hw ring
>> + *sw_cptr -the begin of copy ptr in sw ring
>> + *sw_rptr; the read ptr in sw ring
>> + *sw_wptr; the write ptr in sw ring
>> + */
>> +struct amdgpu_mux_entry {
>> +	struct amdgpu_ring	*ring;
>> +	u64 start_ptr_in_hw_ring;
>> +	u64 end_ptr_in_hw_ring;
>> +
>> +	u64 sw_cptr;
>> +	u64 sw_rptr;
>> +	u64 sw_wptr;
>> +};
>> +
>> +struct amdgpu_ring_mux {
>> +	struct amdgpu_ring *real_ring;
>> +
>> +	struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>> +
>> +	unsigned num_ring_entries;
>> +
>> +	spinlock_t			lock;
>> +
>> +};
>> +
>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
>> +
>> +#endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>> new file mode 100644
>> index 000000000000..452d0ff37758
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>> @@ -0,0 +1,204 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the
>> + * "Software"), to deal in the Software without restriction, including
>> + * without limitation the rights to use, copy, modify, merge, publish,
>> + * distribute, sub license, and/or sell copies of the Software, and to
>> + * permit persons to whom the Software is furnished to do so, subject to
>> + * the following conditions:
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial portions
>> + * of the Software.
>> + *
>> + */
>> +
>> +#include "amdgpu_sw_ring.h"
>> +#include "amdgpu_ring_mux.h"
>> +
>> +#define amdgpu_ring_get_gpu_addr(ring, offset)				\
>> +	(ring->is_mes_queue ?						\
>> +	 (ring->mes_ctx->meta_data_gpu_addr + offset) :			\
>> +	 (ring->adev->wb.gpu_addr + offset * 4))
>> +
>> +#define amdgpu_ring_get_cpu_addr(ring, offset)				\
>> +	(ring->is_mes_queue ?						\
>> +	 (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
>> +	 (&ring->adev->wb.wb[offset]))
>> +
>> +
>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>> +		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>> +		     unsigned int irq_type, unsigned int hw_prio,
>> +		     atomic_t *sched_score)
>> +{
>> +	int r;
>> +	int sched_hw_submission = amdgpu_sched_hw_submission;
>> +	u32 *num_sched;
>> +	u32 hw_ip;
>> +
>> +	BUG_ON(!ring->is_sw_ring);
>> +
>> +	if (ring->adev == NULL) {
>> +		if (adev->num_rings >= AMDGPU_MAX_RINGS)
>> +			return -EINVAL;
>> +
>> +		ring->adev = adev;
>> +		ring->num_hw_submission = sched_hw_submission;
>> +		ring->sched_score = sched_score;
>> +		ring->vmid_wait = dma_fence_get_stub();
>> +
>> +		if (!ring->is_mes_queue) {
>> +			ring->idx = adev->num_rings++;
>> +			adev->rings[ring->idx] = ring;
>> +		}
>> +
>> +		r = amdgpu_fence_driver_init_ring(ring);
>> +		if (r)
>> +			return r;
>> +	}
>> +
>> +	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>> +	if (r) {
>> +		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
>> +		return r;
>> +	}
>> +
>> +	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>> +	if (r) {
>> +		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
>> +		return r;
>> +	}
>> +
>> +	r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>> +	if (r) {
>> +		dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
>> +		return r;
>> +	}
>> +
>> +	r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>> +	if (r) {
>> +		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
>> +		return r;
>> +	}
>> +
>> +	ring->fence_gpu_addr =
>> +		amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>> +	ring->fence_cpu_addr =
>> +		amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>> +
>> +	ring->trail_fence_gpu_addr =
>> +		amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>> +	ring->trail_fence_cpu_addr =
>> +		amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>> +
>> +	ring->cond_exe_gpu_addr =
>> +		amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>> +	ring->cond_exe_cpu_addr =
>> +		amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>> +
>> +	/* always set cond_exec_polling to CONTINUE */
>> +	*ring->cond_exe_cpu_addr = 1;
>> +
>> +	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
>> +	if (r) {
>> +		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
>> +		return r;
>> +	}
>> +
>> +	ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
>> +
>> +	ring->buf_mask = (ring->ring_size / 4) - 1;
>> +	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>> +		0xffffffffffffffff : ring->buf_mask;
>> +
>> +	/* Allocate ring buffer */
>> +	if (ring->ring == NULL) {
>> +		ring->ring = kzalloc(ring->ring_size + ring->funcs->extra_dw, GFP_KERNEL);
>> +		if (!ring->ring) {
>> +			dev_err(adev->dev, "(%d) swring create failed\n", r);
>> +			return r;
>> +		}
>> +
>> +		amdgpu_ring_clear_ring(ring);
>> +	}
>> +
>> +	ring->max_dw = max_dw;
>> +	ring->hw_prio = hw_prio;
>> +
>> +	if (!ring->no_scheduler) {
>> +		hw_ip = ring->funcs->type;
>> +		num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>> +		adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>> +			&ring->sched;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
>> +{
>> +	struct amdgpu_device *adev = ring->adev;
>> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>> +
>> +	BUG_ON(!ring->is_sw_ring);
>> +	return amdgpu_ring_get_rptr_from_mux(mux, ring);
>> +}
>> +
>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
>> +{
>> +	struct amdgpu_device *adev = ring->adev;
>> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>> +
>> +	BUG_ON(!ring->is_sw_ring);
>> +	return amdgpu_ring_get_wptr_from_mux(mux, ring);
>> +}
>> +
>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
>> +{
>> +	BUG_ON(!ring->is_sw_ring);
>> +}
>> +
>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
>> +{
>> +	struct amdgpu_device *adev = ring->adev;
>> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>> +
>> +	BUG_ON(!ring->is_sw_ring);
>> +	amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
>> +}
>> +
>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
>> +{
>> +	BUG_ON(!ring->is_sw_ring);
>> +
>> +	/* Not to finish a ring which is not initialized */
>> +	if (!(ring->adev) ||
>> +	    (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
>> +		return;
>> +
>> +	ring->sched.ready = false;
>> +
>> +	amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>> +	amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>> +
>> +	kfree((void *)ring->ring);
>> +
>> +	dma_fence_put(ring->vmid_wait);
>> +	ring->vmid_wait = NULL;
>> +	ring->me = 0;
>> +
>> +	ring->adev->rings[ring->idx] = NULL;
>> +}
>> +
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>> new file mode 100644
>> index 000000000000..c05d8a94ad0c
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>> @@ -0,0 +1,48 @@
>> +/*
>> + * Copyright 2012 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include <drm/amdgpu_drm.h>
>> +#include <drm/gpu_scheduler.h>
>> +#include <drm/drm_print.h>
>> +
>> +#include "amdgpu_irq.h"
>> +#include "amdgpu_ring.h"
>> +#include "amdgpu.h"
>> +
>> +#ifndef __AMDGPU_SWRING_H__
>> +#define __AMDGPU_SWRING_H__
>> +
>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *sw_ring,
>> +		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>> +		     unsigned int irq_type, unsigned int hw_prio,
>> +		     atomic_t *sched_score);
>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>> +
>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
>> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>> +
>> +#endif
> 

Regards,
-- 
Luben

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-12 13:27       ` Christian König
@ 2022-09-12 15:34         ` Andrey Grodzovsky
  2022-09-12 15:51           ` Liu, Shaoyun
  2022-09-12 16:22           ` Christian König
  0 siblings, 2 replies; 28+ messages in thread
From: Andrey Grodzovsky @ 2022-09-12 15:34 UTC (permalink / raw)
  To: Christian König, jiadong.zhu, amd-gfx; +Cc: Ray.Huang

On 2022-09-12 09:27, Christian König wrote:

> Am 12.09.22 um 15:22 schrieb Andrey Grodzovsky:
>>
>> On 2022-09-12 06:20, Christian König wrote:
>>> Am 09.09.22 um 18:45 schrieb Andrey Grodzovsky:
>>>>
>>>> On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
>>>>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>>>>
>>>>> The software ring is created to support priority
>>>>> context while there is only one hardware queue
>>>>> for gfx.
>>>>>
>>>>> Every software rings has its fence driver and could
>>>>> be used as an ordinary ring for the gpu_scheduler.
>>>>> Multiple software rings are binded to a real ring
>>>>> with the ring muxer. The packages committed on the
>>>>> software ring are copied to the real ring.
>>>>>
>>>>> v2: use array to store software ring entry.
>>>>> v3: remove unnecessary prints.
>>>>>
>>>>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 
>>>>> +++++++++++++++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 
>>>>> +++++++++++++++++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>>>>   7 files changed, 509 insertions(+), 1 deletion(-)
>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> index 3e0e2eb7e235..85224bc81ce5 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>>>       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o 
>>>>> amdgpu_nbio.o \
>>>>>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>>>>>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>>>> -    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>>>> +    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>>>>> +    amdgpu_sw_ring.o amdgpu_ring_mux.o
>>>>>     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>> @@ -33,6 +33,7 @@
>>>>>   #include "amdgpu_imu.h"
>>>>>   #include "soc15.h"
>>>>>   #include "amdgpu_ras.h"
>>>>> +#include "amdgpu_ring_mux.h"
>>>>>     /* GFX current status */
>>>>>   #define AMDGPU_GFX_NORMAL_MODE            0x00000000L
>>>>> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>>>>>       struct amdgpu_gfx_ras        *ras;
>>>>>         bool                is_poweron;
>>>>> +
>>>>> +    struct amdgpu_ring_mux            muxer;
>>>>>   };
>>>>>     #define amdgpu_gfx_get_gpu_clock_counter(adev) 
>>>>> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>> index 7d89a52091c0..fe33a683bfba 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>>>>       bool            is_mes_queue;
>>>>>       uint32_t        hw_queue_id;
>>>>>       struct amdgpu_mes_ctx_data *mes_ctx;
>>>>> +
>>>>> +    bool            is_sw_ring;
>>>>> +
>>>>>   };
>>>>>     #define amdgpu_ring_parse_cs(r, p, job, ib) 
>>>>> ((r)->funcs->parse_cs((p), (job), (ib)))
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>> new file mode 100644
>>>>> index 000000000000..ea4a3c66119a
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>> @@ -0,0 +1,182 @@
>>>>> +/*
>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>> + *
>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>> obtaining a
>>>>> + * copy of this software and associated documentation files (the 
>>>>> "Software"),
>>>>> + * to deal in the Software without restriction, including without 
>>>>> limitation
>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>> sublicense,
>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>> whom the
>>>>> + * Software is furnished to do so, subject to the following 
>>>>> conditions:
>>>>> + *
>>>>> + * The above copyright notice and this permission notice shall be 
>>>>> included in
>>>>> + * all copies or substantial portions of the Software.
>>>>> + *
>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>> KIND, EXPRESS OR
>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>> MERCHANTABILITY,
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>> EVENT SHALL
>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>>> DAMAGES OR
>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>> OTHERWISE,
>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>>>> USE OR
>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#include <drm/drm_print.h>
>>>>> +
>>>>> +#include "amdgpu_ring_mux.h"
>>>>> +#include "amdgpu_ring.h"
>>>>> +
>>>>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>>>>> +
>>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>> struct amdgpu_ring *ring,
>>>>> +    u64 s_begin, u64 s_end);
>>>>> +
>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>>>>> amdgpu_ring *ring)
>>>>> +{
>>>>> +    mux->real_ring = ring;
>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>> +    mux->num_ring_entries = 0;
>>>>> +    spin_lock_init(&mux->lock);
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
>>>>> +{
>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>> +    mux->num_ring_entries = 0;
>>>>> +}
>>>>> +
>>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>> struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    struct amdgpu_mux_entry *e;
>>>>> +
>>>>> +    if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>>>>> +        DRM_ERROR("adding sw ring exceeds max gfx num\n");
>>>>> +        return -ENOMEM;
>>>>> +    }
>>>>> +
>>>>> +    e = &mux->ring_entries[mux->num_ring_entries++];
>>>>> +
>>>>> +    e->ring = ring;
>>>>> +    e->start_ptr_in_hw_ring = 0;
>>>>> +    e->end_ptr_in_hw_ring = 0;
>>>>> +    e->sw_cptr = 0;
>>>>> +    e->sw_rptr = 0;
>>>>> +    e->sw_wptr = 0;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct 
>>>>> amdgpu_ring_mux *mux,
>>>>> +                struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    struct amdgpu_mux_entry *e;
>>>>> +    int i;
>>>>> +
>>>>> +    e = NULL;
>>>>> +    for (i = 0; i < mux->num_ring_entries; i++) {
>>>>> +        if (mux->ring_entries[i].ring == ring) {
>>>>> +            e = &mux->ring_entries[i];
>>>>> +            break;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    return e;
>>>>> +}
>>>>> +
>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
>>>>> struct amdgpu_ring *ring, u64 wptr)
>>>>> +{
>>>>> +    struct amdgpu_mux_entry *e;
>>>>> +
>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>> +    if (!e) {
>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    spin_lock(&mux->lock);
>>>>
>>>>
>>>> A bit more generic question, I assume the spinlock here protects 
>>>> from concurrent runs
>>>> of amdgpu_ib_schedule. For them to be even theoretically concurrent 
>>>> it must be from
>>>> direct submissions to HW (because any scheduler mediated submission 
>>>> is serialized though
>>>> the dedicated scheduler worker thread). But in such case why we 
>>>> protect only here ? If i am
>>>> not missing something there is no total per HW ring lock when 
>>>> calling amdgpu_ib_schedule today
>>>> and we do a lot of HW accesses there to ring  which should probably 
>>>> be protected from
>>>> concurrent accesses.
>>>>
>>>> So if any one can answer this question ?
>>>
>>> Well what we have is in general two schedulers which push their work 
>>> into one hardware ring.
>>>
>>> So we need a lock to make sure that only one is modifying the hw 
>>> ring at the same time.
>>>
>>> From the implementation I think we first write the commands into a 
>>> shadow ring buffer and then copy them over to the real hw ring here.
>>>
>>> So this is the only place where we actually touch the hw ring buffer 
>>> and to need to grab the lock.
>>>
>>> Did I get this right?
>>>
>>> Thanks,
>>> Christian.
>>
>>
>> For the case of the sw ring yes, but I was asking in general, 
>> accesses to real HW rings, amdgpu_ib_schedule writes to HW rings,
>> we may be accessing same HW ring from 2 different contexts when doing 
>> direct submissions (i.e. calling  amdgpu_ib_schedule
>> directly from 2 threads concurrently) this opens possibility to 
>> concurrent access to HW. Or am i missing something here ?
>
> No, that's pretty much correct.
>
> The general idea is that amdgpu_ib_schedule() first writes into a 
> separate software ring buffer for each scheduler. So no locking needed 
> for that.
>
> Then when the set_wptr callback is called we grab the lock and copy 
> the software ring content to the real hw ring and telling the hw to 
> execute it.
>
> The spin_lock is to protect from concurrent hw access.
>
> Regards,
> Christian.


Look at 
amdgpu_copy_buffer->amdgpu_job_submit_direct->amdgpu_ib_schedule->amdgpu_ring_commit->amdgpu_ring_set_wptr,
at no point there lock is taken. The only lock i see that resembles what 
you describe is for amdgpu_kiq.ring_lock. So this applies only
to some of the code but not to all cases.

Andrey


>
>
>>
>> Andrey
>>
>>
>>>
>>>>
>>>>
>>>>> +    e->sw_cptr = e->sw_wptr;
>>>>> +    e->sw_wptr = wptr;
>>>>> +    e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>> +
>>>>> +    if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
>>>>> +        e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>> +        amdgpu_ring_commit(mux->real_ring);
>>>>> +    }
>>>>> +
>>>>> +    spin_unlock(&mux->lock);
>>>>> +}
>>>>> +
>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>> struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    struct amdgpu_mux_entry *e;
>>>>> +
>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>> +    if (!e) {
>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>> +        return 0;
>>>>> +    }
>>>>> +
>>>>> +    return e->sw_wptr;
>>>>> +}
>>>>> +
>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>> struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    struct amdgpu_mux_entry *e;
>>>>> +    u64 r_rptr, r_wptr, offset, start, end;
>>>>> +
>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>> +    if (!e) {
>>>>> +        DRM_ERROR("no sw entry found!\n");
>>>>> +        return 0;
>>>>> +    }
>>>>> +
>>>>> +    r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>>>>> +    r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>>>>> +
>>>>> +    if (r_wptr < r_rptr)
>>>>> +        r_wptr += mux->real_ring->ring_size >> 2;
>>>>> +
>>>>> +    start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>> +    end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>> +    if (start > end)
>>>>> +        end += mux->real_ring->ring_size >> 2;
>>>>> +    if (r_rptr <= end && r_rptr >= start) {
>>>>> +        offset = r_rptr - start;
>>>>> +        e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>>>>> +    } else if (r_rptr < start) {
>>>>> +        e->sw_rptr = e->sw_cptr;
>>>>> +    } else {
>>>>> +        e->sw_rptr = e->sw_wptr;
>>>>> +    }
>>>>> +
>>>>> +    return e->sw_rptr;
>>>>> +}
>>>>> +
>>>>> +/*copy packages on sw ring range[begin, end) */
>>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>> struct amdgpu_ring *ring,
>>>>> +    u64 s_begin, u64 s_end)
>>>>> +{
>>>>> +    u64 begin, end, r_begin, r_end;
>>>>> +    struct amdgpu_ring *real_ring = mux->real_ring;
>>>>> +
>>>>> +    begin = s_begin & ring->buf_mask;
>>>>> +    end = s_end & ring->buf_mask;
>>>>> +
>>>>> +    r_begin = real_ring->wptr & real_ring->buf_mask;
>>>>> +    if (begin == end)
>>>>> +        return -ERANGE;
>>>>> +    if (begin > end) {
>>>>> +        amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end 
>>>>> - begin);
>>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>>> *)&ring->ring[begin],
>>>>> +            (ring->ring_size >> 2) - begin);
>>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>>> *)&ring->ring[0], end);
>>>>> +    } else {
>>>>> +        amdgpu_ring_alloc(real_ring, end - begin);
>>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>>> *)&ring->ring[begin], end - begin);
>>>>> +    }
>>>>> +
>>>>> +    r_end = real_ring->wptr & real_ring->buf_mask;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>> new file mode 100644
>>>>> index 000000000000..d058c43bb063
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>> @@ -0,0 +1,67 @@
>>>>> +/*
>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>> + *
>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>> obtaining a
>>>>> + * copy of this software and associated documentation files (the 
>>>>> "Software"),
>>>>> + * to deal in the Software without restriction, including without 
>>>>> limitation
>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>> sublicense,
>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>> whom the
>>>>> + * Software is furnished to do so, subject to the following 
>>>>> conditions:
>>>>> + *
>>>>> + * The above copyright notice and this permission notice shall be 
>>>>> included in
>>>>> + * all copies or substantial portions of the Software.
>>>>> + *
>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>> KIND, EXPRESS OR
>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>> MERCHANTABILITY,
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>> EVENT SHALL
>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>>> DAMAGES OR
>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>> OTHERWISE,
>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>>>> USE OR
>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#ifndef __AMDGPU_RING_MUX__
>>>>> +#define __AMDGPU_RING_MUX__
>>>>> +
>>>>> +#include <linux/timer.h>
>>>>> +#include <linux/spinlock.h>
>>>>> +#include "amdgpu_ring.h"
>>>>> +
>>>>> +struct amdgpu_ring;
>>>>> +/*
>>>>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>>>>> + * end_ptr_in_hw_ring - last copied end loc on hw ring
>>>>> + *sw_cptr -the begin of copy ptr in sw ring
>>>>> + *sw_rptr; the read ptr in sw ring
>>>>> + *sw_wptr; the write ptr in sw ring
>>>>> + */
>>>>> +struct amdgpu_mux_entry {
>>>>> +    struct amdgpu_ring    *ring;
>>>>> +    u64 start_ptr_in_hw_ring;
>>>>> +    u64 end_ptr_in_hw_ring;
>>>>> +
>>>>> +    u64 sw_cptr;
>>>>> +    u64 sw_rptr;
>>>>> +    u64 sw_wptr;
>>>>> +};
>>>>> +
>>>>> +struct amdgpu_ring_mux {
>>>>> +    struct amdgpu_ring *real_ring;
>>>>> +
>>>>> +    struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>>>>> +
>>>>> +    unsigned num_ring_entries;
>>>>> +
>>>>> +    spinlock_t            lock;
>>>>> +
>>>>> +};
>>>>> +
>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>>>>> amdgpu_ring *ring);
>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
>>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>> struct amdgpu_ring *ring);
>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
>>>>> struct amdgpu_ring *ring, u64 wptr);
>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>> struct amdgpu_ring *ring);
>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>> struct amdgpu_ring *ring);
>>>>> +
>>>>> +#endif
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>> new file mode 100644
>>>>> index 000000000000..452d0ff37758
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>> @@ -0,0 +1,204 @@
>>>>> +/*
>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>> + * All Rights Reserved.
>>>>> + *
>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>> obtaining a
>>>>> + * copy of this software and associated documentation files (the
>>>>> + * "Software"), to deal in the Software without restriction, 
>>>>> including
>>>>> + * without limitation the rights to use, copy, modify, merge, 
>>>>> publish,
>>>>> + * distribute, sub license, and/or sell copies of the Software, 
>>>>> and to
>>>>> + * permit persons to whom the Software is furnished to do so, 
>>>>> subject to
>>>>> + * the following conditions:
>>>>> + *
>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>> KIND, EXPRESS OR
>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>> MERCHANTABILITY,
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO 
>>>>> EVENT SHALL
>>>>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE 
>>>>> FOR ANY CLAIM,
>>>>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
>>>>> TORT OR
>>>>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
>>>>> SOFTWARE OR THE
>>>>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>>> + *
>>>>> + * The above copyright notice and this permission notice 
>>>>> (including the
>>>>> + * next paragraph) shall be included in all copies or substantial 
>>>>> portions
>>>>> + * of the Software.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#include "amdgpu_sw_ring.h"
>>>>> +#include "amdgpu_ring_mux.h"
>>>>> +
>>>>> +#define amdgpu_ring_get_gpu_addr(ring, offset) \
>>>>> +    (ring->is_mes_queue ?                        \
>>>>> +     (ring->mes_ctx->meta_data_gpu_addr + offset) :            \
>>>>> +     (ring->adev->wb.gpu_addr + offset * 4))
>>>>> +
>>>>> +#define amdgpu_ring_get_cpu_addr(ring, offset) \
>>>>> +    (ring->is_mes_queue ?                        \
>>>>> +     (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) 
>>>>> : \
>>>>> +     (&ring->adev->wb.wb[offset]))
>>>>> +
>>>>> +
>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>>>>> amdgpu_ring *ring,
>>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>> +             atomic_t *sched_score)
>>>>> +{
>>>>> +    int r;
>>>>> +    int sched_hw_submission = amdgpu_sched_hw_submission;
>>>>> +    u32 *num_sched;
>>>>> +    u32 hw_ip;
>>>>> +
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +
>>>>> +    if (ring->adev == NULL) {
>>>>> +        if (adev->num_rings >= AMDGPU_MAX_RINGS)
>>>>> +            return -EINVAL;
>>>>> +
>>>>> +        ring->adev = adev;
>>>>> +        ring->num_hw_submission = sched_hw_submission;
>>>>> +        ring->sched_score = sched_score;
>>>>> +        ring->vmid_wait = dma_fence_get_stub();
>>>>> +
>>>>> +        if (!ring->is_mes_queue) {
>>>>> +            ring->idx = adev->num_rings++;
>>>>> +            adev->rings[ring->idx] = ring;
>>>>> +        }
>>>>> +
>>>>> +        r = amdgpu_fence_driver_init_ring(ring);
>>>>> +        if (r)
>>>>> +            return r;
>>>>> +    }
>>>>> +
>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>> +    if (r) {
>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc 
>>>>> failed\n", r);
>>>>> +        return r;
>>>>> +    }
>>>>> +
>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>> +    if (r) {
>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc 
>>>>> failed\n", r);
>>>>> +        return r;
>>>>> +    }
>>>>
>>>>
>>>> Looks like a typo copy pase duplicate of the above
>>>>
>>>>> +
>>>>> +    r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>>>>> +    if (r) {
>>>>> +        dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc 
>>>>> failed\n", r);
>>>>> +        return r;
>>>>> +    }
>>>>> +
>>>>> +    r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>>>>> +    if (r) {
>>>>> +        dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc 
>>>>> failed\n", r);
>>>>> +        return r;
>>>>> +    }
>>>>> +
>>>>> +    ring->fence_gpu_addr =
>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>>>>> +    ring->fence_cpu_addr =
>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>>>>> +
>>>>> +    ring->trail_fence_gpu_addr =
>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>>>>> +    ring->trail_fence_cpu_addr =
>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>>>>> +
>>>>> +    ring->cond_exe_gpu_addr =
>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>>>>> +    ring->cond_exe_cpu_addr =
>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>>>>> +
>>>>> +    /* always set cond_exec_polling to CONTINUE */
>>>>> +    *ring->cond_exe_cpu_addr = 1;
>>>>> +
>>>>> +    r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
>>>>> +    if (r) {
>>>>> +        dev_err(adev->dev, "failed initializing fences (%d).\n", r);
>>>>> +        return r;
>>>>> +    }
>>>>> +
>>>>> +    ring->ring_size = roundup_pow_of_two(max_dw * 4 * 
>>>>> sched_hw_submission);
>>>>> +
>>>>> +    ring->buf_mask = (ring->ring_size / 4) - 1;
>>>>> +    ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>>>>> +        0xffffffffffffffff : ring->buf_mask;
>>>>> +
>>>>> +    /* Allocate ring buffer */
>>>>> +    if (ring->ring == NULL) {
>>>>> +        ring->ring = kzalloc(ring->ring_size + 
>>>>> ring->funcs->extra_dw, GFP_KERNEL);
>>>>> +        if (!ring->ring) {
>>>>> +            dev_err(adev->dev, "(%d) swring create failed\n", r);
>>>>> +            return r;
>>>>> +        }
>>>>> +
>>>>> +        amdgpu_ring_clear_ring(ring);
>>>>> +    }
>>>>> +
>>>>> +    ring->max_dw = max_dw;
>>>>> +    ring->hw_prio = hw_prio;
>>>>> +
>>>>> +    if (!ring->no_scheduler) {
>>>>> +        hw_ip = ring->funcs->type;
>>>>> +        num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>>>>> + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>>>>> +            &ring->sched;
>>>>> +    }
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>
>>>>
>>>> In general i see this function is a big one to one subset of 
>>>> amdgpu_ring_init.
>>>> Could you maybe see a way to refactor such that this function is 
>>>> the base
>>>> and for HW related code that different (like BO allocation for ring 
>>>> buffer) you
>>>> maybe can add if (!ring->sw_ring)... and add those code snippets ? 
>>>> To avoid
>>>> substantial code duplication.
>>>>
>>>> Andrey
>>>>
>>>>
>>>>> +
>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>> +
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +    return amdgpu_ring_get_rptr_from_mux(mux, ring);
>>>>> +}
>>>>> +
>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>> +
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +    return amdgpu_ring_get_wptr_from_mux(mux, ring);
>>>>> +}
>>>>> +
>>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +}
>>>>> +
>>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>> +
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +    amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
>>>>> +}
>>>>> +
>>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +
>>>>> +    /* Not to finish a ring which is not initialized */
>>>>> +    if (!(ring->adev) ||
>>>>> +        (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
>>>>> +        return;
>>>>> +
>>>>> +    ring->sched.ready = false;
>>>>> +
>>>>> +    amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>>>>> +    amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>>>>> +
>>>>> +    kfree((void *)ring->ring);
>>>>> +
>>>>> +    dma_fence_put(ring->vmid_wait);
>>>>> +    ring->vmid_wait = NULL;
>>>>> +    ring->me = 0;
>>>>> +
>>>>> +    ring->adev->rings[ring->idx] = NULL;
>>>>> +}
>>>>> +
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>> new file mode 100644
>>>>> index 000000000000..c05d8a94ad0c
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>> @@ -0,0 +1,48 @@
>>>>> +/*
>>>>> + * Copyright 2012 Advanced Micro Devices, Inc.
>>>>> + *
>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>> obtaining a
>>>>> + * copy of this software and associated documentation files (the 
>>>>> "Software"),
>>>>> + * to deal in the Software without restriction, including without 
>>>>> limitation
>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>> sublicense,
>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>> whom the
>>>>> + * Software is furnished to do so, subject to the following 
>>>>> conditions:
>>>>> + *
>>>>> + * The above copyright notice and this permission notice shall be 
>>>>> included in
>>>>> + * all copies or substantial portions of the Software.
>>>>> + *
>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>> KIND, EXPRESS OR
>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>> MERCHANTABILITY,
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>> EVENT SHALL
>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>>> DAMAGES OR
>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>> OTHERWISE,
>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
>>>>> USE OR
>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#include <drm/amdgpu_drm.h>
>>>>> +#include <drm/gpu_scheduler.h>
>>>>> +#include <drm/drm_print.h>
>>>>> +
>>>>> +#include "amdgpu_irq.h"
>>>>> +#include "amdgpu_ring.h"
>>>>> +#include "amdgpu.h"
>>>>> +
>>>>> +#ifndef __AMDGPU_SWRING_H__
>>>>> +#define __AMDGPU_SWRING_H__
>>>>> +
>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>>>>> amdgpu_ring *sw_ring,
>>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>> +             atomic_t *sched_score);
>>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
>>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
>>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>>>>> +
>>>>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
>>>>> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>>>>> +
>>>>> +#endif
>>>
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-12 15:34         ` Andrey Grodzovsky
@ 2022-09-12 15:51           ` Liu, Shaoyun
  2022-09-12 16:23             ` Christian König
  2022-09-12 16:22           ` Christian König
  1 sibling, 1 reply; 28+ messages in thread
From: Liu, Shaoyun @ 2022-09-12 15:51 UTC (permalink / raw)
  To: Grodzovsky, Andrey, Christian König, Zhu, Jiadong, amd-gfx
  Cc: Huang, Ray

[AMD Official Use Only - General]

Just curious about what's this gfx  software ring used for ?  who decide the priority , can user  request a higher priority  or it's predefined ?

Thanks
Shaoyun.liu

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Andrey Grodzovsky
Sent: Monday, September 12, 2022 11:34 AM
To: Christian König <ckoenig.leichtzumerken@gmail.com>; Zhu, Jiadong <Jiadong.Zhu@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Huang, Ray <Ray.Huang@amd.com>
Subject: Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)

On 2022-09-12 09:27, Christian König wrote:

> Am 12.09.22 um 15:22 schrieb Andrey Grodzovsky:
>>
>> On 2022-09-12 06:20, Christian König wrote:
>>> Am 09.09.22 um 18:45 schrieb Andrey Grodzovsky:
>>>>
>>>> On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
>>>>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>>>>
>>>>> The software ring is created to support priority context while
>>>>> there is only one hardware queue for gfx.
>>>>>
>>>>> Every software rings has its fence driver and could be used as an
>>>>> ordinary ring for the gpu_scheduler.
>>>>> Multiple software rings are binded to a real ring with the ring
>>>>> muxer. The packages committed on the software ring are copied to
>>>>> the real ring.
>>>>>
>>>>> v2: use array to store software ring entry.
>>>>> v3: remove unnecessary prints.
>>>>>
>>>>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182
>>>>> +++++++++++++++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204
>>>>> +++++++++++++++++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>>>>   7 files changed, 509 insertions(+), 1 deletion(-)
>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> index 3e0e2eb7e235..85224bc81ce5 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>>>       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o
>>>>> amdgpu_nbio.o \
>>>>>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o
>>>>> amdgpu_rap.o \
>>>>>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>>>> -    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>>>> +    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>>>>> +    amdgpu_sw_ring.o amdgpu_ring_mux.o
>>>>>     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>> @@ -33,6 +33,7 @@
>>>>>   #include "amdgpu_imu.h"
>>>>>   #include "soc15.h"
>>>>>   #include "amdgpu_ras.h"
>>>>> +#include "amdgpu_ring_mux.h"
>>>>>     /* GFX current status */
>>>>>   #define AMDGPU_GFX_NORMAL_MODE            0x00000000L @@ -346,6
>>>>> +347,8 @@ struct amdgpu_gfx {
>>>>>       struct amdgpu_gfx_ras        *ras;
>>>>>         bool                is_poweron;
>>>>> +
>>>>> +    struct amdgpu_ring_mux            muxer;
>>>>>   };
>>>>>     #define amdgpu_gfx_get_gpu_clock_counter(adev)
>>>>> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>> index 7d89a52091c0..fe33a683bfba 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>>>>       bool            is_mes_queue;
>>>>>       uint32_t        hw_queue_id;
>>>>>       struct amdgpu_mes_ctx_data *mes_ctx;
>>>>> +
>>>>> +    bool            is_sw_ring;
>>>>> +
>>>>>   };
>>>>>     #define amdgpu_ring_parse_cs(r, p, job, ib)
>>>>> ((r)->funcs->parse_cs((p), (job), (ib))) diff --git
>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>> new file mode 100644
>>>>> index 000000000000..ea4a3c66119a
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>> @@ -0,0 +1,182 @@
>>>>> +/*
>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>> + *
>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>> obtaining a
>>>>> + * copy of this software and associated documentation files (the
>>>>> "Software"),
>>>>> + * to deal in the Software without restriction, including without
>>>>> limitation
>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>> sublicense,
>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>> whom the
>>>>> + * Software is furnished to do so, subject to the following
>>>>> conditions:
>>>>> + *
>>>>> + * The above copyright notice and this permission notice shall be
>>>>> included in
>>>>> + * all copies or substantial portions of the Software.
>>>>> + *
>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>> KIND, EXPRESS OR
>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>> MERCHANTABILITY,
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>> EVENT SHALL
>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>>>> DAMAGES OR
>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>> OTHERWISE,
>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>>>> USE OR
>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#include <drm/drm_print.h>
>>>>> +
>>>>> +#include "amdgpu_ring_mux.h"
>>>>> +#include "amdgpu_ring.h"
>>>>> +
>>>>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>>>>> +
>>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
>>>>> struct amdgpu_ring *ring,
>>>>> +    u64 s_begin, u64 s_end);
>>>>> +
>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct
>>>>> amdgpu_ring *ring)
>>>>> +{
>>>>> +    mux->real_ring = ring;
>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>> +    mux->num_ring_entries = 0;
>>>>> +    spin_lock_init(&mux->lock);
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux) {
>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>> +    mux->num_ring_entries = 0;
>>>>> +}
>>>>> +
>>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux,
>>>>> struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    struct amdgpu_mux_entry *e;
>>>>> +
>>>>> +    if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>>>>> +        DRM_ERROR("adding sw ring exceeds max gfx num\n");
>>>>> +        return -ENOMEM;
>>>>> +    }
>>>>> +
>>>>> +    e = &mux->ring_entries[mux->num_ring_entries++];
>>>>> +
>>>>> +    e->ring = ring;
>>>>> +    e->start_ptr_in_hw_ring = 0;
>>>>> +    e->end_ptr_in_hw_ring = 0;
>>>>> +    e->sw_cptr = 0;
>>>>> +    e->sw_rptr = 0;
>>>>> +    e->sw_wptr = 0;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct
>>>>> amdgpu_ring_mux *mux,
>>>>> +                struct amdgpu_ring *ring) {
>>>>> +    struct amdgpu_mux_entry *e;
>>>>> +    int i;
>>>>> +
>>>>> +    e = NULL;
>>>>> +    for (i = 0; i < mux->num_ring_entries; i++) {
>>>>> +        if (mux->ring_entries[i].ring == ring) {
>>>>> +            e = &mux->ring_entries[i];
>>>>> +            break;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    return e;
>>>>> +}
>>>>> +
>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux,
>>>>> struct amdgpu_ring *ring, u64 wptr)
>>>>> +{
>>>>> +    struct amdgpu_mux_entry *e;
>>>>> +
>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>> +    if (!e) {
>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    spin_lock(&mux->lock);
>>>>
>>>>
>>>> A bit more generic question, I assume the spinlock here protects
>>>> from concurrent runs of amdgpu_ib_schedule. For them to be even
>>>> theoretically concurrent it must be from direct submissions to HW
>>>> (because any scheduler mediated submission is serialized though the
>>>> dedicated scheduler worker thread). But in such case why we protect
>>>> only here ? If i am not missing something there is no total per HW
>>>> ring lock when calling amdgpu_ib_schedule today and we do a lot of
>>>> HW accesses there to ring  which should probably be protected from
>>>> concurrent accesses.
>>>>
>>>> So if any one can answer this question ?
>>>
>>> Well what we have is in general two schedulers which push their work
>>> into one hardware ring.
>>>
>>> So we need a lock to make sure that only one is modifying the hw
>>> ring at the same time.
>>>
>>> From the implementation I think we first write the commands into a
>>> shadow ring buffer and then copy them over to the real hw ring here.
>>>
>>> So this is the only place where we actually touch the hw ring buffer
>>> and to need to grab the lock.
>>>
>>> Did I get this right?
>>>
>>> Thanks,
>>> Christian.
>>
>>
>> For the case of the sw ring yes, but I was asking in general,
>> accesses to real HW rings, amdgpu_ib_schedule writes to HW rings, we
>> may be accessing same HW ring from 2 different contexts when doing
>> direct submissions (i.e. calling  amdgpu_ib_schedule directly from 2
>> threads concurrently) this opens possibility to concurrent access to
>> HW. Or am i missing something here ?
>
> No, that's pretty much correct.
>
> The general idea is that amdgpu_ib_schedule() first writes into a
> separate software ring buffer for each scheduler. So no locking needed
> for that.
>
> Then when the set_wptr callback is called we grab the lock and copy
> the software ring content to the real hw ring and telling the hw to
> execute it.
>
> The spin_lock is to protect from concurrent hw access.
>
> Regards,
> Christian.


Look at
amdgpu_copy_buffer->amdgpu_job_submit_direct->amdgpu_ib_schedule->amdgpu_ring_commit->amdgpu_ring_set_wptr,
at no point there lock is taken. The only lock i see that resembles what you describe is for amdgpu_kiq.ring_lock. So this applies only to some of the code but not to all cases.

Andrey


>
>
>>
>> Andrey
>>
>>
>>>
>>>>
>>>>
>>>>> +    e->sw_cptr = e->sw_wptr;
>>>>> +    e->sw_wptr = wptr;
>>>>> +    e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>> +
>>>>> +    if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0)
>>>>> +{
>>>>> +        e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>> +        amdgpu_ring_commit(mux->real_ring);
>>>>> +    }
>>>>> +
>>>>> +    spin_unlock(&mux->lock);
>>>>> +}
>>>>> +
>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>> struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    struct amdgpu_mux_entry *e;
>>>>> +
>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>> +    if (!e) {
>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>> +        return 0;
>>>>> +    }
>>>>> +
>>>>> +    return e->sw_wptr;
>>>>> +}
>>>>> +
>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>> struct amdgpu_ring *ring)
>>>>> +{
>>>>> +    struct amdgpu_mux_entry *e;
>>>>> +    u64 r_rptr, r_wptr, offset, start, end;
>>>>> +
>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>> +    if (!e) {
>>>>> +        DRM_ERROR("no sw entry found!\n");
>>>>> +        return 0;
>>>>> +    }
>>>>> +
>>>>> +    r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>>>>> +    r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>>>>> +
>>>>> +    if (r_wptr < r_rptr)
>>>>> +        r_wptr += mux->real_ring->ring_size >> 2;
>>>>> +
>>>>> +    start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>> +    end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>> +    if (start > end)
>>>>> +        end += mux->real_ring->ring_size >> 2;
>>>>> +    if (r_rptr <= end && r_rptr >= start) {
>>>>> +        offset = r_rptr - start;
>>>>> +        e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>>>>> +    } else if (r_rptr < start) {
>>>>> +        e->sw_rptr = e->sw_cptr;
>>>>> +    } else {
>>>>> +        e->sw_rptr = e->sw_wptr;
>>>>> +    }
>>>>> +
>>>>> +    return e->sw_rptr;
>>>>> +}
>>>>> +
>>>>> +/*copy packages on sw ring range[begin, end) */ static int
>>>>> +copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
>>>>> struct amdgpu_ring *ring,
>>>>> +    u64 s_begin, u64 s_end)
>>>>> +{
>>>>> +    u64 begin, end, r_begin, r_end;
>>>>> +    struct amdgpu_ring *real_ring = mux->real_ring;
>>>>> +
>>>>> +    begin = s_begin & ring->buf_mask;
>>>>> +    end = s_end & ring->buf_mask;
>>>>> +
>>>>> +    r_begin = real_ring->wptr & real_ring->buf_mask;
>>>>> +    if (begin == end)
>>>>> +        return -ERANGE;
>>>>> +    if (begin > end) {
>>>>> +        amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end
>>>>> - begin);
>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>> *)&ring->ring[begin],
>>>>> +            (ring->ring_size >> 2) - begin);
>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>> *)&ring->ring[0], end);
>>>>> +    } else {
>>>>> +        amdgpu_ring_alloc(real_ring, end - begin);
>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>> *)&ring->ring[begin], end - begin);
>>>>> +    }
>>>>> +
>>>>> +    r_end = real_ring->wptr & real_ring->buf_mask;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>> new file mode 100644
>>>>> index 000000000000..d058c43bb063
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>> @@ -0,0 +1,67 @@
>>>>> +/*
>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>> + *
>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>> obtaining a
>>>>> + * copy of this software and associated documentation files (the
>>>>> "Software"),
>>>>> + * to deal in the Software without restriction, including without
>>>>> limitation
>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>> sublicense,
>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>> whom the
>>>>> + * Software is furnished to do so, subject to the following
>>>>> conditions:
>>>>> + *
>>>>> + * The above copyright notice and this permission notice shall be
>>>>> included in
>>>>> + * all copies or substantial portions of the Software.
>>>>> + *
>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>> KIND, EXPRESS OR
>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>> MERCHANTABILITY,
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>> EVENT SHALL
>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>>>> DAMAGES OR
>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>> OTHERWISE,
>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>>>> USE OR
>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#ifndef __AMDGPU_RING_MUX__
>>>>> +#define __AMDGPU_RING_MUX__
>>>>> +
>>>>> +#include <linux/timer.h>
>>>>> +#include <linux/spinlock.h>
>>>>> +#include "amdgpu_ring.h"
>>>>> +
>>>>> +struct amdgpu_ring;
>>>>> +/*
>>>>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>>>>> + * end_ptr_in_hw_ring - last copied end loc on hw ring  *sw_cptr
>>>>> +-the begin of copy ptr in sw ring  *sw_rptr; the read ptr in sw
>>>>> +ring  *sw_wptr; the write ptr in sw ring  */ struct
>>>>> +amdgpu_mux_entry {
>>>>> +    struct amdgpu_ring    *ring;
>>>>> +    u64 start_ptr_in_hw_ring;
>>>>> +    u64 end_ptr_in_hw_ring;
>>>>> +
>>>>> +    u64 sw_cptr;
>>>>> +    u64 sw_rptr;
>>>>> +    u64 sw_wptr;
>>>>> +};
>>>>> +
>>>>> +struct amdgpu_ring_mux {
>>>>> +    struct amdgpu_ring *real_ring;
>>>>> +
>>>>> +    struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>>>>> +
>>>>> +    unsigned num_ring_entries;
>>>>> +
>>>>> +    spinlock_t            lock;
>>>>> +
>>>>> +};
>>>>> +
>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct
>>>>> amdgpu_ring *ring);
>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux); int
>>>>> +amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux,
>>>>> struct amdgpu_ring *ring);
>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux,
>>>>> struct amdgpu_ring *ring, u64 wptr);
>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>> struct amdgpu_ring *ring);
>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>> struct amdgpu_ring *ring);
>>>>> +
>>>>> +#endif
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>> new file mode 100644
>>>>> index 000000000000..452d0ff37758
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>> @@ -0,0 +1,204 @@
>>>>> +/*
>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>> + * All Rights Reserved.
>>>>> + *
>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>> obtaining a
>>>>> + * copy of this software and associated documentation files (the
>>>>> + * "Software"), to deal in the Software without restriction,
>>>>> including
>>>>> + * without limitation the rights to use, copy, modify, merge,
>>>>> publish,
>>>>> + * distribute, sub license, and/or sell copies of the Software,
>>>>> and to
>>>>> + * permit persons to whom the Software is furnished to do so,
>>>>> subject to
>>>>> + * the following conditions:
>>>>> + *
>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>> KIND, EXPRESS OR
>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>> MERCHANTABILITY,
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO
>>>>> EVENT SHALL
>>>>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE
>>>>> FOR ANY CLAIM,
>>>>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>>>>> TORT OR
>>>>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>>>>> SOFTWARE OR THE
>>>>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>>> + *
>>>>> + * The above copyright notice and this permission notice
>>>>> (including the
>>>>> + * next paragraph) shall be included in all copies or substantial
>>>>> portions
>>>>> + * of the Software.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#include "amdgpu_sw_ring.h"
>>>>> +#include "amdgpu_ring_mux.h"
>>>>> +
>>>>> +#define amdgpu_ring_get_gpu_addr(ring, offset) \
>>>>> +    (ring->is_mes_queue ?                        \
>>>>> +     (ring->mes_ctx->meta_data_gpu_addr + offset) :            \
>>>>> +     (ring->adev->wb.gpu_addr + offset * 4))
>>>>> +
>>>>> +#define amdgpu_ring_get_cpu_addr(ring, offset) \
>>>>> +    (ring->is_mes_queue ?                        \
>>>>> +     (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset)
>>>>> : \
>>>>> +     (&ring->adev->wb.wb[offset]))
>>>>> +
>>>>> +
>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct
>>>>> amdgpu_ring *ring,
>>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>> +             atomic_t *sched_score) {
>>>>> +    int r;
>>>>> +    int sched_hw_submission = amdgpu_sched_hw_submission;
>>>>> +    u32 *num_sched;
>>>>> +    u32 hw_ip;
>>>>> +
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +
>>>>> +    if (ring->adev == NULL) {
>>>>> +        if (adev->num_rings >= AMDGPU_MAX_RINGS)
>>>>> +            return -EINVAL;
>>>>> +
>>>>> +        ring->adev = adev;
>>>>> +        ring->num_hw_submission = sched_hw_submission;
>>>>> +        ring->sched_score = sched_score;
>>>>> +        ring->vmid_wait = dma_fence_get_stub();
>>>>> +
>>>>> +        if (!ring->is_mes_queue) {
>>>>> +            ring->idx = adev->num_rings++;
>>>>> +            adev->rings[ring->idx] = ring;
>>>>> +        }
>>>>> +
>>>>> +        r = amdgpu_fence_driver_init_ring(ring);
>>>>> +        if (r)
>>>>> +            return r;
>>>>> +    }
>>>>> +
>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>> +    if (r) {
>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc
>>>>> failed\n", r);
>>>>> +        return r;
>>>>> +    }
>>>>> +
>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>> +    if (r) {
>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc
>>>>> failed\n", r);
>>>>> +        return r;
>>>>> +    }
>>>>
>>>>
>>>> Looks like a typo copy pase duplicate of the above
>>>>
>>>>> +
>>>>> +    r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>>>>> +    if (r) {
>>>>> +        dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc
>>>>> failed\n", r);
>>>>> +        return r;
>>>>> +    }
>>>>> +
>>>>> +    r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>>>>> +    if (r) {
>>>>> +        dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc
>>>>> failed\n", r);
>>>>> +        return r;
>>>>> +    }
>>>>> +
>>>>> +    ring->fence_gpu_addr =
>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>>>>> +    ring->fence_cpu_addr =
>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>>>>> +
>>>>> +    ring->trail_fence_gpu_addr =
>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>>>>> +    ring->trail_fence_cpu_addr =
>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>>>>> +
>>>>> +    ring->cond_exe_gpu_addr =
>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>>>>> +    ring->cond_exe_cpu_addr =
>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>>>>> +
>>>>> +    /* always set cond_exec_polling to CONTINUE */
>>>>> +    *ring->cond_exe_cpu_addr = 1;
>>>>> +
>>>>> +    r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
>>>>> +    if (r) {
>>>>> +        dev_err(adev->dev, "failed initializing fences (%d).\n",
>>>>> +r);
>>>>> +        return r;
>>>>> +    }
>>>>> +
>>>>> +    ring->ring_size = roundup_pow_of_two(max_dw * 4 *
>>>>> sched_hw_submission);
>>>>> +
>>>>> +    ring->buf_mask = (ring->ring_size / 4) - 1;
>>>>> +    ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>>>>> +        0xffffffffffffffff : ring->buf_mask;
>>>>> +
>>>>> +    /* Allocate ring buffer */
>>>>> +    if (ring->ring == NULL) {
>>>>> +        ring->ring = kzalloc(ring->ring_size +
>>>>> ring->funcs->extra_dw, GFP_KERNEL);
>>>>> +        if (!ring->ring) {
>>>>> +            dev_err(adev->dev, "(%d) swring create failed\n", r);
>>>>> +            return r;
>>>>> +        }
>>>>> +
>>>>> +        amdgpu_ring_clear_ring(ring);
>>>>> +    }
>>>>> +
>>>>> +    ring->max_dw = max_dw;
>>>>> +    ring->hw_prio = hw_prio;
>>>>> +
>>>>> +    if (!ring->no_scheduler) {
>>>>> +        hw_ip = ring->funcs->type;
>>>>> +        num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>>>>> + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>>>>> +            &ring->sched;
>>>>> +    }
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>
>>>>
>>>> In general i see this function is a big one to one subset of
>>>> amdgpu_ring_init.
>>>> Could you maybe see a way to refactor such that this function is
>>>> the base and for HW related code that different (like BO allocation
>>>> for ring
>>>> buffer) you
>>>> maybe can add if (!ring->sw_ring)... and add those code snippets ?
>>>> To avoid
>>>> substantial code duplication.
>>>>
>>>> Andrey
>>>>
>>>>
>>>>> +
>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring) {
>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>> +
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +    return amdgpu_ring_get_rptr_from_mux(mux, ring); }
>>>>> +
>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring) {
>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>> +
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +    return amdgpu_ring_get_wptr_from_mux(mux, ring); }
>>>>> +
>>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring) {
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +}
>>>>> +
>>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring) {
>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>> +
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +    amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr); }
>>>>> +
>>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring) {
>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>> +
>>>>> +    /* Not to finish a ring which is not initialized */
>>>>> +    if (!(ring->adev) ||
>>>>> +        (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
>>>>> +        return;
>>>>> +
>>>>> +    ring->sched.ready = false;
>>>>> +
>>>>> +    amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>>>>> +    amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>>>>> +
>>>>> +    kfree((void *)ring->ring);
>>>>> +
>>>>> +    dma_fence_put(ring->vmid_wait);
>>>>> +    ring->vmid_wait = NULL;
>>>>> +    ring->me = 0;
>>>>> +
>>>>> +    ring->adev->rings[ring->idx] = NULL; }
>>>>> +
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>> new file mode 100644
>>>>> index 000000000000..c05d8a94ad0c
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>> @@ -0,0 +1,48 @@
>>>>> +/*
>>>>> + * Copyright 2012 Advanced Micro Devices, Inc.
>>>>> + *
>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>> obtaining a
>>>>> + * copy of this software and associated documentation files (the
>>>>> "Software"),
>>>>> + * to deal in the Software without restriction, including without
>>>>> limitation
>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>> sublicense,
>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>> whom the
>>>>> + * Software is furnished to do so, subject to the following
>>>>> conditions:
>>>>> + *
>>>>> + * The above copyright notice and this permission notice shall be
>>>>> included in
>>>>> + * all copies or substantial portions of the Software.
>>>>> + *
>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>> KIND, EXPRESS OR
>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>> MERCHANTABILITY,
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>> EVENT SHALL
>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>>>> DAMAGES OR
>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>> OTHERWISE,
>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>>>> USE OR
>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#include <drm/amdgpu_drm.h>
>>>>> +#include <drm/gpu_scheduler.h>
>>>>> +#include <drm/drm_print.h>
>>>>> +
>>>>> +#include "amdgpu_irq.h"
>>>>> +#include "amdgpu_ring.h"
>>>>> +#include "amdgpu.h"
>>>>> +
>>>>> +#ifndef __AMDGPU_SWRING_H__
>>>>> +#define __AMDGPU_SWRING_H__
>>>>> +
>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct
>>>>> amdgpu_ring *sw_ring,
>>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>> +             atomic_t *sched_score); void
>>>>> +amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring); void
>>>>> +amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); void
>>>>> +amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>>>>> +
>>>>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); void
>>>>> +amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>>>>> +
>>>>> +#endif
>>>
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-12 15:34         ` Andrey Grodzovsky
  2022-09-12 15:51           ` Liu, Shaoyun
@ 2022-09-12 16:22           ` Christian König
  2022-09-12 16:45             ` Andrey Grodzovsky
  1 sibling, 1 reply; 28+ messages in thread
From: Christian König @ 2022-09-12 16:22 UTC (permalink / raw)
  To: Andrey Grodzovsky, jiadong.zhu, amd-gfx; +Cc: Ray.Huang

Am 12.09.22 um 17:34 schrieb Andrey Grodzovsky:
> On 2022-09-12 09:27, Christian König wrote:
>
>> Am 12.09.22 um 15:22 schrieb Andrey Grodzovsky:
>>>
>>> On 2022-09-12 06:20, Christian König wrote:
>>>> Am 09.09.22 um 18:45 schrieb Andrey Grodzovsky:
>>>>>
>>>>> On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
>>>>>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>>>>>
>>>>>> The software ring is created to support priority
>>>>>> context while there is only one hardware queue
>>>>>> for gfx.
>>>>>>
>>>>>> Every software rings has its fence driver and could
>>>>>> be used as an ordinary ring for the gpu_scheduler.
>>>>>> Multiple software rings are binded to a real ring
>>>>>> with the ring muxer. The packages committed on the
>>>>>> software ring are copied to the real ring.
>>>>>>
>>>>>> v2: use array to store software ring entry.
>>>>>> v3: remove unnecessary prints.
>>>>>>
>>>>>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 
>>>>>> +++++++++++++++++
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 
>>>>>> +++++++++++++++++++
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>>>>>   7 files changed, 509 insertions(+), 1 deletion(-)
>>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>> index 3e0e2eb7e235..85224bc81ce5 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>>>>       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o 
>>>>>> amdgpu_nbio.o \
>>>>>>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o 
>>>>>> amdgpu_rap.o \
>>>>>>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>>>>> -    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>>>>> +    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>>>>>> +    amdgpu_sw_ring.o amdgpu_ring_mux.o
>>>>>>     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>> @@ -33,6 +33,7 @@
>>>>>>   #include "amdgpu_imu.h"
>>>>>>   #include "soc15.h"
>>>>>>   #include "amdgpu_ras.h"
>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>     /* GFX current status */
>>>>>>   #define AMDGPU_GFX_NORMAL_MODE            0x00000000L
>>>>>> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>>>>>>       struct amdgpu_gfx_ras        *ras;
>>>>>>         bool                is_poweron;
>>>>>> +
>>>>>> +    struct amdgpu_ring_mux            muxer;
>>>>>>   };
>>>>>>     #define amdgpu_gfx_get_gpu_clock_counter(adev) 
>>>>>> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>> index 7d89a52091c0..fe33a683bfba 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>>>>>       bool            is_mes_queue;
>>>>>>       uint32_t        hw_queue_id;
>>>>>>       struct amdgpu_mes_ctx_data *mes_ctx;
>>>>>> +
>>>>>> +    bool            is_sw_ring;
>>>>>> +
>>>>>>   };
>>>>>>     #define amdgpu_ring_parse_cs(r, p, job, ib) 
>>>>>> ((r)->funcs->parse_cs((p), (job), (ib)))
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..ea4a3c66119a
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>> @@ -0,0 +1,182 @@
>>>>>> +/*
>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>> obtaining a
>>>>>> + * copy of this software and associated documentation files (the 
>>>>>> "Software"),
>>>>>> + * to deal in the Software without restriction, including 
>>>>>> without limitation
>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>> sublicense,
>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>> whom the
>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>> conditions:
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>> be included in
>>>>>> + * all copies or substantial portions of the Software.
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>> KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>> MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>>> EVENT SHALL
>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>>>> DAMAGES OR
>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>> OTHERWISE,
>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>> THE USE OR
>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>> + *
>>>>>> + */
>>>>>> +
>>>>>> +#include <drm/drm_print.h>
>>>>>> +
>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>> +#include "amdgpu_ring.h"
>>>>>> +
>>>>>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>>>>>> +
>>>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>>> struct amdgpu_ring *ring,
>>>>>> +    u64 s_begin, u64 s_end);
>>>>>> +
>>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>>>>>> amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    mux->real_ring = ring;
>>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>>> +    mux->num_ring_entries = 0;
>>>>>> +    spin_lock_init(&mux->lock);
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
>>>>>> +{
>>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>>> +    mux->num_ring_entries = 0;
>>>>>> +}
>>>>>> +
>>>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>>> struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>> +
>>>>>> +    if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>>>>>> +        DRM_ERROR("adding sw ring exceeds max gfx num\n");
>>>>>> +        return -ENOMEM;
>>>>>> +    }
>>>>>> +
>>>>>> +    e = &mux->ring_entries[mux->num_ring_entries++];
>>>>>> +
>>>>>> +    e->ring = ring;
>>>>>> +    e->start_ptr_in_hw_ring = 0;
>>>>>> +    e->end_ptr_in_hw_ring = 0;
>>>>>> +    e->sw_cptr = 0;
>>>>>> +    e->sw_rptr = 0;
>>>>>> +    e->sw_wptr = 0;
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct 
>>>>>> amdgpu_ring_mux *mux,
>>>>>> +                struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>> +    int i;
>>>>>> +
>>>>>> +    e = NULL;
>>>>>> +    for (i = 0; i < mux->num_ring_entries; i++) {
>>>>>> +        if (mux->ring_entries[i].ring == ring) {
>>>>>> +            e = &mux->ring_entries[i];
>>>>>> +            break;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    return e;
>>>>>> +}
>>>>>> +
>>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
>>>>>> struct amdgpu_ring *ring, u64 wptr)
>>>>>> +{
>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>> +
>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>> +    if (!e) {
>>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>>> +        return;
>>>>>> +    }
>>>>>> +
>>>>>> +    spin_lock(&mux->lock);
>>>>>
>>>>>
>>>>> A bit more generic question, I assume the spinlock here protects 
>>>>> from concurrent runs
>>>>> of amdgpu_ib_schedule. For them to be even theoretically 
>>>>> concurrent it must be from
>>>>> direct submissions to HW (because any scheduler mediated 
>>>>> submission is serialized though
>>>>> the dedicated scheduler worker thread). But in such case why we 
>>>>> protect only here ? If i am
>>>>> not missing something there is no total per HW ring lock when 
>>>>> calling amdgpu_ib_schedule today
>>>>> and we do a lot of HW accesses there to ring  which should 
>>>>> probably be protected from
>>>>> concurrent accesses.
>>>>>
>>>>> So if any one can answer this question ?
>>>>
>>>> Well what we have is in general two schedulers which push their 
>>>> work into one hardware ring.
>>>>
>>>> So we need a lock to make sure that only one is modifying the hw 
>>>> ring at the same time.
>>>>
>>>> From the implementation I think we first write the commands into a 
>>>> shadow ring buffer and then copy them over to the real hw ring here.
>>>>
>>>> So this is the only place where we actually touch the hw ring 
>>>> buffer and to need to grab the lock.
>>>>
>>>> Did I get this right?
>>>>
>>>> Thanks,
>>>> Christian.
>>>
>>>
>>> For the case of the sw ring yes, but I was asking in general, 
>>> accesses to real HW rings, amdgpu_ib_schedule writes to HW rings,
>>> we may be accessing same HW ring from 2 different contexts when 
>>> doing direct submissions (i.e. calling amdgpu_ib_schedule
>>> directly from 2 threads concurrently) this opens possibility to 
>>> concurrent access to HW. Or am i missing something here ?
>>
>> No, that's pretty much correct.
>>
>> The general idea is that amdgpu_ib_schedule() first writes into a 
>> separate software ring buffer for each scheduler. So no locking 
>> needed for that.
>>
>> Then when the set_wptr callback is called we grab the lock and copy 
>> the software ring content to the real hw ring and telling the hw to 
>> execute it.
>>
>> The spin_lock is to protect from concurrent hw access.
>>
>> Regards,
>> Christian.
>
>
> Look at 
> amdgpu_copy_buffer->amdgpu_job_submit_direct->amdgpu_ib_schedule->amdgpu_ring_commit->amdgpu_ring_set_wptr,
> at no point there lock is taken. The only lock i see that resembles 
> what you describe is for amdgpu_kiq.ring_lock. So this applies only
> to some of the code but not to all cases.

Sounds like we have a misunderstanding here.

The case we look at should be this:

amdgpu_job_run()->amdgpu_ib_schedule()->amdgpu_ring_commit()->amdgpu_ring_set_wptr()...amdgpu_ring_set_wptr_to_mux()

Then amdgpu_ring_set_wptr_to_mux() we then grab the lock, copy over the 
commands, commit them to the hw and then drop the lock.

Christian.

>
> Andrey
>
>
>>
>>
>>>
>>> Andrey
>>>
>>>
>>>>
>>>>>
>>>>>
>>>>>> +    e->sw_cptr = e->sw_wptr;
>>>>>> +    e->sw_wptr = wptr;
>>>>>> +    e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>>> +
>>>>>> +    if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
>>>>>> +        e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>>> +        amdgpu_ring_commit(mux->real_ring);
>>>>>> +    }
>>>>>> +
>>>>>> +    spin_unlock(&mux->lock);
>>>>>> +}
>>>>>> +
>>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>>> struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>> +
>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>> +    if (!e) {
>>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>>> +        return 0;
>>>>>> +    }
>>>>>> +
>>>>>> +    return e->sw_wptr;
>>>>>> +}
>>>>>> +
>>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>>> struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>> +    u64 r_rptr, r_wptr, offset, start, end;
>>>>>> +
>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>> +    if (!e) {
>>>>>> +        DRM_ERROR("no sw entry found!\n");
>>>>>> +        return 0;
>>>>>> +    }
>>>>>> +
>>>>>> +    r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>>>>>> +    r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>>>>>> +
>>>>>> +    if (r_wptr < r_rptr)
>>>>>> +        r_wptr += mux->real_ring->ring_size >> 2;
>>>>>> +
>>>>>> +    start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>>> +    end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>>> +    if (start > end)
>>>>>> +        end += mux->real_ring->ring_size >> 2;
>>>>>> +    if (r_rptr <= end && r_rptr >= start) {
>>>>>> +        offset = r_rptr - start;
>>>>>> +        e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>>>>>> +    } else if (r_rptr < start) {
>>>>>> +        e->sw_rptr = e->sw_cptr;
>>>>>> +    } else {
>>>>>> +        e->sw_rptr = e->sw_wptr;
>>>>>> +    }
>>>>>> +
>>>>>> +    return e->sw_rptr;
>>>>>> +}
>>>>>> +
>>>>>> +/*copy packages on sw ring range[begin, end) */
>>>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>>> struct amdgpu_ring *ring,
>>>>>> +    u64 s_begin, u64 s_end)
>>>>>> +{
>>>>>> +    u64 begin, end, r_begin, r_end;
>>>>>> +    struct amdgpu_ring *real_ring = mux->real_ring;
>>>>>> +
>>>>>> +    begin = s_begin & ring->buf_mask;
>>>>>> +    end = s_end & ring->buf_mask;
>>>>>> +
>>>>>> +    r_begin = real_ring->wptr & real_ring->buf_mask;
>>>>>> +    if (begin == end)
>>>>>> +        return -ERANGE;
>>>>>> +    if (begin > end) {
>>>>>> +        amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + 
>>>>>> end - begin);
>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>>>> *)&ring->ring[begin],
>>>>>> +            (ring->ring_size >> 2) - begin);
>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>>>> *)&ring->ring[0], end);
>>>>>> +    } else {
>>>>>> +        amdgpu_ring_alloc(real_ring, end - begin);
>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>>>> *)&ring->ring[begin], end - begin);
>>>>>> +    }
>>>>>> +
>>>>>> +    r_end = real_ring->wptr & real_ring->buf_mask;
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>> new file mode 100644
>>>>>> index 000000000000..d058c43bb063
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>> @@ -0,0 +1,67 @@
>>>>>> +/*
>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>> obtaining a
>>>>>> + * copy of this software and associated documentation files (the 
>>>>>> "Software"),
>>>>>> + * to deal in the Software without restriction, including 
>>>>>> without limitation
>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>> sublicense,
>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>> whom the
>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>> conditions:
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>> be included in
>>>>>> + * all copies or substantial portions of the Software.
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>> KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>> MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>>> EVENT SHALL
>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>>>> DAMAGES OR
>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>> OTHERWISE,
>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>> THE USE OR
>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>> + *
>>>>>> + */
>>>>>> +
>>>>>> +#ifndef __AMDGPU_RING_MUX__
>>>>>> +#define __AMDGPU_RING_MUX__
>>>>>> +
>>>>>> +#include <linux/timer.h>
>>>>>> +#include <linux/spinlock.h>
>>>>>> +#include "amdgpu_ring.h"
>>>>>> +
>>>>>> +struct amdgpu_ring;
>>>>>> +/*
>>>>>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>>>>>> + * end_ptr_in_hw_ring - last copied end loc on hw ring
>>>>>> + *sw_cptr -the begin of copy ptr in sw ring
>>>>>> + *sw_rptr; the read ptr in sw ring
>>>>>> + *sw_wptr; the write ptr in sw ring
>>>>>> + */
>>>>>> +struct amdgpu_mux_entry {
>>>>>> +    struct amdgpu_ring    *ring;
>>>>>> +    u64 start_ptr_in_hw_ring;
>>>>>> +    u64 end_ptr_in_hw_ring;
>>>>>> +
>>>>>> +    u64 sw_cptr;
>>>>>> +    u64 sw_rptr;
>>>>>> +    u64 sw_wptr;
>>>>>> +};
>>>>>> +
>>>>>> +struct amdgpu_ring_mux {
>>>>>> +    struct amdgpu_ring *real_ring;
>>>>>> +
>>>>>> +    struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>>>>>> +
>>>>>> +    unsigned num_ring_entries;
>>>>>> +
>>>>>> +    spinlock_t            lock;
>>>>>> +
>>>>>> +};
>>>>>> +
>>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>>>>>> amdgpu_ring *ring);
>>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
>>>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>>> struct amdgpu_ring *ring);
>>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
>>>>>> struct amdgpu_ring *ring, u64 wptr);
>>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>>> struct amdgpu_ring *ring);
>>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>>> struct amdgpu_ring *ring);
>>>>>> +
>>>>>> +#endif
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..452d0ff37758
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>> @@ -0,0 +1,204 @@
>>>>>> +/*
>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>> + * All Rights Reserved.
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>> obtaining a
>>>>>> + * copy of this software and associated documentation files (the
>>>>>> + * "Software"), to deal in the Software without restriction, 
>>>>>> including
>>>>>> + * without limitation the rights to use, copy, modify, merge, 
>>>>>> publish,
>>>>>> + * distribute, sub license, and/or sell copies of the Software, 
>>>>>> and to
>>>>>> + * permit persons to whom the Software is furnished to do so, 
>>>>>> subject to
>>>>>> + * the following conditions:
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>> KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>> MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO 
>>>>>> EVENT SHALL
>>>>>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE 
>>>>>> FOR ANY CLAIM,
>>>>>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
>>>>>> TORT OR
>>>>>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
>>>>>> SOFTWARE OR THE
>>>>>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice 
>>>>>> (including the
>>>>>> + * next paragraph) shall be included in all copies or 
>>>>>> substantial portions
>>>>>> + * of the Software.
>>>>>> + *
>>>>>> + */
>>>>>> +
>>>>>> +#include "amdgpu_sw_ring.h"
>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>> +
>>>>>> +#define amdgpu_ring_get_gpu_addr(ring, offset) \
>>>>>> +    (ring->is_mes_queue ?                        \
>>>>>> +     (ring->mes_ctx->meta_data_gpu_addr + offset) :            \
>>>>>> +     (ring->adev->wb.gpu_addr + offset * 4))
>>>>>> +
>>>>>> +#define amdgpu_ring_get_cpu_addr(ring, offset) \
>>>>>> +    (ring->is_mes_queue ?                        \
>>>>>> +     (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + 
>>>>>> offset) : \
>>>>>> +     (&ring->adev->wb.wb[offset]))
>>>>>> +
>>>>>> +
>>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>>>>>> amdgpu_ring *ring,
>>>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>>> +             atomic_t *sched_score)
>>>>>> +{
>>>>>> +    int r;
>>>>>> +    int sched_hw_submission = amdgpu_sched_hw_submission;
>>>>>> +    u32 *num_sched;
>>>>>> +    u32 hw_ip;
>>>>>> +
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +
>>>>>> +    if (ring->adev == NULL) {
>>>>>> +        if (adev->num_rings >= AMDGPU_MAX_RINGS)
>>>>>> +            return -EINVAL;
>>>>>> +
>>>>>> +        ring->adev = adev;
>>>>>> +        ring->num_hw_submission = sched_hw_submission;
>>>>>> +        ring->sched_score = sched_score;
>>>>>> +        ring->vmid_wait = dma_fence_get_stub();
>>>>>> +
>>>>>> +        if (!ring->is_mes_queue) {
>>>>>> +            ring->idx = adev->num_rings++;
>>>>>> +            adev->rings[ring->idx] = ring;
>>>>>> +        }
>>>>>> +
>>>>>> +        r = amdgpu_fence_driver_init_ring(ring);
>>>>>> +        if (r)
>>>>>> +            return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>>> +    if (r) {
>>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc 
>>>>>> failed\n", r);
>>>>>> +        return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>>> +    if (r) {
>>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc 
>>>>>> failed\n", r);
>>>>>> +        return r;
>>>>>> +    }
>>>>>
>>>>>
>>>>> Looks like a typo copy pase duplicate of the above
>>>>>
>>>>>> +
>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>>>>>> +    if (r) {
>>>>>> +        dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc 
>>>>>> failed\n", r);
>>>>>> +        return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>>>>>> +    if (r) {
>>>>>> +        dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc 
>>>>>> failed\n", r);
>>>>>> +        return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    ring->fence_gpu_addr =
>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>>>>>> +    ring->fence_cpu_addr =
>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>>>>>> +
>>>>>> +    ring->trail_fence_gpu_addr =
>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>>>>>> +    ring->trail_fence_cpu_addr =
>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>>>>>> +
>>>>>> +    ring->cond_exe_gpu_addr =
>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>>>>>> +    ring->cond_exe_cpu_addr =
>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>>>>>> +
>>>>>> +    /* always set cond_exec_polling to CONTINUE */
>>>>>> +    *ring->cond_exe_cpu_addr = 1;
>>>>>> +
>>>>>> +    r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
>>>>>> +    if (r) {
>>>>>> +        dev_err(adev->dev, "failed initializing fences (%d).\n", 
>>>>>> r);
>>>>>> +        return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    ring->ring_size = roundup_pow_of_two(max_dw * 4 * 
>>>>>> sched_hw_submission);
>>>>>> +
>>>>>> +    ring->buf_mask = (ring->ring_size / 4) - 1;
>>>>>> +    ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>>>>>> +        0xffffffffffffffff : ring->buf_mask;
>>>>>> +
>>>>>> +    /* Allocate ring buffer */
>>>>>> +    if (ring->ring == NULL) {
>>>>>> +        ring->ring = kzalloc(ring->ring_size + 
>>>>>> ring->funcs->extra_dw, GFP_KERNEL);
>>>>>> +        if (!ring->ring) {
>>>>>> +            dev_err(adev->dev, "(%d) swring create failed\n", r);
>>>>>> +            return r;
>>>>>> +        }
>>>>>> +
>>>>>> +        amdgpu_ring_clear_ring(ring);
>>>>>> +    }
>>>>>> +
>>>>>> +    ring->max_dw = max_dw;
>>>>>> +    ring->hw_prio = hw_prio;
>>>>>> +
>>>>>> +    if (!ring->no_scheduler) {
>>>>>> +        hw_ip = ring->funcs->type;
>>>>>> +        num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>>>>>> + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>>>>>> +            &ring->sched;
>>>>>> +    }
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>
>>>>>
>>>>> In general i see this function is a big one to one subset of 
>>>>> amdgpu_ring_init.
>>>>> Could you maybe see a way to refactor such that this function is 
>>>>> the base
>>>>> and for HW related code that different (like BO allocation for 
>>>>> ring buffer) you
>>>>> maybe can add if (!ring->sw_ring)... and add those code snippets ? 
>>>>> To avoid
>>>>> substantial code duplication.
>>>>>
>>>>> Andrey
>>>>>
>>>>>
>>>>>> +
>>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>> +
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +    return amdgpu_ring_get_rptr_from_mux(mux, ring);
>>>>>> +}
>>>>>> +
>>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>> +
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +    return amdgpu_ring_get_wptr_from_mux(mux, ring);
>>>>>> +}
>>>>>> +
>>>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +}
>>>>>> +
>>>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>> +
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +    amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
>>>>>> +}
>>>>>> +
>>>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +
>>>>>> +    /* Not to finish a ring which is not initialized */
>>>>>> +    if (!(ring->adev) ||
>>>>>> +        (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
>>>>>> +        return;
>>>>>> +
>>>>>> +    ring->sched.ready = false;
>>>>>> +
>>>>>> +    amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>>>>>> +    amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>>>>>> +
>>>>>> +    kfree((void *)ring->ring);
>>>>>> +
>>>>>> +    dma_fence_put(ring->vmid_wait);
>>>>>> +    ring->vmid_wait = NULL;
>>>>>> +    ring->me = 0;
>>>>>> +
>>>>>> +    ring->adev->rings[ring->idx] = NULL;
>>>>>> +}
>>>>>> +
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>> new file mode 100644
>>>>>> index 000000000000..c05d8a94ad0c
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>> @@ -0,0 +1,48 @@
>>>>>> +/*
>>>>>> + * Copyright 2012 Advanced Micro Devices, Inc.
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>> obtaining a
>>>>>> + * copy of this software and associated documentation files (the 
>>>>>> "Software"),
>>>>>> + * to deal in the Software without restriction, including 
>>>>>> without limitation
>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>> sublicense,
>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>> whom the
>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>> conditions:
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>> be included in
>>>>>> + * all copies or substantial portions of the Software.
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>> KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>> MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>>> EVENT SHALL
>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
>>>>>> DAMAGES OR
>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>> OTHERWISE,
>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>> THE USE OR
>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>> + *
>>>>>> + */
>>>>>> +
>>>>>> +#include <drm/amdgpu_drm.h>
>>>>>> +#include <drm/gpu_scheduler.h>
>>>>>> +#include <drm/drm_print.h>
>>>>>> +
>>>>>> +#include "amdgpu_irq.h"
>>>>>> +#include "amdgpu_ring.h"
>>>>>> +#include "amdgpu.h"
>>>>>> +
>>>>>> +#ifndef __AMDGPU_SWRING_H__
>>>>>> +#define __AMDGPU_SWRING_H__
>>>>>> +
>>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>>>>>> amdgpu_ring *sw_ring,
>>>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>>> +             atomic_t *sched_score);
>>>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
>>>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
>>>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>>>>>> +
>>>>>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
>>>>>> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>>>>>> +
>>>>>> +#endif
>>>>
>>


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-12 15:51           ` Liu, Shaoyun
@ 2022-09-12 16:23             ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2022-09-12 16:23 UTC (permalink / raw)
  To: Liu, Shaoyun, Grodzovsky, Andrey, Zhu, Jiadong, amd-gfx; +Cc: Huang, Ray

We try to provide a high and low priority gfx ring for GFX9 (and maybe 
other) hw generations which don't support multiple gfx hw rings, but 
just MCBP.

Regards,
Christian.

Am 12.09.22 um 17:51 schrieb Liu, Shaoyun:
> [AMD Official Use Only - General]
>
> Just curious about what's this gfx  software ring used for ?  who decide the priority , can user  request a higher priority  or it's predefined ?
>
> Thanks
> Shaoyun.liu
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Andrey Grodzovsky
> Sent: Monday, September 12, 2022 11:34 AM
> To: Christian König <ckoenig.leichtzumerken@gmail.com>; Zhu, Jiadong <Jiadong.Zhu@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Huang, Ray <Ray.Huang@amd.com>
> Subject: Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
>
> On 2022-09-12 09:27, Christian König wrote:
>
>> Am 12.09.22 um 15:22 schrieb Andrey Grodzovsky:
>>> On 2022-09-12 06:20, Christian König wrote:
>>>> Am 09.09.22 um 18:45 schrieb Andrey Grodzovsky:
>>>>> On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
>>>>>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>>>>>
>>>>>> The software ring is created to support priority context while
>>>>>> there is only one hardware queue for gfx.
>>>>>>
>>>>>> Every software rings has its fence driver and could be used as an
>>>>>> ordinary ring for the gpu_scheduler.
>>>>>> Multiple software rings are binded to a real ring with the ring
>>>>>> muxer. The packages committed on the software ring are copied to
>>>>>> the real ring.
>>>>>>
>>>>>> v2: use array to store software ring entry.
>>>>>> v3: remove unnecessary prints.
>>>>>>
>>>>>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>>>>>> ---
>>>>>>    drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182
>>>>>> +++++++++++++++++
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204
>>>>>> +++++++++++++++++++
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>>>>>    7 files changed, 509 insertions(+), 1 deletion(-)
>>>>>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>> index 3e0e2eb7e235..85224bc81ce5 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>>>>        amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o
>>>>>> amdgpu_nbio.o \
>>>>>>        amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o
>>>>>> amdgpu_rap.o \
>>>>>>        amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>>>>> -    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>>>>> +    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>>>>>> +    amdgpu_sw_ring.o amdgpu_ring_mux.o
>>>>>>      amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>> @@ -33,6 +33,7 @@
>>>>>>    #include "amdgpu_imu.h"
>>>>>>    #include "soc15.h"
>>>>>>    #include "amdgpu_ras.h"
>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>      /* GFX current status */
>>>>>>    #define AMDGPU_GFX_NORMAL_MODE            0x00000000L @@ -346,6
>>>>>> +347,8 @@ struct amdgpu_gfx {
>>>>>>        struct amdgpu_gfx_ras        *ras;
>>>>>>          bool                is_poweron;
>>>>>> +
>>>>>> +    struct amdgpu_ring_mux            muxer;
>>>>>>    };
>>>>>>      #define amdgpu_gfx_get_gpu_clock_counter(adev)
>>>>>> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>> index 7d89a52091c0..fe33a683bfba 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>>>>>        bool            is_mes_queue;
>>>>>>        uint32_t        hw_queue_id;
>>>>>>        struct amdgpu_mes_ctx_data *mes_ctx;
>>>>>> +
>>>>>> +    bool            is_sw_ring;
>>>>>> +
>>>>>>    };
>>>>>>      #define amdgpu_ring_parse_cs(r, p, job, ib)
>>>>>> ((r)->funcs->parse_cs((p), (job), (ib))) diff --git
>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..ea4a3c66119a
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>> @@ -0,0 +1,182 @@
>>>>>> +/*
>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>> obtaining a
>>>>>> + * copy of this software and associated documentation files (the
>>>>>> "Software"),
>>>>>> + * to deal in the Software without restriction, including without
>>>>>> limitation
>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>> sublicense,
>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>> whom the
>>>>>> + * Software is furnished to do so, subject to the following
>>>>>> conditions:
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice shall be
>>>>>> included in
>>>>>> + * all copies or substantial portions of the Software.
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>> KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>> MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>> EVENT SHALL
>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>>>>> DAMAGES OR
>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>> OTHERWISE,
>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>>>>> USE OR
>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>> + *
>>>>>> + */
>>>>>> +
>>>>>> +#include <drm/drm_print.h>
>>>>>> +
>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>> +#include "amdgpu_ring.h"
>>>>>> +
>>>>>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>>>>>> +
>>>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>> struct amdgpu_ring *ring,
>>>>>> +    u64 s_begin, u64 s_end);
>>>>>> +
>>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct
>>>>>> amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    mux->real_ring = ring;
>>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>>> +    mux->num_ring_entries = 0;
>>>>>> +    spin_lock_init(&mux->lock);
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux) {
>>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>>> +    mux->num_ring_entries = 0;
>>>>>> +}
>>>>>> +
>>>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>> struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>> +
>>>>>> +    if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>>>>>> +        DRM_ERROR("adding sw ring exceeds max gfx num\n");
>>>>>> +        return -ENOMEM;
>>>>>> +    }
>>>>>> +
>>>>>> +    e = &mux->ring_entries[mux->num_ring_entries++];
>>>>>> +
>>>>>> +    e->ring = ring;
>>>>>> +    e->start_ptr_in_hw_ring = 0;
>>>>>> +    e->end_ptr_in_hw_ring = 0;
>>>>>> +    e->sw_cptr = 0;
>>>>>> +    e->sw_rptr = 0;
>>>>>> +    e->sw_wptr = 0;
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct
>>>>>> amdgpu_ring_mux *mux,
>>>>>> +                struct amdgpu_ring *ring) {
>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>> +    int i;
>>>>>> +
>>>>>> +    e = NULL;
>>>>>> +    for (i = 0; i < mux->num_ring_entries; i++) {
>>>>>> +        if (mux->ring_entries[i].ring == ring) {
>>>>>> +            e = &mux->ring_entries[i];
>>>>>> +            break;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    return e;
>>>>>> +}
>>>>>> +
>>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux,
>>>>>> struct amdgpu_ring *ring, u64 wptr)
>>>>>> +{
>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>> +
>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>> +    if (!e) {
>>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>>> +        return;
>>>>>> +    }
>>>>>> +
>>>>>> +    spin_lock(&mux->lock);
>>>>>
>>>>> A bit more generic question, I assume the spinlock here protects
>>>>> from concurrent runs of amdgpu_ib_schedule. For them to be even
>>>>> theoretically concurrent it must be from direct submissions to HW
>>>>> (because any scheduler mediated submission is serialized though the
>>>>> dedicated scheduler worker thread). But in such case why we protect
>>>>> only here ? If i am not missing something there is no total per HW
>>>>> ring lock when calling amdgpu_ib_schedule today and we do a lot of
>>>>> HW accesses there to ring  which should probably be protected from
>>>>> concurrent accesses.
>>>>>
>>>>> So if any one can answer this question ?
>>>> Well what we have is in general two schedulers which push their work
>>>> into one hardware ring.
>>>>
>>>> So we need a lock to make sure that only one is modifying the hw
>>>> ring at the same time.
>>>>
>>>>  From the implementation I think we first write the commands into a
>>>> shadow ring buffer and then copy them over to the real hw ring here.
>>>>
>>>> So this is the only place where we actually touch the hw ring buffer
>>>> and to need to grab the lock.
>>>>
>>>> Did I get this right?
>>>>
>>>> Thanks,
>>>> Christian.
>>>
>>> For the case of the sw ring yes, but I was asking in general,
>>> accesses to real HW rings, amdgpu_ib_schedule writes to HW rings, we
>>> may be accessing same HW ring from 2 different contexts when doing
>>> direct submissions (i.e. calling  amdgpu_ib_schedule directly from 2
>>> threads concurrently) this opens possibility to concurrent access to
>>> HW. Or am i missing something here ?
>> No, that's pretty much correct.
>>
>> The general idea is that amdgpu_ib_schedule() first writes into a
>> separate software ring buffer for each scheduler. So no locking needed
>> for that.
>>
>> Then when the set_wptr callback is called we grab the lock and copy
>> the software ring content to the real hw ring and telling the hw to
>> execute it.
>>
>> The spin_lock is to protect from concurrent hw access.
>>
>> Regards,
>> Christian.
>
> Look at
> amdgpu_copy_buffer->amdgpu_job_submit_direct->amdgpu_ib_schedule->amdgpu_ring_commit->amdgpu_ring_set_wptr,
> at no point there lock is taken. The only lock i see that resembles what you describe is for amdgpu_kiq.ring_lock. So this applies only to some of the code but not to all cases.
>
> Andrey
>
>
>>
>>> Andrey
>>>
>>>
>>>>>
>>>>>> +    e->sw_cptr = e->sw_wptr;
>>>>>> +    e->sw_wptr = wptr;
>>>>>> +    e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>>> +
>>>>>> +    if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0)
>>>>>> +{
>>>>>> +        e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>>> +        amdgpu_ring_commit(mux->real_ring);
>>>>>> +    }
>>>>>> +
>>>>>> +    spin_unlock(&mux->lock);
>>>>>> +}
>>>>>> +
>>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>> struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>> +
>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>> +    if (!e) {
>>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>>> +        return 0;
>>>>>> +    }
>>>>>> +
>>>>>> +    return e->sw_wptr;
>>>>>> +}
>>>>>> +
>>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>> struct amdgpu_ring *ring)
>>>>>> +{
>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>> +    u64 r_rptr, r_wptr, offset, start, end;
>>>>>> +
>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>> +    if (!e) {
>>>>>> +        DRM_ERROR("no sw entry found!\n");
>>>>>> +        return 0;
>>>>>> +    }
>>>>>> +
>>>>>> +    r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>>>>>> +    r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>>>>>> +
>>>>>> +    if (r_wptr < r_rptr)
>>>>>> +        r_wptr += mux->real_ring->ring_size >> 2;
>>>>>> +
>>>>>> +    start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>>> +    end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>>> +    if (start > end)
>>>>>> +        end += mux->real_ring->ring_size >> 2;
>>>>>> +    if (r_rptr <= end && r_rptr >= start) {
>>>>>> +        offset = r_rptr - start;
>>>>>> +        e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>>>>>> +    } else if (r_rptr < start) {
>>>>>> +        e->sw_rptr = e->sw_cptr;
>>>>>> +    } else {
>>>>>> +        e->sw_rptr = e->sw_wptr;
>>>>>> +    }
>>>>>> +
>>>>>> +    return e->sw_rptr;
>>>>>> +}
>>>>>> +
>>>>>> +/*copy packages on sw ring range[begin, end) */ static int
>>>>>> +copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>> struct amdgpu_ring *ring,
>>>>>> +    u64 s_begin, u64 s_end)
>>>>>> +{
>>>>>> +    u64 begin, end, r_begin, r_end;
>>>>>> +    struct amdgpu_ring *real_ring = mux->real_ring;
>>>>>> +
>>>>>> +    begin = s_begin & ring->buf_mask;
>>>>>> +    end = s_end & ring->buf_mask;
>>>>>> +
>>>>>> +    r_begin = real_ring->wptr & real_ring->buf_mask;
>>>>>> +    if (begin == end)
>>>>>> +        return -ERANGE;
>>>>>> +    if (begin > end) {
>>>>>> +        amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end
>>>>>> - begin);
>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>>> *)&ring->ring[begin],
>>>>>> +            (ring->ring_size >> 2) - begin);
>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>>> *)&ring->ring[0], end);
>>>>>> +    } else {
>>>>>> +        amdgpu_ring_alloc(real_ring, end - begin);
>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>>> *)&ring->ring[begin], end - begin);
>>>>>> +    }
>>>>>> +
>>>>>> +    r_end = real_ring->wptr & real_ring->buf_mask;
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>> new file mode 100644
>>>>>> index 000000000000..d058c43bb063
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>> @@ -0,0 +1,67 @@
>>>>>> +/*
>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>> obtaining a
>>>>>> + * copy of this software and associated documentation files (the
>>>>>> "Software"),
>>>>>> + * to deal in the Software without restriction, including without
>>>>>> limitation
>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>> sublicense,
>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>> whom the
>>>>>> + * Software is furnished to do so, subject to the following
>>>>>> conditions:
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice shall be
>>>>>> included in
>>>>>> + * all copies or substantial portions of the Software.
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>> KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>> MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>> EVENT SHALL
>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>>>>> DAMAGES OR
>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>> OTHERWISE,
>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>>>>> USE OR
>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>> + *
>>>>>> + */
>>>>>> +
>>>>>> +#ifndef __AMDGPU_RING_MUX__
>>>>>> +#define __AMDGPU_RING_MUX__
>>>>>> +
>>>>>> +#include <linux/timer.h>
>>>>>> +#include <linux/spinlock.h>
>>>>>> +#include "amdgpu_ring.h"
>>>>>> +
>>>>>> +struct amdgpu_ring;
>>>>>> +/*
>>>>>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>>>>>> + * end_ptr_in_hw_ring - last copied end loc on hw ring  *sw_cptr
>>>>>> +-the begin of copy ptr in sw ring  *sw_rptr; the read ptr in sw
>>>>>> +ring  *sw_wptr; the write ptr in sw ring  */ struct
>>>>>> +amdgpu_mux_entry {
>>>>>> +    struct amdgpu_ring    *ring;
>>>>>> +    u64 start_ptr_in_hw_ring;
>>>>>> +    u64 end_ptr_in_hw_ring;
>>>>>> +
>>>>>> +    u64 sw_cptr;
>>>>>> +    u64 sw_rptr;
>>>>>> +    u64 sw_wptr;
>>>>>> +};
>>>>>> +
>>>>>> +struct amdgpu_ring_mux {
>>>>>> +    struct amdgpu_ring *real_ring;
>>>>>> +
>>>>>> +    struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>>>>>> +
>>>>>> +    unsigned num_ring_entries;
>>>>>> +
>>>>>> +    spinlock_t            lock;
>>>>>> +
>>>>>> +};
>>>>>> +
>>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct
>>>>>> amdgpu_ring *ring);
>>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux); int
>>>>>> +amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>> struct amdgpu_ring *ring);
>>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux,
>>>>>> struct amdgpu_ring *ring, u64 wptr);
>>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>> struct amdgpu_ring *ring);
>>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>> struct amdgpu_ring *ring);
>>>>>> +
>>>>>> +#endif
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..452d0ff37758
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>> @@ -0,0 +1,204 @@
>>>>>> +/*
>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>> + * All Rights Reserved.
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>> obtaining a
>>>>>> + * copy of this software and associated documentation files (the
>>>>>> + * "Software"), to deal in the Software without restriction,
>>>>>> including
>>>>>> + * without limitation the rights to use, copy, modify, merge,
>>>>>> publish,
>>>>>> + * distribute, sub license, and/or sell copies of the Software,
>>>>>> and to
>>>>>> + * permit persons to whom the Software is furnished to do so,
>>>>>> subject to
>>>>>> + * the following conditions:
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>> KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>> MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO
>>>>>> EVENT SHALL
>>>>>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE
>>>>>> FOR ANY CLAIM,
>>>>>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>>>>>> TORT OR
>>>>>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>>>>>> SOFTWARE OR THE
>>>>>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice
>>>>>> (including the
>>>>>> + * next paragraph) shall be included in all copies or substantial
>>>>>> portions
>>>>>> + * of the Software.
>>>>>> + *
>>>>>> + */
>>>>>> +
>>>>>> +#include "amdgpu_sw_ring.h"
>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>> +
>>>>>> +#define amdgpu_ring_get_gpu_addr(ring, offset) \
>>>>>> +    (ring->is_mes_queue ?                        \
>>>>>> +     (ring->mes_ctx->meta_data_gpu_addr + offset) :            \
>>>>>> +     (ring->adev->wb.gpu_addr + offset * 4))
>>>>>> +
>>>>>> +#define amdgpu_ring_get_cpu_addr(ring, offset) \
>>>>>> +    (ring->is_mes_queue ?                        \
>>>>>> +     (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset)
>>>>>> : \
>>>>>> +     (&ring->adev->wb.wb[offset]))
>>>>>> +
>>>>>> +
>>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct
>>>>>> amdgpu_ring *ring,
>>>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>>> +             atomic_t *sched_score) {
>>>>>> +    int r;
>>>>>> +    int sched_hw_submission = amdgpu_sched_hw_submission;
>>>>>> +    u32 *num_sched;
>>>>>> +    u32 hw_ip;
>>>>>> +
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +
>>>>>> +    if (ring->adev == NULL) {
>>>>>> +        if (adev->num_rings >= AMDGPU_MAX_RINGS)
>>>>>> +            return -EINVAL;
>>>>>> +
>>>>>> +        ring->adev = adev;
>>>>>> +        ring->num_hw_submission = sched_hw_submission;
>>>>>> +        ring->sched_score = sched_score;
>>>>>> +        ring->vmid_wait = dma_fence_get_stub();
>>>>>> +
>>>>>> +        if (!ring->is_mes_queue) {
>>>>>> +            ring->idx = adev->num_rings++;
>>>>>> +            adev->rings[ring->idx] = ring;
>>>>>> +        }
>>>>>> +
>>>>>> +        r = amdgpu_fence_driver_init_ring(ring);
>>>>>> +        if (r)
>>>>>> +            return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>>> +    if (r) {
>>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc
>>>>>> failed\n", r);
>>>>>> +        return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>>> +    if (r) {
>>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc
>>>>>> failed\n", r);
>>>>>> +        return r;
>>>>>> +    }
>>>>>
>>>>> Looks like a typo copy pase duplicate of the above
>>>>>
>>>>>> +
>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>>>>>> +    if (r) {
>>>>>> +        dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc
>>>>>> failed\n", r);
>>>>>> +        return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>>>>>> +    if (r) {
>>>>>> +        dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc
>>>>>> failed\n", r);
>>>>>> +        return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    ring->fence_gpu_addr =
>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>>>>>> +    ring->fence_cpu_addr =
>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>>>>>> +
>>>>>> +    ring->trail_fence_gpu_addr =
>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>>>>>> +    ring->trail_fence_cpu_addr =
>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>>>>>> +
>>>>>> +    ring->cond_exe_gpu_addr =
>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>>>>>> +    ring->cond_exe_cpu_addr =
>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>>>>>> +
>>>>>> +    /* always set cond_exec_polling to CONTINUE */
>>>>>> +    *ring->cond_exe_cpu_addr = 1;
>>>>>> +
>>>>>> +    r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
>>>>>> +    if (r) {
>>>>>> +        dev_err(adev->dev, "failed initializing fences (%d).\n",
>>>>>> +r);
>>>>>> +        return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    ring->ring_size = roundup_pow_of_two(max_dw * 4 *
>>>>>> sched_hw_submission);
>>>>>> +
>>>>>> +    ring->buf_mask = (ring->ring_size / 4) - 1;
>>>>>> +    ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>>>>>> +        0xffffffffffffffff : ring->buf_mask;
>>>>>> +
>>>>>> +    /* Allocate ring buffer */
>>>>>> +    if (ring->ring == NULL) {
>>>>>> +        ring->ring = kzalloc(ring->ring_size +
>>>>>> ring->funcs->extra_dw, GFP_KERNEL);
>>>>>> +        if (!ring->ring) {
>>>>>> +            dev_err(adev->dev, "(%d) swring create failed\n", r);
>>>>>> +            return r;
>>>>>> +        }
>>>>>> +
>>>>>> +        amdgpu_ring_clear_ring(ring);
>>>>>> +    }
>>>>>> +
>>>>>> +    ring->max_dw = max_dw;
>>>>>> +    ring->hw_prio = hw_prio;
>>>>>> +
>>>>>> +    if (!ring->no_scheduler) {
>>>>>> +        hw_ip = ring->funcs->type;
>>>>>> +        num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>>>>>> + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>>>>>> +            &ring->sched;
>>>>>> +    }
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>
>>>>> In general i see this function is a big one to one subset of
>>>>> amdgpu_ring_init.
>>>>> Could you maybe see a way to refactor such that this function is
>>>>> the base and for HW related code that different (like BO allocation
>>>>> for ring
>>>>> buffer) you
>>>>> maybe can add if (!ring->sw_ring)... and add those code snippets ?
>>>>> To avoid
>>>>> substantial code duplication.
>>>>>
>>>>> Andrey
>>>>>
>>>>>
>>>>>> +
>>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring) {
>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>> +
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +    return amdgpu_ring_get_rptr_from_mux(mux, ring); }
>>>>>> +
>>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring) {
>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>> +
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +    return amdgpu_ring_get_wptr_from_mux(mux, ring); }
>>>>>> +
>>>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring) {
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +}
>>>>>> +
>>>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring) {
>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>> +
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +    amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr); }
>>>>>> +
>>>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring) {
>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>> +
>>>>>> +    /* Not to finish a ring which is not initialized */
>>>>>> +    if (!(ring->adev) ||
>>>>>> +        (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
>>>>>> +        return;
>>>>>> +
>>>>>> +    ring->sched.ready = false;
>>>>>> +
>>>>>> +    amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>>>>>> +    amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>>>>>> +
>>>>>> +    kfree((void *)ring->ring);
>>>>>> +
>>>>>> +    dma_fence_put(ring->vmid_wait);
>>>>>> +    ring->vmid_wait = NULL;
>>>>>> +    ring->me = 0;
>>>>>> +
>>>>>> +    ring->adev->rings[ring->idx] = NULL; }
>>>>>> +
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>> new file mode 100644
>>>>>> index 000000000000..c05d8a94ad0c
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>> @@ -0,0 +1,48 @@
>>>>>> +/*
>>>>>> + * Copyright 2012 Advanced Micro Devices, Inc.
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>> obtaining a
>>>>>> + * copy of this software and associated documentation files (the
>>>>>> "Software"),
>>>>>> + * to deal in the Software without restriction, including without
>>>>>> limitation
>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>> sublicense,
>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>> whom the
>>>>>> + * Software is furnished to do so, subject to the following
>>>>>> conditions:
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice shall be
>>>>>> included in
>>>>>> + * all copies or substantial portions of the Software.
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>> KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>> MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>> EVENT SHALL
>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>>>>> DAMAGES OR
>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>> OTHERWISE,
>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>>>>> USE OR
>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>> + *
>>>>>> + */
>>>>>> +
>>>>>> +#include <drm/amdgpu_drm.h>
>>>>>> +#include <drm/gpu_scheduler.h>
>>>>>> +#include <drm/drm_print.h>
>>>>>> +
>>>>>> +#include "amdgpu_irq.h"
>>>>>> +#include "amdgpu_ring.h"
>>>>>> +#include "amdgpu.h"
>>>>>> +
>>>>>> +#ifndef __AMDGPU_SWRING_H__
>>>>>> +#define __AMDGPU_SWRING_H__
>>>>>> +
>>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct
>>>>>> amdgpu_ring *sw_ring,
>>>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>>> +             atomic_t *sched_score); void
>>>>>> +amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring); void
>>>>>> +amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); void
>>>>>> +amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>>>>>> +
>>>>>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); void
>>>>>> +amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>>>>>> +
>>>>>> +#endif


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-12 16:22           ` Christian König
@ 2022-09-12 16:45             ` Andrey Grodzovsky
  2022-09-13  1:44               ` Zhu, Jiadong
  0 siblings, 1 reply; 28+ messages in thread
From: Andrey Grodzovsky @ 2022-09-12 16:45 UTC (permalink / raw)
  To: Christian König, jiadong.zhu, amd-gfx; +Cc: Ray.Huang


On 2022-09-12 12:22, Christian König wrote:
> Am 12.09.22 um 17:34 schrieb Andrey Grodzovsky:
>> On 2022-09-12 09:27, Christian König wrote:
>>
>>> Am 12.09.22 um 15:22 schrieb Andrey Grodzovsky:
>>>>
>>>> On 2022-09-12 06:20, Christian König wrote:
>>>>> Am 09.09.22 um 18:45 schrieb Andrey Grodzovsky:
>>>>>>
>>>>>> On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
>>>>>>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>>>>>>
>>>>>>> The software ring is created to support priority
>>>>>>> context while there is only one hardware queue
>>>>>>> for gfx.
>>>>>>>
>>>>>>> Every software rings has its fence driver and could
>>>>>>> be used as an ordinary ring for the gpu_scheduler.
>>>>>>> Multiple software rings are binded to a real ring
>>>>>>> with the ring muxer. The packages committed on the
>>>>>>> software ring are copied to the real ring.
>>>>>>>
>>>>>>> v2: use array to store software ring entry.
>>>>>>> v3: remove unnecessary prints.
>>>>>>>
>>>>>>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>>>>>>> ---
>>>>>>>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 
>>>>>>> +++++++++++++++++
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 
>>>>>>> +++++++++++++++++++
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>>>>>>   7 files changed, 509 insertions(+), 1 deletion(-)
>>>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>> index 3e0e2eb7e235..85224bc81ce5 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>>>>>       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o 
>>>>>>> amdgpu_nbio.o \
>>>>>>>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o 
>>>>>>> amdgpu_rap.o \
>>>>>>>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>>>>>> -    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>>>>>> +    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>>>>>>> +    amdgpu_sw_ring.o amdgpu_ring_mux.o
>>>>>>>     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>> @@ -33,6 +33,7 @@
>>>>>>>   #include "amdgpu_imu.h"
>>>>>>>   #include "soc15.h"
>>>>>>>   #include "amdgpu_ras.h"
>>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>>     /* GFX current status */
>>>>>>>   #define AMDGPU_GFX_NORMAL_MODE 0x00000000L
>>>>>>> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>>>>>>>       struct amdgpu_gfx_ras        *ras;
>>>>>>>         bool                is_poweron;
>>>>>>> +
>>>>>>> +    struct amdgpu_ring_mux            muxer;
>>>>>>>   };
>>>>>>>     #define amdgpu_gfx_get_gpu_clock_counter(adev) 
>>>>>>> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>> index 7d89a52091c0..fe33a683bfba 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>>>>>>       bool            is_mes_queue;
>>>>>>>       uint32_t        hw_queue_id;
>>>>>>>       struct amdgpu_mes_ctx_data *mes_ctx;
>>>>>>> +
>>>>>>> +    bool            is_sw_ring;
>>>>>>> +
>>>>>>>   };
>>>>>>>     #define amdgpu_ring_parse_cs(r, p, job, ib) 
>>>>>>> ((r)->funcs->parse_cs((p), (job), (ib)))
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..ea4a3c66119a
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>> @@ -0,0 +1,182 @@
>>>>>>> +/*
>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>> + *
>>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>>> obtaining a
>>>>>>> + * copy of this software and associated documentation files 
>>>>>>> (the "Software"),
>>>>>>> + * to deal in the Software without restriction, including 
>>>>>>> without limitation
>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>>> sublicense,
>>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>>> whom the
>>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>>> conditions:
>>>>>>> + *
>>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>>> be included in
>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>> + *
>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>>> KIND, EXPRESS OR
>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>>> MERCHANTABILITY,
>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>>>> EVENT SHALL
>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY 
>>>>>>> CLAIM, DAMAGES OR
>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>>> OTHERWISE,
>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>>> THE USE OR
>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>> + *
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include <drm/drm_print.h>
>>>>>>> +
>>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>> +#include "amdgpu_ring.h"
>>>>>>> +
>>>>>>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>>>>>>> +
>>>>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>>>> struct amdgpu_ring *ring,
>>>>>>> +    u64 s_begin, u64 s_end);
>>>>>>> +
>>>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>>>>>>> amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    mux->real_ring = ring;
>>>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>>>> +    mux->num_ring_entries = 0;
>>>>>>> +    spin_lock_init(&mux->lock);
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
>>>>>>> +{
>>>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>>>> +    mux->num_ring_entries = 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>>>> struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>> +
>>>>>>> +    if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>>>>>>> +        DRM_ERROR("adding sw ring exceeds max gfx num\n");
>>>>>>> +        return -ENOMEM;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    e = &mux->ring_entries[mux->num_ring_entries++];
>>>>>>> +
>>>>>>> +    e->ring = ring;
>>>>>>> +    e->start_ptr_in_hw_ring = 0;
>>>>>>> +    e->end_ptr_in_hw_ring = 0;
>>>>>>> +    e->sw_cptr = 0;
>>>>>>> +    e->sw_rptr = 0;
>>>>>>> +    e->sw_wptr = 0;
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct 
>>>>>>> amdgpu_ring_mux *mux,
>>>>>>> +                struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>> +    int i;
>>>>>>> +
>>>>>>> +    e = NULL;
>>>>>>> +    for (i = 0; i < mux->num_ring_entries; i++) {
>>>>>>> +        if (mux->ring_entries[i].ring == ring) {
>>>>>>> +            e = &mux->ring_entries[i];
>>>>>>> +            break;
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return e;
>>>>>>> +}
>>>>>>> +
>>>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
>>>>>>> struct amdgpu_ring *ring, u64 wptr)
>>>>>>> +{
>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>> +
>>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>>> +    if (!e) {
>>>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    spin_lock(&mux->lock);
>>>>>>
>>>>>>
>>>>>> A bit more generic question, I assume the spinlock here protects 
>>>>>> from concurrent runs
>>>>>> of amdgpu_ib_schedule. For them to be even theoretically 
>>>>>> concurrent it must be from
>>>>>> direct submissions to HW (because any scheduler mediated 
>>>>>> submission is serialized though
>>>>>> the dedicated scheduler worker thread). But in such case why we 
>>>>>> protect only here ? If i am
>>>>>> not missing something there is no total per HW ring lock when 
>>>>>> calling amdgpu_ib_schedule today
>>>>>> and we do a lot of HW accesses there to ring  which should 
>>>>>> probably be protected from
>>>>>> concurrent accesses.
>>>>>>
>>>>>> So if any one can answer this question ?
>>>>>
>>>>> Well what we have is in general two schedulers which push their 
>>>>> work into one hardware ring.
>>>>>
>>>>> So we need a lock to make sure that only one is modifying the hw 
>>>>> ring at the same time.
>>>>>
>>>>> From the implementation I think we first write the commands into a 
>>>>> shadow ring buffer and then copy them over to the real hw ring here.
>>>>>
>>>>> So this is the only place where we actually touch the hw ring 
>>>>> buffer and to need to grab the lock.
>>>>>
>>>>> Did I get this right?
>>>>>
>>>>> Thanks,
>>>>> Christian.
>>>>
>>>>
>>>> For the case of the sw ring yes, but I was asking in general, 
>>>> accesses to real HW rings, amdgpu_ib_schedule writes to HW rings,
>>>> we may be accessing same HW ring from 2 different contexts when 
>>>> doing direct submissions (i.e. calling amdgpu_ib_schedule
>>>> directly from 2 threads concurrently) this opens possibility to 
>>>> concurrent access to HW. Or am i missing something here ?
>>>
>>> No, that's pretty much correct.
>>>
>>> The general idea is that amdgpu_ib_schedule() first writes into a 
>>> separate software ring buffer for each scheduler. So no locking 
>>> needed for that.
>>>
>>> Then when the set_wptr callback is called we grab the lock and copy 
>>> the software ring content to the real hw ring and telling the hw to 
>>> execute it.
>>>
>>> The spin_lock is to protect from concurrent hw access.
>>>
>>> Regards,
>>> Christian.
>>
>>
>> Look at 
>> amdgpu_copy_buffer->amdgpu_job_submit_direct->amdgpu_ib_schedule->amdgpu_ring_commit->amdgpu_ring_set_wptr,
>> at no point there lock is taken. The only lock i see that resembles 
>> what you describe is for amdgpu_kiq.ring_lock. So this applies only
>> to some of the code but not to all cases.
>
> Sounds like we have a misunderstanding here.
>
> The case we look at should be this:
>
> amdgpu_job_run()->amdgpu_ib_schedule()->amdgpu_ring_commit()->amdgpu_ring_set_wptr()...amdgpu_ring_set_wptr_to_mux() 
>
>
> Then amdgpu_ring_set_wptr_to_mux() we then grab the lock, copy over 
> the commands, commit them to the hw and then drop the lock.


Yes, misunderstanding - I am asking for the general case not related to 
this patch-set. When we work with HW rings directly from direct submissions.
Nothing prevents in that case from 2 concurrent accesses to HW the way i 
showed above, or is there something ?

Andrey


>
> Christian.
>
>>
>> Andrey
>>
>>
>>>
>>>
>>>>
>>>> Andrey
>>>>
>>>>
>>>>>
>>>>>>
>>>>>>
>>>>>>> +    e->sw_cptr = e->sw_wptr;
>>>>>>> +    e->sw_wptr = wptr;
>>>>>>> +    e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>>>> +
>>>>>>> +    if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
>>>>>>> +        e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>>>> +        amdgpu_ring_commit(mux->real_ring);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    spin_unlock(&mux->lock);
>>>>>>> +}
>>>>>>> +
>>>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>>>> struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>> +
>>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>>> +    if (!e) {
>>>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>>>> +        return 0;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return e->sw_wptr;
>>>>>>> +}
>>>>>>> +
>>>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>>>> struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>> +    u64 r_rptr, r_wptr, offset, start, end;
>>>>>>> +
>>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>>> +    if (!e) {
>>>>>>> +        DRM_ERROR("no sw entry found!\n");
>>>>>>> +        return 0;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>>>>>>> +    r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>>>>>>> +
>>>>>>> +    if (r_wptr < r_rptr)
>>>>>>> +        r_wptr += mux->real_ring->ring_size >> 2;
>>>>>>> +
>>>>>>> +    start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>>>> +    end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>>>> +    if (start > end)
>>>>>>> +        end += mux->real_ring->ring_size >> 2;
>>>>>>> +    if (r_rptr <= end && r_rptr >= start) {
>>>>>>> +        offset = r_rptr - start;
>>>>>>> +        e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>>>>>>> +    } else if (r_rptr < start) {
>>>>>>> +        e->sw_rptr = e->sw_cptr;
>>>>>>> +    } else {
>>>>>>> +        e->sw_rptr = e->sw_wptr;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return e->sw_rptr;
>>>>>>> +}
>>>>>>> +
>>>>>>> +/*copy packages on sw ring range[begin, end) */
>>>>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>>>> struct amdgpu_ring *ring,
>>>>>>> +    u64 s_begin, u64 s_end)
>>>>>>> +{
>>>>>>> +    u64 begin, end, r_begin, r_end;
>>>>>>> +    struct amdgpu_ring *real_ring = mux->real_ring;
>>>>>>> +
>>>>>>> +    begin = s_begin & ring->buf_mask;
>>>>>>> +    end = s_end & ring->buf_mask;
>>>>>>> +
>>>>>>> +    r_begin = real_ring->wptr & real_ring->buf_mask;
>>>>>>> +    if (begin == end)
>>>>>>> +        return -ERANGE;
>>>>>>> +    if (begin > end) {
>>>>>>> +        amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + 
>>>>>>> end - begin);
>>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>>>>> *)&ring->ring[begin],
>>>>>>> +            (ring->ring_size >> 2) - begin);
>>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>>>>> *)&ring->ring[0], end);
>>>>>>> +    } else {
>>>>>>> +        amdgpu_ring_alloc(real_ring, end - begin);
>>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void 
>>>>>>> *)&ring->ring[begin], end - begin);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r_end = real_ring->wptr & real_ring->buf_mask;
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..d058c43bb063
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>> @@ -0,0 +1,67 @@
>>>>>>> +/*
>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>> + *
>>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>>> obtaining a
>>>>>>> + * copy of this software and associated documentation files 
>>>>>>> (the "Software"),
>>>>>>> + * to deal in the Software without restriction, including 
>>>>>>> without limitation
>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>>> sublicense,
>>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>>> whom the
>>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>>> conditions:
>>>>>>> + *
>>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>>> be included in
>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>> + *
>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>>> KIND, EXPRESS OR
>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>>> MERCHANTABILITY,
>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>>>> EVENT SHALL
>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY 
>>>>>>> CLAIM, DAMAGES OR
>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>>> OTHERWISE,
>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>>> THE USE OR
>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>> + *
>>>>>>> + */
>>>>>>> +
>>>>>>> +#ifndef __AMDGPU_RING_MUX__
>>>>>>> +#define __AMDGPU_RING_MUX__
>>>>>>> +
>>>>>>> +#include <linux/timer.h>
>>>>>>> +#include <linux/spinlock.h>
>>>>>>> +#include "amdgpu_ring.h"
>>>>>>> +
>>>>>>> +struct amdgpu_ring;
>>>>>>> +/*
>>>>>>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>>>>>>> + * end_ptr_in_hw_ring - last copied end loc on hw ring
>>>>>>> + *sw_cptr -the begin of copy ptr in sw ring
>>>>>>> + *sw_rptr; the read ptr in sw ring
>>>>>>> + *sw_wptr; the write ptr in sw ring
>>>>>>> + */
>>>>>>> +struct amdgpu_mux_entry {
>>>>>>> +    struct amdgpu_ring    *ring;
>>>>>>> +    u64 start_ptr_in_hw_ring;
>>>>>>> +    u64 end_ptr_in_hw_ring;
>>>>>>> +
>>>>>>> +    u64 sw_cptr;
>>>>>>> +    u64 sw_rptr;
>>>>>>> +    u64 sw_wptr;
>>>>>>> +};
>>>>>>> +
>>>>>>> +struct amdgpu_ring_mux {
>>>>>>> +    struct amdgpu_ring *real_ring;
>>>>>>> +
>>>>>>> +    struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>>>>>>> +
>>>>>>> +    unsigned num_ring_entries;
>>>>>>> +
>>>>>>> +    spinlock_t            lock;
>>>>>>> +
>>>>>>> +};
>>>>>>> +
>>>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct 
>>>>>>> amdgpu_ring *ring);
>>>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
>>>>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, 
>>>>>>> struct amdgpu_ring *ring);
>>>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
>>>>>>> struct amdgpu_ring *ring, u64 wptr);
>>>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>>>> struct amdgpu_ring *ring);
>>>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, 
>>>>>>> struct amdgpu_ring *ring);
>>>>>>> +
>>>>>>> +#endif
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..452d0ff37758
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>> @@ -0,0 +1,204 @@
>>>>>>> +/*
>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>> + * All Rights Reserved.
>>>>>>> + *
>>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>>> obtaining a
>>>>>>> + * copy of this software and associated documentation files (the
>>>>>>> + * "Software"), to deal in the Software without restriction, 
>>>>>>> including
>>>>>>> + * without limitation the rights to use, copy, modify, merge, 
>>>>>>> publish,
>>>>>>> + * distribute, sub license, and/or sell copies of the Software, 
>>>>>>> and to
>>>>>>> + * permit persons to whom the Software is furnished to do so, 
>>>>>>> subject to
>>>>>>> + * the following conditions:
>>>>>>> + *
>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>>> KIND, EXPRESS OR
>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>>> MERCHANTABILITY,
>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO 
>>>>>>> EVENT SHALL
>>>>>>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE 
>>>>>>> LIABLE FOR ANY CLAIM,
>>>>>>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
>>>>>>> CONTRACT, TORT OR
>>>>>>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
>>>>>>> SOFTWARE OR THE
>>>>>>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>>>>> + *
>>>>>>> + * The above copyright notice and this permission notice 
>>>>>>> (including the
>>>>>>> + * next paragraph) shall be included in all copies or 
>>>>>>> substantial portions
>>>>>>> + * of the Software.
>>>>>>> + *
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include "amdgpu_sw_ring.h"
>>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>> +
>>>>>>> +#define amdgpu_ring_get_gpu_addr(ring, offset) \
>>>>>>> +    (ring->is_mes_queue ?                        \
>>>>>>> +     (ring->mes_ctx->meta_data_gpu_addr + offset) :            \
>>>>>>> +     (ring->adev->wb.gpu_addr + offset * 4))
>>>>>>> +
>>>>>>> +#define amdgpu_ring_get_cpu_addr(ring, offset) \
>>>>>>> +    (ring->is_mes_queue ?                        \
>>>>>>> +     (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + 
>>>>>>> offset) : \
>>>>>>> +     (&ring->adev->wb.wb[offset]))
>>>>>>> +
>>>>>>> +
>>>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>>>>>>> amdgpu_ring *ring,
>>>>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>>>> +             atomic_t *sched_score)
>>>>>>> +{
>>>>>>> +    int r;
>>>>>>> +    int sched_hw_submission = amdgpu_sched_hw_submission;
>>>>>>> +    u32 *num_sched;
>>>>>>> +    u32 hw_ip;
>>>>>>> +
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +
>>>>>>> +    if (ring->adev == NULL) {
>>>>>>> +        if (adev->num_rings >= AMDGPU_MAX_RINGS)
>>>>>>> +            return -EINVAL;
>>>>>>> +
>>>>>>> +        ring->adev = adev;
>>>>>>> +        ring->num_hw_submission = sched_hw_submission;
>>>>>>> +        ring->sched_score = sched_score;
>>>>>>> +        ring->vmid_wait = dma_fence_get_stub();
>>>>>>> +
>>>>>>> +        if (!ring->is_mes_queue) {
>>>>>>> +            ring->idx = adev->num_rings++;
>>>>>>> +            adev->rings[ring->idx] = ring;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        r = amdgpu_fence_driver_init_ring(ring);
>>>>>>> +        if (r)
>>>>>>> +            return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>>>> +    if (r) {
>>>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc 
>>>>>>> failed\n", r);
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>>>> +    if (r) {
>>>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc 
>>>>>>> failed\n", r);
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>
>>>>>>
>>>>>> Looks like a typo copy pase duplicate of the above
>>>>>>
>>>>>>> +
>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>>>>>>> +    if (r) {
>>>>>>> +        dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc 
>>>>>>> failed\n", r);
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>>>>>>> +    if (r) {
>>>>>>> +        dev_err(adev->dev, "(%d) ring cond_exec_polling wb 
>>>>>>> alloc failed\n", r);
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    ring->fence_gpu_addr =
>>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>>>>>>> +    ring->fence_cpu_addr =
>>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>>>>>>> +
>>>>>>> +    ring->trail_fence_gpu_addr =
>>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>>>>>>> +    ring->trail_fence_cpu_addr =
>>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>>>>>>> +
>>>>>>> +    ring->cond_exe_gpu_addr =
>>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>>>>>>> +    ring->cond_exe_cpu_addr =
>>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>>>>>>> +
>>>>>>> +    /* always set cond_exec_polling to CONTINUE */
>>>>>>> +    *ring->cond_exe_cpu_addr = 1;
>>>>>>> +
>>>>>>> +    r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
>>>>>>> +    if (r) {
>>>>>>> +        dev_err(adev->dev, "failed initializing fences 
>>>>>>> (%d).\n", r);
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    ring->ring_size = roundup_pow_of_two(max_dw * 4 * 
>>>>>>> sched_hw_submission);
>>>>>>> +
>>>>>>> +    ring->buf_mask = (ring->ring_size / 4) - 1;
>>>>>>> +    ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>>>>>>> +        0xffffffffffffffff : ring->buf_mask;
>>>>>>> +
>>>>>>> +    /* Allocate ring buffer */
>>>>>>> +    if (ring->ring == NULL) {
>>>>>>> +        ring->ring = kzalloc(ring->ring_size + 
>>>>>>> ring->funcs->extra_dw, GFP_KERNEL);
>>>>>>> +        if (!ring->ring) {
>>>>>>> +            dev_err(adev->dev, "(%d) swring create failed\n", r);
>>>>>>> +            return r;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        amdgpu_ring_clear_ring(ring);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    ring->max_dw = max_dw;
>>>>>>> +    ring->hw_prio = hw_prio;
>>>>>>> +
>>>>>>> +    if (!ring->no_scheduler) {
>>>>>>> +        hw_ip = ring->funcs->type;
>>>>>>> +        num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>>>>>>> + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>>>>>>> +            &ring->sched;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>
>>>>>>
>>>>>> In general i see this function is a big one to one subset of 
>>>>>> amdgpu_ring_init.
>>>>>> Could you maybe see a way to refactor such that this function is 
>>>>>> the base
>>>>>> and for HW related code that different (like BO allocation for 
>>>>>> ring buffer) you
>>>>>> maybe can add if (!ring->sw_ring)... and add those code snippets 
>>>>>> ? To avoid
>>>>>> substantial code duplication.
>>>>>>
>>>>>> Andrey
>>>>>>
>>>>>>
>>>>>>> +
>>>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>>> +
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +    return amdgpu_ring_get_rptr_from_mux(mux, ring);
>>>>>>> +}
>>>>>>> +
>>>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>>> +
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +    return amdgpu_ring_get_wptr_from_mux(mux, ring);
>>>>>>> +}
>>>>>>> +
>>>>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +}
>>>>>>> +
>>>>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>>> +
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +    amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
>>>>>>> +}
>>>>>>> +
>>>>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +
>>>>>>> +    /* Not to finish a ring which is not initialized */
>>>>>>> +    if (!(ring->adev) ||
>>>>>>> +        (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
>>>>>>> +        return;
>>>>>>> +
>>>>>>> +    ring->sched.ready = false;
>>>>>>> +
>>>>>>> +    amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>>>>>>> +    amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>>>>>>> +
>>>>>>> +    kfree((void *)ring->ring);
>>>>>>> +
>>>>>>> +    dma_fence_put(ring->vmid_wait);
>>>>>>> +    ring->vmid_wait = NULL;
>>>>>>> +    ring->me = 0;
>>>>>>> +
>>>>>>> +    ring->adev->rings[ring->idx] = NULL;
>>>>>>> +}
>>>>>>> +
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..c05d8a94ad0c
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>> @@ -0,0 +1,48 @@
>>>>>>> +/*
>>>>>>> + * Copyright 2012 Advanced Micro Devices, Inc.
>>>>>>> + *
>>>>>>> + * Permission is hereby granted, free of charge, to any person 
>>>>>>> obtaining a
>>>>>>> + * copy of this software and associated documentation files 
>>>>>>> (the "Software"),
>>>>>>> + * to deal in the Software without restriction, including 
>>>>>>> without limitation
>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute, 
>>>>>>> sublicense,
>>>>>>> + * and/or sell copies of the Software, and to permit persons to 
>>>>>>> whom the
>>>>>>> + * Software is furnished to do so, subject to the following 
>>>>>>> conditions:
>>>>>>> + *
>>>>>>> + * The above copyright notice and this permission notice shall 
>>>>>>> be included in
>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>> + *
>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
>>>>>>> KIND, EXPRESS OR
>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>>>>>>> MERCHANTABILITY,
>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
>>>>>>> EVENT SHALL
>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY 
>>>>>>> CLAIM, DAMAGES OR
>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
>>>>>>> OTHERWISE,
>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
>>>>>>> THE USE OR
>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>> + *
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include <drm/amdgpu_drm.h>
>>>>>>> +#include <drm/gpu_scheduler.h>
>>>>>>> +#include <drm/drm_print.h>
>>>>>>> +
>>>>>>> +#include "amdgpu_irq.h"
>>>>>>> +#include "amdgpu_ring.h"
>>>>>>> +#include "amdgpu.h"
>>>>>>> +
>>>>>>> +#ifndef __AMDGPU_SWRING_H__
>>>>>>> +#define __AMDGPU_SWRING_H__
>>>>>>> +
>>>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct 
>>>>>>> amdgpu_ring *sw_ring,
>>>>>>> +             unsigned int max_dw, struct amdgpu_irq_src *irq_src,
>>>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>>>> +             atomic_t *sched_score);
>>>>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>>>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>>>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
>>>>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
>>>>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>>>>>>> +
>>>>>>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
>>>>>>> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>>>>>>> +
>>>>>>> +#endif
>>>>>
>>>
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2)
  2022-09-09 17:02   ` Andrey Grodzovsky
@ 2022-09-13  1:32     ` Zhu, Jiadong
  0 siblings, 0 replies; 28+ messages in thread
From: Zhu, Jiadong @ 2022-09-13  1:32 UTC (permalink / raw)
  To: Grodzovsky, Andrey, amd-gfx; +Cc: Huang, Ray

[AMD Official Use Only - General]

-----Original Message-----
From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Sent: Saturday, September 10, 2022 1:02 AM
To: Zhu, Jiadong <Jiadong.Zhu@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Huang, Ray <Ray.Huang@amd.com>
Subject: Re: [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2)


On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>
> Trigger MCBP according to the priroty of the software rings and the hw
> fence signaling condition.
>
> The muxer records some lastest locations from the software ring which
> is used to resubmit packages in preemption scenarios.
>
> v2: update comment style
>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Makefile          |   2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c       |   2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c     | 101 ++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h     |  29 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c     |  12 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 163 ++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  16 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  26 +++
>   9 files changed, 351 insertions(+), 3 deletions(-)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 85224bc81ce5..24c5aa19bbf2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>       amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> -     amdgpu_sw_ring.o amdgpu_ring_mux.o
> +     amdgpu_sw_ring.o amdgpu_ring_mux.o amdgpu_mcbp.o
>
>   amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 258cffe3c06a..af86d87e2f3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>               }
>       }
>
> +     amdgpu_ring_ib_begin(ring);
>       if (job && ring->funcs->init_cond_exec)
>               patch_offset = amdgpu_ring_init_cond_exec(ring);
>
> @@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>           ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
>               ring->funcs->emit_wave_limit(ring, false);
>
> +     amdgpu_ring_ib_end(ring);
>       amdgpu_ring_commit(ring);
>       return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
> new file mode 100644
> index 000000000000..2a12101a7699
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
> @@ -0,0 +1,101 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/kernel.h>
> +#include <linux/firmware.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <drm/gpu_scheduler.h>
> +
> +#include "amdgpu.h"
> +#include "amdgpu_mcbp.h"
> +#include "amdgpu_ring.h"
> +
> +/* trigger mcbp and find if we need resubmit */ int
> +amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux) {
> +     struct amdgpu_mux_entry *e;
> +     struct amdgpu_ring *ring = NULL;
> +     int i;
> +
> +     DRM_INFO("%s in\n", __func__);
> +
> +     spin_lock(&mux->lock);


Same comment/question about locking as in patch 1


> +
> +     amdgpu_ring_preempt_ib(mux->real_ring);
> +
> +     ring = NULL;
> +     for (i = 0; i < mux->num_ring_entries; i++) {
> +             e = &mux->ring_entries[i];
> +             if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
> +                     ring = e->ring;
> +                     break;
> +             }
> +     }
> +
> +     if (!ring) {
> +             DRM_ERROR("cannot find low priority ring\n");
> +             return -ENOENT;
> +     }
> +
> +     amdgpu_fence_process(ring);


> What's the role of fence signaling here (sorry, I am not very knowledgeable about how exactly mcbp works) ?


When the mcbp package(unmap_queues) is sent to cp, the following fences after the current running ib won't be executed any longer. But the current ib's fence may still be triggered, which depends on the counters of CE/DE within the current ib when they receive the unmap_queues. amdgpu_fence_process is used here to find those unsignaled fences. If found, we will resubmit those ibs later after the high priority ibs.

> +
> +     DRM_INFO("after preempted ring_prio(%d) last_seq(%x) sync_seq(%x)\n",
> +             ring->hw_prio, atomic_read(&ring->fence_drv.last_seq), ring->fence_drv.sync_seq);
> +
> +     if (atomic_read(&ring->fence_drv.last_seq) !=
> +         ring->fence_drv.sync_seq) {
> +             DRM_INFO("schedule resubmit\n");
> +             mux->s_resubmit = true;
> +             amdgpu_ring_mux_schedule_resubmit(mux);
> +     }
> +
> +     spin_unlock(&mux->lock);
> +     return 0;
> +}
> +
> +
> +/*scan on low prio rings to have unsignaled fence and high ring has no fence.*/
> +int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
> +{
> +     struct amdgpu_ring *ring;
> +     uint32_t seq, last_seq;
> +     int i, need_preempt;
> +
> +     need_preempt = 0;
> +     for (i = 0; i < mux->num_ring_entries; i++) {
> +             ring = mux->ring_entries[i].ring;
> +             last_seq = atomic_read(&ring->fence_drv.last_seq);
> +             seq = READ_ONCE(ring->fence_drv.sync_seq);
> +             DRM_INFO("ring(%p) prio(%d) last_seq(%x) seq(%x)\n",
> +                     ring, ring->hw_prio, last_seq, seq);
> +             if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT && last_seq < seq)
> +                     return 0;
> +             if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && last_seq < seq)
> +                     need_preempt = 1;
> +     }
> +
> +     DRM_INFO("%s return %d\n", __func__, need_preempt && !mux->s_resubmit);
> +     return need_preempt && !mux->s_resubmit;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
> new file mode 100644
> index 000000000000..0033bcba8d03
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
> @@ -0,0 +1,29 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __AMDGPU_MCBP_H__
> +#define __AMDGPU_MCBP_H__
> +
> +int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux);
> +int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux);
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 5b70a2c36d81..6d7f8a40e308 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -583,3 +583,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
>
>       return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
>   }
> +
> +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
> +{
> +     if (ring->is_sw_ring)
> +             amdgpu_sw_ring_ib_begin(ring);
> +}
> +
> +void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
> +{
> +     if (ring->is_sw_ring)
> +             amdgpu_sw_ring_ib_end(ring);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index d3155dc86c07..399037b0d6e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -311,6 +311,9 @@ struct amdgpu_ring {
>   #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
>
>   int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
> +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
> +void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
> +
>   void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
>   void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
>   void amdgpu_ring_commit(struct amdgpu_ring *ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> index ea4a3c66119a..0c9b639b844e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> @@ -20,28 +20,60 @@
>    * OTHER DEALINGS IN THE SOFTWARE.
>    *
>    */
> -
> +#include <linux/slab.h>
>   #include <drm/drm_print.h>
>
>   #include "amdgpu_ring_mux.h"
> +#include "amdgpu_mcbp.h"
>   #include "amdgpu_ring.h"
>
>   #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>
> +static struct kmem_cache *amdgpu_mux_chunk_slab;
> +
>   static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
>       u64 s_begin, u64 s_end);
> +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux);
> +static void amdgpu_mux_resubmit_fallback(struct timer_list *t);
>
>   int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
>   {
>       mux->real_ring = ring;
> +
>       memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>       mux->num_ring_entries = 0;
> +
> +     mux->s_resubmit = false;
> +
> +     amdgpu_mux_chunk_slab = kmem_cache_create(
> +             "amdgpu_mux_chunk", sizeof(struct amdgpu_mux_chunk), 0,
> +             SLAB_HWCACHE_ALIGN, NULL);
> +     if (!amdgpu_mux_chunk_slab) {
> +             DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
> +             return -ENOMEM;
> +     }
> +
>       spin_lock_init(&mux->lock);
> +
> +     timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
> +
>       return 0;
>   }
>
>   void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
>   {
> +     struct amdgpu_mux_entry *e;
> +     struct amdgpu_mux_chunk *chunk, *chunk2;
> +     int i;
> +
> +     for (i = 0; i < mux->num_ring_entries; i++) {
> +             e = &mux->ring_entries[i];
> +             list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
> +                     list_del(&chunk->entry);
> +                     kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
> +             }
> +     }
> +     kmem_cache_destroy(amdgpu_mux_chunk_slab);
>       memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>       mux->num_ring_entries = 0;
>   }
> @@ -64,6 +96,8 @@ int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring
>       e->sw_rptr = 0;
>       e->sw_wptr = 0;
>
> +     INIT_LIST_HEAD(&e->list);
> +
>       return 0;
>   }
>
> @@ -180,3 +214,130 @@ static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring
>
>       return 0;
>   }
> +
> +void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
> +{
> +     mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
> +}
> +
> +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +     struct amdgpu_mux_entry *e;
> +     struct amdgpu_mux_chunk *chunk;
> +
> +     if (mux->s_resubmit)
> +             amdgpu_mux_resubmit_chunks(mux);
> +
> +     e = amdgpu_get_sw_entry(mux, ring);
> +     if (!e) {
> +             DRM_ERROR("cannot find entry!\n");
> +             return;
> +     }
> +
> +     chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
> +     if (!chunk) {
> +             DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
> +             return;
> +     }
> +
> +     chunk->start = ring->wptr;
> +     list_add_tail(&chunk->entry, &e->list);
> +}
> +
> +static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +     uint32_t last_seq, size = 0;
> +     struct amdgpu_mux_entry *e;
> +     struct amdgpu_mux_chunk *chunk, *tmp;
> +
> +     e = amdgpu_get_sw_entry(mux, ring);
> +     if (!e) {
> +             DRM_ERROR("cannot find entry!\n");
> +             return;
> +     }
> +
> +     last_seq = atomic_read(&ring->fence_drv.last_seq);
> +
> +     list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
> +             if (chunk->sync_seq <= last_seq) {
> +                     list_del(&chunk->entry);
> +                     kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
> +             } else {
> +                     size++;


What's the role of size here ? Seems to have no impact.

Andrey


> +             }
> +     }
> +}
> +
> +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +     struct amdgpu_mux_entry *e;
> +     struct amdgpu_mux_chunk *chunk;
> +
> +     e = amdgpu_get_sw_entry(mux, ring);
> +     if (!e) {
> +             DRM_ERROR("cannot find entry!\n");
> +             return;
> +     }
> +
> +     chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
> +     if (!chunk) {
> +             DRM_ERROR("cannot find chunk!\n");
> +             return;
> +     }
> +
> +     chunk->end = ring->wptr;
> +     chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
> +
> +     scan_and_remove_signaled_chunk(mux, ring);
> +}
> +
> +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
> +{
> +     struct amdgpu_mux_entry *e = NULL;
> +     struct amdgpu_mux_chunk *chunk;
> +     uint32_t seq, last_seq;
> +     int i;
> +
> +     /*find low priority entries:*/
> +     spin_lock(&mux->lock);
> +
> +     for (i = 0; i < mux->num_ring_entries; i++) {
> +             if (mux->ring_entries[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
> +                             e = &mux->ring_entries[i];
> +                     break;
> +             }
> +     }
> +
> +     if (!e) {
> +             DRM_ERROR("%s no low priority ring found\n", __func__);
> +             return;
> +     }
> +
> +     last_seq = atomic_read(&e->ring->fence_drv.last_seq);
> +     seq = READ_ONCE(e->ring->fence_drv.sync_seq);
> +     if (seq == last_seq) {
> +             DRM_INFO("skip as fence signaled seq=%x\n", seq);
> +             return;
> +     }
> +     DRM_INFO("begin to copy resubmit chunks\n");
> +
> +     /*resubmit all the fences between (last_seq, seq]*/
> +     list_for_each_entry(chunk, &e->list, entry) {
> +             if (chunk->sync_seq > last_seq) {
> +                     copy_pkt_from_sw_ring(mux, e->ring, chunk->start, chunk->end);
> +                     amdgpu_ring_commit(mux->real_ring);
> +             }
> +     }
> +     spin_unlock(&mux->lock);
> +
> +     del_timer(&mux->resubmit_timer);
> +     mux->s_resubmit = false;
> +}
> +
> +static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
> +{
> +     struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
> +
> +     DRM_INFO("calling %s\n", __func__);
> +     amdgpu_mux_resubmit_chunks(mux);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> index d058c43bb063..1d91c235061a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -44,17 +44,27 @@ struct amdgpu_mux_entry {
>       u64 sw_cptr;
>       u64 sw_rptr;
>       u64 sw_wptr;
> +
> +     struct list_head list;
>   };
>
>   struct amdgpu_ring_mux {
>       struct amdgpu_ring *real_ring;
>
>       struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
> -
>       unsigned num_ring_entries;
>
>       spinlock_t                      lock;
>
> +     bool s_resubmit;
> +     struct timer_list               resubmit_timer;
> +};
> +
> +struct amdgpu_mux_chunk {
> +     struct list_head entry;
> +     uint32_t sync_seq;
> +     u64 start;
> +     u64 end;
>   };
>
>   int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> @@ -64,4 +74,8 @@ void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring
>   u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
>   u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
>
> +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux);
> +
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> index 452d0ff37758..143a84c18534 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> @@ -26,6 +26,7 @@
>
>   #include "amdgpu_sw_ring.h"
>   #include "amdgpu_ring_mux.h"
> +#include "amdgpu_mcbp.h"
>
>   #define amdgpu_ring_get_gpu_addr(ring, offset)                              \
>       (ring->is_mes_queue ?                                           \
> @@ -202,3 +203,28 @@ void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
>       ring->adev->rings[ring->idx] = NULL;
>   }
>
> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
> +{
> +     struct amdgpu_device *adev = ring->adev;
> +     struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +     BUG_ON(!ring->is_sw_ring);
> +     if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
> +             if (amdgpu_mcbp_scan(mux) > 0)
> +                     amdgpu_mcbp_trigger_preempt(mux);
> +             return;
> +     }
> +
> +     amdgpu_ring_mux_start_ib(mux, ring);
> +}
> +
> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
> +{
> +     struct amdgpu_device *adev = ring->adev;
> +     struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +     BUG_ON(!ring->is_sw_ring);
> +     if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
> +             return;
> +     amdgpu_ring_mux_end_ib(mux, ring);
> +}

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-12 16:45             ` Andrey Grodzovsky
@ 2022-09-13  1:44               ` Zhu, Jiadong
  2022-09-13  2:00                 ` Andrey Grodzovsky
  0 siblings, 1 reply; 28+ messages in thread
From: Zhu, Jiadong @ 2022-09-13  1:44 UTC (permalink / raw)
  To: Grodzovsky, Andrey, Christian König, amd-gfx; +Cc: Huang, Ray

[AMD Official Use Only - General]

-----Original Message-----
From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Sent: Tuesday, September 13, 2022 12:45 AM
To: Christian König <ckoenig.leichtzumerken@gmail.com>; Zhu, Jiadong <Jiadong.Zhu@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Huang, Ray <Ray.Huang@amd.com>
Subject: Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)


On 2022-09-12 12:22, Christian König wrote:
> Am 12.09.22 um 17:34 schrieb Andrey Grodzovsky:
>> On 2022-09-12 09:27, Christian König wrote:
>>
>>> Am 12.09.22 um 15:22 schrieb Andrey Grodzovsky:
>>>>
>>>> On 2022-09-12 06:20, Christian König wrote:
>>>>> Am 09.09.22 um 18:45 schrieb Andrey Grodzovsky:
>>>>>>
>>>>>> On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
>>>>>>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>>>>>>
>>>>>>> The software ring is created to support priority context while
>>>>>>> there is only one hardware queue for gfx.
>>>>>>>
>>>>>>> Every software rings has its fence driver and could be used as
>>>>>>> an ordinary ring for the gpu_scheduler.
>>>>>>> Multiple software rings are binded to a real ring with the ring
>>>>>>> muxer. The packages committed on the software ring are copied to
>>>>>>> the real ring.
>>>>>>>
>>>>>>> v2: use array to store software ring entry.
>>>>>>> v3: remove unnecessary prints.
>>>>>>>
>>>>>>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>>>>>>> ---
>>>>>>>   drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182
>>>>>>> +++++++++++++++++
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204
>>>>>>> +++++++++++++++++++
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>>>>>>   7 files changed, 509 insertions(+), 1 deletion(-)
>>>>>>>   create mode 100644
>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>>   create mode 100644
>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>> index 3e0e2eb7e235..85224bc81ce5 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>>>>>       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o
>>>>>>> amdgpu_nbio.o \
>>>>>>>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o
>>>>>>> amdgpu_rap.o \
>>>>>>>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>>>>>> -    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>>>>>> +    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>>>>>> +\
>>>>>>> +    amdgpu_sw_ring.o amdgpu_ring_mux.o
>>>>>>>     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>> @@ -33,6 +33,7 @@
>>>>>>>   #include "amdgpu_imu.h"
>>>>>>>   #include "soc15.h"
>>>>>>>   #include "amdgpu_ras.h"
>>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>>     /* GFX current status */
>>>>>>>   #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -346,6 +347,8 @@
>>>>>>> struct amdgpu_gfx {
>>>>>>>       struct amdgpu_gfx_ras        *ras;
>>>>>>>         bool                is_poweron;
>>>>>>> +
>>>>>>> +    struct amdgpu_ring_mux            muxer;
>>>>>>>   };
>>>>>>>     #define amdgpu_gfx_get_gpu_clock_counter(adev)
>>>>>>> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>> index 7d89a52091c0..fe33a683bfba 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>>>>>>       bool            is_mes_queue;
>>>>>>>       uint32_t        hw_queue_id;
>>>>>>>       struct amdgpu_mes_ctx_data *mes_ctx;
>>>>>>> +
>>>>>>> +    bool            is_sw_ring;
>>>>>>> +
>>>>>>>   };
>>>>>>>     #define amdgpu_ring_parse_cs(r, p, job, ib)
>>>>>>> ((r)->funcs->parse_cs((p), (job), (ib))) diff --git
>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..ea4a3c66119a
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>> @@ -0,0 +1,182 @@
>>>>>>> +/*
>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>> + *
>>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>>> obtaining a
>>>>>>> + * copy of this software and associated documentation files
>>>>>>> (the "Software"),
>>>>>>> + * to deal in the Software without restriction, including
>>>>>>> without limitation
>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>>> sublicense,
>>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>>> whom the
>>>>>>> + * Software is furnished to do so, subject to the following
>>>>>>> conditions:
>>>>>>> + *
>>>>>>> + * The above copyright notice and this permission notice shall
>>>>>>> be included in
>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>> + *
>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>>> KIND, EXPRESS OR
>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>>> MERCHANTABILITY,
>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>>> EVENT SHALL
>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
>>>>>>> CLAIM, DAMAGES OR
>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>>> OTHERWISE,
>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>>>>>>> THE USE OR
>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>> + *
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include <drm/drm_print.h>
>>>>>>> +
>>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>> +#include "amdgpu_ring.h"
>>>>>>> +
>>>>>>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>>>>>>> +
>>>>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>>> struct amdgpu_ring *ring,
>>>>>>> +    u64 s_begin, u64 s_end);
>>>>>>> +
>>>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct
>>>>>>> amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    mux->real_ring = ring;
>>>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>>>> +    mux->num_ring_entries = 0;
>>>>>>> +    spin_lock_init(&mux->lock);
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux) {
>>>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>>>> +    mux->num_ring_entries = 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>>> struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>> +
>>>>>>> +    if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>>>>>>> +        DRM_ERROR("adding sw ring exceeds max gfx num\n");
>>>>>>> +        return -ENOMEM;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    e = &mux->ring_entries[mux->num_ring_entries++];
>>>>>>> +
>>>>>>> +    e->ring = ring;
>>>>>>> +    e->start_ptr_in_hw_ring = 0;
>>>>>>> +    e->end_ptr_in_hw_ring = 0;
>>>>>>> +    e->sw_cptr = 0;
>>>>>>> +    e->sw_rptr = 0;
>>>>>>> +    e->sw_wptr = 0;
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct
>>>>>>> amdgpu_ring_mux *mux,
>>>>>>> +                struct amdgpu_ring *ring) {
>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>> +    int i;
>>>>>>> +
>>>>>>> +    e = NULL;
>>>>>>> +    for (i = 0; i < mux->num_ring_entries; i++) {
>>>>>>> +        if (mux->ring_entries[i].ring == ring) {
>>>>>>> +            e = &mux->ring_entries[i];
>>>>>>> +            break;
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return e;
>>>>>>> +}
>>>>>>> +
>>>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux,
>>>>>>> struct amdgpu_ring *ring, u64 wptr)
>>>>>>> +{
>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>> +
>>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>>> +    if (!e) {
>>>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    spin_lock(&mux->lock);
>>>>>>
>>>>>>
>>>>>> A bit more generic question, I assume the spinlock here protects
>>>>>> from concurrent runs of amdgpu_ib_schedule. For them to be even
>>>>>> theoretically concurrent it must be from direct submissions to HW
>>>>>> (because any scheduler mediated submission is serialized though
>>>>>> the dedicated scheduler worker thread). But in such case why we
>>>>>> protect only here ? If i am not missing something there is no
>>>>>> total per HW ring lock when calling amdgpu_ib_schedule today and
>>>>>> we do a lot of HW accesses there to ring  which should probably
>>>>>> be protected from concurrent accesses.
>>>>>>
>>>>>> So if any one can answer this question ?
>>>>>
>>>>> Well what we have is in general two schedulers which push their
>>>>> work into one hardware ring.
>>>>>
>>>>> So we need a lock to make sure that only one is modifying the hw
>>>>> ring at the same time.
>>>>>
>>>>> From the implementation I think we first write the commands into a
>>>>> shadow ring buffer and then copy them over to the real hw ring here.
>>>>>
>>>>> So this is the only place where we actually touch the hw ring
>>>>> buffer and to need to grab the lock.
>>>>>
>>>>> Did I get this right?
>>>>>
>>>>> Thanks,
>>>>> Christian.
>>>>
>>>>
>>>> For the case of the sw ring yes, but I was asking in general,
>>>> accesses to real HW rings, amdgpu_ib_schedule writes to HW rings,
>>>> we may be accessing same HW ring from 2 different contexts when
>>>> doing direct submissions (i.e. calling amdgpu_ib_schedule directly
>>>> from 2 threads concurrently) this opens possibility to concurrent
>>>> access to HW. Or am i missing something here ?
>>>
>>> No, that's pretty much correct.
>>>
>>> The general idea is that amdgpu_ib_schedule() first writes into a
>>> separate software ring buffer for each scheduler. So no locking
>>> needed for that.
>>>
>>> Then when the set_wptr callback is called we grab the lock and copy
>>> the software ring content to the real hw ring and telling the hw to
>>> execute it.
>>>
>>> The spin_lock is to protect from concurrent hw access.
>>>
>>> Regards,
>>> Christian.
>>
>>
>> Look at
>> amdgpu_copy_buffer->amdgpu_job_submit_direct->amdgpu_ib_schedule->amd
>> gpu_ring_commit->amdgpu_ring_set_wptr,
>> at no point there lock is taken. The only lock i see that resembles
>> what you describe is for amdgpu_kiq.ring_lock. So this applies only
>> to some of the code but not to all cases.
>
> Sounds like we have a misunderstanding here.
>
> The case we look at should be this:
>
> amdgpu_job_run()->amdgpu_ib_schedule()->amdgpu_ring_commit()->amdgpu_r
> ing_set_wptr()...amdgpu_ring_set_wptr_to_mux()
>
>
> Then amdgpu_ring_set_wptr_to_mux() we then grab the lock, copy over
> the commands, commit them to the hw and then drop the lock.


> Yes, misunderstanding - I am asking for the general case not related to this patch-set. When we work with HW rings directly from direct submissions.
> Nothing prevents in that case from 2 concurrent accesses to HW the way i showed above, or is there something ?

> Andrey

drm_sched_init creates the thread of drm_sched_main once per ring, thus every amdgpu_ib_schedule on a certain ring should be executed in the same thread. Please correct me if I am wrong.

Thanks,
Jiadong
>
> Christian.
>
>>
>> Andrey
>>
>>
>>>
>>>
>>>>
>>>> Andrey
>>>>
>>>>
>>>>>
>>>>>>
>>>>>>
>>>>>>> +    e->sw_cptr = e->sw_wptr;
>>>>>>> +    e->sw_wptr = wptr;
>>>>>>> +    e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>>>> +
>>>>>>> +    if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) ==
>>>>>>> +0) {
>>>>>>> +        e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>>>> +        amdgpu_ring_commit(mux->real_ring);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    spin_unlock(&mux->lock);
>>>>>>> +}
>>>>>>> +
>>>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>>> struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>> +
>>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>>> +    if (!e) {
>>>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>>>> +        return 0;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return e->sw_wptr;
>>>>>>> +}
>>>>>>> +
>>>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>>> struct amdgpu_ring *ring)
>>>>>>> +{
>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>> +    u64 r_rptr, r_wptr, offset, start, end;
>>>>>>> +
>>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>>> +    if (!e) {
>>>>>>> +        DRM_ERROR("no sw entry found!\n");
>>>>>>> +        return 0;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>>>>>>> +    r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>>>>>>> +
>>>>>>> +    if (r_wptr < r_rptr)
>>>>>>> +        r_wptr += mux->real_ring->ring_size >> 2;
>>>>>>> +
>>>>>>> +    start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>>>> +    end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>>>> +    if (start > end)
>>>>>>> +        end += mux->real_ring->ring_size >> 2;
>>>>>>> +    if (r_rptr <= end && r_rptr >= start) {
>>>>>>> +        offset = r_rptr - start;
>>>>>>> +        e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>>>>>>> +    } else if (r_rptr < start) {
>>>>>>> +        e->sw_rptr = e->sw_cptr;
>>>>>>> +    } else {
>>>>>>> +        e->sw_rptr = e->sw_wptr;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return e->sw_rptr;
>>>>>>> +}
>>>>>>> +
>>>>>>> +/*copy packages on sw ring range[begin, end) */ static int
>>>>>>> +copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>>> struct amdgpu_ring *ring,
>>>>>>> +    u64 s_begin, u64 s_end)
>>>>>>> +{
>>>>>>> +    u64 begin, end, r_begin, r_end;
>>>>>>> +    struct amdgpu_ring *real_ring = mux->real_ring;
>>>>>>> +
>>>>>>> +    begin = s_begin & ring->buf_mask;
>>>>>>> +    end = s_end & ring->buf_mask;
>>>>>>> +
>>>>>>> +    r_begin = real_ring->wptr & real_ring->buf_mask;
>>>>>>> +    if (begin == end)
>>>>>>> +        return -ERANGE;
>>>>>>> +    if (begin > end) {
>>>>>>> +        amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) +
>>>>>>> end - begin);
>>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>>>> *)&ring->ring[begin],
>>>>>>> +            (ring->ring_size >> 2) - begin);
>>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>>>> *)&ring->ring[0], end);
>>>>>>> +    } else {
>>>>>>> +        amdgpu_ring_alloc(real_ring, end - begin);
>>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>>>> *)&ring->ring[begin], end - begin);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r_end = real_ring->wptr & real_ring->buf_mask;
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..d058c43bb063
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>> @@ -0,0 +1,67 @@
>>>>>>> +/*
>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>> + *
>>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>>> obtaining a
>>>>>>> + * copy of this software and associated documentation files
>>>>>>> (the "Software"),
>>>>>>> + * to deal in the Software without restriction, including
>>>>>>> without limitation
>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>>> sublicense,
>>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>>> whom the
>>>>>>> + * Software is furnished to do so, subject to the following
>>>>>>> conditions:
>>>>>>> + *
>>>>>>> + * The above copyright notice and this permission notice shall
>>>>>>> be included in
>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>> + *
>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>>> KIND, EXPRESS OR
>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>>> MERCHANTABILITY,
>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>>> EVENT SHALL
>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
>>>>>>> CLAIM, DAMAGES OR
>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>>> OTHERWISE,
>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>>>>>>> THE USE OR
>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>> + *
>>>>>>> + */
>>>>>>> +
>>>>>>> +#ifndef __AMDGPU_RING_MUX__
>>>>>>> +#define __AMDGPU_RING_MUX__
>>>>>>> +
>>>>>>> +#include <linux/timer.h>
>>>>>>> +#include <linux/spinlock.h>
>>>>>>> +#include "amdgpu_ring.h"
>>>>>>> +
>>>>>>> +struct amdgpu_ring;
>>>>>>> +/*
>>>>>>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>>>>>>> + * end_ptr_in_hw_ring - last copied end loc on hw ring
>>>>>>> +*sw_cptr -the begin of copy ptr in sw ring  *sw_rptr; the read
>>>>>>> +ptr in sw ring  *sw_wptr; the write ptr in sw ring  */ struct
>>>>>>> +amdgpu_mux_entry {
>>>>>>> +    struct amdgpu_ring    *ring;
>>>>>>> +    u64 start_ptr_in_hw_ring;
>>>>>>> +    u64 end_ptr_in_hw_ring;
>>>>>>> +
>>>>>>> +    u64 sw_cptr;
>>>>>>> +    u64 sw_rptr;
>>>>>>> +    u64 sw_wptr;
>>>>>>> +};
>>>>>>> +
>>>>>>> +struct amdgpu_ring_mux {
>>>>>>> +    struct amdgpu_ring *real_ring;
>>>>>>> +
>>>>>>> +    struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>>>>>>> +
>>>>>>> +    unsigned num_ring_entries;
>>>>>>> +
>>>>>>> +    spinlock_t            lock;
>>>>>>> +
>>>>>>> +};
>>>>>>> +
>>>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct
>>>>>>> amdgpu_ring *ring);
>>>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux); int
>>>>>>> +amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>>> struct amdgpu_ring *ring);
>>>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux,
>>>>>>> struct amdgpu_ring *ring, u64 wptr);
>>>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>>> struct amdgpu_ring *ring);
>>>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>>> struct amdgpu_ring *ring);
>>>>>>> +
>>>>>>> +#endif
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..452d0ff37758
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>> @@ -0,0 +1,204 @@
>>>>>>> +/*
>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>> + * All Rights Reserved.
>>>>>>> + *
>>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>>> obtaining a
>>>>>>> + * copy of this software and associated documentation files
>>>>>>> + (the
>>>>>>> + * "Software"), to deal in the Software without restriction,
>>>>>>> including
>>>>>>> + * without limitation the rights to use, copy, modify, merge,
>>>>>>> publish,
>>>>>>> + * distribute, sub license, and/or sell copies of the Software,
>>>>>>> and to
>>>>>>> + * permit persons to whom the Software is furnished to do so,
>>>>>>> subject to
>>>>>>> + * the following conditions:
>>>>>>> + *
>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>>> KIND, EXPRESS OR
>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>>> MERCHANTABILITY,
>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO
>>>>>>> EVENT SHALL
>>>>>>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE
>>>>>>> LIABLE FOR ANY CLAIM,
>>>>>>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
>>>>>>> CONTRACT, TORT OR
>>>>>>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>>>>>>> SOFTWARE OR THE
>>>>>>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>>>>> + *
>>>>>>> + * The above copyright notice and this permission notice
>>>>>>> (including the
>>>>>>> + * next paragraph) shall be included in all copies or
>>>>>>> substantial portions
>>>>>>> + * of the Software.
>>>>>>> + *
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include "amdgpu_sw_ring.h"
>>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>> +
>>>>>>> +#define amdgpu_ring_get_gpu_addr(ring, offset) \
>>>>>>> +    (ring->is_mes_queue ?                        \
>>>>>>> +     (ring->mes_ctx->meta_data_gpu_addr + offset) :
>>>>>>> +\
>>>>>>> +     (ring->adev->wb.gpu_addr + offset * 4))
>>>>>>> +
>>>>>>> +#define amdgpu_ring_get_cpu_addr(ring, offset) \
>>>>>>> +    (ring->is_mes_queue ?                        \
>>>>>>> +     (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) +
>>>>>>> offset) : \
>>>>>>> +     (&ring->adev->wb.wb[offset]))
>>>>>>> +
>>>>>>> +
>>>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct
>>>>>>> amdgpu_ring *ring,
>>>>>>> +             unsigned int max_dw, struct amdgpu_irq_src
>>>>>>> +*irq_src,
>>>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>>>> +             atomic_t *sched_score) {
>>>>>>> +    int r;
>>>>>>> +    int sched_hw_submission = amdgpu_sched_hw_submission;
>>>>>>> +    u32 *num_sched;
>>>>>>> +    u32 hw_ip;
>>>>>>> +
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +
>>>>>>> +    if (ring->adev == NULL) {
>>>>>>> +        if (adev->num_rings >= AMDGPU_MAX_RINGS)
>>>>>>> +            return -EINVAL;
>>>>>>> +
>>>>>>> +        ring->adev = adev;
>>>>>>> +        ring->num_hw_submission = sched_hw_submission;
>>>>>>> +        ring->sched_score = sched_score;
>>>>>>> +        ring->vmid_wait = dma_fence_get_stub();
>>>>>>> +
>>>>>>> +        if (!ring->is_mes_queue) {
>>>>>>> +            ring->idx = adev->num_rings++;
>>>>>>> +            adev->rings[ring->idx] = ring;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        r = amdgpu_fence_driver_init_ring(ring);
>>>>>>> +        if (r)
>>>>>>> +            return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>>>> +    if (r) {
>>>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc
>>>>>>> failed\n", r);
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>>>> +    if (r) {
>>>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc
>>>>>>> failed\n", r);
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>
>>>>>>
>>>>>> Looks like a typo copy pase duplicate of the above
>>>>>>
>>>>>>> +
>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>>>>>>> +    if (r) {
>>>>>>> +        dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc
>>>>>>> failed\n", r);
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>>>>>>> +    if (r) {
>>>>>>> +        dev_err(adev->dev, "(%d) ring cond_exec_polling wb
>>>>>>> alloc failed\n", r);
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    ring->fence_gpu_addr =
>>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>>>>>>> +    ring->fence_cpu_addr =
>>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>>>>>>> +
>>>>>>> +    ring->trail_fence_gpu_addr =
>>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>>>>>>> +    ring->trail_fence_cpu_addr =
>>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>>>>>>> +
>>>>>>> +    ring->cond_exe_gpu_addr =
>>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>>>>>>> +    ring->cond_exe_cpu_addr =
>>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>>>>>>> +
>>>>>>> +    /* always set cond_exec_polling to CONTINUE */
>>>>>>> +    *ring->cond_exe_cpu_addr = 1;
>>>>>>> +
>>>>>>> +    r = amdgpu_fence_driver_start_ring(ring, irq_src,
>>>>>>> +irq_type);
>>>>>>> +    if (r) {
>>>>>>> +        dev_err(adev->dev, "failed initializing fences
>>>>>>> (%d).\n", r);
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    ring->ring_size = roundup_pow_of_two(max_dw * 4 *
>>>>>>> sched_hw_submission);
>>>>>>> +
>>>>>>> +    ring->buf_mask = (ring->ring_size / 4) - 1;
>>>>>>> +    ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>>>>>>> +        0xffffffffffffffff : ring->buf_mask;
>>>>>>> +
>>>>>>> +    /* Allocate ring buffer */
>>>>>>> +    if (ring->ring == NULL) {
>>>>>>> +        ring->ring = kzalloc(ring->ring_size +
>>>>>>> ring->funcs->extra_dw, GFP_KERNEL);
>>>>>>> +        if (!ring->ring) {
>>>>>>> +            dev_err(adev->dev, "(%d) swring create failed\n",
>>>>>>> +r);
>>>>>>> +            return r;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        amdgpu_ring_clear_ring(ring);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    ring->max_dw = max_dw;
>>>>>>> +    ring->hw_prio = hw_prio;
>>>>>>> +
>>>>>>> +    if (!ring->no_scheduler) {
>>>>>>> +        hw_ip = ring->funcs->type;
>>>>>>> +        num_sched =
>>>>>>> +&adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>>>>>>> + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>>>>>>> +            &ring->sched;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>
>>>>>>
>>>>>> In general i see this function is a big one to one subset of
>>>>>> amdgpu_ring_init.
>>>>>> Could you maybe see a way to refactor such that this function is
>>>>>> the base and for HW related code that different (like BO
>>>>>> allocation for ring buffer) you maybe can add if
>>>>>> (!ring->sw_ring)... and add those code snippets ? To avoid
>>>>>> substantial code duplication.
>>>>>>
>>>>>> Andrey
>>>>>>
>>>>>>
>>>>>>> +
>>>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring) {
>>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>>> +
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +    return amdgpu_ring_get_rptr_from_mux(mux, ring); }
>>>>>>> +
>>>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring) {
>>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>>> +
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +    return amdgpu_ring_get_wptr_from_mux(mux, ring); }
>>>>>>> +
>>>>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring) {
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +}
>>>>>>> +
>>>>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring) {
>>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>>> +
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +    amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr); }
>>>>>>> +
>>>>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring) {
>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>> +
>>>>>>> +    /* Not to finish a ring which is not initialized */
>>>>>>> +    if (!(ring->adev) ||
>>>>>>> +        (!ring->is_mes_queue &&
>>>>>>> +!(ring->adev->rings[ring->idx])))
>>>>>>> +        return;
>>>>>>> +
>>>>>>> +    ring->sched.ready = false;
>>>>>>> +
>>>>>>> +    amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>>>>>>> +    amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>>>>>>> +
>>>>>>> +    kfree((void *)ring->ring);
>>>>>>> +
>>>>>>> +    dma_fence_put(ring->vmid_wait);
>>>>>>> +    ring->vmid_wait = NULL;
>>>>>>> +    ring->me = 0;
>>>>>>> +
>>>>>>> +    ring->adev->rings[ring->idx] = NULL; }
>>>>>>> +
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..c05d8a94ad0c
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>> @@ -0,0 +1,48 @@
>>>>>>> +/*
>>>>>>> + * Copyright 2012 Advanced Micro Devices, Inc.
>>>>>>> + *
>>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>>> obtaining a
>>>>>>> + * copy of this software and associated documentation files
>>>>>>> (the "Software"),
>>>>>>> + * to deal in the Software without restriction, including
>>>>>>> without limitation
>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>>> sublicense,
>>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>>> whom the
>>>>>>> + * Software is furnished to do so, subject to the following
>>>>>>> conditions:
>>>>>>> + *
>>>>>>> + * The above copyright notice and this permission notice shall
>>>>>>> be included in
>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>> + *
>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>>> KIND, EXPRESS OR
>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>>> MERCHANTABILITY,
>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>>> EVENT SHALL
>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
>>>>>>> CLAIM, DAMAGES OR
>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>>> OTHERWISE,
>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>>>>>>> THE USE OR
>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>> + *
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include <drm/amdgpu_drm.h>
>>>>>>> +#include <drm/gpu_scheduler.h>
>>>>>>> +#include <drm/drm_print.h>
>>>>>>> +
>>>>>>> +#include "amdgpu_irq.h"
>>>>>>> +#include "amdgpu_ring.h"
>>>>>>> +#include "amdgpu.h"
>>>>>>> +
>>>>>>> +#ifndef __AMDGPU_SWRING_H__
>>>>>>> +#define __AMDGPU_SWRING_H__
>>>>>>> +
>>>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct
>>>>>>> amdgpu_ring *sw_ring,
>>>>>>> +             unsigned int max_dw, struct amdgpu_irq_src
>>>>>>> +*irq_src,
>>>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>>>> +             atomic_t *sched_score); void
>>>>>>> +amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>>>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>>>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring); void
>>>>>>> +amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); void
>>>>>>> +amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>>>>>>> +
>>>>>>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); void
>>>>>>> +amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>>>>>>> +
>>>>>>> +#endif
>>>>>
>>>
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-13  1:44               ` Zhu, Jiadong
@ 2022-09-13  2:00                 ` Andrey Grodzovsky
  2022-09-13  7:25                   ` Christian König
  0 siblings, 1 reply; 28+ messages in thread
From: Andrey Grodzovsky @ 2022-09-13  2:00 UTC (permalink / raw)
  To: Zhu, Jiadong, Christian König, amd-gfx; +Cc: Huang, Ray


On 2022-09-12 21:44, Zhu, Jiadong wrote:
> [AMD Official Use Only - General]
>
> -----Original Message-----
> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Sent: Tuesday, September 13, 2022 12:45 AM
> To: Christian König <ckoenig.leichtzumerken@gmail.com>; Zhu, Jiadong <Jiadong.Zhu@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Huang, Ray <Ray.Huang@amd.com>
> Subject: Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
>
>
> On 2022-09-12 12:22, Christian König wrote:
>> Am 12.09.22 um 17:34 schrieb Andrey Grodzovsky:
>>> On 2022-09-12 09:27, Christian König wrote:
>>>
>>>> Am 12.09.22 um 15:22 schrieb Andrey Grodzovsky:
>>>>> On 2022-09-12 06:20, Christian König wrote:
>>>>>> Am 09.09.22 um 18:45 schrieb Andrey Grodzovsky:
>>>>>>> On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
>>>>>>>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>>>>>>>
>>>>>>>> The software ring is created to support priority context while
>>>>>>>> there is only one hardware queue for gfx.
>>>>>>>>
>>>>>>>> Every software rings has its fence driver and could be used as
>>>>>>>> an ordinary ring for the gpu_scheduler.
>>>>>>>> Multiple software rings are binded to a real ring with the ring
>>>>>>>> muxer. The packages committed on the software ring are copied to
>>>>>>>> the real ring.
>>>>>>>>
>>>>>>>> v2: use array to store software ring entry.
>>>>>>>> v3: remove unnecessary prints.
>>>>>>>>
>>>>>>>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>>>>>>>> ---
>>>>>>>>    drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>>>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>>>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>>>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182
>>>>>>>> +++++++++++++++++
>>>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>>>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204
>>>>>>>> +++++++++++++++++++
>>>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>>>>>>>>    7 files changed, 509 insertions(+), 1 deletion(-)
>>>>>>>>    create mode 100644
>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>>>    create mode 100644
>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>>> index 3e0e2eb7e235..85224bc81ce5 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>>>>> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>>>>>>        amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o
>>>>>>>> amdgpu_nbio.o \
>>>>>>>>        amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o
>>>>>>>> amdgpu_rap.o \
>>>>>>>>        amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>>>>>>> -    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>>>>>>> +    amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
>>>>>>>> +\
>>>>>>>> +    amdgpu_sw_ring.o amdgpu_ring_mux.o
>>>>>>>>      amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>>>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>>> index 53526ffb2ce1..0de8e3cd0f1c 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>>>>>> @@ -33,6 +33,7 @@
>>>>>>>>    #include "amdgpu_imu.h"
>>>>>>>>    #include "soc15.h"
>>>>>>>>    #include "amdgpu_ras.h"
>>>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>>>      /* GFX current status */
>>>>>>>>    #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -346,6 +347,8 @@
>>>>>>>> struct amdgpu_gfx {
>>>>>>>>        struct amdgpu_gfx_ras        *ras;
>>>>>>>>          bool                is_poweron;
>>>>>>>> +
>>>>>>>> +    struct amdgpu_ring_mux            muxer;
>>>>>>>>    };
>>>>>>>>      #define amdgpu_gfx_get_gpu_clock_counter(adev)
>>>>>>>> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>>> index 7d89a52091c0..fe33a683bfba 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>>>>>> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>>>>>>>>        bool            is_mes_queue;
>>>>>>>>        uint32_t        hw_queue_id;
>>>>>>>>        struct amdgpu_mes_ctx_data *mes_ctx;
>>>>>>>> +
>>>>>>>> +    bool            is_sw_ring;
>>>>>>>> +
>>>>>>>>    };
>>>>>>>>      #define amdgpu_ring_parse_cs(r, p, job, ib)
>>>>>>>> ((r)->funcs->parse_cs((p), (job), (ib))) diff --git
>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..ea4a3c66119a
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>>>>>>>> @@ -0,0 +1,182 @@
>>>>>>>> +/*
>>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>>> + *
>>>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>>>> obtaining a
>>>>>>>> + * copy of this software and associated documentation files
>>>>>>>> (the "Software"),
>>>>>>>> + * to deal in the Software without restriction, including
>>>>>>>> without limitation
>>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>>>> sublicense,
>>>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>>>> whom the
>>>>>>>> + * Software is furnished to do so, subject to the following
>>>>>>>> conditions:
>>>>>>>> + *
>>>>>>>> + * The above copyright notice and this permission notice shall
>>>>>>>> be included in
>>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>>> + *
>>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>>>> KIND, EXPRESS OR
>>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>>>> MERCHANTABILITY,
>>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>>>> EVENT SHALL
>>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
>>>>>>>> CLAIM, DAMAGES OR
>>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>>>> OTHERWISE,
>>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>>>>>>>> THE USE OR
>>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>>> + *
>>>>>>>> + */
>>>>>>>> +
>>>>>>>> +#include <drm/drm_print.h>
>>>>>>>> +
>>>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>>> +#include "amdgpu_ring.h"
>>>>>>>> +
>>>>>>>> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>>>>>>>> +
>>>>>>>> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>>>> struct amdgpu_ring *ring,
>>>>>>>> +    u64 s_begin, u64 s_end);
>>>>>>>> +
>>>>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct
>>>>>>>> amdgpu_ring *ring)
>>>>>>>> +{
>>>>>>>> +    mux->real_ring = ring;
>>>>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>>>>> +    mux->num_ring_entries = 0;
>>>>>>>> +    spin_lock_init(&mux->lock);
>>>>>>>> +    return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux) {
>>>>>>>> +    memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>>>>>>>> +    mux->num_ring_entries = 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>>>> struct amdgpu_ring *ring)
>>>>>>>> +{
>>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>>> +
>>>>>>>> +    if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
>>>>>>>> +        DRM_ERROR("adding sw ring exceeds max gfx num\n");
>>>>>>>> +        return -ENOMEM;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    e = &mux->ring_entries[mux->num_ring_entries++];
>>>>>>>> +
>>>>>>>> +    e->ring = ring;
>>>>>>>> +    e->start_ptr_in_hw_ring = 0;
>>>>>>>> +    e->end_ptr_in_hw_ring = 0;
>>>>>>>> +    e->sw_cptr = 0;
>>>>>>>> +    e->sw_rptr = 0;
>>>>>>>> +    e->sw_wptr = 0;
>>>>>>>> +
>>>>>>>> +    return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct
>>>>>>>> amdgpu_ring_mux *mux,
>>>>>>>> +                struct amdgpu_ring *ring) {
>>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>>> +    int i;
>>>>>>>> +
>>>>>>>> +    e = NULL;
>>>>>>>> +    for (i = 0; i < mux->num_ring_entries; i++) {
>>>>>>>> +        if (mux->ring_entries[i].ring == ring) {
>>>>>>>> +            e = &mux->ring_entries[i];
>>>>>>>> +            break;
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    return e;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux,
>>>>>>>> struct amdgpu_ring *ring, u64 wptr)
>>>>>>>> +{
>>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>>> +
>>>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>>>> +    if (!e) {
>>>>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>>>>> +        return;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    spin_lock(&mux->lock);
>>>>>>>
>>>>>>> A bit more generic question, I assume the spinlock here protects
>>>>>>> from concurrent runs of amdgpu_ib_schedule. For them to be even
>>>>>>> theoretically concurrent it must be from direct submissions to HW
>>>>>>> (because any scheduler mediated submission is serialized though
>>>>>>> the dedicated scheduler worker thread). But in such case why we
>>>>>>> protect only here ? If i am not missing something there is no
>>>>>>> total per HW ring lock when calling amdgpu_ib_schedule today and
>>>>>>> we do a lot of HW accesses there to ring  which should probably
>>>>>>> be protected from concurrent accesses.
>>>>>>>
>>>>>>> So if any one can answer this question ?
>>>>>> Well what we have is in general two schedulers which push their
>>>>>> work into one hardware ring.
>>>>>>
>>>>>> So we need a lock to make sure that only one is modifying the hw
>>>>>> ring at the same time.
>>>>>>
>>>>>>  From the implementation I think we first write the commands into a
>>>>>> shadow ring buffer and then copy them over to the real hw ring here.
>>>>>>
>>>>>> So this is the only place where we actually touch the hw ring
>>>>>> buffer and to need to grab the lock.
>>>>>>
>>>>>> Did I get this right?
>>>>>>
>>>>>> Thanks,
>>>>>> Christian.
>>>>>
>>>>> For the case of the sw ring yes, but I was asking in general,
>>>>> accesses to real HW rings, amdgpu_ib_schedule writes to HW rings,
>>>>> we may be accessing same HW ring from 2 different contexts when
>>>>> doing direct submissions (i.e. calling amdgpu_ib_schedule directly
>>>>> from 2 threads concurrently) this opens possibility to concurrent
>>>>> access to HW. Or am i missing something here ?
>>>> No, that's pretty much correct.
>>>>
>>>> The general idea is that amdgpu_ib_schedule() first writes into a
>>>> separate software ring buffer for each scheduler. So no locking
>>>> needed for that.
>>>>
>>>> Then when the set_wptr callback is called we grab the lock and copy
>>>> the software ring content to the real hw ring and telling the hw to
>>>> execute it.
>>>>
>>>> The spin_lock is to protect from concurrent hw access.
>>>>
>>>> Regards,
>>>> Christian.
>>>
>>> Look at
>>> amdgpu_copy_buffer->amdgpu_job_submit_direct->amdgpu_ib_schedule->amd
>>> gpu_ring_commit->amdgpu_ring_set_wptr,
>>> at no point there lock is taken. The only lock i see that resembles
>>> what you describe is for amdgpu_kiq.ring_lock. So this applies only
>>> to some of the code but not to all cases.
>> Sounds like we have a misunderstanding here.
>>
>> The case we look at should be this:
>>
>> amdgpu_job_run()->amdgpu_ib_schedule()->amdgpu_ring_commit()->amdgpu_r
>> ing_set_wptr()...amdgpu_ring_set_wptr_to_mux()
>>
>>
>> Then amdgpu_ring_set_wptr_to_mux() we then grab the lock, copy over
>> the commands, commit them to the hw and then drop the lock.
>
>> Yes, misunderstanding - I am asking for the general case not related to this patch-set. When we work with HW rings directly from direct submissions.
>> Nothing prevents in that case from 2 concurrent accesses to HW the way i showed above, or is there something ?
>> Andrey
> drm_sched_init creates the thread of drm_sched_main once per ring, thus every amdgpu_ib_schedule on a certain ring should be executed in the same thread. Please correct me if I am wrong.


You are right for scheduler mediated submissions (executing through 
drm_sched_backend_ops.run_job hook) , I am talking about direct 
submissions without gpu scheduler (using amdgpu_job_submit_direct)

Andrey


>
> Thanks,
> Jiadong
>> Christian.
>>
>>> Andrey
>>>
>>>
>>>>
>>>>> Andrey
>>>>>
>>>>>
>>>>>>>
>>>>>>>> +    e->sw_cptr = e->sw_wptr;
>>>>>>>> +    e->sw_wptr = wptr;
>>>>>>>> +    e->start_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>>>>> +
>>>>>>>> +    if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) ==
>>>>>>>> +0) {
>>>>>>>> +        e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>>>>>>>> +        amdgpu_ring_commit(mux->real_ring);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    spin_unlock(&mux->lock);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>>>> struct amdgpu_ring *ring)
>>>>>>>> +{
>>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>>> +
>>>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>>>> +    if (!e) {
>>>>>>>> +        DRM_ERROR("cannot find entry for sw ring\n");
>>>>>>>> +        return 0;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    return e->sw_wptr;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>>>> struct amdgpu_ring *ring)
>>>>>>>> +{
>>>>>>>> +    struct amdgpu_mux_entry *e;
>>>>>>>> +    u64 r_rptr, r_wptr, offset, start, end;
>>>>>>>> +
>>>>>>>> +    e = amdgpu_get_sw_entry(mux, ring);
>>>>>>>> +    if (!e) {
>>>>>>>> +        DRM_ERROR("no sw entry found!\n");
>>>>>>>> +        return 0;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>>>>>>>> +    r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>>>>>>>> +
>>>>>>>> +    if (r_wptr < r_rptr)
>>>>>>>> +        r_wptr += mux->real_ring->ring_size >> 2;
>>>>>>>> +
>>>>>>>> +    start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>>>>> +    end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
>>>>>>>> +    if (start > end)
>>>>>>>> +        end += mux->real_ring->ring_size >> 2;
>>>>>>>> +    if (r_rptr <= end && r_rptr >= start) {
>>>>>>>> +        offset = r_rptr - start;
>>>>>>>> +        e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
>>>>>>>> +    } else if (r_rptr < start) {
>>>>>>>> +        e->sw_rptr = e->sw_cptr;
>>>>>>>> +    } else {
>>>>>>>> +        e->sw_rptr = e->sw_wptr;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    return e->sw_rptr;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/*copy packages on sw ring range[begin, end) */ static int
>>>>>>>> +copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>>>> struct amdgpu_ring *ring,
>>>>>>>> +    u64 s_begin, u64 s_end)
>>>>>>>> +{
>>>>>>>> +    u64 begin, end, r_begin, r_end;
>>>>>>>> +    struct amdgpu_ring *real_ring = mux->real_ring;
>>>>>>>> +
>>>>>>>> +    begin = s_begin & ring->buf_mask;
>>>>>>>> +    end = s_end & ring->buf_mask;
>>>>>>>> +
>>>>>>>> +    r_begin = real_ring->wptr & real_ring->buf_mask;
>>>>>>>> +    if (begin == end)
>>>>>>>> +        return -ERANGE;
>>>>>>>> +    if (begin > end) {
>>>>>>>> +        amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) +
>>>>>>>> end - begin);
>>>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>>>>> *)&ring->ring[begin],
>>>>>>>> +            (ring->ring_size >> 2) - begin);
>>>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>>>>> *)&ring->ring[0], end);
>>>>>>>> +    } else {
>>>>>>>> +        amdgpu_ring_alloc(real_ring, end - begin);
>>>>>>>> +        amdgpu_ring_write_multiple(real_ring, (void
>>>>>>>> *)&ring->ring[begin], end - begin);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    r_end = real_ring->wptr & real_ring->buf_mask;
>>>>>>>> +
>>>>>>>> +    return 0;
>>>>>>>> +}
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..d058c43bb063
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>>>>>>>> @@ -0,0 +1,67 @@
>>>>>>>> +/*
>>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>>> + *
>>>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>>>> obtaining a
>>>>>>>> + * copy of this software and associated documentation files
>>>>>>>> (the "Software"),
>>>>>>>> + * to deal in the Software without restriction, including
>>>>>>>> without limitation
>>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>>>> sublicense,
>>>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>>>> whom the
>>>>>>>> + * Software is furnished to do so, subject to the following
>>>>>>>> conditions:
>>>>>>>> + *
>>>>>>>> + * The above copyright notice and this permission notice shall
>>>>>>>> be included in
>>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>>> + *
>>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>>>> KIND, EXPRESS OR
>>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>>>> MERCHANTABILITY,
>>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>>>> EVENT SHALL
>>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
>>>>>>>> CLAIM, DAMAGES OR
>>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>>>> OTHERWISE,
>>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>>>>>>>> THE USE OR
>>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>>> + *
>>>>>>>> + */
>>>>>>>> +
>>>>>>>> +#ifndef __AMDGPU_RING_MUX__
>>>>>>>> +#define __AMDGPU_RING_MUX__
>>>>>>>> +
>>>>>>>> +#include <linux/timer.h>
>>>>>>>> +#include <linux/spinlock.h>
>>>>>>>> +#include "amdgpu_ring.h"
>>>>>>>> +
>>>>>>>> +struct amdgpu_ring;
>>>>>>>> +/*
>>>>>>>> + * start_ptr_in_hw_ring - last copied start loc on hw ring
>>>>>>>> + * end_ptr_in_hw_ring - last copied end loc on hw ring
>>>>>>>> +*sw_cptr -the begin of copy ptr in sw ring  *sw_rptr; the read
>>>>>>>> +ptr in sw ring  *sw_wptr; the write ptr in sw ring  */ struct
>>>>>>>> +amdgpu_mux_entry {
>>>>>>>> +    struct amdgpu_ring    *ring;
>>>>>>>> +    u64 start_ptr_in_hw_ring;
>>>>>>>> +    u64 end_ptr_in_hw_ring;
>>>>>>>> +
>>>>>>>> +    u64 sw_cptr;
>>>>>>>> +    u64 sw_rptr;
>>>>>>>> +    u64 sw_wptr;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +struct amdgpu_ring_mux {
>>>>>>>> +    struct amdgpu_ring *real_ring;
>>>>>>>> +
>>>>>>>> +    struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
>>>>>>>> +
>>>>>>>> +    unsigned num_ring_entries;
>>>>>>>> +
>>>>>>>> +    spinlock_t            lock;
>>>>>>>> +
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct
>>>>>>>> amdgpu_ring *ring);
>>>>>>>> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux); int
>>>>>>>> +amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux,
>>>>>>>> struct amdgpu_ring *ring);
>>>>>>>> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux,
>>>>>>>> struct amdgpu_ring *ring, u64 wptr);
>>>>>>>> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>>>> struct amdgpu_ring *ring);
>>>>>>>> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux,
>>>>>>>> struct amdgpu_ring *ring);
>>>>>>>> +
>>>>>>>> +#endif
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..452d0ff37758
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>>>>>>>> @@ -0,0 +1,204 @@
>>>>>>>> +/*
>>>>>>>> + * Copyright 2022 Advanced Micro Devices, Inc.
>>>>>>>> + * All Rights Reserved.
>>>>>>>> + *
>>>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>>>> obtaining a
>>>>>>>> + * copy of this software and associated documentation files
>>>>>>>> + (the
>>>>>>>> + * "Software"), to deal in the Software without restriction,
>>>>>>>> including
>>>>>>>> + * without limitation the rights to use, copy, modify, merge,
>>>>>>>> publish,
>>>>>>>> + * distribute, sub license, and/or sell copies of the Software,
>>>>>>>> and to
>>>>>>>> + * permit persons to whom the Software is furnished to do so,
>>>>>>>> subject to
>>>>>>>> + * the following conditions:
>>>>>>>> + *
>>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>>>> KIND, EXPRESS OR
>>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>>>> MERCHANTABILITY,
>>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO
>>>>>>>> EVENT SHALL
>>>>>>>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE
>>>>>>>> LIABLE FOR ANY CLAIM,
>>>>>>>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
>>>>>>>> CONTRACT, TORT OR
>>>>>>>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>>>>>>>> SOFTWARE OR THE
>>>>>>>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>>>>>> + *
>>>>>>>> + * The above copyright notice and this permission notice
>>>>>>>> (including the
>>>>>>>> + * next paragraph) shall be included in all copies or
>>>>>>>> substantial portions
>>>>>>>> + * of the Software.
>>>>>>>> + *
>>>>>>>> + */
>>>>>>>> +
>>>>>>>> +#include "amdgpu_sw_ring.h"
>>>>>>>> +#include "amdgpu_ring_mux.h"
>>>>>>>> +
>>>>>>>> +#define amdgpu_ring_get_gpu_addr(ring, offset) \
>>>>>>>> +    (ring->is_mes_queue ?                        \
>>>>>>>> +     (ring->mes_ctx->meta_data_gpu_addr + offset) :
>>>>>>>> +\
>>>>>>>> +     (ring->adev->wb.gpu_addr + offset * 4))
>>>>>>>> +
>>>>>>>> +#define amdgpu_ring_get_cpu_addr(ring, offset) \
>>>>>>>> +    (ring->is_mes_queue ?                        \
>>>>>>>> +     (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) +
>>>>>>>> offset) : \
>>>>>>>> +     (&ring->adev->wb.wb[offset]))
>>>>>>>> +
>>>>>>>> +
>>>>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct
>>>>>>>> amdgpu_ring *ring,
>>>>>>>> +             unsigned int max_dw, struct amdgpu_irq_src
>>>>>>>> +*irq_src,
>>>>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>>>>> +             atomic_t *sched_score) {
>>>>>>>> +    int r;
>>>>>>>> +    int sched_hw_submission = amdgpu_sched_hw_submission;
>>>>>>>> +    u32 *num_sched;
>>>>>>>> +    u32 hw_ip;
>>>>>>>> +
>>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>>> +
>>>>>>>> +    if (ring->adev == NULL) {
>>>>>>>> +        if (adev->num_rings >= AMDGPU_MAX_RINGS)
>>>>>>>> +            return -EINVAL;
>>>>>>>> +
>>>>>>>> +        ring->adev = adev;
>>>>>>>> +        ring->num_hw_submission = sched_hw_submission;
>>>>>>>> +        ring->sched_score = sched_score;
>>>>>>>> +        ring->vmid_wait = dma_fence_get_stub();
>>>>>>>> +
>>>>>>>> +        if (!ring->is_mes_queue) {
>>>>>>>> +            ring->idx = adev->num_rings++;
>>>>>>>> +            adev->rings[ring->idx] = ring;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        r = amdgpu_fence_driver_init_ring(ring);
>>>>>>>> +        if (r)
>>>>>>>> +            return r;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>>>>> +    if (r) {
>>>>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc
>>>>>>>> failed\n", r);
>>>>>>>> +        return r;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->fence_offs);
>>>>>>>> +    if (r) {
>>>>>>>> +        dev_err(adev->dev, "(%d) ring fence_offs wb alloc
>>>>>>>> failed\n", r);
>>>>>>>> +        return r;
>>>>>>>> +    }
>>>>>>>
>>>>>>> Looks like a typo copy pase duplicate of the above
>>>>>>>
>>>>>>>> +
>>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
>>>>>>>> +    if (r) {
>>>>>>>> +        dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc
>>>>>>>> failed\n", r);
>>>>>>>> +        return r;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
>>>>>>>> +    if (r) {
>>>>>>>> +        dev_err(adev->dev, "(%d) ring cond_exec_polling wb
>>>>>>>> alloc failed\n", r);
>>>>>>>> +        return r;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    ring->fence_gpu_addr =
>>>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
>>>>>>>> +    ring->fence_cpu_addr =
>>>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
>>>>>>>> +
>>>>>>>> +    ring->trail_fence_gpu_addr =
>>>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
>>>>>>>> +    ring->trail_fence_cpu_addr =
>>>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
>>>>>>>> +
>>>>>>>> +    ring->cond_exe_gpu_addr =
>>>>>>>> +        amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
>>>>>>>> +    ring->cond_exe_cpu_addr =
>>>>>>>> +        amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
>>>>>>>> +
>>>>>>>> +    /* always set cond_exec_polling to CONTINUE */
>>>>>>>> +    *ring->cond_exe_cpu_addr = 1;
>>>>>>>> +
>>>>>>>> +    r = amdgpu_fence_driver_start_ring(ring, irq_src,
>>>>>>>> +irq_type);
>>>>>>>> +    if (r) {
>>>>>>>> +        dev_err(adev->dev, "failed initializing fences
>>>>>>>> (%d).\n", r);
>>>>>>>> +        return r;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    ring->ring_size = roundup_pow_of_two(max_dw * 4 *
>>>>>>>> sched_hw_submission);
>>>>>>>> +
>>>>>>>> +    ring->buf_mask = (ring->ring_size / 4) - 1;
>>>>>>>> +    ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
>>>>>>>> +        0xffffffffffffffff : ring->buf_mask;
>>>>>>>> +
>>>>>>>> +    /* Allocate ring buffer */
>>>>>>>> +    if (ring->ring == NULL) {
>>>>>>>> +        ring->ring = kzalloc(ring->ring_size +
>>>>>>>> ring->funcs->extra_dw, GFP_KERNEL);
>>>>>>>> +        if (!ring->ring) {
>>>>>>>> +            dev_err(adev->dev, "(%d) swring create failed\n",
>>>>>>>> +r);
>>>>>>>> +            return r;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        amdgpu_ring_clear_ring(ring);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    ring->max_dw = max_dw;
>>>>>>>> +    ring->hw_prio = hw_prio;
>>>>>>>> +
>>>>>>>> +    if (!ring->no_scheduler) {
>>>>>>>> +        hw_ip = ring->funcs->type;
>>>>>>>> +        num_sched =
>>>>>>>> +&adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>>>>>>>> + adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
>>>>>>>> +            &ring->sched;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    return 0;
>>>>>>>> +}
>>>>>>>
>>>>>>> In general i see this function is a big one to one subset of
>>>>>>> amdgpu_ring_init.
>>>>>>> Could you maybe see a way to refactor such that this function is
>>>>>>> the base and for HW related code that different (like BO
>>>>>>> allocation for ring buffer) you maybe can add if
>>>>>>> (!ring->sw_ring)... and add those code snippets ? To avoid
>>>>>>> substantial code duplication.
>>>>>>>
>>>>>>> Andrey
>>>>>>>
>>>>>>>
>>>>>>>> +
>>>>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring) {
>>>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>>>> +
>>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>>> +    return amdgpu_ring_get_rptr_from_mux(mux, ring); }
>>>>>>>> +
>>>>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring) {
>>>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>>>> +
>>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>>> +    return amdgpu_ring_get_wptr_from_mux(mux, ring); }
>>>>>>>> +
>>>>>>>> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring) {
>>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring) {
>>>>>>>> +    struct amdgpu_device *adev = ring->adev;
>>>>>>>> +    struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
>>>>>>>> +
>>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>>> +    amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr); }
>>>>>>>> +
>>>>>>>> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring) {
>>>>>>>> +    BUG_ON(!ring->is_sw_ring);
>>>>>>>> +
>>>>>>>> +    /* Not to finish a ring which is not initialized */
>>>>>>>> +    if (!(ring->adev) ||
>>>>>>>> +        (!ring->is_mes_queue &&
>>>>>>>> +!(ring->adev->rings[ring->idx])))
>>>>>>>> +        return;
>>>>>>>> +
>>>>>>>> +    ring->sched.ready = false;
>>>>>>>> +
>>>>>>>> +    amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
>>>>>>>> +    amdgpu_device_wb_free(ring->adev, ring->fence_offs);
>>>>>>>> +
>>>>>>>> +    kfree((void *)ring->ring);
>>>>>>>> +
>>>>>>>> +    dma_fence_put(ring->vmid_wait);
>>>>>>>> +    ring->vmid_wait = NULL;
>>>>>>>> +    ring->me = 0;
>>>>>>>> +
>>>>>>>> +    ring->adev->rings[ring->idx] = NULL; }
>>>>>>>> +
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..c05d8a94ad0c
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>>>>>>>> @@ -0,0 +1,48 @@
>>>>>>>> +/*
>>>>>>>> + * Copyright 2012 Advanced Micro Devices, Inc.
>>>>>>>> + *
>>>>>>>> + * Permission is hereby granted, free of charge, to any person
>>>>>>>> obtaining a
>>>>>>>> + * copy of this software and associated documentation files
>>>>>>>> (the "Software"),
>>>>>>>> + * to deal in the Software without restriction, including
>>>>>>>> without limitation
>>>>>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>>>>>> sublicense,
>>>>>>>> + * and/or sell copies of the Software, and to permit persons to
>>>>>>>> whom the
>>>>>>>> + * Software is furnished to do so, subject to the following
>>>>>>>> conditions:
>>>>>>>> + *
>>>>>>>> + * The above copyright notice and this permission notice shall
>>>>>>>> be included in
>>>>>>>> + * all copies or substantial portions of the Software.
>>>>>>>> + *
>>>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>>>>>>>> KIND, EXPRESS OR
>>>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>>>>>> MERCHANTABILITY,
>>>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>>>>>>> EVENT SHALL
>>>>>>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
>>>>>>>> CLAIM, DAMAGES OR
>>>>>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>>>>> OTHERWISE,
>>>>>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>>>>>>>> THE USE OR
>>>>>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>>>>>> + *
>>>>>>>> + */
>>>>>>>> +
>>>>>>>> +#include <drm/amdgpu_drm.h>
>>>>>>>> +#include <drm/gpu_scheduler.h>
>>>>>>>> +#include <drm/drm_print.h>
>>>>>>>> +
>>>>>>>> +#include "amdgpu_irq.h"
>>>>>>>> +#include "amdgpu_ring.h"
>>>>>>>> +#include "amdgpu.h"
>>>>>>>> +
>>>>>>>> +#ifndef __AMDGPU_SWRING_H__
>>>>>>>> +#define __AMDGPU_SWRING_H__
>>>>>>>> +
>>>>>>>> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct
>>>>>>>> amdgpu_ring *sw_ring,
>>>>>>>> +             unsigned int max_dw, struct amdgpu_irq_src
>>>>>>>> +*irq_src,
>>>>>>>> +             unsigned int irq_type, unsigned int hw_prio,
>>>>>>>> +             atomic_t *sched_score); void
>>>>>>>> +amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
>>>>>>>> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
>>>>>>>> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring); void
>>>>>>>> +amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); void
>>>>>>>> +amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
>>>>>>>> +
>>>>>>>> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); void
>>>>>>>> +amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>>>>>>>> +
>>>>>>>> +#endif

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-13  2:00                 ` Andrey Grodzovsky
@ 2022-09-13  7:25                   ` Christian König
  2022-09-13 15:07                     ` Andrey Grodzovsky
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2022-09-13  7:25 UTC (permalink / raw)
  To: Andrey Grodzovsky, Zhu, Jiadong, amd-gfx; +Cc: Huang, Ray

Am 13.09.22 um 04:00 schrieb Andrey Grodzovsky:
>
> [SNIP]
>
> You are right for scheduler mediated submissions (executing through 
> drm_sched_backend_ops.run_job hook) , I am talking about direct 
> submissions without gpu scheduler (using amdgpu_job_submit_direct)
>
> Andrey

Direct submission is only used while initially testing the hardware, 
during a GPU reset/recovery or for handling page faults with the SDMA.

In other words when we know that we have exclusive access to the hardware.

Christian.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-13  7:25                   ` Christian König
@ 2022-09-13 15:07                     ` Andrey Grodzovsky
  0 siblings, 0 replies; 28+ messages in thread
From: Andrey Grodzovsky @ 2022-09-13 15:07 UTC (permalink / raw)
  To: Christian König, Zhu, Jiadong, amd-gfx; +Cc: Huang, Ray

I guess, but this is kind of implicit assumption which is not really 
documented and easily overlooked.
Anyway - for this code it's not directly relevant.

Andrey


On 2022-09-13 03:25, Christian König wrote:
> Am 13.09.22 um 04:00 schrieb Andrey Grodzovsky:
>>
>> [SNIP]
>>
>> You are right for scheduler mediated submissions (executing through 
>> drm_sched_backend_ops.run_job hook) , I am talking about direct 
>> submissions without gpu scheduler (using amdgpu_job_submit_direct)
>>
>> Andrey
>
> Direct submission is only used while initially testing the hardware, 
> during a GPU reset/recovery or for handling page faults with the SDMA.
>
> In other words when we know that we have exclusive access to the 
> hardware.
>
> Christian.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-09  1:50 [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) jiadong.zhu
                   ` (4 preceding siblings ...)
  2022-09-09 16:45 ` Andrey Grodzovsky
@ 2022-09-13 15:12 ` Luben Tuikov
  2022-09-14  2:34   ` Zhu, Jiadong
  5 siblings, 1 reply; 28+ messages in thread
From: Luben Tuikov @ 2022-09-13 15:12 UTC (permalink / raw)
  To: jiadong.zhu, amd-gfx; +Cc: Ray.Huang

Inlined:

On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
> 
> The software ring is created to support priority
> context while there is only one hardware queue
> for gfx.
> 
> Every software rings has its fence driver and could
> be used as an ordinary ring for the gpu_scheduler.
> Multiple software rings are binded to a real ring
> with the ring muxer. The packages committed on the
> software ring are copied to the real ring.
> 
> v2: use array to store software ring entry.
> v3: remove unnecessary prints.
> 
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 +++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>  7 files changed, 509 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 3e0e2eb7e235..85224bc81ce5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>  	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
>  	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>  	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> -	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
> +	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> +	amdgpu_sw_ring.o amdgpu_ring_mux.o
>  
>  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 53526ffb2ce1..0de8e3cd0f1c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -33,6 +33,7 @@
>  #include "amdgpu_imu.h"
>  #include "soc15.h"
>  #include "amdgpu_ras.h"
> +#include "amdgpu_ring_mux.h"
>  
>  /* GFX current status */
>  #define AMDGPU_GFX_NORMAL_MODE			0x00000000L
> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>  	struct amdgpu_gfx_ras		*ras;
>  
>  	bool				is_poweron;
> +
> +	struct amdgpu_ring_mux			muxer;
>  };
>  
>  #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 7d89a52091c0..fe33a683bfba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>  	bool			is_mes_queue;
>  	uint32_t		hw_queue_id;
>  	struct amdgpu_mes_ctx_data *mes_ctx;
> +
> +	bool			is_sw_ring;
> +
>  };
>  
>  #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> new file mode 100644
> index 000000000000..ea4a3c66119a
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> @@ -0,0 +1,182 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <drm/drm_print.h>
> +
> +#include "amdgpu_ring_mux.h"
> +#include "amdgpu_ring.h"
> +
> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
> +
> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
> +	u64 s_begin, u64 s_end);
> +
> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	mux->real_ring = ring;
> +	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
> +	mux->num_ring_entries = 0;
> +	spin_lock_init(&mux->lock);
> +	return 0;
> +}
> +
> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
> +{
> +	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
> +	mux->num_ring_entries = 0;
> +}
> +
> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +
> +	if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
> +		DRM_ERROR("adding sw ring exceeds max gfx num\n");
> +		return -ENOMEM;
> +	}

You can't return here -ENOMEM, as it is not a real out of memory condition.
Maybe EINVAL or something like that, but not ENOMEM.

Also, under what circumstances would we get to this condition here?
Are such circumstances valid?

And if so, then when this is returned, what happens?
Does the driver die?

I feel we shouldn't ever have this here--it should've been
calculated correctly to never have fallen in this/such a circumstance like that here.

> +
> +	e = &mux->ring_entries[mux->num_ring_entries++];
> +
> +	e->ring = ring;
> +	e->start_ptr_in_hw_ring = 0;
> +	e->end_ptr_in_hw_ring = 0;
> +	e->sw_cptr = 0;
> +	e->sw_rptr = 0;
> +	e->sw_wptr = 0;
> +
> +	return 0;
> +}
> +
> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct amdgpu_ring_mux *mux,
> +				struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +	int i;
> +
> +	e = NULL;
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		if (mux->ring_entries[i].ring == ring) {
> +			e = &mux->ring_entries[i];
> +			break;
> +		}
> +	}
> +
> +	return e;
> +}
> +
> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
> +{
> +	struct amdgpu_mux_entry *e;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry for sw ring\n");
> +		return;
> +	}

How and why would we get in this situation here like that?
Also, DRM_ERROR() may overflow the kernel log. Perhaps DRM_DEBUG is better,
or not print anything at all.

> +
> +	spin_lock(&mux->lock);
> +	e->sw_cptr = e->sw_wptr;
> +	e->sw_wptr = wptr;
> +	e->start_ptr_in_hw_ring = mux->real_ring->wptr;
> +
> +	if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
> +		e->end_ptr_in_hw_ring = mux->real_ring->wptr;
> +		amdgpu_ring_commit(mux->real_ring);
> +	}
> +
> +	spin_unlock(&mux->lock);
> +}
> +
> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry for sw ring\n");
> +		return 0;
> +	}
> +
> +	return e->sw_wptr;
> +}
> +
> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +	u64 r_rptr, r_wptr, offset, start, end;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("no sw entry found!\n");
> +		return 0;
> +	}
> +
> +	r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
> +	r_wptr = amdgpu_ring_get_wptr(mux->real_ring);

These names are very much the same to a human. How about
writep and readp?

> +
> +	if (r_wptr < r_rptr)
> +		r_wptr += mux->real_ring->ring_size >> 2;
> +
> +	start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
> +	end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
> +	if (start > end)
> +		end += mux->real_ring->ring_size >> 2;
> +	if (r_rptr <= end && r_rptr >= start) {
> +		offset = r_rptr - start;
> +		e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
> +	} else if (r_rptr < start) {
> +		e->sw_rptr = e->sw_cptr;
> +	} else {
> +		e->sw_rptr = e->sw_wptr;
> +	}
> +
> +	return e->sw_rptr;
> +}
> +
> +/*copy packages on sw ring range[begin, end) */
> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
> +	u64 s_begin, u64 s_end)
> +{
> +	u64 begin, end, r_begin, r_end;
> +	struct amdgpu_ring *real_ring = mux->real_ring;
> +
> +	begin = s_begin & ring->buf_mask;
> +	end = s_end & ring->buf_mask;
> +
> +	r_begin = real_ring->wptr & real_ring->buf_mask;
> +	if (begin == end)
> +		return -ERANGE;
> +	if (begin > end) {
> +		amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - begin);
> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin],
> +			(ring->ring_size >> 2) - begin);
> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
> +	} else {
> +		amdgpu_ring_alloc(real_ring, end - begin);
> +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin], end - begin);
> +	}
> +
> +	r_end = real_ring->wptr & real_ring->buf_mask;
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> new file mode 100644
> index 000000000000..d058c43bb063
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __AMDGPU_RING_MUX__
> +#define __AMDGPU_RING_MUX__
> +
> +#include <linux/timer.h>
> +#include <linux/spinlock.h>
> +#include "amdgpu_ring.h"
> +
> +struct amdgpu_ring;
> +/*
> + * start_ptr_in_hw_ring - last copied start loc on hw ring

Use double dash for visibility "--".
Please spell out "location".
Perhaps you want to say something like:
"last start location where we copied to in the hardware ring".

Don't you just need an "index pointer"--pointing to the next
location to copy to?

I don't understand what "last copied end loc on hw ring" is,
as we ll as "last copied start loc on hw ring". 

> + * end_ptr_in_hw_ring - last copied end loc on hw ring
> + *sw_cptr -the begin of copy ptr in sw ring

Double-dash and space around it.

Don't you just mean "the copy pointer in sw ring"?

> + *sw_rptr; the read ptr in sw ring
> + *sw_wptr; the write ptr in sw ring

Double-dash instead of semicolon.

> + */
> +struct amdgpu_mux_entry {
> +	struct amdgpu_ring	*ring;
> +	u64 start_ptr_in_hw_ring;
> +	u64 end_ptr_in_hw_ring;
> +
> +	u64 sw_cptr;
> +	u64 sw_rptr;
> +	u64 sw_wptr;
> +};
> +
> +struct amdgpu_ring_mux {
> +	struct amdgpu_ring *real_ring;
> +
> +	struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];

Shouldn't the size of this array be dynamic depending on the ASIC used?
Maybe call it "ring_entry", so that "ring_entry[i]" means "ring entry at index i."

> +
> +	unsigned num_ring_entries;
> +
> +	spinlock_t			lock;
> +
> +};
> +
> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> new file mode 100644
> index 000000000000..452d0ff37758
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> @@ -0,0 +1,204 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + */
> +
> +#include "amdgpu_sw_ring.h"
> +#include "amdgpu_ring_mux.h"
> +
> +#define amdgpu_ring_get_gpu_addr(ring, offset)				\
> +	(ring->is_mes_queue ?						\
> +	 (ring->mes_ctx->meta_data_gpu_addr + offset) :			\
> +	 (ring->adev->wb.gpu_addr + offset * 4))

I don't know how you were able to actually insert TAB chars after the text and before
the backslash. Pressing the TAB key in my editor only aligns the line according to
the mode, and that's it--I can't insert a TAB char.

Don't insert TAB chars to align the backslash. Instead use the space bar--insert spaces.

> +
> +#define amdgpu_ring_get_cpu_addr(ring, offset)				\
> +	(ring->is_mes_queue ?						\
> +	 (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
> +	 (&ring->adev->wb.wb[offset]))
> +
> +
> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
> +		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
> +		     unsigned int irq_type, unsigned int hw_prio,
> +		     atomic_t *sched_score)
> +{
> +	int r;
> +	int sched_hw_submission = amdgpu_sched_hw_submission;
> +	u32 *num_sched;
> +	u32 hw_ip;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +
> +	if (ring->adev == NULL) {
> +		if (adev->num_rings >= AMDGPU_MAX_RINGS)
> +			return -EINVAL;

I don't think we should have this here. Make it dynamic instead,
so that it would work with any adev in the future.

> +
> +		ring->adev = adev;
> +		ring->num_hw_submission = sched_hw_submission;
> +		ring->sched_score = sched_score;
> +		ring->vmid_wait = dma_fence_get_stub();
> +
> +		if (!ring->is_mes_queue) {
> +			ring->idx = adev->num_rings++;
> +			adev->rings[ring->idx] = ring;
> +		}
> +
> +		r = amdgpu_fence_driver_init_ring(ring);
> +		if (r)
> +			return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
> +	if (r) {
> +		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
> +		return r;
> +	}
> +
> +	ring->fence_gpu_addr =
> +		amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
> +	ring->fence_cpu_addr =
> +		amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
> +
> +	ring->trail_fence_gpu_addr =
> +		amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
> +	ring->trail_fence_cpu_addr =
> +		amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
> +
> +	ring->cond_exe_gpu_addr =
> +		amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
> +	ring->cond_exe_cpu_addr =
> +		amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
> +
> +	/* always set cond_exec_polling to CONTINUE */
> +	*ring->cond_exe_cpu_addr = 1;
> +
> +	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
> +	if (r) {
> +		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
> +		return r;
> +	}
> +
> +	ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
> +
> +	ring->buf_mask = (ring->ring_size / 4) - 1;
> +	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
> +		0xffffffffffffffff : ring->buf_mask;
> +
> +	/* Allocate ring buffer */
> +	if (ring->ring == NULL) {
> +		ring->ring = kzalloc(ring->ring_size + ring->funcs->extra_dw, GFP_KERNEL);
> +		if (!ring->ring) {
> +			dev_err(adev->dev, "(%d) swring create failed\n", r);
> +			return r;
> +		}
> +
> +		amdgpu_ring_clear_ring(ring);
> +	}
> +
> +	ring->max_dw = max_dw;
> +	ring->hw_prio = hw_prio;
> +
> +	if (!ring->no_scheduler) {
> +		hw_ip = ring->funcs->type;
> +		num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
> +		adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
> +			&ring->sched;
> +	}
> +
> +	return 0;
> +}
> +
> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	return amdgpu_ring_get_rptr_from_mux(mux, ring);
> +}
> +
> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	return amdgpu_ring_get_wptr_from_mux(mux, ring);
> +}
> +
> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
> +{
> +	BUG_ON(!ring->is_sw_ring);
> +}
> +
> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr);
> +}
> +
> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
> +{
> +	BUG_ON(!ring->is_sw_ring);
> +
> +	/* Not to finish a ring which is not initialized */
> +	if (!(ring->adev) ||
> +	    (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
> +		return;

You don't need parenthesis around "ring->adev" and around
ring->adev->rings[ring->idx], drop them.

> +
> +	ring->sched.ready = false;
> +
> +	amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
> +	amdgpu_device_wb_free(ring->adev, ring->fence_offs);
> +
> +	kfree((void *)ring->ring);

No need to cast to (void *).

> +
> +	dma_fence_put(ring->vmid_wait);
> +	ring->vmid_wait = NULL;
> +	ring->me = 0;
> +
> +	ring->adev->rings[ring->idx] = NULL;
> +}
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
> new file mode 100644
> index 000000000000..c05d8a94ad0c
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
> @@ -0,0 +1,48 @@
> +/*
> + * Copyright 2012 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <drm/amdgpu_drm.h>
> +#include <drm/gpu_scheduler.h>
> +#include <drm/drm_print.h>
> +
> +#include "amdgpu_irq.h"
> +#include "amdgpu_ring.h"
> +#include "amdgpu.h"
> +
> +#ifndef __AMDGPU_SWRING_H__
> +#define __AMDGPU_SWRING_H__
> +
> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *sw_ring,
> +		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
> +		     unsigned int irq_type, unsigned int hw_prio,
> +		     atomic_t *sched_score);
> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
> +
> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
> +
> +#endif

Make sure to run your patches through scripts/checkpatch.pl.

Regards,
-- 
Luben

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3)
  2022-09-09  1:50 ` [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3) jiadong.zhu
  2022-09-09 16:46   ` Andrey Grodzovsky
@ 2022-09-13 15:23   ` Luben Tuikov
  1 sibling, 0 replies; 28+ messages in thread
From: Luben Tuikov @ 2022-09-13 15:23 UTC (permalink / raw)
  To: jiadong.zhu, amd-gfx; +Cc: Ray.Huang

Inlined:

On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
> 
> Set ring functions with software ring callbacks
> on gfx9.
> 
> The software ring could be tested by debugfs_test_ib
> case.
> 
> v2: set sw_ring 2 to enable software ring by default.
> v3: remove the parameter for software ring enablement.
> 
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h      |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  16 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 116 +++++++++++++++++++++--
>  5 files changed, 128 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 96d058c4cd4b..525df0b4d55f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -207,6 +207,7 @@ extern bool amdgpu_ignore_bad_page_threshold;
>  extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
>  extern int amdgpu_async_gfx_ring;
>  extern int amdgpu_mcbp;
> +extern int amdgpu_sw_ring;
>  extern int amdgpu_discovery;
>  extern int amdgpu_mes;
>  extern int amdgpu_mes_kiq;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 0de8e3cd0f1c..5eec82014f0a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -348,6 +348,8 @@ struct amdgpu_gfx {
>  
>  	bool				is_poweron;
>  
> +	/*software ring*/

Isn't is more aestethic to put spaces around? Like this:
/* software ring */
?

Please run your patches through scripts/checkpatch.pl.

> +	unsigned						num_sw_gfx_rings;
>  	struct amdgpu_ring_mux			muxer;
>  };
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 13db99d653bd..5b70a2c36d81 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -33,6 +33,7 @@
>  
>  #include <drm/amdgpu_drm.h>
>  #include "amdgpu.h"
> +#include "amdgpu_sw_ring.h"
>  #include "atom.h"
>  
>  /*
> @@ -121,6 +122,11 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
>  {
>  	uint32_t count;
>  
> +	if (ring->is_sw_ring) {
> +		amdgpu_sw_ring_commit(ring);
> +		return;
> +	}
> +
>  	/* We pad to match fetch size */
>  	count = ring->funcs->align_mask + 1 -
>  		(ring->wptr & ring->funcs->align_mask);
> @@ -183,6 +189,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>  	u32 *num_sched;
>  	u32 hw_ip;
>  
> +	if (adev->gfx.num_sw_gfx_rings > 0 && ring->is_sw_ring) {
> +		return amdgpu_sw_ring_init(adev, ring, max_dw, irq_src, irq_type,
> +			hw_prio, sched_score);
> +	}
> +
>  	/* Set the hw submission limit higher for KIQ because
>  	 * it's used for a number of gfx/compute tasks by both
>  	 * KFD and KGD which may have outstanding fences and
> @@ -343,7 +354,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>   */
>  void amdgpu_ring_fini(struct amdgpu_ring *ring)
>  {
> -
> +	if (ring->is_sw_ring) {
> +		amdgpu_sw_ring_fini(ring);
> +		return;
> +	}
>  	/* Not to finish a ring which is not initialized */
>  	if (!(ring->adev) ||
>  	    (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index fe33a683bfba..ba6d8c753f7e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -38,7 +38,8 @@ struct amdgpu_vm;
>  /* max number of rings */
>  #define AMDGPU_MAX_RINGS		28
>  #define AMDGPU_MAX_HWIP_RINGS		8
> -#define AMDGPU_MAX_GFX_RINGS		2
> +/*2 software ring and 1 real ring*/
> +#define AMDGPU_MAX_GFX_RINGS		3
>  #define AMDGPU_MAX_COMPUTE_RINGS	8
>  #define AMDGPU_MAX_VCE_RINGS		3
>  #define AMDGPU_MAX_UVD_ENC_RINGS	2
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5349ca4d19e3..774e44e1074a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -47,6 +47,7 @@
>  
>  #include "amdgpu_ras.h"
>  
> +#include "amdgpu_sw_ring.h"
>  #include "gfx_v9_4.h"
>  #include "gfx_v9_0.h"
>  #include "gfx_v9_4_2.h"
> @@ -55,7 +56,8 @@
>  #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
>  #include "asic_reg/gc/gc_9_0_default.h"
>  
> -#define GFX9_NUM_GFX_RINGS     1
> +#define GFX9_NUM_GFX_RINGS     3
> +#define GFX9_NUM_SW_GFX_RINGS  2
>  #define GFX9_MEC_HPD_SIZE 4096
>  #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
>  #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
> @@ -2270,6 +2272,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
>  static int gfx_v9_0_sw_init(void *handle)
>  {
>  	int i, j, k, r, ring_id;
> +	unsigned int hw_prio;
>  	struct amdgpu_ring *ring;
>  	struct amdgpu_kiq *kiq;
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -2356,13 +2359,40 @@ static int gfx_v9_0_sw_init(void *handle)
>  			sprintf(ring->name, "gfx_%d", i);
>  		ring->use_doorbell = true;
>  		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
> +		ring->is_sw_ring = (adev->gfx.num_sw_gfx_rings > 1) && (i > 0);
> +
> +		if (adev->gfx.num_sw_gfx_rings > 1 && i == 2)
> +			hw_prio = AMDGPU_RING_PRIO_2;
> +		else
> +			hw_prio = AMDGPU_RING_PRIO_DEFAULT;
> +		if (adev->gfx.num_sw_gfx_rings > 0 && i == 0)
> +			ring->no_scheduler = true;
> +
>  		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
>  				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
> -				     AMDGPU_RING_PRIO_DEFAULT, NULL);
> +				     hw_prio, NULL);
>  		if (r)
>  			return r;
> +
> +		if (ring->is_sw_ring)
> +			ring->wptr = 0;
>  	}
>  
> +	/*init the muxer and add sw rings */
> +	if (adev->gfx.num_sw_gfx_rings > 0) {
> +		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0]);
> +		if (r) {
> +			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
> +			return r;
> +		}
> +		for (i = 1; i < adev->gfx.num_gfx_rings; i++) {
> +			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, &adev->gfx.gfx_ring[i]);
> +			if (r) {
> +				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
> +				return r;
> +			}
> +		}
> +	}
>  	/* set up the compute queues - allocate horizontally across pipes */
>  	ring_id = 0;
>  	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
> @@ -2413,6 +2443,9 @@ static int gfx_v9_0_sw_fini(void *handle)
>  	int i;
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>  
> +	if (adev->gfx.num_sw_gfx_rings > 0)
> +		amdgpu_ring_mux_fini(&adev->gfx.muxer);
> +
>  	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>  		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>  	for (i = 0; i < adev->gfx.num_compute_rings; i++)
> @@ -4709,8 +4742,9 @@ static int gfx_v9_0_early_init(void *handle)
>  	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
>  	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
>  		adev->gfx.num_gfx_rings = 0;
> -	else
> -		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> +
> +	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> +	adev->gfx.num_sw_gfx_rings = GFX9_NUM_SW_GFX_RINGS;
>  	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
>  					  AMDGPU_MAX_COMPUTE_RINGS);
>  	gfx_v9_0_set_kiq_pm4_funcs(adev);
> @@ -5877,7 +5911,11 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
>  
>  	switch (me_id) {
>  	case 0:
> -		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
> +		if (adev->gfx.num_sw_gfx_rings > 1) {
> +			for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++)
> +				amdgpu_fence_process(&adev->gfx.gfx_ring[i]);
> +		} else
> +			amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
>  		break;
>  	case 1:
>  	case 2:
> @@ -6882,6 +6920,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>  	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
>  };
>  
> +
> +static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
> +	.type = AMDGPU_RING_TYPE_GFX,
> +	.align_mask = 0xff,
> +	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
> +	.support_64bit_ptrs = true,
> +	.secure_submission_supported = true,
> +	.vmhub = AMDGPU_GFXHUB_0,
> +	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
> +	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
> +	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
> +	.emit_frame_size = /* totally 242 maximum if 16 IBs */
> +		5 +  /* COND_EXEC */
> +		7 +  /* PIPELINE_SYNC */
> +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> +		2 + /* VM_FLUSH */
> +		8 +  /* FENCE for VM_FLUSH */
> +		20 + /* GDS switch */
> +		4 + /* double SWITCH_BUFFER,
> +		     * the first COND_EXEC jump to the place just
> +		     * prior to this double SWITCH_BUFFER
> +		     */
> +		5 + /* COND_EXEC */
> +		7 +	 /*	HDP_flush */
> +		4 +	 /*	VGT_flush */
> +		14 + /*	CE_META */
> +		31 + /*	DE_META */
> +		3 + /* CNTX_CTRL */
> +		5 + /* HDP_INVL */
> +		8 + 8 + /* FENCE x2 */
> +		2 + /* SWITCH_BUFFER */
> +		7, /* gfx_v9_0_emit_mem_sync */
> +	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
> +	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
> +	.emit_fence = gfx_v9_0_ring_emit_fence,
> +	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
> +	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
> +	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
> +	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
> +	.test_ring = gfx_v9_0_ring_test_ring,
> +	.test_ib = gfx_v9_0_ring_test_ib,
> +	.insert_nop = amdgpu_ring_insert_nop,
> +	.pad_ib = amdgpu_ring_generic_pad_ib,
> +	.emit_switch_buffer = gfx_v9_ring_emit_sb,
> +	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
> +	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
> +	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
> +	.emit_wreg = gfx_v9_0_ring_emit_wreg,
> +	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> +	.soft_recovery = gfx_v9_0_ring_soft_recovery,
> +	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
> +};
> +
>  static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>  	.type = AMDGPU_RING_TYPE_COMPUTE,
>  	.align_mask = 0xff,
> @@ -6956,9 +7050,15 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
>  
>  	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
>  
> -	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
> -
> +	if (adev->gfx.num_sw_gfx_rings > 0) {
> +		//first one is the real ring
> +		adev->gfx.gfx_ring[0].funcs = &gfx_v9_0_ring_funcs_gfx;
> +		for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++)
> +			adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
> +	} else {
> +		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> +			adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
> +	}
>  	for (i = 0; i < adev->gfx.num_compute_rings; i++)
>  		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
>  }

Regards,
-- 
Luben

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2)
  2022-09-09  1:50 ` [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2) jiadong.zhu
  2022-09-09 17:02   ` Andrey Grodzovsky
@ 2022-09-13 15:47   ` Luben Tuikov
  1 sibling, 0 replies; 28+ messages in thread
From: Luben Tuikov @ 2022-09-13 15:47 UTC (permalink / raw)
  To: jiadong.zhu, amd-gfx; +Cc: Ray.Huang

Inlined:

On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
> 
> Trigger MCBP according to the priroty of the

"priority"

Spell out MCBP here, "Mid-Command Buffer Preemption."

> software rings and the hw fence signaling
> condition.

"signalling"

> 
> The muxer records some lastest locations from the

"lastest"? ENOENT
Please use an actual word.

Run your patches through scripts/checkpatch.pl.

> software ring which is used to resubmit packages
> in preemption scenarios.
> 
> v2: update comment style
> 
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile          |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c       |   2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c     | 101 ++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h     |  29 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c     |  12 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 163 ++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  16 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  26 +++
>  9 files changed, 351 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 85224bc81ce5..24c5aa19bbf2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>  	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>  	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>  	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> -	amdgpu_sw_ring.o amdgpu_ring_mux.o
> +	amdgpu_sw_ring.o amdgpu_ring_mux.o amdgpu_mcbp.o
>  
>  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 258cffe3c06a..af86d87e2f3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  		}
>  	}
>  
> +	amdgpu_ring_ib_begin(ring);
>  	if (job && ring->funcs->init_cond_exec)
>  		patch_offset = amdgpu_ring_init_cond_exec(ring);
>  
> @@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  	    ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
>  		ring->funcs->emit_wave_limit(ring, false);
>  
> +	amdgpu_ring_ib_end(ring);
>  	amdgpu_ring_commit(ring);
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
> new file mode 100644
> index 000000000000..2a12101a7699
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
> @@ -0,0 +1,101 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/kernel.h>
> +#include <linux/firmware.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <drm/gpu_scheduler.h>
> +
> +#include "amdgpu.h"
> +#include "amdgpu_mcbp.h"
> +#include "amdgpu_ring.h"
> +
> +/* trigger mcbp and find if we need resubmit */
> +int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
> +{
> +	struct amdgpu_mux_entry *e;
> +	struct amdgpu_ring *ring = NULL;
> +	int i;
> +
> +	DRM_INFO("%s in\n", __func__);
> +
> +	spin_lock(&mux->lock);
> +
> +	amdgpu_ring_preempt_ib(mux->real_ring);
> +
> +	ring = NULL;
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		e = &mux->ring_entries[i];
> +		if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
> +			ring = e->ring;
> +			break;
> +		}
> +	}
> +
> +	if (!ring) {
> +		DRM_ERROR("cannot find low priority ring\n");
> +		return -ENOENT;
> +	}
> +
> +	amdgpu_fence_process(ring);
> +
> +	DRM_INFO("after preempted ring_prio(%d) last_seq(%x) sync_seq(%x)\n",
> +		ring->hw_prio, atomic_read(&ring->fence_drv.last_seq), ring->fence_drv.sync_seq);
> +
> +	if (atomic_read(&ring->fence_drv.last_seq) !=
> +	    ring->fence_drv.sync_seq) {
> +		DRM_INFO("schedule resubmit\n");
> +		mux->s_resubmit = true;
> +		amdgpu_ring_mux_schedule_resubmit(mux);
> +	}
> +
> +	spin_unlock(&mux->lock);
> +	return 0;
> +}
> +
> +
> +/*scan on low prio rings to have unsignaled fence and high ring has no fence.*/
> +int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
> +{
> +	struct amdgpu_ring *ring;
> +	uint32_t seq, last_seq;
> +	int i, need_preempt;
> +
> +	need_preempt = 0;
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		ring = mux->ring_entries[i].ring;
> +		last_seq = atomic_read(&ring->fence_drv.last_seq);
> +		seq = READ_ONCE(ring->fence_drv.sync_seq);
> +		DRM_INFO("ring(%p) prio(%d) last_seq(%x) seq(%x)\n",
> +			ring, ring->hw_prio, last_seq, seq);
> +		if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT && last_seq < seq)
> +			return 0;
> +		if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && last_seq < seq)
> +			need_preempt = 1;
> +	}
> +
> +	DRM_INFO("%s return %d\n", __func__, need_preempt && !mux->s_resubmit);
> +	return need_preempt && !mux->s_resubmit;
> +}

DRM_INFO() seem here to be debug messages and I feel they should be removed.
Sometimes we enable INFO level messages and this will overflow the log.

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
> new file mode 100644
> index 000000000000..0033bcba8d03
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
> @@ -0,0 +1,29 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __AMDGPU_MCBP_H__
> +#define __AMDGPU_MCBP_H__
> +
> +int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux);
> +int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux);
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 5b70a2c36d81..6d7f8a40e308 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -583,3 +583,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
>  
>  	return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
>  }
> +
> +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
> +{
> +	if (ring->is_sw_ring)
> +		amdgpu_sw_ring_ib_begin(ring);
> +}
> +
> +void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
> +{
> +	if (ring->is_sw_ring)
> +		amdgpu_sw_ring_ib_end(ring);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index d3155dc86c07..399037b0d6e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -311,6 +311,9 @@ struct amdgpu_ring {
>  #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
>  
>  int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
> +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
> +void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
> +
>  void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
>  void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
>  void amdgpu_ring_commit(struct amdgpu_ring *ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> index ea4a3c66119a..0c9b639b844e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> @@ -20,28 +20,60 @@
>   * OTHER DEALINGS IN THE SOFTWARE.
>   *
>   */
> -
> +#include <linux/slab.h>
>  #include <drm/drm_print.h>
>  
>  #include "amdgpu_ring_mux.h"
> +#include "amdgpu_mcbp.h"
>  #include "amdgpu_ring.h"
>  
>  #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
>  
> +static struct kmem_cache *amdgpu_mux_chunk_slab;
> +
>  static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
>  	u64 s_begin, u64 s_end);
> +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux);
> +static void amdgpu_mux_resubmit_fallback(struct timer_list *t);
>  
>  int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
>  {
>  	mux->real_ring = ring;
> +
>  	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>  	mux->num_ring_entries = 0;
> +
> +	mux->s_resubmit = false;
> +
> +	amdgpu_mux_chunk_slab = kmem_cache_create(
> +		"amdgpu_mux_chunk", sizeof(struct amdgpu_mux_chunk), 0,
> +		SLAB_HWCACHE_ALIGN, NULL);
> +	if (!amdgpu_mux_chunk_slab) {
> +		DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
> +		return -ENOMEM;
> +	}
> +
>  	spin_lock_init(&mux->lock);
> +
> +	timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
> +
>  	return 0;
>  }
>  
>  void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
>  {
> +	struct amdgpu_mux_entry *e;
> +	struct amdgpu_mux_chunk *chunk, *chunk2;
> +	int i;
> +
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		e = &mux->ring_entries[i];
> +		list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
> +			list_del(&chunk->entry);
> +			kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
> +		}
> +	}
> +	kmem_cache_destroy(amdgpu_mux_chunk_slab);
>  	memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
>  	mux->num_ring_entries = 0;
>  }
> @@ -64,6 +96,8 @@ int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring
>  	e->sw_rptr = 0;
>  	e->sw_wptr = 0;
>  
> +	INIT_LIST_HEAD(&e->list);
> +
>  	return 0;
>  }
>  
> @@ -180,3 +214,130 @@ static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring
>  
>  	return 0;
>  }
> +
> +void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
> +{
> +	mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
> +}
> +
> +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +	struct amdgpu_mux_chunk *chunk;
> +
> +	if (mux->s_resubmit)
> +		amdgpu_mux_resubmit_chunks(mux);
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry!\n");
> +		return;
> +	}
> +
> +	chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
> +	if (!chunk) {
> +		DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
> +		return;
> +	}
> +
> +	chunk->start = ring->wptr;
> +	list_add_tail(&chunk->entry, &e->list);
> +}
> +
> +static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	uint32_t last_seq, size = 0;
> +	struct amdgpu_mux_entry *e;
> +	struct amdgpu_mux_chunk *chunk, *tmp;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry!\n");
> +		return;
> +	}
> +
> +	last_seq = atomic_read(&ring->fence_drv.last_seq);
> +
> +	list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
> +		if (chunk->sync_seq <= last_seq) {
> +			list_del(&chunk->entry);
> +			kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
> +		} else {
> +			size++;
> +		}
> +	}
> +}
> +
> +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_mux_entry *e;
> +	struct amdgpu_mux_chunk *chunk;
> +
> +	e = amdgpu_get_sw_entry(mux, ring);
> +	if (!e) {
> +		DRM_ERROR("cannot find entry!\n");
> +		return;
> +	}
> +
> +	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
> +	if (!chunk) {
> +		DRM_ERROR("cannot find chunk!\n");
> +		return;
> +	}
> +
> +	chunk->end = ring->wptr;
> +	chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
> +
> +	scan_and_remove_signaled_chunk(mux, ring);
> +}
> +
> +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
> +{
> +	struct amdgpu_mux_entry *e = NULL;
> +	struct amdgpu_mux_chunk *chunk;
> +	uint32_t seq, last_seq;
> +	int i;
> +
> +	/*find low priority entries:*/
> +	spin_lock(&mux->lock);
> +
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		if (mux->ring_entries[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
> +				e = &mux->ring_entries[i];
> +			break;
> +		}
> +	}
> +
> +	if (!e) {
> +		DRM_ERROR("%s no low priority ring found\n", __func__);
> +		return;
> +	}
> +
> +	last_seq = atomic_read(&e->ring->fence_drv.last_seq);
> +	seq = READ_ONCE(e->ring->fence_drv.sync_seq);
> +	if (seq == last_seq) {
> +		DRM_INFO("skip as fence signaled seq=%x\n", seq);
> +		return;
> +	}
> +	DRM_INFO("begin to copy resubmit chunks\n");

Those two DRM_INFO() should be removed.

> +
> +	/*resubmit all the fences between (last_seq, seq]*/
> +	list_for_each_entry(chunk, &e->list, entry) {
> +		if (chunk->sync_seq > last_seq) {
> +			copy_pkt_from_sw_ring(mux, e->ring, chunk->start, chunk->end);
> +			amdgpu_ring_commit(mux->real_ring);
> +		}
> +	}
> +	spin_unlock(&mux->lock);
> +
> +	del_timer(&mux->resubmit_timer);
> +	mux->s_resubmit = false;
> +}
> +
> +static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
> +{
> +	struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
> +
> +	DRM_INFO("calling %s\n", __func__);
> +	amdgpu_mux_resubmit_chunks(mux);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> index d058c43bb063..1d91c235061a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -44,17 +44,27 @@ struct amdgpu_mux_entry {
>  	u64 sw_cptr;
>  	u64 sw_rptr;
>  	u64 sw_wptr;
> +
> +	struct list_head list;
>  };
>  
>  struct amdgpu_ring_mux {
>  	struct amdgpu_ring *real_ring;
>  
>  	struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
> -
>  	unsigned num_ring_entries;
>  
>  	spinlock_t			lock;
>  
> +	bool s_resubmit;
> +	struct timer_list		resubmit_timer;
> +};
> +
> +struct amdgpu_mux_chunk {
> +	struct list_head entry;
> +	uint32_t sync_seq;
> +	u64 start;
> +	u64 end;
>  };

I'd generally include a comment on struct members--makes it clear what the members
are.

>  
>  int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> @@ -64,4 +74,8 @@ void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring
>  u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
>  u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
>  
> +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
> +void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux);
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> index 452d0ff37758..143a84c18534 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> @@ -26,6 +26,7 @@
>  
>  #include "amdgpu_sw_ring.h"
>  #include "amdgpu_ring_mux.h"
> +#include "amdgpu_mcbp.h"
>  
>  #define amdgpu_ring_get_gpu_addr(ring, offset)				\
>  	(ring->is_mes_queue ?						\
> @@ -202,3 +203,28 @@ void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
>  	ring->adev->rings[ring->idx] = NULL;
>  }
>  
> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
> +		if (amdgpu_mcbp_scan(mux) > 0)
> +			amdgpu_mcbp_trigger_preempt(mux);
> +		return;
> +	}
> +
> +	amdgpu_ring_mux_start_ib(mux, ring);
> +}
> +
> +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +	BUG_ON(!ring->is_sw_ring);
> +	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
> +		return;
> +	amdgpu_ring_mux_end_ib(mux, ring);
> +}

Regards,
-- 
Luben

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-13 15:12 ` Luben Tuikov
@ 2022-09-14  2:34   ` Zhu, Jiadong
  2022-09-14  4:30     ` Luben Tuikov
  0 siblings, 1 reply; 28+ messages in thread
From: Zhu, Jiadong @ 2022-09-14  2:34 UTC (permalink / raw)
  To: Tuikov, Luben, amd-gfx; +Cc: Huang, Ray

[AMD Official Use Only - General]

Thank Luben for the review. I replied inline and will update the patch.

Thanks,
Jiadong

-----Original Message-----
From: Tuikov, Luben <Luben.Tuikov@amd.com>
Sent: Tuesday, September 13, 2022 11:12 PM
To: Zhu, Jiadong <Jiadong.Zhu@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Huang, Ray <Ray.Huang@amd.com>
Subject: Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)

Inlined:

On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>
> The software ring is created to support priority context while there
> is only one hardware queue for gfx.
>
> Every software rings has its fence driver and could be used as an
> ordinary ring for the gpu_scheduler.
> Multiple software rings are binded to a real ring with the ring muxer.
> The packages committed on the software ring are copied to the real
> ring.
>
> v2: use array to store software ring entry.
> v3: remove unnecessary prints.
>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile          |   3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 +++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +++++
>  7 files changed, 509 insertions(+), 1 deletion(-)  create mode 100644
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 3e0e2eb7e235..85224bc81ce5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> -     amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
> +     amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> +     amdgpu_sw_ring.o amdgpu_ring_mux.o
>
>  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 53526ffb2ce1..0de8e3cd0f1c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -33,6 +33,7 @@
>  #include "amdgpu_imu.h"
>  #include "soc15.h"
>  #include "amdgpu_ras.h"
> +#include "amdgpu_ring_mux.h"
>
>  /* GFX current status */
>  #define AMDGPU_GFX_NORMAL_MODE                       0x00000000L
> @@ -346,6 +347,8 @@ struct amdgpu_gfx {
>       struct amdgpu_gfx_ras           *ras;
>
>       bool                            is_poweron;
> +
> +     struct amdgpu_ring_mux                  muxer;
>  };
>
>  #define amdgpu_gfx_get_gpu_clock_counter(adev)
> (adev)->gfx.funcs->get_gpu_clock_counter((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 7d89a52091c0..fe33a683bfba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -278,6 +278,9 @@ struct amdgpu_ring {
>       bool                    is_mes_queue;
>       uint32_t                hw_queue_id;
>       struct amdgpu_mes_ctx_data *mes_ctx;
> +
> +     bool                    is_sw_ring;
> +
>  };
>
>  #define amdgpu_ring_parse_cs(r, p, job, ib)
> ((r)->funcs->parse_cs((p), (job), (ib))) diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> new file mode 100644
> index 000000000000..ea4a3c66119a
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> @@ -0,0 +1,182 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <drm/drm_print.h>
> +
> +#include "amdgpu_ring_mux.h"
> +#include "amdgpu_ring.h"
> +
> +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
> +
> +static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
> +     u64 s_begin, u64 s_end);
> +
> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct
> +amdgpu_ring *ring) {
> +     mux->real_ring = ring;
> +     memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
> +     mux->num_ring_entries = 0;
> +     spin_lock_init(&mux->lock);
> +     return 0;
> +}
> +
> +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux) {
> +     memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
> +     mux->num_ring_entries = 0;
> +}
> +
> +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct
> +amdgpu_ring *ring) {
> +     struct amdgpu_mux_entry *e;
> +
> +     if (mux->num_ring_entries == AMDGPU_MAX_GFX_RINGS) {
> +             DRM_ERROR("adding sw ring exceeds max gfx num\n");
> +             return -ENOMEM;
> +     }

>You can't return here -ENOMEM, as it is not a real out of memory condition.
>Maybe EINVAL or something like that, but not ENOMEM.

>Also, under what circumstances would we get to this condition here?
>Are such circumstances valid?

>And if so, then when this is returned, what happens?
>Does the driver die?

>I feel we shouldn't ever have this here--it should've been calculated correctly to never have fallen in this/such a circumstance like that here.

Sure. I will remove these checks.
gfx ring numbers should be calculated carefully to call this function like the other places.

> +
> +     e = &mux->ring_entries[mux->num_ring_entries++];
> +
> +     e->ring = ring;
> +     e->start_ptr_in_hw_ring = 0;
> +     e->end_ptr_in_hw_ring = 0;
> +     e->sw_cptr = 0;
> +     e->sw_rptr = 0;
> +     e->sw_wptr = 0;
> +
> +     return 0;
> +}
> +
> +static struct amdgpu_mux_entry *amdgpu_get_sw_entry(struct amdgpu_ring_mux *mux,
> +                             struct amdgpu_ring *ring)
> +{
> +     struct amdgpu_mux_entry *e;
> +     int i;
> +
> +     e = NULL;
> +     for (i = 0; i < mux->num_ring_entries; i++) {
> +             if (mux->ring_entries[i].ring == ring) {
> +                     e = &mux->ring_entries[i];
> +                     break;
> +             }
> +     }
> +
> +     return e;
> +}
> +
> +void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct
> +amdgpu_ring *ring, u64 wptr) {
> +     struct amdgpu_mux_entry *e;
> +
> +     e = amdgpu_get_sw_entry(mux, ring);
> +     if (!e) {
> +             DRM_ERROR("cannot find entry for sw ring\n");
> +             return;
> +     }

> How and why would we get in this situation here like that?
> Also, DRM_ERROR() may overflow the kernel log. Perhaps DRM_DEBUG is better, or not print anything at all.

This shall not happen if the driver is completely debugged. if this happened, the software ring does not work. we need to fix it immediately.
We enhanced it like " DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); "

> +
> +     spin_lock(&mux->lock);
> +     e->sw_cptr = e->sw_wptr;
> +     e->sw_wptr = wptr;
> +     e->start_ptr_in_hw_ring = mux->real_ring->wptr;
> +
> +     if (copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr) == 0) {
> +             e->end_ptr_in_hw_ring = mux->real_ring->wptr;
> +             amdgpu_ring_commit(mux->real_ring);
> +     }
> +
> +     spin_unlock(&mux->lock);
> +}
> +
> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct
> +amdgpu_ring *ring) {
> +     struct amdgpu_mux_entry *e;
> +
> +     e = amdgpu_get_sw_entry(mux, ring);
> +     if (!e) {
> +             DRM_ERROR("cannot find entry for sw ring\n");
> +             return 0;
> +     }
> +
> +     return e->sw_wptr;
> +}
> +
> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct
> +amdgpu_ring *ring) {
> +     struct amdgpu_mux_entry *e;
> +     u64 r_rptr, r_wptr, offset, start, end;
> +
> +     e = amdgpu_get_sw_entry(mux, ring);
> +     if (!e) {
> +             DRM_ERROR("no sw entry found!\n");
> +             return 0;
> +     }
> +
> +     r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
> +     r_wptr = amdgpu_ring_get_wptr(mux->real_ring);

> These names are very much the same to a human. How about writep and readp?

r_rptr for real ring's read ptr differed from sw_rptr. Maybe we change to real_rptr/real_wptr?


> +
> +     if (r_wptr < r_rptr)
> +             r_wptr += mux->real_ring->ring_size >> 2;
> +
> +     start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
> +     end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
> +     if (start > end)
> +             end += mux->real_ring->ring_size >> 2;
> +     if (r_rptr <= end && r_rptr >= start) {
> +             offset = r_rptr - start;
> +             e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
> +     } else if (r_rptr < start) {
> +             e->sw_rptr = e->sw_cptr;
> +     } else {
> +             e->sw_rptr = e->sw_wptr;
> +     }
> +
> +     return e->sw_rptr;
> +}
> +
> +/*copy packages on sw ring range[begin, end) */ static int
> +copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
> +     u64 s_begin, u64 s_end)
> +{
> +     u64 begin, end, r_begin, r_end;
> +     struct amdgpu_ring *real_ring = mux->real_ring;
> +
> +     begin = s_begin & ring->buf_mask;
> +     end = s_end & ring->buf_mask;
> +
> +     r_begin = real_ring->wptr & real_ring->buf_mask;
> +     if (begin == end)
> +             return -ERANGE;
> +     if (begin > end) {
> +             amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - begin);
> +             amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin],
> +                     (ring->ring_size >> 2) - begin);
> +             amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
> +     } else {
> +             amdgpu_ring_alloc(real_ring, end - begin);
> +             amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[begin], end - begin);
> +     }
> +
> +     r_end = real_ring->wptr & real_ring->buf_mask;
> +
> +     return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> new file mode 100644
> index 000000000000..d058c43bb063
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __AMDGPU_RING_MUX__
> +#define __AMDGPU_RING_MUX__
> +
> +#include <linux/timer.h>
> +#include <linux/spinlock.h>
> +#include "amdgpu_ring.h"
> +
> +struct amdgpu_ring;
> +/*
> + * start_ptr_in_hw_ring - last copied start loc on hw ring

>Use double dash for visibility "--".
>Please spell out "location".
>Perhaps you want to say something like:
>"last start location where we copied to in the hardware ring".
>
>Don't you just need an "index pointer"--pointing to the next location to copy to?
>
>I don't understand what "last copied end loc on hw ring" is, as we ll as "last copied start loc on hw ring".

> + * end_ptr_in_hw_ring - last copied end loc on hw ring *sw_cptr -the
> + begin of copy ptr in sw ring

>Double-dash and space around it.

>Don't you just mean "the copy pointer in sw ring"?


> + *sw_rptr; the read ptr in sw ring
> + *sw_wptr; the write ptr in sw ring

>Double-dash instead of semicolon.

Sure, I will update the patch.

> + */
> +struct amdgpu_mux_entry {
> +     struct amdgpu_ring      *ring;
> +     u64 start_ptr_in_hw_ring;
> +     u64 end_ptr_in_hw_ring;
> +
> +     u64 sw_cptr;
> +     u64 sw_rptr;
> +     u64 sw_wptr;
> +};
> +
> +struct amdgpu_ring_mux {
> +     struct amdgpu_ring *real_ring;
> +
> +     struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];

>Shouldn't the size of this array be dynamic depending on the ASIC used?
  This aligns with gfx_ring in amdgpu_gfx.h
        struct amdgpu_ring              gfx_ring[AMDGPU_MAX_GFX_RINGS];
>Maybe call it "ring_entry", so that "ring_entry[i]" means "ring entry at index i."
        will update.
> +
> +     unsigned num_ring_entries;
> +
> +     spinlock_t                      lock;
> +
> +};
> +
> +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct
> +amdgpu_ring *ring); void amdgpu_ring_mux_fini(struct amdgpu_ring_mux
> +*mux); int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux,
> +struct amdgpu_ring *ring); void amdgpu_ring_set_wptr_to_mux(struct
> +amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
> +u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct
> +amdgpu_ring *ring);
> +u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct
> +amdgpu_ring *ring);
> +
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> new file mode 100644
> index 000000000000..452d0ff37758
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
> @@ -0,0 +1,204 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction,
> +including
> + * without limitation the rights to use, copy, modify, merge,
> +publish,
> + * distribute, sub license, and/or sell copies of the Software, and
> +to
> + * permit persons to whom the Software is furnished to do so, subject
> +to
> + * the following conditions:
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
> +ANY CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
> +OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
> +OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * The above copyright notice and this permission notice (including
> +the
> + * next paragraph) shall be included in all copies or substantial
> +portions
> + * of the Software.
> + *
> + */
> +
> +#include "amdgpu_sw_ring.h"
> +#include "amdgpu_ring_mux.h"
> +
> +#define amdgpu_ring_get_gpu_addr(ring, offset)                               \
> +     (ring->is_mes_queue ?                                           \
> +      (ring->mes_ctx->meta_data_gpu_addr + offset) :                 \
> +      (ring->adev->wb.gpu_addr + offset * 4))

>I don't know how you were able to actually insert TAB chars after the text and before the backslash. Pressing the TAB key in my editor only aligns the line according to the mode, and that's it--I can't insert a TAB char.

>Don't insert TAB chars to align the backslash. Instead use the space bar--insert spaces.
acknowledged.

> +
> +#define amdgpu_ring_get_cpu_addr(ring, offset)                               \
> +     (ring->is_mes_queue ?                                           \
> +      (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
> +      (&ring->adev->wb.wb[offset]))
> +
> +
> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
> +                  unsigned int max_dw, struct amdgpu_irq_src *irq_src,
> +                  unsigned int irq_type, unsigned int hw_prio,
> +                  atomic_t *sched_score)
> +{
> +     int r;
> +     int sched_hw_submission = amdgpu_sched_hw_submission;
> +     u32 *num_sched;
> +     u32 hw_ip;
> +
> +     BUG_ON(!ring->is_sw_ring);
> +
> +     if (ring->adev == NULL) {
> +             if (adev->num_rings >= AMDGPU_MAX_RINGS)
> +                     return -EINVAL;

> I don't think we should have this here. Make it dynamic instead, so that it would work with any adev in the future.
acknowledged
> +
> +             ring->adev = adev;
> +             ring->num_hw_submission = sched_hw_submission;
> +             ring->sched_score = sched_score;
> +             ring->vmid_wait = dma_fence_get_stub();
> +
> +             if (!ring->is_mes_queue) {
> +                     ring->idx = adev->num_rings++;
> +                     adev->rings[ring->idx] = ring;
> +             }
> +
> +             r = amdgpu_fence_driver_init_ring(ring);
> +             if (r)
> +                     return r;
> +     }
> +
> +     r = amdgpu_device_wb_get(adev, &ring->fence_offs);
> +     if (r) {
> +             dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
> +             return r;
> +     }
> +
> +     r = amdgpu_device_wb_get(adev, &ring->fence_offs);
> +     if (r) {
> +             dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
> +             return r;
> +     }
> +
> +     r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
> +     if (r) {
> +             dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
> +             return r;
> +     }
> +
> +     r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
> +     if (r) {
> +             dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
> +             return r;
> +     }
> +
> +     ring->fence_gpu_addr =
> +             amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
> +     ring->fence_cpu_addr =
> +             amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
> +
> +     ring->trail_fence_gpu_addr =
> +             amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
> +     ring->trail_fence_cpu_addr =
> +             amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
> +
> +     ring->cond_exe_gpu_addr =
> +             amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
> +     ring->cond_exe_cpu_addr =
> +             amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
> +
> +     /* always set cond_exec_polling to CONTINUE */
> +     *ring->cond_exe_cpu_addr = 1;
> +
> +     r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
> +     if (r) {
> +             dev_err(adev->dev, "failed initializing fences (%d).\n", r);
> +             return r;
> +     }
> +
> +     ring->ring_size = roundup_pow_of_two(max_dw * 4 *
> +sched_hw_submission);
> +
> +     ring->buf_mask = (ring->ring_size / 4) - 1;
> +     ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
> +             0xffffffffffffffff : ring->buf_mask;
> +
> +     /* Allocate ring buffer */
> +     if (ring->ring == NULL) {
> +             ring->ring = kzalloc(ring->ring_size + ring->funcs->extra_dw, GFP_KERNEL);
> +             if (!ring->ring) {
> +                     dev_err(adev->dev, "(%d) swring create failed\n", r);
> +                     return r;
> +             }
> +
> +             amdgpu_ring_clear_ring(ring);
> +     }
> +
> +     ring->max_dw = max_dw;
> +     ring->hw_prio = hw_prio;
> +
> +     if (!ring->no_scheduler) {
> +             hw_ip = ring->funcs->type;
> +             num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
> +             adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
> +                     &ring->sched;
> +     }
> +
> +     return 0;
> +}
> +
> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring) {
> +     struct amdgpu_device *adev = ring->adev;
> +     struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +     BUG_ON(!ring->is_sw_ring);
> +     return amdgpu_ring_get_rptr_from_mux(mux, ring); }
> +
> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring) {
> +     struct amdgpu_device *adev = ring->adev;
> +     struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +     BUG_ON(!ring->is_sw_ring);
> +     return amdgpu_ring_get_wptr_from_mux(mux, ring); }
> +
> +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring) {
> +     BUG_ON(!ring->is_sw_ring);
> +}
> +
> +void amdgpu_sw_ring_commit(struct amdgpu_ring *ring) {
> +     struct amdgpu_device *adev = ring->adev;
> +     struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
> +
> +     BUG_ON(!ring->is_sw_ring);
> +     amdgpu_ring_set_wptr_to_mux(mux, ring, ring->wptr); }
> +
> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring) {
> +     BUG_ON(!ring->is_sw_ring);
> +
> +     /* Not to finish a ring which is not initialized */
> +     if (!(ring->adev) ||
> +         (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
> +             return;

You don't need parenthesis around "ring->adev" and around
ring->adev->rings[ring->idx], drop them.

> +
> +     ring->sched.ready = false;
> +
> +     amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
> +     amdgpu_device_wb_free(ring->adev, ring->fence_offs);
> +
> +     kfree((void *)ring->ring);

> No need to cast to (void *).
 Acknowledged.
> +
> +     dma_fence_put(ring->vmid_wait);
> +     ring->vmid_wait = NULL;
> +     ring->me = 0;
> +
> +     ring->adev->rings[ring->idx] = NULL; }
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
> new file mode 100644
> index 000000000000..c05d8a94ad0c
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h
> @@ -0,0 +1,48 @@
> +/*
> + * Copyright 2012 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <drm/amdgpu_drm.h>
> +#include <drm/gpu_scheduler.h>
> +#include <drm/drm_print.h>
> +
> +#include "amdgpu_irq.h"
> +#include "amdgpu_ring.h"
> +#include "amdgpu.h"
> +
> +#ifndef __AMDGPU_SWRING_H__
> +#define __AMDGPU_SWRING_H__
> +
> +int amdgpu_sw_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *sw_ring,
> +                  unsigned int max_dw, struct amdgpu_irq_src *irq_src,
> +                  unsigned int irq_type, unsigned int hw_prio,
> +                  atomic_t *sched_score);
> +void amdgpu_sw_ring_fini(struct amdgpu_ring *ring);
> +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
> +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring); void
> +amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); void
> +amdgpu_sw_ring_commit(struct amdgpu_ring *ring);
> +
> +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); void
> +amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
> +
> +#endif

> Make sure to run your patches through scripts/checkpatch.pl.
Sure.

Regards,
--
Luben

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)
  2022-09-14  2:34   ` Zhu, Jiadong
@ 2022-09-14  4:30     ` Luben Tuikov
  0 siblings, 0 replies; 28+ messages in thread
From: Luben Tuikov @ 2022-09-14  4:30 UTC (permalink / raw)
  To: Zhu, Jiadong, amd-gfx; +Cc: Huang, Ray


On 2022-09-13 22:34, Zhu, Jiadong wrote:
>> +
>> +     r_rptr = amdgpu_ring_get_rptr(mux->real_ring);
>> +     r_wptr = amdgpu_ring_get_wptr(mux->real_ring);
>> These names are very much the same to a human. How about writep and readp?
> r_rptr for real ring's read ptr differed from sw_rptr. Maybe we change to real_rptr/real_wptr?
> 

"real_rptr" and "real_wptr" is the same as "r_rptr" and "r_wptr", and actually worse.
The problem is that there's too little entropy on those names and as such to a human
they look the same. In the current version of the patch, you have only one out of six
characters different and that's in the middle of the word--very hard for a human to see,
note and distinguish. The situation is even worse with "real_rptr" and "real_wptr",
as that's one out of nine characters different and still very hard for a human to
notice the difference.

For this reason I suggested, using "writep" and "readp" which are immediately
distinguishable from each other as they have high entropy. Now, what they mean,
you can put that in a comment, but please use names with high entropy in them,
i.e. they are different from each other and easily distinguishable by a human.

Regards,
-- 
Luben

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2022-09-14  4:30 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-09  1:50 [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) jiadong.zhu
2022-09-09  1:50 ` [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3) jiadong.zhu
2022-09-09 16:46   ` Andrey Grodzovsky
2022-09-13 15:23   ` Luben Tuikov
2022-09-09  1:50 ` [PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9(v2) jiadong.zhu
2022-09-09 16:48   ` Andrey Grodzovsky
2022-09-09  1:50 ` [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2) jiadong.zhu
2022-09-09 17:02   ` Andrey Grodzovsky
2022-09-13  1:32     ` Zhu, Jiadong
2022-09-13 15:47   ` Luben Tuikov
2022-09-09 14:24 ` [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) Christian König
2022-09-12 14:31   ` Luben Tuikov
2022-09-09 16:45 ` Andrey Grodzovsky
2022-09-12 10:20   ` Christian König
2022-09-12 13:22     ` Andrey Grodzovsky
2022-09-12 13:27       ` Christian König
2022-09-12 15:34         ` Andrey Grodzovsky
2022-09-12 15:51           ` Liu, Shaoyun
2022-09-12 16:23             ` Christian König
2022-09-12 16:22           ` Christian König
2022-09-12 16:45             ` Andrey Grodzovsky
2022-09-13  1:44               ` Zhu, Jiadong
2022-09-13  2:00                 ` Andrey Grodzovsky
2022-09-13  7:25                   ` Christian König
2022-09-13 15:07                     ` Andrey Grodzovsky
2022-09-13 15:12 ` Luben Tuikov
2022-09-14  2:34   ` Zhu, Jiadong
2022-09-14  4:30     ` Luben Tuikov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.