All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov
@ 2023-07-28 19:15 Samir Dhume
  2023-07-28 19:15 ` [PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change " Samir Dhume
                   ` (5 more replies)
  0 siblings, 6 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
  To: amd-gfx
  Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
	frank.min, hawking.zhang

    The structures are the same as v4_0 except for the
    init header

Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h | 37 +++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h

diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
new file mode 100644
index 000000000000..db7eb5260295
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_3_H__
+#define __MMSCH_V4_0_3_H__
+
+#include "amdgpu_vcn.h"
+#include "mmsch_v4_0.h"
+
+struct mmsch_v4_0_3_init_header {
+	uint32_t version;
+	uint32_t total_size;
+	struct mmsch_v4_0_table_info vcn0;
+	struct mmsch_v4_0_table_info mjpegdec0[4];
+	struct mmsch_v4_0_table_info mjpegdec1[4];
+};
+#endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change for sriov
  2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
  2023-07-28 19:15 ` [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3 Samir Dhume
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
  To: amd-gfx
  Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
	frank.min, hawking.zhang

Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 550ac040b4be..411c1d802823 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1317,6 +1317,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret;
 
+	/* for SRIOV, guest should not control VCN Power-gating
+	 * MMSCH FW should control Power-gating and clock-gating
+	 * guest should avoid touching CGC and PG
+	 */
+	if (amdgpu_sriov_vf(adev)) {
+		adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+		return 0;
+	}
+
 	if (state == adev->vcn.cur_state)
 		return 0;
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
  2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
  2023-07-28 19:15 ` [PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change " Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
  2023-08-08 12:28   ` Leo Liu
  2023-07-28 19:15 ` [PATCH v3 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
  To: amd-gfx
  Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
	frank.min, hawking.zhang

initialization table handshake with mmsch

Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 257 +++++++++++++++++++++---
 1 file changed, 233 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 411c1d802823..b978265b2d77 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -31,6 +31,7 @@
 #include "soc15d.h"
 #include "soc15_hw_ip.h"
 #include "vcn_v2_0.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -44,6 +45,7 @@
 #define VCN_VID_SOC_ADDRESS_2_0		0x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0	0x48300
 
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v4_0_3_set_powergating_state(void *handle,
@@ -130,6 +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle)
 			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
 	}
 
+	r = amdgpu_virt_alloc_mm_table(adev);
+	if (r)
+		return r;
+
 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
 		adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
 
@@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)
 		drm_dev_exit(idx);
 	}
 
+	amdgpu_virt_free_mm_table(adev);
+
 	r = amdgpu_vcn_suspend(adev);
 	if (r)
 		return r;
@@ -189,33 +197,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
 	struct amdgpu_ring *ring;
 	int i, r, vcn_inst;
 
-	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-		vcn_inst = GET_INST(VCN, i);
-		ring = &adev->vcn.inst[i].ring_enc[0];
+	if (amdgpu_sriov_vf(adev)) {
+		r = vcn_v4_0_3_start_sriov(adev);
+		if (r)
+			goto done;
 
-		if (ring->use_doorbell) {
-			adev->nbio.funcs->vcn_doorbell_range(
-				adev, ring->use_doorbell,
-				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-					9 * vcn_inst,
-				adev->vcn.inst[i].aid_id);
-
-			WREG32_SOC15(
-				VCN, GET_INST(VCN, ring->me),
-				regVCN_RB1_DB_CTRL,
-				ring->doorbell_index
-						<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
-					VCN_RB1_DB_CTRL__EN_MASK);
-
-			/* Read DB_CTRL to flush the write DB_CTRL command. */
-			RREG32_SOC15(
-				VCN, GET_INST(VCN, ring->me),
-				regVCN_RB1_DB_CTRL);
+		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+			ring = &adev->vcn.inst[i].ring_enc[0];
+			ring->wptr = 0;
+			ring->wptr_old = 0;
+			vcn_v4_0_3_unified_ring_set_wptr(ring);
+			ring->sched.ready = true;
 		}
+	} else {
+		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+			vcn_inst = GET_INST(VCN, i);
+			ring = &adev->vcn.inst[i].ring_enc[0];
+
+			if (ring->use_doorbell) {
+				adev->nbio.funcs->vcn_doorbell_range(
+					adev, ring->use_doorbell,
+					(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+						9 * vcn_inst,
+					adev->vcn.inst[i].aid_id);
+
+				WREG32_SOC15(
+					VCN, GET_INST(VCN, ring->me),
+					regVCN_RB1_DB_CTRL,
+					ring->doorbell_index
+							<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+						VCN_RB1_DB_CTRL__EN_MASK);
+
+				/* Read DB_CTRL to flush the write DB_CTRL command. */
+				RREG32_SOC15(
+					VCN, GET_INST(VCN, ring->me),
+					regVCN_RB1_DB_CTRL);
+			}
 
-		r = amdgpu_ring_test_helper(ring);
-		if (r)
-			goto done;
+			r = amdgpu_ring_test_helper(ring);
+			if (r)
+				goto done;
+		}
 	}
 
 done:
@@ -813,6 +835,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
 	return 0;
 }
 
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+	int i, vcn_inst;
+	struct amdgpu_ring *ring_enc;
+	uint64_t cache_addr;
+	uint64_t rb_enc_addr;
+	uint64_t ctx_addr;
+	uint32_t param, resp, expected;
+	uint32_t offset, cache_size;
+	uint32_t tmp, timeout;
+
+	struct amdgpu_mm_table *table = &adev->virt.mm_table;
+	uint32_t *table_loc;
+	uint32_t table_size;
+	uint32_t size, size_dw;
+	uint32_t init_status;
+	uint32_t enabled_vcn;
+
+	struct mmsch_v4_0_cmd_direct_write
+		direct_wt = { {0} };
+	struct mmsch_v4_0_cmd_direct_read_modify_write
+		direct_rd_mod_wt = { {0} };
+	struct mmsch_v4_0_cmd_end end = { {0} };
+	struct mmsch_v4_0_3_init_header header;
+
+	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+	volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+	direct_wt.cmd_header.command_type =
+		MMSCH_COMMAND__DIRECT_REG_WRITE;
+	direct_rd_mod_wt.cmd_header.command_type =
+		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+	end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+		vcn_inst = GET_INST(VCN, i);
+
+		memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+		header.version = MMSCH_VERSION;
+		header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
+
+		table_loc = (uint32_t *)table->cpu_addr;
+		table_loc += header.total_size;
+
+		table_size = 0;
+
+		MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
+			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+		cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+
+			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+
+			offset = 0;
+			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+				regUVD_VCPU_CACHE_OFFSET0), 0);
+		} else {
+			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+				lower_32_bits(adev->vcn.inst[i].gpu_addr));
+			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+				upper_32_bits(adev->vcn.inst[i].gpu_addr));
+			offset = cache_size;
+			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+				regUVD_VCPU_CACHE_OFFSET0),
+				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+		}
+
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_VCPU_CACHE_SIZE0),
+			cache_size);
+
+		cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_VCPU_CACHE_OFFSET1), 0);
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
+
+		cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
+			AMDGPU_VCN_STACK_SIZE;
+
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_VCPU_CACHE_OFFSET2), 0);
+
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
+
+		fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
+		rb_setup = &fw_shared->rb_setup;
+
+		ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
+		ring_enc->wptr = 0;
+		rb_enc_addr = ring_enc->gpu_addr;
+
+		rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+		rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+		rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+		rb_setup->rb_size = ring_enc->ring_size / 4;
+		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+			lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+			upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+			regUVD_VCPU_NONCACHE_SIZE0),
+			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+		MMSCH_V4_0_INSERT_END();
+
+		header.vcn0.init_status = 0;
+		header.vcn0.table_offset = header.total_size;
+		header.vcn0.table_size = table_size;
+		header.total_size += table_size;
+
+		/* Send init table to mmsch */
+		size = sizeof(struct mmsch_v4_0_3_init_header);
+		table_loc = (uint32_t *)table->cpu_addr;
+		memcpy((void *)table_loc, &header, size);
+
+		ctx_addr = table->gpu_addr;
+		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+		tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
+		tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+		tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
+
+		size = header.total_size;
+		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
+
+		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+		param = 0x00000001;
+		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
+		tmp = 0;
+		timeout = 1000;
+		resp = 0;
+		expected = MMSCH_VF_MAILBOX_RESP__OK;
+		while (resp != expected) {
+			resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
+			if (resp != 0)
+				break;
+
+			udelay(10);
+			tmp = tmp + 10;
+			if (tmp >= timeout) {
+				DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+					" waiting for regMMSCH_VF_MAILBOX_RESP "\
+					"(expected=0x%08x, readback=0x%08x)\n",
+					tmp, expected, resp);
+				return -EBUSY;
+			}
+		}
+
+		enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+		init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
+		if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+					&& init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+			DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+				"status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+		}
+	}
+
+	return 0;
+}
+
 /**
  * vcn_v4_0_3_start - VCN start
  *
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary
  2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
  2023-07-28 19:15 ` [PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change " Samir Dhume
  2023-07-28 19:15 ` [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3 Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
  2023-07-28 19:15 ` [PATCH v3 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3 Samir Dhume
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
  To: amd-gfx
  Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
	frank.min, hawking.zhang

Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index b978265b2d77..7cd5ca204317 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -113,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle)
 
 		ring = &adev->vcn.inst[i].ring_enc[0];
 		ring->use_doorbell = true;
-		ring->doorbell_index =
-			(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-			9 * vcn_inst;
+
+		if (!amdgpu_sriov_vf(adev))
+			ring->doorbell_index =
+				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+				9 * vcn_inst;
+		else
+			ring->doorbell_index =
+				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+				32 * vcn_inst;
+
 		ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
 		sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
 		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3
  2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
                   ` (2 preceding siblings ...)
  2023-07-28 19:15 ` [PATCH v3 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
  2023-07-28 19:15 ` [PATCH v3 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
  2023-07-28 19:15 ` [PATCH v3 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3 Samir Dhume
  5 siblings, 0 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
  To: amd-gfx
  Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
	frank.min, hawking.zhang

initialization table handshake with mmsch

Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 174 ++++++++++++++++++++---
 1 file changed, 153 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index ce2b22f7e4e4..33f04ea8549f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -26,6 +26,7 @@
 #include "soc15.h"
 #include "soc15d.h"
 #include "jpeg_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
 static int jpeg_v4_0_3_set_powergating_state(void *handle,
 				enum amd_powergating_state state);
 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
 
 static int amdgpu_ih_srcid_jpeg[] = {
 	VCN_4_0__SRCID__JPEG_DECODE,
@@ -160,6 +162,120 @@ static int jpeg_v4_0_3_sw_fini(void *handle)
 	return r;
 }
 
+static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	uint64_t ctx_addr;
+	uint32_t param, resp, expected;
+	uint32_t tmp, timeout;
+
+	struct amdgpu_mm_table *table = &adev->virt.mm_table;
+	uint32_t *table_loc;
+	uint32_t table_size;
+	uint32_t size, size_dw, item_offset;
+	uint32_t init_status;
+	int i, j, jpeg_inst;
+
+	struct mmsch_v4_0_cmd_direct_write
+		direct_wt = { {0} };
+	struct mmsch_v4_0_cmd_end end = { {0} };
+	struct mmsch_v4_0_3_init_header header;
+
+	direct_wt.cmd_header.command_type =
+		MMSCH_COMMAND__DIRECT_REG_WRITE;
+	end.cmd_header.command_type =
+		MMSCH_COMMAND__END;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+		jpeg_inst = GET_INST(JPEG, i);
+
+		memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+		header.version = MMSCH_VERSION;
+		header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
+
+		table_loc = (uint32_t *)table->cpu_addr;
+		table_loc += header.total_size;
+
+		item_offset = header.total_size;
+
+
+		for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+			ring = &adev->jpeg.inst[i].ring_dec[j];
+			table_size = 0;
+
+			tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+			MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr));
+			tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+			MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr));
+			tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE);
+			MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+			if (j <= 3) {
+				header.mjpegdec0[j].table_offset = item_offset;
+				header.mjpegdec0[j].init_status = 0;
+				header.mjpegdec0[j].table_size = table_size;
+			} else {
+				header.mjpegdec1[j-4].table_offset = item_offset;
+				header.mjpegdec1[j-4].init_status = 0;
+				header.mjpegdec1[j-4].table_size = table_size;
+			}
+			header.total_size += table_size;
+			item_offset+= table_size;
+		}
+
+		MMSCH_V4_0_INSERT_END();
+
+		/* send init table to MMSCH */
+		size = sizeof(struct mmsch_v4_0_3_init_header);
+		table_loc = (uint32_t *)table->cpu_addr;
+		memcpy((void *)table_loc, &header, size);
+
+		ctx_addr = table->gpu_addr;
+		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+		tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+		tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+		tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+		size = header.total_size;
+		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+		param = 0x00000001;
+		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+		tmp = 0;
+		timeout = 1000;
+		resp = 0;
+		expected = MMSCH_VF_MAILBOX_RESP__OK;
+		init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->mjpegdec0[i].init_status;
+		while (resp != expected) {
+			resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP);
+
+			if (resp != 0)
+				break;
+			udelay(10);
+			tmp = tmp + 10;
+			if (tmp >= timeout) {
+				DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+					" waiting for regMMSCH_VF_MAILBOX_RESP "\
+					"(expected=0x%08x, readback=0x%08x)\n",
+					tmp, expected, resp);
+				return -EBUSY;
+			}
+		}
+		if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+			DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", resp, init_status);
+		}
+
+
+	}
+	return 0;
+}
+
+
 /**
  * jpeg_v4_0_3_hw_init - start and test JPEG block
  *
@@ -172,31 +288,47 @@ static int jpeg_v4_0_3_hw_init(void *handle)
 	struct amdgpu_ring *ring;
 	int i, j, r, jpeg_inst;
 
-	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
-		jpeg_inst = GET_INST(JPEG, i);
+	if (amdgpu_sriov_vf(adev)) {
+		r = jpeg_v4_0_3_start_sriov(adev);
+		if (r)
+			return r;
 
-		ring = adev->jpeg.inst[i].ring_dec;
+		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+			for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+				ring = &adev->jpeg.inst[i].ring_dec[j];
+				ring->wptr = 0;
+				ring->wptr_old = 0;
+				jpeg_v4_0_3_dec_ring_set_wptr(ring);
+				ring->sched.ready = true;
+			}
+		}
+	} else {
+		for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+			jpeg_inst = GET_INST(JPEG, i);
 
-		if (ring->use_doorbell)
-			adev->nbio.funcs->vcn_doorbell_range(
-				adev, ring->use_doorbell,
-				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-					9 * jpeg_inst,
-				adev->jpeg.inst[i].aid_id);
+			ring = adev->jpeg.inst[i].ring_dec;
 
-		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
-			ring = &adev->jpeg.inst[i].ring_dec[j];
 			if (ring->use_doorbell)
-				WREG32_SOC15_OFFSET(
-					VCN, GET_INST(VCN, i),
-					regVCN_JPEG_DB_CTRL,
-					(ring->pipe ? (ring->pipe - 0x15) : 0),
-					ring->doorbell_index
-							<< VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
-						VCN_JPEG_DB_CTRL__EN_MASK);
-			r = amdgpu_ring_test_helper(ring);
-			if (r)
-				return r;
+				adev->nbio.funcs->vcn_doorbell_range(
+					adev, ring->use_doorbell,
+					(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+						9 * jpeg_inst,
+					adev->jpeg.inst[i].aid_id);
+
+			for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+				ring = &adev->jpeg.inst[i].ring_dec[j];
+				if (ring->use_doorbell)
+					WREG32_SOC15_OFFSET(
+						VCN, GET_INST(VCN, i),
+						regVCN_JPEG_DB_CTRL,
+						(ring->pipe ? (ring->pipe - 0x15) : 0),
+						ring->doorbell_index
+								<< VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+							VCN_JPEG_DB_CTRL__EN_MASK);
+				r = amdgpu_ring_test_helper(ring);
+				if (r)
+					return r;
+			}
 		}
 	}
 	DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary
  2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
                   ` (3 preceding siblings ...)
  2023-07-28 19:15 ` [PATCH v3 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3 Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
  2023-07-28 19:15 ` [PATCH v3 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3 Samir Dhume
  5 siblings, 0 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
  To: amd-gfx
  Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
	frank.min, hawking.zhang

BASE: VCN0 unified (32 byte boundary)
BASE+4: MJPEG0
BASE+5: MJPEG1
BASE+6: MJPEG2
BASE+7: MJPEG3
BASE+12: MJPEG4
BASE+13: MJPEG5
BASE+14: MJPEG6
BASE+15: MJPEG7

Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 33f04ea8549f..f745eeef442f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle)
 			ring = &adev->jpeg.inst[i].ring_dec[j];
 			ring->use_doorbell = true;
 			ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
-			ring->doorbell_index =
-				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-				1 + j + 9 * jpeg_inst;
+			if (!amdgpu_sriov_vf(adev)) {
+				ring->doorbell_index =
+					(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+					1 + j + 9 * jpeg_inst;
+			} else {
+				if (j < 4)
+					ring->doorbell_index =
+						(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+						4 + j + 32 * jpeg_inst;
+				else
+					ring->doorbell_index =
+						(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+						8 + j + 32 * jpeg_inst;
+			}
 			sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
 			r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
 						AMDGPU_RING_PRIO_DEFAULT, NULL);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3
  2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
                   ` (4 preceding siblings ...)
  2023-07-28 19:15 ` [PATCH v3 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
  5 siblings, 0 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
  To: amd-gfx
  Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
	frank.min, hawking.zhang

For sriov, doorbell index for vcn0 for AID needs to be on
32 byte boundary so we need to move the vcn end doorbell

Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index f637574644c0..4a279960cd21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -330,14 +330,14 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
 	AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END		= 0x19F,
 	/* IH: 0x1A0 ~ 0x1AF */
 	AMDGPU_DOORBELL_LAYOUT1_IH                      = 0x1A0,
-	/* VCN: 0x1B0 ~ 0x1D4 */
+	/* VCN: 0x1B0 ~ 0x1E8 */
 	AMDGPU_DOORBELL_LAYOUT1_VCN_START               = 0x1B0,
-	AMDGPU_DOORBELL_LAYOUT1_VCN_END                 = 0x1D4,
+	AMDGPU_DOORBELL_LAYOUT1_VCN_END                 = 0x1E8,
 
 	AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP		= AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
 	AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP		= AMDGPU_DOORBELL_LAYOUT1_VCN_END,
 
-	AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT          = 0x1D4,
+	AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT          = 0x1E8,
 	AMDGPU_DOORBELL_LAYOUT1_INVALID                 = 0xFFFF
 } AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1;
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
  2023-07-28 19:15 ` [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3 Samir Dhume
@ 2023-08-08 12:28   ` Leo Liu
  2023-08-08 16:31     ` Dhume, Samir
  0 siblings, 1 reply; 9+ messages in thread
From: Leo Liu @ 2023-08-08 12:28 UTC (permalink / raw)
  To: Samir Dhume, amd-gfx
  Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, frank.min,
	hawking.zhang


On 2023-07-28 15:15, Samir Dhume wrote:
> initialization table handshake with mmsch
>
> Signed-off-by: Samir Dhume <samir.dhume@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 257 +++++++++++++++++++++---
>   1 file changed, 233 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> index 411c1d802823..b978265b2d77 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> @@ -31,6 +31,7 @@
>   #include "soc15d.h"
>   #include "soc15_hw_ip.h"
>   #include "vcn_v2_0.h"
> +#include "mmsch_v4_0_3.h"
>   
>   #include "vcn/vcn_4_0_3_offset.h"
>   #include "vcn/vcn_4_0_3_sh_mask.h"
> @@ -44,6 +45,7 @@
>   #define VCN_VID_SOC_ADDRESS_2_0		0x1fb00
>   #define VCN1_VID_SOC_ADDRESS_3_0	0x48300
>   
> +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
>   static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
>   static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
>   static int vcn_v4_0_3_set_powergating_state(void *handle,
> @@ -130,6 +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle)
>   			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
>   	}
>   
> +	r = amdgpu_virt_alloc_mm_table(adev);

Since this function is not for bare-metal, please move amdgpu_sriov_vf() 
check from inside of the function to here, to avoid confusion.


> +	if (r)
> +		return r;
> +
>   	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
>   		adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
>   
> @@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)
>   		drm_dev_exit(idx);
>   	}
>   
> +	amdgpu_virt_free_mm_table(adev);

Same as above.


Regards,

Leo



> +
>   	r = amdgpu_vcn_suspend(adev);
>   	if (r)
>   		return r;
> @@ -189,33 +197,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
>   	struct amdgpu_ring *ring;
>   	int i, r, vcn_inst;
>   
> -	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> -		vcn_inst = GET_INST(VCN, i);
> -		ring = &adev->vcn.inst[i].ring_enc[0];
> +	if (amdgpu_sriov_vf(adev)) {
> +		r = vcn_v4_0_3_start_sriov(adev);
> +		if (r)
> +			goto done;
>   
> -		if (ring->use_doorbell) {
> -			adev->nbio.funcs->vcn_doorbell_range(
> -				adev, ring->use_doorbell,
> -				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
> -					9 * vcn_inst,
> -				adev->vcn.inst[i].aid_id);
> -
> -			WREG32_SOC15(
> -				VCN, GET_INST(VCN, ring->me),
> -				regVCN_RB1_DB_CTRL,
> -				ring->doorbell_index
> -						<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
> -					VCN_RB1_DB_CTRL__EN_MASK);
> -
> -			/* Read DB_CTRL to flush the write DB_CTRL command. */
> -			RREG32_SOC15(
> -				VCN, GET_INST(VCN, ring->me),
> -				regVCN_RB1_DB_CTRL);
> +		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +			ring = &adev->vcn.inst[i].ring_enc[0];
> +			ring->wptr = 0;
> +			ring->wptr_old = 0;
> +			vcn_v4_0_3_unified_ring_set_wptr(ring);
> +			ring->sched.ready = true;
>   		}
> +	} else {
> +		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +			vcn_inst = GET_INST(VCN, i);
> +			ring = &adev->vcn.inst[i].ring_enc[0];
> +
> +			if (ring->use_doorbell) {
> +				adev->nbio.funcs->vcn_doorbell_range(
> +					adev, ring->use_doorbell,
> +					(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
> +						9 * vcn_inst,
> +					adev->vcn.inst[i].aid_id);
> +
> +				WREG32_SOC15(
> +					VCN, GET_INST(VCN, ring->me),
> +					regVCN_RB1_DB_CTRL,
> +					ring->doorbell_index
> +							<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
> +						VCN_RB1_DB_CTRL__EN_MASK);
> +
> +				/* Read DB_CTRL to flush the write DB_CTRL command. */
> +				RREG32_SOC15(
> +					VCN, GET_INST(VCN, ring->me),
> +					regVCN_RB1_DB_CTRL);
> +			}
>   
> -		r = amdgpu_ring_test_helper(ring);
> -		if (r)
> -			goto done;
> +			r = amdgpu_ring_test_helper(ring);
> +			if (r)
> +				goto done;
> +		}
>   	}
>   
>   done:
> @@ -813,6 +835,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
>   	return 0;
>   }
>   
> +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
> +{
> +	int i, vcn_inst;
> +	struct amdgpu_ring *ring_enc;
> +	uint64_t cache_addr;
> +	uint64_t rb_enc_addr;
> +	uint64_t ctx_addr;
> +	uint32_t param, resp, expected;
> +	uint32_t offset, cache_size;
> +	uint32_t tmp, timeout;
> +
> +	struct amdgpu_mm_table *table = &adev->virt.mm_table;
> +	uint32_t *table_loc;
> +	uint32_t table_size;
> +	uint32_t size, size_dw;
> +	uint32_t init_status;
> +	uint32_t enabled_vcn;
> +
> +	struct mmsch_v4_0_cmd_direct_write
> +		direct_wt = { {0} };
> +	struct mmsch_v4_0_cmd_direct_read_modify_write
> +		direct_rd_mod_wt = { {0} };
> +	struct mmsch_v4_0_cmd_end end = { {0} };
> +	struct mmsch_v4_0_3_init_header header;
> +
> +	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
> +	volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
> +
> +	direct_wt.cmd_header.command_type =
> +		MMSCH_COMMAND__DIRECT_REG_WRITE;
> +	direct_rd_mod_wt.cmd_header.command_type =
> +		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
> +	end.cmd_header.command_type = MMSCH_COMMAND__END;
> +
> +	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> +		vcn_inst = GET_INST(VCN, i);
> +
> +		memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
> +		header.version = MMSCH_VERSION;
> +		header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
> +
> +		table_loc = (uint32_t *)table->cpu_addr;
> +		table_loc += header.total_size;
> +
> +		table_size = 0;
> +
> +		MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
> +			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
> +
> +		cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
> +
> +		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
> +			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> +				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
> +
> +			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> +				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
> +
> +			offset = 0;
> +			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +				regUVD_VCPU_CACHE_OFFSET0), 0);
> +		} else {
> +			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> +				lower_32_bits(adev->vcn.inst[i].gpu_addr));
> +			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> +				upper_32_bits(adev->vcn.inst[i].gpu_addr));
> +			offset = cache_size;
> +			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +				regUVD_VCPU_CACHE_OFFSET0),
> +				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
> +		}
> +
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_VCPU_CACHE_SIZE0),
> +			cache_size);
> +
> +		cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_VCPU_CACHE_OFFSET1), 0);
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
> +
> +		cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
> +			AMDGPU_VCN_STACK_SIZE;
> +
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
> +
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
> +
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_VCPU_CACHE_OFFSET2), 0);
> +
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
> +
> +		fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
> +		rb_setup = &fw_shared->rb_setup;
> +
> +		ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
> +		ring_enc->wptr = 0;
> +		rb_enc_addr = ring_enc->gpu_addr;
> +
> +		rb_setup->is_rb_enabled_flags |= RB_ENABLED;
> +		rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
> +		rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
> +		rb_setup->rb_size = ring_enc->ring_size / 4;
> +		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
> +
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
> +			lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
> +			upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
> +		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +			regUVD_VCPU_NONCACHE_SIZE0),
> +			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
> +		MMSCH_V4_0_INSERT_END();
> +
> +		header.vcn0.init_status = 0;
> +		header.vcn0.table_offset = header.total_size;
> +		header.vcn0.table_size = table_size;
> +		header.total_size += table_size;
> +
> +		/* Send init table to mmsch */
> +		size = sizeof(struct mmsch_v4_0_3_init_header);
> +		table_loc = (uint32_t *)table->cpu_addr;
> +		memcpy((void *)table_loc, &header, size);
> +
> +		ctx_addr = table->gpu_addr;
> +		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
> +		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
> +
> +		tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
> +		tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
> +		tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
> +		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
> +
> +		size = header.total_size;
> +		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
> +
> +		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
> +
> +		param = 0x00000001;
> +		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
> +		tmp = 0;
> +		timeout = 1000;
> +		resp = 0;
> +		expected = MMSCH_VF_MAILBOX_RESP__OK;
> +		while (resp != expected) {
> +			resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
> +			if (resp != 0)
> +				break;
> +
> +			udelay(10);
> +			tmp = tmp + 10;
> +			if (tmp >= timeout) {
> +				DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
> +					" waiting for regMMSCH_VF_MAILBOX_RESP "\
> +					"(expected=0x%08x, readback=0x%08x)\n",
> +					tmp, expected, resp);
> +				return -EBUSY;
> +			}
> +		}
> +
> +		enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
> +		init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
> +		if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
> +					&& init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
> +			DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
> +				"status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   /**
>    * vcn_v4_0_3_start - VCN start
>    *

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
  2023-08-08 12:28   ` Leo Liu
@ 2023-08-08 16:31     ` Dhume, Samir
  0 siblings, 0 replies; 9+ messages in thread
From: Dhume, Samir @ 2023-08-08 16:31 UTC (permalink / raw)
  To: Liu, Leo, amd-gfx
  Cc: Luo, Zhigang, Chen, Guchun, Wan, Gavin, Lazar, Lijo, Min, Frank,
	Zhang, Hawking

[AMD Official Use Only - General]

Thanks Leo. I'll restore the check for sriov before calling amdgpu_virt_alloc_mm_table(). That will make it consistent with other vcn ip versions. I'll retain the check for sriov inside amdgpu_virt_alloc_mm_table() as well, as a conservative check.

Thanks,
Samir


-----Original Message-----
From: Liu, Leo <Leo.Liu@amd.com>
Sent: Tuesday, August 8, 2023 8:29 AM
To: Dhume, Samir <Samir.Dhume@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Luo, Zhigang <Zhigang.Luo@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Wan, Gavin <Gavin.Wan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; Min, Frank <Frank.Min@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: Re: [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3


On 2023-07-28 15:15, Samir Dhume wrote:
> initialization table handshake with mmsch
>
> Signed-off-by: Samir Dhume <samir.dhume@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 257 +++++++++++++++++++++---
>   1 file changed, 233 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> index 411c1d802823..b978265b2d77 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> @@ -31,6 +31,7 @@
>   #include "soc15d.h"
>   #include "soc15_hw_ip.h"
>   #include "vcn_v2_0.h"
> +#include "mmsch_v4_0_3.h"
>
>   #include "vcn/vcn_4_0_3_offset.h"
>   #include "vcn/vcn_4_0_3_sh_mask.h"
> @@ -44,6 +45,7 @@
>   #define VCN_VID_SOC_ADDRESS_2_0             0x1fb00
>   #define VCN1_VID_SOC_ADDRESS_3_0    0x48300
>
> +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
>   static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
>   static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
>   static int vcn_v4_0_3_set_powergating_state(void *handle, @@ -130,6
> +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle)
>                       amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
>       }
>
> +     r = amdgpu_virt_alloc_mm_table(adev);

Since this function is not for bare-metal, please move amdgpu_sriov_vf() check from inside of the function to here, to avoid confusion.


> +     if (r)
> +             return r;
> +
>       if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
>               adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
>
> @@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)
>               drm_dev_exit(idx);
>       }
>
> +     amdgpu_virt_free_mm_table(adev);

Same as above.


Regards,

Leo



> +
>       r = amdgpu_vcn_suspend(adev);
>       if (r)
>               return r;
> @@ -189,33 +197,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
>       struct amdgpu_ring *ring;
>       int i, r, vcn_inst;
>
> -     for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> -             vcn_inst = GET_INST(VCN, i);
> -             ring = &adev->vcn.inst[i].ring_enc[0];
> +     if (amdgpu_sriov_vf(adev)) {
> +             r = vcn_v4_0_3_start_sriov(adev);
> +             if (r)
> +                     goto done;
>
> -             if (ring->use_doorbell) {
> -                     adev->nbio.funcs->vcn_doorbell_range(
> -                             adev, ring->use_doorbell,
> -                             (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
> -                                     9 * vcn_inst,
> -                             adev->vcn.inst[i].aid_id);
> -
> -                     WREG32_SOC15(
> -                             VCN, GET_INST(VCN, ring->me),
> -                             regVCN_RB1_DB_CTRL,
> -                             ring->doorbell_index
> -                                             << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
> -                                     VCN_RB1_DB_CTRL__EN_MASK);
> -
> -                     /* Read DB_CTRL to flush the write DB_CTRL command. */
> -                     RREG32_SOC15(
> -                             VCN, GET_INST(VCN, ring->me),
> -                             regVCN_RB1_DB_CTRL);
> +             for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +                     ring = &adev->vcn.inst[i].ring_enc[0];
> +                     ring->wptr = 0;
> +                     ring->wptr_old = 0;
> +                     vcn_v4_0_3_unified_ring_set_wptr(ring);
> +                     ring->sched.ready = true;
>               }
> +     } else {
> +             for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +                     vcn_inst = GET_INST(VCN, i);
> +                     ring = &adev->vcn.inst[i].ring_enc[0];
> +
> +                     if (ring->use_doorbell) {
> +                             adev->nbio.funcs->vcn_doorbell_range(
> +                                     adev, ring->use_doorbell,
> +                                     (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
> +                                             9 * vcn_inst,
> +                                     adev->vcn.inst[i].aid_id);
> +
> +                             WREG32_SOC15(
> +                                     VCN, GET_INST(VCN, ring->me),
> +                                     regVCN_RB1_DB_CTRL,
> +                                     ring->doorbell_index
> +                                                     << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
> +                                             VCN_RB1_DB_CTRL__EN_MASK);
> +
> +                             /* Read DB_CTRL to flush the write DB_CTRL command. */
> +                             RREG32_SOC15(
> +                                     VCN, GET_INST(VCN, ring->me),
> +                                     regVCN_RB1_DB_CTRL);
> +                     }
>
> -             r = amdgpu_ring_test_helper(ring);
> -             if (r)
> -                     goto done;
> +                     r = amdgpu_ring_test_helper(ring);
> +                     if (r)
> +                             goto done;
> +             }
>       }
>
>   done:
> @@ -813,6 +835,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
>       return 0;
>   }
>
> +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
> +{
> +     int i, vcn_inst;
> +     struct amdgpu_ring *ring_enc;
> +     uint64_t cache_addr;
> +     uint64_t rb_enc_addr;
> +     uint64_t ctx_addr;
> +     uint32_t param, resp, expected;
> +     uint32_t offset, cache_size;
> +     uint32_t tmp, timeout;
> +
> +     struct amdgpu_mm_table *table = &adev->virt.mm_table;
> +     uint32_t *table_loc;
> +     uint32_t table_size;
> +     uint32_t size, size_dw;
> +     uint32_t init_status;
> +     uint32_t enabled_vcn;
> +
> +     struct mmsch_v4_0_cmd_direct_write
> +             direct_wt = { {0} };
> +     struct mmsch_v4_0_cmd_direct_read_modify_write
> +             direct_rd_mod_wt = { {0} };
> +     struct mmsch_v4_0_cmd_end end = { {0} };
> +     struct mmsch_v4_0_3_init_header header;
> +
> +     volatile struct amdgpu_vcn4_fw_shared *fw_shared;
> +     volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
> +
> +     direct_wt.cmd_header.command_type =
> +             MMSCH_COMMAND__DIRECT_REG_WRITE;
> +     direct_rd_mod_wt.cmd_header.command_type =
> +             MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
> +     end.cmd_header.command_type = MMSCH_COMMAND__END;
> +
> +     for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> +             vcn_inst = GET_INST(VCN, i);
> +
> +             memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
> +             header.version = MMSCH_VERSION;
> +             header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
> +
> +             table_loc = (uint32_t *)table->cpu_addr;
> +             table_loc += header.total_size;
> +
> +             table_size = 0;
> +
> +             MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
> +                     ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
> +
> +             cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
> +
> +             if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
> +                     MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                             regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> +                             adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
> +
> +                     MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                             regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> +                             adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
> +
> +                     offset = 0;
> +                     MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                             regUVD_VCPU_CACHE_OFFSET0), 0);
> +             } else {
> +                     MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                             regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> +                             lower_32_bits(adev->vcn.inst[i].gpu_addr));
> +                     MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                             regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> +                             upper_32_bits(adev->vcn.inst[i].gpu_addr));
> +                     offset = cache_size;
> +                     MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                             regUVD_VCPU_CACHE_OFFSET0),
> +                             AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
> +             }
> +
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_VCPU_CACHE_SIZE0),
> +                     cache_size);
> +
> +             cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_VCPU_CACHE_OFFSET1), 0);
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
> +
> +             cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
> +                     AMDGPU_VCN_STACK_SIZE;
> +
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
> +
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
> +
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_VCPU_CACHE_OFFSET2), 0);
> +
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
> +
> +             fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
> +             rb_setup = &fw_shared->rb_setup;
> +
> +             ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
> +             ring_enc->wptr = 0;
> +             rb_enc_addr = ring_enc->gpu_addr;
> +
> +             rb_setup->is_rb_enabled_flags |= RB_ENABLED;
> +             rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
> +             rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
> +             rb_setup->rb_size = ring_enc->ring_size / 4;
> +             fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
> +
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
> +                     lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
> +                     upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
> +             MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> +                     regUVD_VCPU_NONCACHE_SIZE0),
> +                     AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
> +             MMSCH_V4_0_INSERT_END();
> +
> +             header.vcn0.init_status = 0;
> +             header.vcn0.table_offset = header.total_size;
> +             header.vcn0.table_size = table_size;
> +             header.total_size += table_size;
> +
> +             /* Send init table to mmsch */
> +             size = sizeof(struct mmsch_v4_0_3_init_header);
> +             table_loc = (uint32_t *)table->cpu_addr;
> +             memcpy((void *)table_loc, &header, size);
> +
> +             ctx_addr = table->gpu_addr;
> +             WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
> +             WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
> +
> +             tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
> +             tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
> +             tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
> +             WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
> +
> +             size = header.total_size;
> +             WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
> +
> +             WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
> +
> +             param = 0x00000001;
> +             WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
> +             tmp = 0;
> +             timeout = 1000;
> +             resp = 0;
> +             expected = MMSCH_VF_MAILBOX_RESP__OK;
> +             while (resp != expected) {
> +                     resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
> +                     if (resp != 0)
> +                             break;
> +
> +                     udelay(10);
> +                     tmp = tmp + 10;
> +                     if (tmp >= timeout) {
> +                             DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
> +                                     " waiting for regMMSCH_VF_MAILBOX_RESP "\
> +                                     "(expected=0x%08x, readback=0x%08x)\n",
> +                                     tmp, expected, resp);
> +                             return -EBUSY;
> +                     }
> +             }
> +
> +             enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
> +             init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
> +             if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
> +                                     && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
> +                     DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
> +                             "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
> +             }
> +     }
> +
> +     return 0;
> +}
> +
>   /**
>    * vcn_v4_0_3_start - VCN start
>    *

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-08-08 16:31 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
2023-07-28 19:15 ` [PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change " Samir Dhume
2023-07-28 19:15 ` [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3 Samir Dhume
2023-08-08 12:28   ` Leo Liu
2023-08-08 16:31     ` Dhume, Samir
2023-07-28 19:15 ` [PATCH v3 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
2023-07-28 19:15 ` [PATCH v3 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3 Samir Dhume
2023-07-28 19:15 ` [PATCH v3 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
2023-07-28 19:15 ` [PATCH v3 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3 Samir Dhume

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.