* [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov
@ 2023-07-28 19:15 Samir Dhume
2023-07-28 19:15 ` [PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change " Samir Dhume
` (5 more replies)
0 siblings, 6 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
To: amd-gfx
Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
frank.min, hawking.zhang
The structures are the same as v4_0 except for the
init header
Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h | 37 +++++++++++++++++++++++
1 file changed, 37 insertions(+)
create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
new file mode 100644
index 000000000000..db7eb5260295
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_3_H__
+#define __MMSCH_V4_0_3_H__
+
+#include "amdgpu_vcn.h"
+#include "mmsch_v4_0.h"
+
+struct mmsch_v4_0_3_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v4_0_table_info vcn0;
+ struct mmsch_v4_0_table_info mjpegdec0[4];
+ struct mmsch_v4_0_table_info mjpegdec1[4];
+};
+#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change for sriov
2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
2023-07-28 19:15 ` [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3 Samir Dhume
` (4 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
To: amd-gfx
Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
frank.min, hawking.zhang
Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 550ac040b4be..411c1d802823 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1317,6 +1317,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
if (state == adev->vcn.cur_state)
return 0;
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
2023-07-28 19:15 ` [PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change " Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
2023-08-08 12:28 ` Leo Liu
2023-07-28 19:15 ` [PATCH v3 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
` (3 subsequent siblings)
5 siblings, 1 reply; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
To: amd-gfx
Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
frank.min, hawking.zhang
initialization table handshake with mmsch
Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 257 +++++++++++++++++++++---
1 file changed, 233 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 411c1d802823..b978265b2d77 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -31,6 +31,7 @@
#include "soc15d.h"
#include "soc15_hw_ip.h"
#include "vcn_v2_0.h"
+#include "mmsch_v4_0_3.h"
#include "vcn/vcn_4_0_3_offset.h"
#include "vcn/vcn_4_0_3_sh_mask.h"
@@ -44,6 +45,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v4_0_3_set_powergating_state(void *handle,
@@ -130,6 +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
}
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
@@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)
drm_dev_exit(idx);
}
+ amdgpu_virt_free_mm_table(adev);
+
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@@ -189,33 +197,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
struct amdgpu_ring *ring;
int i, r, vcn_inst;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- vcn_inst = GET_INST(VCN, i);
- ring = &adev->vcn.inst[i].ring_enc[0];
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v4_0_3_start_sriov(adev);
+ if (r)
+ goto done;
- if (ring->use_doorbell) {
- adev->nbio.funcs->vcn_doorbell_range(
- adev, ring->use_doorbell,
- (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
- 9 * vcn_inst,
- adev->vcn.inst[i].aid_id);
-
- WREG32_SOC15(
- VCN, GET_INST(VCN, ring->me),
- regVCN_RB1_DB_CTRL,
- ring->doorbell_index
- << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
- VCN_RB1_DB_CTRL__EN_MASK);
-
- /* Read DB_CTRL to flush the write DB_CTRL command. */
- RREG32_SOC15(
- VCN, GET_INST(VCN, ring->me),
- regVCN_RB1_DB_CTRL);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v4_0_3_unified_ring_set_wptr(ring);
+ ring->sched.ready = true;
}
+ } else {
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ vcn_inst = GET_INST(VCN, i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ if (ring->use_doorbell) {
+ adev->nbio.funcs->vcn_doorbell_range(
+ adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * vcn_inst,
+ adev->vcn.inst[i].aid_id);
+
+ WREG32_SOC15(
+ VCN, GET_INST(VCN, ring->me),
+ regVCN_RB1_DB_CTRL,
+ ring->doorbell_index
+ << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(
+ VCN, GET_INST(VCN, ring->me),
+ regVCN_RB1_DB_CTRL);
+ }
- r = amdgpu_ring_test_helper(ring);
- if (r)
- goto done;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
}
done:
@@ -813,6 +835,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
return 0;
}
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+ struct amdgpu_ring *ring_enc;
+ uint64_t cache_addr;
+ uint64_t rb_enc_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+ uint32_t enabled_vcn;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_3_init_header header;
+
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ table_size = 0;
+
+ MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+
+ offset = 0;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET1), 0);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET2), 0);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
+
+ fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
+ rb_setup = &fw_shared->rb_setup;
+
+ ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
+ ring_enc->wptr = 0;
+ rb_enc_addr = ring_enc->gpu_addr;
+
+ rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+ MMSCH_V4_0_INSERT_END();
+
+ header.vcn0.init_status = 0;
+ header.vcn0.table_offset = header.total_size;
+ header.vcn0.table_size = table_size;
+ header.total_size += table_size;
+
+ /* Send init table to mmsch */
+ size = sizeof(struct mmsch_v4_0_3_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
+ if (resp != 0)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+
+ enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+ init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+ "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+ }
+ }
+
+ return 0;
+}
+
/**
* vcn_v4_0_3_start - VCN start
*
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary
2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
2023-07-28 19:15 ` [PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change " Samir Dhume
2023-07-28 19:15 ` [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3 Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
2023-07-28 19:15 ` [PATCH v3 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3 Samir Dhume
` (2 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
To: amd-gfx
Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
frank.min, hawking.zhang
Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index b978265b2d77..7cd5ca204317 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -113,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_enc[0];
ring->use_doorbell = true;
- ring->doorbell_index =
- (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
- 9 * vcn_inst;
+
+ if (!amdgpu_sriov_vf(adev))
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * vcn_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 32 * vcn_inst;
+
ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3
2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
` (2 preceding siblings ...)
2023-07-28 19:15 ` [PATCH v3 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
2023-07-28 19:15 ` [PATCH v3 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
2023-07-28 19:15 ` [PATCH v3 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3 Samir Dhume
5 siblings, 0 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
To: amd-gfx
Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
frank.min, hawking.zhang
initialization table handshake with mmsch
Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 174 ++++++++++++++++++++---
1 file changed, 153 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index ce2b22f7e4e4..33f04ea8549f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -26,6 +26,7 @@
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
#include "vcn/vcn_4_0_3_offset.h"
#include "vcn/vcn_4_0_3_sh_mask.h"
@@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
static int jpeg_v4_0_3_set_powergating_state(void *handle,
enum amd_powergating_state state);
static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
static int amdgpu_ih_srcid_jpeg[] = {
VCN_4_0__SRCID__JPEG_DECODE,
@@ -160,6 +162,120 @@ static int jpeg_v4_0_3_sw_fini(void *handle)
return r;
}
+static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw, item_offset;
+ uint32_t init_status;
+ int i, j, jpeg_inst;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_3_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ item_offset = header.total_size;
+
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ table_size = 0;
+
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+ if (j <= 3) {
+ header.mjpegdec0[j].table_offset = item_offset;
+ header.mjpegdec0[j].init_status = 0;
+ header.mjpegdec0[j].table_size = table_size;
+ } else {
+ header.mjpegdec1[j-4].table_offset = item_offset;
+ header.mjpegdec1[j-4].init_status = 0;
+ header.mjpegdec1[j-4].table_size = table_size;
+ }
+ header.total_size += table_size;
+ item_offset+= table_size;
+ }
+
+ MMSCH_V4_0_INSERT_END();
+
+ /* send init table to MMSCH */
+ size = sizeof(struct mmsch_v4_0_3_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->mjpegdec0[i].init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", resp, init_status);
+ }
+
+
+ }
+ return 0;
+}
+
+
/**
* jpeg_v4_0_3_hw_init - start and test JPEG block
*
@@ -172,31 +288,47 @@ static int jpeg_v4_0_3_hw_init(void *handle)
struct amdgpu_ring *ring;
int i, j, r, jpeg_inst;
- for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- jpeg_inst = GET_INST(JPEG, i);
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v4_0_3_start_sriov(adev);
+ if (r)
+ return r;
- ring = adev->jpeg.inst[i].ring_dec;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v4_0_3_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ } else {
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
- if (ring->use_doorbell)
- adev->nbio.funcs->vcn_doorbell_range(
- adev, ring->use_doorbell,
- (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
- 9 * jpeg_inst,
- adev->jpeg.inst[i].aid_id);
+ ring = adev->jpeg.inst[i].ring_dec;
- for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
- ring = &adev->jpeg.inst[i].ring_dec[j];
if (ring->use_doorbell)
- WREG32_SOC15_OFFSET(
- VCN, GET_INST(VCN, i),
- regVCN_JPEG_DB_CTRL,
- (ring->pipe ? (ring->pipe - 0x15) : 0),
- ring->doorbell_index
- << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
- VCN_JPEG_DB_CTRL__EN_MASK);
- r = amdgpu_ring_test_helper(ring);
- if (r)
- return r;
+ adev->nbio.funcs->vcn_doorbell_range(
+ adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(
+ VCN, GET_INST(VCN, i),
+ regVCN_JPEG_DB_CTRL,
+ (ring->pipe ? (ring->pipe - 0x15) : 0),
+ ring->doorbell_index
+ << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
}
}
DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary
2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
` (3 preceding siblings ...)
2023-07-28 19:15 ` [PATCH v3 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3 Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
2023-07-28 19:15 ` [PATCH v3 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3 Samir Dhume
5 siblings, 0 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
To: amd-gfx
Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
frank.min, hawking.zhang
BASE: VCN0 unified (32 byte boundary)
BASE+4: MJPEG0
BASE+5: MJPEG1
BASE+6: MJPEG2
BASE+7: MJPEG3
BASE+12: MJPEG4
BASE+13: MJPEG5
BASE+14: MJPEG6
BASE+15: MJPEG7
Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 33f04ea8549f..f745eeef442f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle)
ring = &adev->jpeg.inst[i].ring_dec[j];
ring->use_doorbell = true;
ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
- ring->doorbell_index =
- (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
- 1 + j + 9 * jpeg_inst;
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 9 * jpeg_inst;
+ } else {
+ if (j < 4)
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 4 + j + 32 * jpeg_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 8 + j + 32 * jpeg_inst;
+ }
sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3
2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
` (4 preceding siblings ...)
2023-07-28 19:15 ` [PATCH v3 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
@ 2023-07-28 19:15 ` Samir Dhume
5 siblings, 0 replies; 9+ messages in thread
From: Samir Dhume @ 2023-07-28 19:15 UTC (permalink / raw)
To: amd-gfx
Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, Samir Dhume,
frank.min, hawking.zhang
For sriov, doorbell index for vcn0 for AID needs to be on
32 byte boundary so we need to move the vcn end doorbell
Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index f637574644c0..4a279960cd21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -330,14 +330,14 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
/* IH: 0x1A0 ~ 0x1AF */
AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0,
- /* VCN: 0x1B0 ~ 0x1D4 */
+ /* VCN: 0x1B0 ~ 0x1E8 */
AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0,
- AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4,
+ AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8,
AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END,
- AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4,
+ AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1E8,
AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF
} AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1;
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
2023-07-28 19:15 ` [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3 Samir Dhume
@ 2023-08-08 12:28 ` Leo Liu
2023-08-08 16:31 ` Dhume, Samir
0 siblings, 1 reply; 9+ messages in thread
From: Leo Liu @ 2023-08-08 12:28 UTC (permalink / raw)
To: Samir Dhume, amd-gfx
Cc: zhigang.luo, guchun.chen, gavin.wan, lijo.lazar, frank.min,
hawking.zhang
On 2023-07-28 15:15, Samir Dhume wrote:
> initialization table handshake with mmsch
>
> Signed-off-by: Samir Dhume <samir.dhume@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 257 +++++++++++++++++++++---
> 1 file changed, 233 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> index 411c1d802823..b978265b2d77 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> @@ -31,6 +31,7 @@
> #include "soc15d.h"
> #include "soc15_hw_ip.h"
> #include "vcn_v2_0.h"
> +#include "mmsch_v4_0_3.h"
>
> #include "vcn/vcn_4_0_3_offset.h"
> #include "vcn/vcn_4_0_3_sh_mask.h"
> @@ -44,6 +45,7 @@
> #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
> #define VCN1_VID_SOC_ADDRESS_3_0 0x48300
>
> +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
> static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
> static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
> static int vcn_v4_0_3_set_powergating_state(void *handle,
> @@ -130,6 +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle)
> amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
> }
>
> + r = amdgpu_virt_alloc_mm_table(adev);
Since this function is not for bare-metal, please move amdgpu_sriov_vf()
check from inside of the function to here, to avoid confusion.
> + if (r)
> + return r;
> +
> if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
> adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
>
> @@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)
> drm_dev_exit(idx);
> }
>
> + amdgpu_virt_free_mm_table(adev);
Same as above.
Regards,
Leo
> +
> r = amdgpu_vcn_suspend(adev);
> if (r)
> return r;
> @@ -189,33 +197,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
> struct amdgpu_ring *ring;
> int i, r, vcn_inst;
>
> - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> - vcn_inst = GET_INST(VCN, i);
> - ring = &adev->vcn.inst[i].ring_enc[0];
> + if (amdgpu_sriov_vf(adev)) {
> + r = vcn_v4_0_3_start_sriov(adev);
> + if (r)
> + goto done;
>
> - if (ring->use_doorbell) {
> - adev->nbio.funcs->vcn_doorbell_range(
> - adev, ring->use_doorbell,
> - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
> - 9 * vcn_inst,
> - adev->vcn.inst[i].aid_id);
> -
> - WREG32_SOC15(
> - VCN, GET_INST(VCN, ring->me),
> - regVCN_RB1_DB_CTRL,
> - ring->doorbell_index
> - << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
> - VCN_RB1_DB_CTRL__EN_MASK);
> -
> - /* Read DB_CTRL to flush the write DB_CTRL command. */
> - RREG32_SOC15(
> - VCN, GET_INST(VCN, ring->me),
> - regVCN_RB1_DB_CTRL);
> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> + ring = &adev->vcn.inst[i].ring_enc[0];
> + ring->wptr = 0;
> + ring->wptr_old = 0;
> + vcn_v4_0_3_unified_ring_set_wptr(ring);
> + ring->sched.ready = true;
> }
> + } else {
> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> + vcn_inst = GET_INST(VCN, i);
> + ring = &adev->vcn.inst[i].ring_enc[0];
> +
> + if (ring->use_doorbell) {
> + adev->nbio.funcs->vcn_doorbell_range(
> + adev, ring->use_doorbell,
> + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
> + 9 * vcn_inst,
> + adev->vcn.inst[i].aid_id);
> +
> + WREG32_SOC15(
> + VCN, GET_INST(VCN, ring->me),
> + regVCN_RB1_DB_CTRL,
> + ring->doorbell_index
> + << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
> + VCN_RB1_DB_CTRL__EN_MASK);
> +
> + /* Read DB_CTRL to flush the write DB_CTRL command. */
> + RREG32_SOC15(
> + VCN, GET_INST(VCN, ring->me),
> + regVCN_RB1_DB_CTRL);
> + }
>
> - r = amdgpu_ring_test_helper(ring);
> - if (r)
> - goto done;
> + r = amdgpu_ring_test_helper(ring);
> + if (r)
> + goto done;
> + }
> }
>
> done:
> @@ -813,6 +835,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
> return 0;
> }
>
> +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
> +{
> + int i, vcn_inst;
> + struct amdgpu_ring *ring_enc;
> + uint64_t cache_addr;
> + uint64_t rb_enc_addr;
> + uint64_t ctx_addr;
> + uint32_t param, resp, expected;
> + uint32_t offset, cache_size;
> + uint32_t tmp, timeout;
> +
> + struct amdgpu_mm_table *table = &adev->virt.mm_table;
> + uint32_t *table_loc;
> + uint32_t table_size;
> + uint32_t size, size_dw;
> + uint32_t init_status;
> + uint32_t enabled_vcn;
> +
> + struct mmsch_v4_0_cmd_direct_write
> + direct_wt = { {0} };
> + struct mmsch_v4_0_cmd_direct_read_modify_write
> + direct_rd_mod_wt = { {0} };
> + struct mmsch_v4_0_cmd_end end = { {0} };
> + struct mmsch_v4_0_3_init_header header;
> +
> + volatile struct amdgpu_vcn4_fw_shared *fw_shared;
> + volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
> +
> + direct_wt.cmd_header.command_type =
> + MMSCH_COMMAND__DIRECT_REG_WRITE;
> + direct_rd_mod_wt.cmd_header.command_type =
> + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
> + end.cmd_header.command_type = MMSCH_COMMAND__END;
> +
> + for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> + vcn_inst = GET_INST(VCN, i);
> +
> + memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
> + header.version = MMSCH_VERSION;
> + header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
> +
> + table_loc = (uint32_t *)table->cpu_addr;
> + table_loc += header.total_size;
> +
> + table_size = 0;
> +
> + MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
> + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
> +
> + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
> +
> + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
> +
> + offset = 0;
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET0), 0);
> + } else {
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> + lower_32_bits(adev->vcn.inst[i].gpu_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> + upper_32_bits(adev->vcn.inst[i].gpu_addr));
> + offset = cache_size;
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET0),
> + AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
> + }
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_SIZE0),
> + cache_size);
> +
> + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET1), 0);
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
> +
> + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
> + AMDGPU_VCN_STACK_SIZE;
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET2), 0);
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
> +
> + fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
> + rb_setup = &fw_shared->rb_setup;
> +
> + ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
> + ring_enc->wptr = 0;
> + rb_enc_addr = ring_enc->gpu_addr;
> +
> + rb_setup->is_rb_enabled_flags |= RB_ENABLED;
> + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
> + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
> + rb_setup->rb_size = ring_enc->ring_size / 4;
> + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
> + lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
> + upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_NONCACHE_SIZE0),
> + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
> + MMSCH_V4_0_INSERT_END();
> +
> + header.vcn0.init_status = 0;
> + header.vcn0.table_offset = header.total_size;
> + header.vcn0.table_size = table_size;
> + header.total_size += table_size;
> +
> + /* Send init table to mmsch */
> + size = sizeof(struct mmsch_v4_0_3_init_header);
> + table_loc = (uint32_t *)table->cpu_addr;
> + memcpy((void *)table_loc, &header, size);
> +
> + ctx_addr = table->gpu_addr;
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
> +
> + tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
> + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
> + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
> +
> + size = header.total_size;
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
> +
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
> +
> + param = 0x00000001;
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
> + tmp = 0;
> + timeout = 1000;
> + resp = 0;
> + expected = MMSCH_VF_MAILBOX_RESP__OK;
> + while (resp != expected) {
> + resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
> + if (resp != 0)
> + break;
> +
> + udelay(10);
> + tmp = tmp + 10;
> + if (tmp >= timeout) {
> + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
> + " waiting for regMMSCH_VF_MAILBOX_RESP "\
> + "(expected=0x%08x, readback=0x%08x)\n",
> + tmp, expected, resp);
> + return -EBUSY;
> + }
> + }
> +
> + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
> + init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
> + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
> + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
> + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
> + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
> + }
> + }
> +
> + return 0;
> +}
> +
> /**
> * vcn_v4_0_3_start - VCN start
> *
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
2023-08-08 12:28 ` Leo Liu
@ 2023-08-08 16:31 ` Dhume, Samir
0 siblings, 0 replies; 9+ messages in thread
From: Dhume, Samir @ 2023-08-08 16:31 UTC (permalink / raw)
To: Liu, Leo, amd-gfx
Cc: Luo, Zhigang, Chen, Guchun, Wan, Gavin, Lazar, Lijo, Min, Frank,
Zhang, Hawking
[AMD Official Use Only - General]
Thanks Leo. I'll restore the check for sriov before calling amdgpu_virt_alloc_mm_table(). That will make it consistent with other vcn ip versions. I'll retain the check for sriov inside amdgpu_virt_alloc_mm_table() as well, as a conservative check.
Thanks,
Samir
-----Original Message-----
From: Liu, Leo <Leo.Liu@amd.com>
Sent: Tuesday, August 8, 2023 8:29 AM
To: Dhume, Samir <Samir.Dhume@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Luo, Zhigang <Zhigang.Luo@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Wan, Gavin <Gavin.Wan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; Min, Frank <Frank.Min@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: Re: [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
On 2023-07-28 15:15, Samir Dhume wrote:
> initialization table handshake with mmsch
>
> Signed-off-by: Samir Dhume <samir.dhume@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 257 +++++++++++++++++++++---
> 1 file changed, 233 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> index 411c1d802823..b978265b2d77 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> @@ -31,6 +31,7 @@
> #include "soc15d.h"
> #include "soc15_hw_ip.h"
> #include "vcn_v2_0.h"
> +#include "mmsch_v4_0_3.h"
>
> #include "vcn/vcn_4_0_3_offset.h"
> #include "vcn/vcn_4_0_3_sh_mask.h"
> @@ -44,6 +45,7 @@
> #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
> #define VCN1_VID_SOC_ADDRESS_3_0 0x48300
>
> +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
> static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
> static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
> static int vcn_v4_0_3_set_powergating_state(void *handle, @@ -130,6
> +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle)
> amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
> }
>
> + r = amdgpu_virt_alloc_mm_table(adev);
Since this function is not for bare-metal, please move amdgpu_sriov_vf() check from inside of the function to here, to avoid confusion.
> + if (r)
> + return r;
> +
> if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
> adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
>
> @@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)
> drm_dev_exit(idx);
> }
>
> + amdgpu_virt_free_mm_table(adev);
Same as above.
Regards,
Leo
> +
> r = amdgpu_vcn_suspend(adev);
> if (r)
> return r;
> @@ -189,33 +197,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
> struct amdgpu_ring *ring;
> int i, r, vcn_inst;
>
> - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> - vcn_inst = GET_INST(VCN, i);
> - ring = &adev->vcn.inst[i].ring_enc[0];
> + if (amdgpu_sriov_vf(adev)) {
> + r = vcn_v4_0_3_start_sriov(adev);
> + if (r)
> + goto done;
>
> - if (ring->use_doorbell) {
> - adev->nbio.funcs->vcn_doorbell_range(
> - adev, ring->use_doorbell,
> - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
> - 9 * vcn_inst,
> - adev->vcn.inst[i].aid_id);
> -
> - WREG32_SOC15(
> - VCN, GET_INST(VCN, ring->me),
> - regVCN_RB1_DB_CTRL,
> - ring->doorbell_index
> - << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
> - VCN_RB1_DB_CTRL__EN_MASK);
> -
> - /* Read DB_CTRL to flush the write DB_CTRL command. */
> - RREG32_SOC15(
> - VCN, GET_INST(VCN, ring->me),
> - regVCN_RB1_DB_CTRL);
> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> + ring = &adev->vcn.inst[i].ring_enc[0];
> + ring->wptr = 0;
> + ring->wptr_old = 0;
> + vcn_v4_0_3_unified_ring_set_wptr(ring);
> + ring->sched.ready = true;
> }
> + } else {
> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> + vcn_inst = GET_INST(VCN, i);
> + ring = &adev->vcn.inst[i].ring_enc[0];
> +
> + if (ring->use_doorbell) {
> + adev->nbio.funcs->vcn_doorbell_range(
> + adev, ring->use_doorbell,
> + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
> + 9 * vcn_inst,
> + adev->vcn.inst[i].aid_id);
> +
> + WREG32_SOC15(
> + VCN, GET_INST(VCN, ring->me),
> + regVCN_RB1_DB_CTRL,
> + ring->doorbell_index
> + << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
> + VCN_RB1_DB_CTRL__EN_MASK);
> +
> + /* Read DB_CTRL to flush the write DB_CTRL command. */
> + RREG32_SOC15(
> + VCN, GET_INST(VCN, ring->me),
> + regVCN_RB1_DB_CTRL);
> + }
>
> - r = amdgpu_ring_test_helper(ring);
> - if (r)
> - goto done;
> + r = amdgpu_ring_test_helper(ring);
> + if (r)
> + goto done;
> + }
> }
>
> done:
> @@ -813,6 +835,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
> return 0;
> }
>
> +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
> +{
> + int i, vcn_inst;
> + struct amdgpu_ring *ring_enc;
> + uint64_t cache_addr;
> + uint64_t rb_enc_addr;
> + uint64_t ctx_addr;
> + uint32_t param, resp, expected;
> + uint32_t offset, cache_size;
> + uint32_t tmp, timeout;
> +
> + struct amdgpu_mm_table *table = &adev->virt.mm_table;
> + uint32_t *table_loc;
> + uint32_t table_size;
> + uint32_t size, size_dw;
> + uint32_t init_status;
> + uint32_t enabled_vcn;
> +
> + struct mmsch_v4_0_cmd_direct_write
> + direct_wt = { {0} };
> + struct mmsch_v4_0_cmd_direct_read_modify_write
> + direct_rd_mod_wt = { {0} };
> + struct mmsch_v4_0_cmd_end end = { {0} };
> + struct mmsch_v4_0_3_init_header header;
> +
> + volatile struct amdgpu_vcn4_fw_shared *fw_shared;
> + volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
> +
> + direct_wt.cmd_header.command_type =
> + MMSCH_COMMAND__DIRECT_REG_WRITE;
> + direct_rd_mod_wt.cmd_header.command_type =
> + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
> + end.cmd_header.command_type = MMSCH_COMMAND__END;
> +
> + for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> + vcn_inst = GET_INST(VCN, i);
> +
> + memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
> + header.version = MMSCH_VERSION;
> + header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
> +
> + table_loc = (uint32_t *)table->cpu_addr;
> + table_loc += header.total_size;
> +
> + table_size = 0;
> +
> + MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
> + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
> +
> + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
> +
> + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
> +
> + offset = 0;
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET0), 0);
> + } else {
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> + lower_32_bits(adev->vcn.inst[i].gpu_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> + upper_32_bits(adev->vcn.inst[i].gpu_addr));
> + offset = cache_size;
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET0),
> + AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
> + }
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_SIZE0),
> + cache_size);
> +
> + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET1), 0);
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
> +
> + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
> + AMDGPU_VCN_STACK_SIZE;
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET2), 0);
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
> +
> + fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
> + rb_setup = &fw_shared->rb_setup;
> +
> + ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
> + ring_enc->wptr = 0;
> + rb_enc_addr = ring_enc->gpu_addr;
> +
> + rb_setup->is_rb_enabled_flags |= RB_ENABLED;
> + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
> + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
> + rb_setup->rb_size = ring_enc->ring_size / 4;
> + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
> + lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
> + upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_NONCACHE_SIZE0),
> + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
> + MMSCH_V4_0_INSERT_END();
> +
> + header.vcn0.init_status = 0;
> + header.vcn0.table_offset = header.total_size;
> + header.vcn0.table_size = table_size;
> + header.total_size += table_size;
> +
> + /* Send init table to mmsch */
> + size = sizeof(struct mmsch_v4_0_3_init_header);
> + table_loc = (uint32_t *)table->cpu_addr;
> + memcpy((void *)table_loc, &header, size);
> +
> + ctx_addr = table->gpu_addr;
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
> +
> + tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
> + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
> + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
> +
> + size = header.total_size;
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
> +
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
> +
> + param = 0x00000001;
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
> + tmp = 0;
> + timeout = 1000;
> + resp = 0;
> + expected = MMSCH_VF_MAILBOX_RESP__OK;
> + while (resp != expected) {
> + resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
> + if (resp != 0)
> + break;
> +
> + udelay(10);
> + tmp = tmp + 10;
> + if (tmp >= timeout) {
> + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
> + " waiting for regMMSCH_VF_MAILBOX_RESP "\
> + "(expected=0x%08x, readback=0x%08x)\n",
> + tmp, expected, resp);
> + return -EBUSY;
> + }
> + }
> +
> + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
> + init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
> + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
> + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
> + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
> + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
> + }
> + }
> +
> + return 0;
> +}
> +
> /**
> * vcn_v4_0_3_start - VCN start
> *
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2023-08-08 16:31 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-28 19:15 [PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov Samir Dhume
2023-07-28 19:15 ` [PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change " Samir Dhume
2023-07-28 19:15 ` [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3 Samir Dhume
2023-08-08 12:28 ` Leo Liu
2023-08-08 16:31 ` Dhume, Samir
2023-07-28 19:15 ` [PATCH v3 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
2023-07-28 19:15 ` [PATCH v3 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3 Samir Dhume
2023-07-28 19:15 ` [PATCH v3 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary Samir Dhume
2023-07-28 19:15 ` [PATCH v3 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3 Samir Dhume
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.