* [PATCH v2 2/3] drm/amdgpu: sdma support for sriov cpx mode
2024-03-15 19:50 [PATCH v2 1/3] drm/amdgpu: function to read physical xcc_id Samir Dhume
@ 2024-03-15 19:50 ` Samir Dhume
2024-03-15 19:50 ` [PATCH v2 3/3] drm/amdgpu/jpeg: " Samir Dhume
1 sibling, 0 replies; 4+ messages in thread
From: Samir Dhume @ 2024-03-15 19:50 UTC (permalink / raw)
To: amd-gfx; +Cc: samir.dhume, lijo.lazar, gavin.wan, leo.liu, Alexander.Deucher
sdma has 2 instances in SRIOV cpx mode. Odd numbered VFs have
sdma0/sdma1 instances. Even numbered vfs have sdma2/sdma3.
Changes involve
1. identifying odd/even numbered VF
2. registering correct number of instances with irq handler
3. mapping instance number with IH client-id depending upon
whether vf is odd/even numbered.
v2:
1. fix for correct number of instances registered with irq
2. remove mmio access from interrupt handler. Use xcc_mask to
detect cpx mode.
Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 63 ++++++++++++++++--------
1 file changed, 43 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index eaa4f5f49949..117a7c692c0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -66,13 +66,28 @@ static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset);
}
-static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
+static unsigned sdma_v4_4_2_seq_to_irq_id(struct amdgpu_device *adev, int seq_num)
{
+ bool sriov_cpx_odd = false;
+
+ /* check for sriov cpx mode odd/even vf */
+ if (amdgpu_sriov_vf(adev)) {
+ if (adev->gfx.xcc_mask == 0x1)
+ if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+ sriov_cpx_odd = true;
+ }
+
switch (seq_num) {
case 0:
- return SOC15_IH_CLIENTID_SDMA0;
+ if (sriov_cpx_odd)
+ return SOC15_IH_CLIENTID_SDMA2;
+ else
+ return SOC15_IH_CLIENTID_SDMA0;
case 1:
- return SOC15_IH_CLIENTID_SDMA1;
+ if (sriov_cpx_odd)
+ return SOC15_IH_CLIENTID_SDMA3;
+ else
+ return SOC15_IH_CLIENTID_SDMA1;
case 2:
return SOC15_IH_CLIENTID_SDMA2;
case 3:
@@ -82,7 +97,7 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
}
}
-static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
+static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned client_id)
{
switch (client_id) {
case SOC15_IH_CLIENTID_SDMA0:
@@ -90,9 +105,15 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
case SOC15_IH_CLIENTID_SDMA1:
return 1;
case SOC15_IH_CLIENTID_SDMA2:
- return 2;
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ return 0;
+ else
+ return 2;
case SOC15_IH_CLIENTID_SDMA3:
- return 3;
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ return 1;
+ else
+ return 3;
default:
return -EINVAL;
}
@@ -1300,13 +1321,15 @@ static int sdma_v4_4_2_late_init(void *handle)
static int sdma_v4_4_2_sw_init(void *handle)
{
struct amdgpu_ring *ring;
- int r, i;
+ int r, i, num_irq_inst;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 aid_id;
+ num_irq_inst = min(adev->sdma.num_instances, adev->sdma.num_inst_per_aid);
+
/* SDMA trap event */
- for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
- r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ for (i = 0; i < num_irq_inst; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
SDMA0_4_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
@@ -1314,8 +1337,8 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
/* SDMA SRAM ECC event */
- for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
- r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ for (i = 0; i < num_irq_inst; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
&adev->sdma.ecc_irq);
if (r)
@@ -1323,26 +1346,26 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
/* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
- for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
- r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ for (i = 0; i < num_irq_inst; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
SDMA0_4_0__SRCID__SDMA_VM_HOLE,
&adev->sdma.vm_hole_irq);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID,
&adev->sdma.doorbell_invalid_irq);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT,
&adev->sdma.pool_timeout_irq);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
SDMA0_4_0__SRCID__SDMA_SRBMWRITE,
&adev->sdma.srbm_write_irq);
if (r)
@@ -1541,7 +1564,7 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev,
uint32_t instance, i;
DRM_DEBUG("IH: SDMA trap\n");
- instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
/* Client id gives the SDMA instance in AID. To know the exact SDMA
* instance, interrupt entry gives the node id which corresponds to the AID instance.
@@ -1584,7 +1607,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA))
goto out;
- instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0)
goto out;
@@ -1603,7 +1626,7 @@ static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev,
DRM_ERROR("Illegal instruction in SDMA command stream\n");
- instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0)
return 0;
@@ -1647,7 +1670,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
struct amdgpu_task_info *task_info;
u64 addr;
- instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0 || instance >= adev->sdma.num_instances) {
dev_err(adev->dev, "sdma instance invalid %d\n", instance);
return -EINVAL;
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v2 3/3] drm/amdgpu/jpeg: support for sriov cpx mode
2024-03-15 19:50 [PATCH v2 1/3] drm/amdgpu: function to read physical xcc_id Samir Dhume
2024-03-15 19:50 ` [PATCH v2 2/3] drm/amdgpu: sdma support for sriov cpx mode Samir Dhume
@ 2024-03-15 19:50 ` Samir Dhume
2024-03-18 13:19 ` Liu, Leo
1 sibling, 1 reply; 4+ messages in thread
From: Samir Dhume @ 2024-03-15 19:50 UTC (permalink / raw)
To: amd-gfx; +Cc: samir.dhume, lijo.lazar, gavin.wan, leo.liu, Alexander.Deucher
In SRIOV CPX mode, each VF has 4 jpeg engines. The even-
numbered VFs point to JPEG0 block of the AID and the odd-
numbered VFs point to the JPEG1 block.
Even-numbered VFs Odd numbered VFs
VCN doorbell 0 VCN Decode ring VCN Decode ring
VCN doorbell 1-3 Reserved Reserved
VCN doorbell 4 JPEG0-0 ring
VCN doorbell 5 JPEG0-1 ring
VCN doorbell 6 JPEG0-2 ring
VCN doorbell 7 JPEG0-3 ring
VCN doorbell 8 JPEG1-0 ring
VCN doorbell 9 JPEG1-1 ring
VCN doorbell 10 JPEG1-2 ring
VCN doorbell 11 JPEG1-3 ring
Changes involve
1. sriov cpx mode - 4 rings
2. sriov cpx mode for odd numbered VFs - register correct src-ids
(starting with JPEG4). Map src-id to correct instance in interrupt-
handler.
v2:
1. removed mmio access from interrupt handler. Use xcc_mask to detect
cpx mode.
2. remove unneccessary sriov variables
Signed-off-by: Samir Dhume <samir.dhume@amd.com>
---
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 60 +++++++++++++++++++++---
1 file changed, 53 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 32caeb37cef9..d95ca797412c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -68,6 +68,11 @@ static int jpeg_v4_0_3_early_init(void *handle)
adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+ /* check for sriov cpx mode */
+ if (amdgpu_sriov_vf(adev))
+ if (adev->gfx.xcc_mask == 0x1)
+ adev->jpeg.num_jpeg_rings = 4;
+
jpeg_v4_0_3_set_dec_ring_funcs(adev);
jpeg_v4_0_3_set_irq_funcs(adev);
jpeg_v4_0_3_set_ras_funcs(adev);
@@ -87,11 +92,25 @@ static int jpeg_v4_0_3_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
int i, j, r, jpeg_inst;
+ bool sriov_cpx_odd = false;
+
+ /* check for sriov cpx mode odd/even numbered vfs */
+ if (amdgpu_sriov_vf(adev)) {
+ if (adev->gfx.xcc_mask == 0x1) {
+ if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+ sriov_cpx_odd = true;
+ }
+ }
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
/* JPEG TRAP */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ if (!sriov_cpx_odd)
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ else
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j+4], &adev->jpeg.inst->irq);
+
if (r)
return r;
}
@@ -116,10 +135,14 @@ static int jpeg_v4_0_3_sw_init(void *handle)
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
1 + j + 9 * jpeg_inst;
} else {
- if (j < 4)
+ if ((j < 4) && (!sriov_cpx_odd))
ring->doorbell_index =
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
4 + j + 32 * jpeg_inst;
+ else if (sriov_cpx_odd)
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 12 + j + 32 * jpeg_inst;
else
ring->doorbell_index =
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
@@ -186,6 +209,7 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
uint32_t size, size_dw, item_offset;
uint32_t init_status;
int i, j, jpeg_inst;
+ bool cpx_odd = false;
struct mmsch_v4_0_cmd_direct_write
direct_wt = { {0} };
@@ -197,6 +221,12 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
end.cmd_header.command_type =
MMSCH_COMMAND__END;
+ /* check for cpx mode odd/even numbered vf */
+ if (adev->gfx.xcc_mask == 0x1) {
+ if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+ cpx_odd = true;
+ }
+
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
jpeg_inst = GET_INST(JPEG, i);
@@ -220,10 +250,14 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE);
MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
- if (j <= 3) {
+ if ((j <= 3) && (!cpx_odd)) {
header.mjpegdec0[j].table_offset = item_offset;
header.mjpegdec0[j].init_status = 0;
header.mjpegdec0[j].table_size = table_size;
+ } else if (cpx_odd) {
+ header.mjpegdec1[j].table_offset = item_offset;
+ header.mjpegdec1[j].init_status = 0;
+ header.mjpegdec1[j].table_size = table_size;
} else {
header.mjpegdec1[j - 4].table_offset = item_offset;
header.mjpegdec1[j - 4].init_status = 0;
@@ -1015,16 +1049,28 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
break;
case VCN_4_0__SRCID__JPEG4_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ else
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
break;
case VCN_4_0__SRCID__JPEG5_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ else
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
break;
case VCN_4_0__SRCID__JPEG6_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ else
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
break;
case VCN_4_0__SRCID__JPEG7_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ else
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
break;
default:
DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread