All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nicolas Chautru <nicolas.chautru@intel.com>
To: dev@dpdk.org, thomas@monjalon.net
Cc: maxime.coquelin@redhat.com, trix@redhat.com, mdr@ashroe.eu,
	bruce.richardson@intel.com, hemant.agrawal@nxp.com,
	david.marchand@redhat.com, stephen@networkplumber.org,
	hernan.vargas@intel.com, Nic Chautru <nicolas.chautru@intel.com>
Subject: [PATCH v7 09/14] baseband/acc: add LTE processing functions
Date: Thu, 29 Sep 2022 14:11:23 -0700	[thread overview]
Message-ID: <20220929211128.36381-10-nicolas.chautru@intel.com> (raw)
In-Reply-To: <20220929211128.36381-1-nicolas.chautru@intel.com>

From: Nic Chautru <nicolas.chautru@intel.com>

Add functions and capability for 4G FEC

Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
---
 drivers/baseband/acc/rte_acc200_pmd.c | 849 +++++++++++++++++++++++++-
 1 file changed, 845 insertions(+), 4 deletions(-)

diff --git a/drivers/baseband/acc/rte_acc200_pmd.c b/drivers/baseband/acc/rte_acc200_pmd.c
index 229e5fba86..0c7311f4cd 100644
--- a/drivers/baseband/acc/rte_acc200_pmd.c
+++ b/drivers/baseband/acc/rte_acc200_pmd.c
@@ -658,6 +658,46 @@ acc200_dev_info_get(struct rte_bbdev *dev,
 	struct acc_device *d = dev->data->dev_private;
 	int i;
 	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
+		{
+			.type = RTE_BBDEV_OP_TURBO_DEC,
+			.cap.turbo_dec = {
+				.capability_flags =
+					RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
+					RTE_BBDEV_TURBO_CRC_TYPE_24B |
+					RTE_BBDEV_TURBO_EQUALIZER |
+					RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
+					RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
+					RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
+					RTE_BBDEV_TURBO_SOFT_OUTPUT |
+					RTE_BBDEV_TURBO_EARLY_TERMINATION |
+					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
+					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
+					RTE_BBDEV_TURBO_MAP_DEC |
+					RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
+					RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
+				.max_llr_modulus = INT8_MAX,
+				.num_buffers_src =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_hard_out =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_soft_out =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+			}
+		},
+		{
+			.type = RTE_BBDEV_OP_TURBO_ENC,
+			.cap.turbo_enc = {
+				.capability_flags =
+					RTE_BBDEV_TURBO_CRC_24B_ATTACH |
+					RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
+					RTE_BBDEV_TURBO_RATE_MATCH |
+					RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
+				.num_buffers_src =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_dst =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+			}
+		},
 		{
 			.type   = RTE_BBDEV_OP_LDPC_ENC,
 			.cap.ldpc_enc = {
@@ -709,15 +749,17 @@ acc200_dev_info_get(struct rte_bbdev *dev,
 
 	/* Exposed number of queues */
 	dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0;
-	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = 0;
-	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = 0;
+	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_aqs_per_groups *
+			d->acc_conf.q_ul_4g.num_qgroups;
+	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_aqs_per_groups *
+			d->acc_conf.q_dl_4g.num_qgroups;
 	dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_aqs_per_groups *
 			d->acc_conf.q_ul_5g.num_qgroups;
 	dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_aqs_per_groups *
 			d->acc_conf.q_dl_5g.num_qgroups;
 	dev_info->num_queues[RTE_BBDEV_OP_FFT] = 0;
-	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = 0;
-	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = 0;
+	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_qgroups;
+	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_qgroups;
 	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_qgroups;
 	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_qgroups;
 	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = 0;
@@ -762,6 +804,70 @@ static struct rte_pci_id pci_id_acc200_vf_map[] = {
 	{.device_id = 0},
 };
 
+/* Fill in a frame control word for turbo decoding. */
+static inline void
+acc200_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc_fcw_td *fcw)
+{
+	fcw->fcw_ver = 1;
+	fcw->num_maps = ACC_FCW_TD_AUTOMAP;
+	fcw->bypass_sb_deint = !check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE);
+	if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
+		/* FIXME for TB block */
+		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
+		fcw->k_neg = op->turbo_dec.tb_params.k_neg;
+	} else {
+		fcw->k_pos = op->turbo_dec.cb_params.k;
+		fcw->k_neg = op->turbo_dec.cb_params.k;
+	}
+	fcw->c = 1;
+	fcw->c_neg = 1;
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+		fcw->soft_output_en = 1;
+		fcw->sw_soft_out_dis = 0;
+		fcw->sw_et_cont = check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH);
+		fcw->sw_soft_out_saturation = check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_SOFT_OUT_SATURATE);
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_EQUALIZER)) {
+			fcw->bypass_teq = 0;
+			fcw->ea = op->turbo_dec.cb_params.e;
+			fcw->eb = op->turbo_dec.cb_params.e;
+			if (op->turbo_dec.rv_index == 0)
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_0;
+			else if (op->turbo_dec.rv_index == 1)
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_1;
+			else if (op->turbo_dec.rv_index == 2)
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_2;
+			else
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_3;
+		} else {
+			fcw->bypass_teq = 1;
+			fcw->eb = 64; /* avoid undefined value */
+		}
+	} else {
+		fcw->soft_output_en = 0;
+		fcw->sw_soft_out_dis = 1;
+		fcw->bypass_teq = 0;
+	}
+
+	fcw->code_block_mode = 1; /* FIXME */
+	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_CRC_TYPE_24B);
+
+	fcw->ext_td_cold_reg_en = 1;
+	fcw->raw_decoder_input_on = 0;
+	fcw->max_iter = RTE_MAX((uint8_t) op->turbo_dec.iter_max, 2);
+	fcw->min_iter = 2;
+	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
+
+	fcw->early_stop_en = check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_EARLY_TERMINATION) & !fcw->soft_output_en;
+	fcw->ext_scale = 0xF;
+}
+
 /* Fill in a frame control word for LDPC decoding. */
 static inline void
 acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
@@ -887,6 +993,205 @@ acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
 		fcw->tb_crc_select = 1;
 }
 
+static inline int
+acc200_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
+		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
+		struct rte_mbuf *output, uint32_t *in_offset,
+		uint32_t *out_offset, uint32_t *out_length,
+		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
+{
+	int next_triplet = 1; /* FCW already done */
+	uint32_t e, ea, eb, length;
+	uint16_t k, k_neg, k_pos;
+	uint8_t cab, c_neg;
+
+	desc->word0 = ACC_DMA_DESC_TYPE;
+	desc->word1 = 0; /**< Timestamp could be disabled */
+	desc->word2 = 0;
+	desc->word3 = 0;
+	desc->numCBs = 1;
+
+	if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
+		ea = op->turbo_enc.tb_params.ea;
+		eb = op->turbo_enc.tb_params.eb;
+		cab = op->turbo_enc.tb_params.cab;
+		k_neg = op->turbo_enc.tb_params.k_neg;
+		k_pos = op->turbo_enc.tb_params.k_pos;
+		c_neg = op->turbo_enc.tb_params.c_neg;
+		e = (r < cab) ? ea : eb;
+		k = (r < c_neg) ? k_neg : k_pos;
+	} else {
+		e = op->turbo_enc.cb_params.e;
+		k = op->turbo_enc.cb_params.k;
+	}
+
+	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
+		length = (k - 24) >> 3;
+	else
+		length = k >> 3;
+
+	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
+		rte_bbdev_log(ERR,
+				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
+				*mbuf_total_left, length);
+		return -1;
+	}
+
+	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset,
+			length, seg_total_left, next_triplet,
+			check_bit(op->turbo_enc.op_flags,
+			RTE_BBDEV_TURBO_ENC_SCATTER_GATHER));
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->m2dlen = next_triplet;
+	*mbuf_total_left -= length;
+
+	/* Set output length */
+	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
+		/* Integer round up division by 8 */
+		*out_length = (e + 7) >> 3;
+	else
+		*out_length = (k >> 3) * 3 + 2;
+
+	next_triplet = acc_dma_fill_blk_type(desc, output, *out_offset,
+			*out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	op->turbo_enc.output.length += *out_length;
+	*out_offset += *out_length;
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->d2mlen = next_triplet - desc->m2dlen;
+
+	desc->op_addr = op;
+
+	return 0;
+}
+
+static inline int
+acc200_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
+		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
+		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
+		uint32_t *in_offset, uint32_t *h_out_offset,
+		uint32_t *s_out_offset, uint32_t *h_out_length,
+		uint32_t *s_out_length, uint32_t *mbuf_total_left,
+		uint32_t *seg_total_left, uint8_t r)
+{
+	int next_triplet = 1; /* FCW already done */
+	uint16_t k;
+	uint16_t crc24_overlap = 0;
+	uint32_t e, kw;
+
+	desc->word0 = ACC_DMA_DESC_TYPE;
+	desc->word1 = 0; /**< Timestamp could be disabled */
+	desc->word2 = 0;
+	desc->word3 = 0;
+	desc->numCBs = 1;
+
+	if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
+		k = (r < op->turbo_dec.tb_params.c_neg)
+			? op->turbo_dec.tb_params.k_neg
+			: op->turbo_dec.tb_params.k_pos;
+		e = (r < op->turbo_dec.tb_params.cab)
+			? op->turbo_dec.tb_params.ea
+			: op->turbo_dec.tb_params.eb;
+	} else {
+		k = op->turbo_dec.cb_params.k;
+		e = op->turbo_dec.cb_params.e;
+	}
+
+	if ((op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+		&& !check_bit(op->turbo_dec.op_flags,
+		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
+		crc24_overlap = 24;
+
+	/* Calculates circular buffer size.
+	 * According to 3gpp 36.212 section 5.1.4.2
+	 *   Kw = 3 * Kpi,
+	 * where:
+	 *   Kpi = nCol * nRow
+	 * where nCol is 32 and nRow can be calculated from:
+	 *   D =< nCol * nRow
+	 * where D is the size of each output from turbo encoder block (k + 4).
+	 */
+	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
+
+	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
+		rte_bbdev_log(ERR,
+				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
+				*mbuf_total_left, kw);
+		return -1;
+	}
+
+	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset, kw,
+			seg_total_left, next_triplet,
+			check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_DEC_SCATTER_GATHER));
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->m2dlen = next_triplet;
+	*mbuf_total_left -= kw;
+	*h_out_length = ((k - crc24_overlap) >> 3);
+	next_triplet = acc_dma_fill_blk_type(
+			desc, h_output, *h_out_offset,
+			*h_out_length, next_triplet, ACC_DMA_BLKID_OUT_HARD);
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+
+	op->turbo_dec.hard_output.length += *h_out_length;
+	*h_out_offset += *h_out_length;
+
+	/* Soft output */
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+		if (op->turbo_dec.soft_output.data == 0) {
+			rte_bbdev_log(ERR, "Soft output is not defined");
+			return -1;
+		}
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_EQUALIZER))
+			*s_out_length = e;
+		else
+			*s_out_length = (k * 3) + 12;
+
+		next_triplet = acc_dma_fill_blk_type(desc, s_output,
+				*s_out_offset, *s_out_length, next_triplet,
+				ACC_DMA_BLKID_OUT_SOFT);
+		if (unlikely(next_triplet < 0)) {
+			rte_bbdev_log(ERR,
+					"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+					op);
+			return -1;
+		}
+
+		op->turbo_dec.soft_output.length += *s_out_length;
+		*s_out_offset += *s_out_length;
+	}
+
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->d2mlen = next_triplet - desc->m2dlen;
+
+	desc->op_addr = op;
+
+	return 0;
+}
+
 static inline int
 acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
 		struct acc_dma_req_desc *desc,
@@ -1059,6 +1364,49 @@ acc200_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
 	desc->op_addr = op;
 }
 
+/* Enqueue one encode operations for ACC200 device in CB mode */
+static inline int
+enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
+		uint16_t total_enqueued_cbs)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
+		seg_total_left;
+	struct rte_mbuf *input, *output_head, *output;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	acc_fcw_te_fill(op, &desc->req.fcw_te);
+
+	input = op->turbo_enc.input.data;
+	output_head = output = op->turbo_enc.output.data;
+	in_offset = op->turbo_enc.input.offset;
+	out_offset = op->turbo_enc.output.offset;
+	out_length = 0;
+	mbuf_total_left = op->turbo_enc.input.length;
+	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
+			- in_offset;
+
+	ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
+			&in_offset, &out_offset, &out_length, &mbuf_total_left,
+			&seg_total_left, 0);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	mbuf_append(output_head, output, out_length);
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
+			sizeof(desc->req.fcw_te) - 8);
+	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+	/* One CB (one op) was successfully prepared to enqueue */
+	return 1;
+}
+
 /* Enqueue one encode operations for ACC200 device in CB mode
  * multiplexed on the same descriptor
  */
@@ -1175,6 +1523,78 @@ enqueue_ldpc_enc_part_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
 
 }
 
+/* Enqueue one encode operations for ACC200 device in TB mode. */
+static inline int
+enqueue_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
+		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint8_t r, c;
+	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
+		seg_total_left;
+	struct rte_mbuf *input, *output_head, *output;
+	uint16_t current_enqueued_cbs = 0;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
+	acc_fcw_te_fill(op, &desc->req.fcw_te);
+
+	input = op->turbo_enc.input.data;
+	output_head = output = op->turbo_enc.output.data;
+	in_offset = op->turbo_enc.input.offset;
+	out_offset = op->turbo_enc.output.offset;
+	out_length = 0;
+	mbuf_total_left = op->turbo_enc.input.length;
+
+	c = op->turbo_enc.tb_params.c;
+	r = op->turbo_enc.tb_params.r;
+
+	while (mbuf_total_left > 0 && r < c) {
+		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+		/* Set up DMA descriptor */
+		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+				& q->sw_ring_wrap_mask);
+		desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
+		desc->req.data_ptrs[0].blen = ACC_FCW_TE_BLEN;
+
+		ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
+				&in_offset, &out_offset, &out_length,
+				&mbuf_total_left, &seg_total_left, r);
+		if (unlikely(ret < 0))
+			return ret;
+		mbuf_append(output_head, output, out_length);
+
+		/* Set total number of CBs in TB */
+		desc->req.cbs_in_tb = cbs_in_tb;
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
+				sizeof(desc->req.fcw_te) - 8);
+		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+		if (seg_total_left == 0) {
+			/* Go to the next mbuf */
+			input = input->next;
+			in_offset = 0;
+			output = output->next;
+			out_offset = 0;
+		}
+
+		total_enqueued_cbs++;
+		current_enqueued_cbs++;
+		r++;
+	}
+
+	/* Set SDone on last CB descriptor for TB mode. */
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	return current_enqueued_cbs;
+}
+
 /* Enqueue one encode operations for ACC200 device in TB mode.
  * returns the number of descs used
  */
@@ -1240,6 +1660,62 @@ enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
 	return return_descs;
 }
 
+/** Enqueue one decode operations for ACC200 device in CB mode */
+static inline int
+enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
+		uint16_t total_enqueued_cbs)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
+		h_out_length, mbuf_total_left, seg_total_left;
+	struct rte_mbuf *input, *h_output_head, *h_output,
+		*s_output_head, *s_output;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	acc200_fcw_td_fill(op, &desc->req.fcw_td);
+
+	input = op->turbo_dec.input.data;
+	h_output_head = h_output = op->turbo_dec.hard_output.data;
+	s_output_head = s_output = op->turbo_dec.soft_output.data;
+	in_offset = op->turbo_dec.input.offset;
+	h_out_offset = op->turbo_dec.hard_output.offset;
+	s_out_offset = op->turbo_dec.soft_output.offset;
+	h_out_length = s_out_length = 0;
+	mbuf_total_left = op->turbo_dec.input.length;
+	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+
+	/* Set up DMA descriptor */
+	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+
+	ret = acc200_dma_desc_td_fill(op, &desc->req, &input, h_output,
+			s_output, &in_offset, &h_out_offset, &s_out_offset,
+			&h_out_length, &s_out_length, &mbuf_total_left,
+			&seg_total_left, 0);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	/* Hard output */
+	mbuf_append(h_output_head, h_output, h_out_length);
+
+	/* Soft output */
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))
+		mbuf_append(s_output_head, s_output, s_out_length);
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
+			sizeof(desc->req.fcw_td));
+	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+	/* One CB (one op) was successfully prepared to enqueue */
+	return 1;
+}
+
 /** Enqueue one decode operations for ACC200 device in CB mode */
 static inline int
 enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
@@ -1437,6 +1913,139 @@ enqueue_ldpc_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 	return current_enqueued_cbs;
 }
 
+/* Enqueue one decode operations for ACC200 device in TB mode */
+static inline int
+enqueue_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
+		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint8_t r, c;
+	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
+		h_out_length, mbuf_total_left, seg_total_left;
+	struct rte_mbuf *input, *h_output_head, *h_output,
+		*s_output_head, *s_output;
+	uint16_t current_enqueued_cbs = 0;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
+	acc200_fcw_td_fill(op, &desc->req.fcw_td);
+
+	input = op->turbo_dec.input.data;
+	h_output_head = h_output = op->turbo_dec.hard_output.data;
+	s_output_head = s_output = op->turbo_dec.soft_output.data;
+	in_offset = op->turbo_dec.input.offset;
+	h_out_offset = op->turbo_dec.hard_output.offset;
+	s_out_offset = op->turbo_dec.soft_output.offset;
+	h_out_length = s_out_length = 0;
+	mbuf_total_left = op->turbo_dec.input.length;
+	c = op->turbo_dec.tb_params.c;
+	r = op->turbo_dec.tb_params.r;
+
+	while (mbuf_total_left > 0 && r < c) {
+
+		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+
+		/* Set up DMA descriptor */
+		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+				& q->sw_ring_wrap_mask);
+		desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
+		desc->req.data_ptrs[0].blen = ACC_FCW_TD_BLEN;
+		ret = acc200_dma_desc_td_fill(op, &desc->req, &input,
+				h_output, s_output, &in_offset, &h_out_offset,
+				&s_out_offset, &h_out_length, &s_out_length,
+				&mbuf_total_left, &seg_total_left, r);
+
+		if (unlikely(ret < 0))
+			return ret;
+
+		/* Hard output */
+		mbuf_append(h_output_head, h_output, h_out_length);
+
+		/* Soft output */
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_SOFT_OUTPUT))
+			mbuf_append(s_output_head, s_output, s_out_length);
+
+		/* Set total number of CBs in TB */
+		desc->req.cbs_in_tb = cbs_in_tb;
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
+				sizeof(desc->req.fcw_td) - 8);
+		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+		if (seg_total_left == 0) {
+			/* Go to the next mbuf */
+			input = input->next;
+			in_offset = 0;
+			h_output = h_output->next;
+			h_out_offset = 0;
+
+			if (check_bit(op->turbo_dec.op_flags,
+					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+				s_output = s_output->next;
+				s_out_offset = 0;
+			}
+		}
+
+		total_enqueued_cbs++;
+		current_enqueued_cbs++;
+		r++;
+	}
+
+	/* Set SDone on last CB descriptor for TB mode */
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	return current_enqueued_cbs;
+}
+
+/* Enqueue encode operations for ACC200 device in CB mode. */
+static uint16_t
+acc200_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i;
+	union acc_dma_desc *desc;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		/* Check if there are available space for further processing */
+		if (unlikely(avail - 1 < 0)) {
+			acc_enqueue_ring_full(q_data);
+			break;
+		}
+		avail -= 1;
+
+		ret = enqueue_enc_one_op_cb(q, ops[i], i);
+		if (ret < 0) {
+			acc_enqueue_invalid(q_data);
+			break;
+		}
+	}
+
+	if (unlikely(i == 0))
+		return 0; /* Nothing to enqueue */
+
+	/* Set SDone in last CB in enqueued ops for CB mode*/
+	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
+			& q->sw_ring_wrap_mask);
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	acc_dma_enqueue(q, i, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+	return i;
+}
+
 /** Enqueue encode operations for ACC200 device in CB mode. */
 static inline uint16_t
 acc200_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
@@ -1485,6 +2094,45 @@ acc200_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
 	return i;
 }
 
+/* Enqueue encode operations for ACC200 device in TB mode. */
+static uint16_t
+acc200_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i, enqueued_cbs = 0;
+	uint8_t cbs_in_tb;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
+		/* Check if there are available space for further processing */
+		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
+			acc_enqueue_ring_full(q_data);
+			break;
+		}
+		avail -= cbs_in_tb;
+
+		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
+		if (ret <= 0) {
+			acc_enqueue_invalid(q_data);
+			break;
+		}
+		enqueued_cbs += ret;
+	}
+	if (unlikely(enqueued_cbs == 0))
+		return 0; /* Nothing to enqueue */
+
+	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
 /* Enqueue LDPC encode operations for ACC200 device in TB mode. */
 static uint16_t
 acc200_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data,
@@ -1525,6 +2173,20 @@ acc200_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data,
 	return i;
 }
 
+/* Enqueue encode operations for ACC200 device. */
+static uint16_t
+acc200_enqueue_enc(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	int32_t aq_avail = acc_aq_avail(q_data, num);
+	if (unlikely((aq_avail <= 0) || (num == 0)))
+		return 0;
+	if (ops[0]->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+		return acc200_enqueue_enc_tb(q_data, ops, num);
+	else
+		return acc200_enqueue_enc_cb(q_data, ops, num);
+}
+
 /* Enqueue encode operations for ACC200 device. */
 static uint16_t
 acc200_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
@@ -1539,6 +2201,47 @@ acc200_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
 		return acc200_enqueue_ldpc_enc_cb(q_data, ops, num);
 }
 
+
+/* Enqueue decode operations for ACC200 device in CB mode */
+static uint16_t
+acc200_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i;
+	union acc_dma_desc *desc;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		/* Check if there are available space for further processing */
+		if (unlikely(avail - 1 < 0))
+			break;
+		avail -= 1;
+
+		ret = enqueue_dec_one_op_cb(q, ops[i], i);
+		if (ret < 0)
+			break;
+	}
+
+	if (unlikely(i == 0))
+		return 0; /* Nothing to enqueue */
+
+	/* Set SDone in last CB in enqueued ops for CB mode*/
+	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
+			& q->sw_ring_wrap_mask);
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	acc_dma_enqueue(q, i, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
 /* Enqueue decode operations for ACC200 device in TB mode */
 static uint16_t
 acc200_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data,
@@ -1623,6 +2326,58 @@ acc200_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
 	return i;
 }
 
+
+/* Enqueue decode operations for ACC200 device in TB mode */
+static uint16_t
+acc200_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i, enqueued_cbs = 0;
+	uint8_t cbs_in_tb;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
+		/* Check if there are available space for further processing */
+		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
+			acc_enqueue_ring_full(q_data);
+			break;
+		}
+		avail -= cbs_in_tb;
+
+		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
+		if (ret <= 0) {
+			acc_enqueue_invalid(q_data);
+			break;
+		}
+		enqueued_cbs += ret;
+	}
+
+	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
+/* Enqueue decode operations for ACC200 device. */
+static uint16_t
+acc200_enqueue_dec(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	int32_t aq_avail = acc_aq_avail(q_data, num);
+	if (unlikely((aq_avail <= 0) || (num == 0)))
+		return 0;
+	if (ops[0]->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+		return acc200_enqueue_dec_tb(q_data, ops, num);
+	else
+		return acc200_enqueue_dec_cb(q_data, ops, num);
+}
+
 /* Enqueue decode operations for ACC200 device. */
 static uint16_t
 acc200_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
@@ -1978,6 +2733,48 @@ dequeue_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op **ref_op,
 	return cb_idx;
 }
 
+/* Dequeue encode operations from ACC200 device. */
+static uint16_t
+acc200_dequeue_enc(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	uint32_t avail = acc_ring_avail_deq(q);
+	uint32_t aq_dequeued = 0;
+	uint16_t i, dequeued_ops = 0, dequeued_descs = 0;
+	int ret, cbm;
+	struct rte_bbdev_enc_op *op;
+	if (avail == 0)
+		return 0;
+	op = (q->ring_addr + (q->sw_ring_tail &
+			q->sw_ring_wrap_mask))->req.op_addr;
+
+	cbm = op->turbo_enc.code_block_mode;
+
+	for (i = 0; i < num; i++) {
+		if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
+			ret = dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
+					&dequeued_ops, &aq_dequeued,
+					&dequeued_descs);
+		else
+			ret = dequeue_enc_one_op_cb(q, &ops[dequeued_ops],
+					&dequeued_ops, &aq_dequeued,
+					&dequeued_descs);
+		if (ret < 0)
+			break;
+		if (dequeued_ops >= num)
+			break;
+	}
+
+	q->aq_dequeued += aq_dequeued;
+	q->sw_ring_tail += dequeued_descs;
+
+	/* Update enqueue stats */
+	q_data->queue_stats.dequeued_count += dequeued_ops;
+
+	return dequeued_ops;
+}
+
 /* Dequeue LDPC encode operations from ACC200 device. */
 static uint16_t
 acc200_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
@@ -2019,6 +2816,46 @@ acc200_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
 	return dequeued_ops;
 }
 
+/* Dequeue decode operations from ACC200 device. */
+static uint16_t
+acc200_dequeue_dec(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	uint16_t dequeue_num;
+	uint32_t avail = acc_ring_avail_deq(q);
+	uint32_t aq_dequeued = 0;
+	uint16_t i;
+	uint16_t dequeued_cbs = 0;
+	struct rte_bbdev_dec_op *op;
+	int ret;
+
+	dequeue_num = (avail < num) ? avail : num;
+
+	for (i = 0; i < dequeue_num; ++i) {
+		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
+			& q->sw_ring_wrap_mask))->req.op_addr;
+		if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+			ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
+					&aq_dequeued);
+		else
+			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
+					dequeued_cbs, &aq_dequeued);
+
+		if (ret <= 0)
+			break;
+		dequeued_cbs += ret;
+	}
+
+	q->aq_dequeued += aq_dequeued;
+	q->sw_ring_tail += dequeued_cbs;
+
+	/* Update enqueue stats */
+	q_data->queue_stats.dequeued_count += i;
+
+	return i;
+}
+
 /* Dequeue decode operations from ACC200 device. */
 static uint16_t
 acc200_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
@@ -2067,6 +2904,10 @@ acc200_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv)
 	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
 
 	dev->dev_ops = &acc200_bbdev_ops;
+	dev->enqueue_enc_ops = acc200_enqueue_enc;
+	dev->enqueue_dec_ops = acc200_enqueue_dec;
+	dev->dequeue_enc_ops = acc200_dequeue_enc;
+	dev->dequeue_dec_ops = acc200_dequeue_dec;
 	dev->enqueue_ldpc_enc_ops = acc200_enqueue_ldpc_enc;
 	dev->enqueue_ldpc_dec_ops = acc200_enqueue_ldpc_dec;
 	dev->dequeue_ldpc_enc_ops = acc200_dequeue_ldpc_enc;
-- 
2.37.1


  parent reply	other threads:[~2022-09-29 21:13 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-29 21:11 From: Nicolas Chautru <nicolas.chautru@intel.com> Nicolas Chautru
2022-09-29 21:11 ` [PATCH v7 01/14] baseband/acc100: remove unused registers Nicolas Chautru
2022-09-29 21:11 ` [PATCH v7 02/14] baseband/acc100: refactor to segregate common code Nicolas Chautru
2022-09-29 21:11 ` [PATCH v7 03/14] baseband/acc: rename directory from acc100 to acc Nicolas Chautru
2022-09-29 21:11 ` [PATCH v7 04/14] baseband/acc: introduce PMD for ACC200 Nicolas Chautru
2022-10-07 18:15   ` [EXT] " Akhil Goyal
2022-10-07 18:24     ` Chautru, Nicolas
2022-09-29 21:11 ` [PATCH v7 05/14] baseband/acc: add HW register definitions " Nicolas Chautru
2022-09-29 21:11 ` [PATCH v7 06/14] baseband/acc: add info get function " Nicolas Chautru
2022-09-29 21:11 ` [PATCH v7 07/14] baseband/acc: add queue configuration " Nicolas Chautru
2022-09-29 21:11 ` [PATCH v7 08/14] baseband/acc: add LDPC processing functions Nicolas Chautru
2022-09-29 21:11 ` Nicolas Chautru [this message]
2022-09-29 21:11 ` [PATCH v7 10/14] baseband/acc: add support for FFT operations Nicolas Chautru
2022-09-29 21:11 ` [PATCH v7 11/14] baseband/acc: support interrupt Nicolas Chautru
2022-09-29 21:11 ` [PATCH v7 12/14] baseband/acc: add device status and vf2pf comms Nicolas Chautru
2022-09-29 21:11 ` [PATCH v7 13/14] baseband/acc: add PF configure companion function Nicolas Chautru
2022-10-05 20:56   ` Vargas, Hernan
2022-09-29 21:11 ` [PATCH v7 14/14] baseband/acc: simplify meson dependency Nicolas Chautru

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220929211128.36381-10-nicolas.chautru@intel.com \
    --to=nicolas.chautru@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=david.marchand@redhat.com \
    --cc=dev@dpdk.org \
    --cc=hemant.agrawal@nxp.com \
    --cc=hernan.vargas@intel.com \
    --cc=maxime.coquelin@redhat.com \
    --cc=mdr@ashroe.eu \
    --cc=stephen@networkplumber.org \
    --cc=thomas@monjalon.net \
    --cc=trix@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.