From mboxrd@z Thu Jan 1 00:00:00 1970 From: Anoob Joseph Subject: [PATCH v3 24/32] common/cpt: add support for kasumi Date: Fri, 5 Oct 2018 18:29:15 +0530 Message-ID: <1538744363-30340-25-git-send-email-anoob.joseph@caviumnetworks.com> References: <1536033560-21541-1-git-send-email-ajoseph@caviumnetworks.com> <1538744363-30340-1-git-send-email-anoob.joseph@caviumnetworks.com> Mime-Version: 1.0 Content-Type: text/plain Cc: Srisivasubramanian S , Jerin Jacob , Narayana Prasad , dev@dpdk.org, Ankur Dwivedi , Anoob Joseph , Murthy NSSR , Nithin Dabilpuram , Ragothaman Jayaraman , Tejasree Kondoj To: Akhil Goyal , Pablo de Lara , Thomas Monjalon Return-path: Received: from NAM01-BN3-obe.outbound.protection.outlook.com (mail-bn3nam01on0042.outbound.protection.outlook.com [104.47.33.42]) by dpdk.org (Postfix) with ESMTP id D6DF51B1CF for ; Fri, 5 Oct 2018 15:02:11 +0200 (CEST) In-Reply-To: <1538744363-30340-1-git-send-email-anoob.joseph@caviumnetworks.com> List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Srisivasubramanian S Adding microcode interface for supporting kasumi. Signed-off-by: Ankur Dwivedi Signed-off-by: Anoob Joseph Signed-off-by: Murthy NSSR Signed-off-by: Nithin Dabilpuram Signed-off-by: Ragothaman Jayaraman Signed-off-by: Srisivasubramanian S Signed-off-by: Tejasree Kondoj --- drivers/common/cpt/cpt_ucode.h | 450 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 450 insertions(+) diff --git a/drivers/common/cpt/cpt_ucode.h b/drivers/common/cpt/cpt_ucode.h index 5d7743c..05cf95c 100644 --- a/drivers/common/cpt/cpt_ucode.h +++ b/drivers/common/cpt/cpt_ucode.h @@ -1816,6 +1816,450 @@ cpt_zuc_snow3g_dec_prep(uint32_t req_flags, return 0; } +static __rte_always_inline int +cpt_kasumi_enc_prep(uint32_t req_flags, + uint64_t d_offs, + uint64_t d_lens, + fc_params_t *params, + void *op, + void **prep_req) +{ + uint32_t size; + int32_t inputlen = 0, outputlen = 0; + struct cpt_ctx *cpt_ctx; + uint32_t mac_len = 0; + uint8_t i = 0; + struct cpt_request_info *req; + buf_ptr_t *buf_p; + uint32_t encr_offset, auth_offset; + uint32_t encr_data_len, auth_data_len; + int flags, m_size; + uint8_t *iv_s, *iv_d, iv_len = 8; + uint8_t dir = 0; + void *m_vaddr, *c_vaddr; + uint64_t m_dma, c_dma; + uint64_t *offset_vaddr, offset_dma; + vq_cmd_word0_t vq_cmd_w0; + vq_cmd_word3_t vq_cmd_w3; + opcode_info_t opcode; + uint8_t *in_buffer; + uint32_t g_size_bytes, s_size_bytes; + uint64_t dptr_dma, rptr_dma; + sg_comp_t *gather_comp; + sg_comp_t *scatter_comp; + + buf_p = ¶ms->meta_buf; + m_vaddr = buf_p->vaddr; + m_dma = buf_p->dma_addr; + m_size = buf_p->size; + + encr_offset = ENCR_OFFSET(d_offs) / 8; + auth_offset = AUTH_OFFSET(d_offs) / 8; + encr_data_len = ENCR_DLEN(d_lens); + auth_data_len = AUTH_DLEN(d_lens); + + cpt_ctx = params->ctx_buf.vaddr; + flags = cpt_ctx->zsk_flags; + mac_len = cpt_ctx->mac_len; + + if (flags == 0x0) + iv_s = params->iv_buf; + else + iv_s = params->auth_iv_buf; + + dir = iv_s[8] & 0x1; + + /* + * Save initial space that followed app data for completion code & + * alternate completion code to fall in same cache line as app data + */ + m_vaddr = (uint8_t *)m_vaddr + COMPLETION_CODE_SIZE; + m_dma += COMPLETION_CODE_SIZE; + size = (uint8_t *)RTE_PTR_ALIGN((uint8_t *)m_vaddr, 16) - + (uint8_t *)m_vaddr; + + c_vaddr = (uint8_t *)m_vaddr + size; + c_dma = m_dma + size; + size += sizeof(cpt_res_s_t); + + m_vaddr = (uint8_t *)m_vaddr + size; + m_dma += size; + m_size -= size; + + /* Reserve memory for cpt request info */ + req = m_vaddr; + + size = sizeof(struct cpt_request_info); + m_vaddr = (uint8_t *)m_vaddr + size; + m_dma += size; + m_size -= size; + + opcode.s.major = CPT_MAJOR_OP_KASUMI | CPT_DMA_MODE; + + /* indicates ECB/CBC, direction, ctx from cptr, iv from dptr */ + opcode.s.minor = ((1 << 6) | (cpt_ctx->k_ecb << 5) | + (dir << 4) | (0 << 3) | (flags & 0x7)); + + /* + * GP op header, lengths are expected in bits. + */ + vq_cmd_w0.u64 = 0; + vq_cmd_w0.s.param1 = rte_cpu_to_be_16(encr_data_len); + vq_cmd_w0.s.param2 = rte_cpu_to_be_16(auth_data_len); + vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags); + + /* consider iv len */ + if (flags == 0x0) { + encr_offset += iv_len; + auth_offset += iv_len; + } + + /* save space for offset ctrl and iv */ + offset_vaddr = m_vaddr; + offset_dma = m_dma; + + m_vaddr = (uint8_t *)m_vaddr + OFF_CTRL_LEN + iv_len; + m_dma += OFF_CTRL_LEN + iv_len; + m_size -= OFF_CTRL_LEN + iv_len; + + /* DPTR has SG list */ + in_buffer = m_vaddr; + dptr_dma = m_dma; + + ((uint16_t *)in_buffer)[0] = 0; + ((uint16_t *)in_buffer)[1] = 0; + + /* TODO Add error check if space will be sufficient */ + gather_comp = (sg_comp_t *)((uint8_t *)m_vaddr + 8); + + /* + * Input Gather List + */ + i = 0; + + /* Offset control word followed by iv */ + + if (flags == 0x0) { + inputlen = encr_offset + (RTE_ALIGN(encr_data_len, 8) / 8); + outputlen = inputlen; + /* iv offset is 0 */ + *offset_vaddr = rte_cpu_to_be_64((uint64_t)encr_offset << 16); + } else { + inputlen = auth_offset + (RTE_ALIGN(auth_data_len, 8) / 8); + outputlen = mac_len; + /* iv offset is 0 */ + *offset_vaddr = rte_cpu_to_be_64((uint64_t)auth_offset); + } + + i = fill_sg_comp(gather_comp, i, offset_dma, OFF_CTRL_LEN + iv_len); + + /* IV */ + iv_d = (uint8_t *)offset_vaddr + OFF_CTRL_LEN; + memcpy(iv_d, iv_s, iv_len); + + /* input data */ + size = inputlen - iv_len; + if (size) { + i = fill_sg_comp_from_iov(gather_comp, i, + params->src_iov, 0, + &size, NULL, 0); + + if (size) + return ERR_BAD_INPUT_ARG; + } + ((uint16_t *)in_buffer)[2] = rte_cpu_to_be_16(i); + g_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t); + + /* + * Output Scatter List + */ + + i = 0; + scatter_comp = (sg_comp_t *)((uint8_t *)gather_comp + g_size_bytes); + + if (flags == 0x1) { + /* IV in SLIST only for F8 */ + iv_len = 0; + } + + /* IV */ + if (iv_len) { + i = fill_sg_comp(scatter_comp, i, + offset_dma + OFF_CTRL_LEN, + iv_len); + } + + /* Add output data */ + if (req_flags & VALID_MAC_BUF) { + size = outputlen - iv_len - mac_len; + if (size) { + i = fill_sg_comp_from_iov(scatter_comp, i, + params->dst_iov, 0, + &size, NULL, 0); + + if (size) + return ERR_BAD_INPUT_ARG; + } + + /* mac data */ + if (mac_len) { + i = fill_sg_comp_from_buf(scatter_comp, i, + ¶ms->mac_buf); + } + } else { + /* Output including mac */ + size = outputlen - iv_len; + if (size) { + i = fill_sg_comp_from_iov(scatter_comp, i, + params->dst_iov, 0, + &size, NULL, 0); + + if (size) + return ERR_BAD_INPUT_ARG; + } + } + ((uint16_t *)in_buffer)[3] = rte_cpu_to_be_16(i); + s_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t); + + size = g_size_bytes + s_size_bytes + SG_LIST_HDR_SIZE; + + /* This is DPTR len incase of SG mode */ + vq_cmd_w0.s.dlen = rte_cpu_to_be_16(size); + + m_vaddr = (uint8_t *)m_vaddr + size; + m_dma += size; + m_size -= size; + + /* cpt alternate completion address saved earlier */ + req->alternate_caddr = (uint64_t *)((uint8_t *)c_vaddr - 8); + *req->alternate_caddr = ~((uint64_t)COMPLETION_CODE_INIT); + rptr_dma = c_dma - 8; + + req->ist.ei1 = dptr_dma; + req->ist.ei2 = rptr_dma; + + /* First 16-bit swap then 64-bit swap */ + /* TODO: HACK: Reverse the vq_cmd and cpt_req bit field definitions + * to eliminate all the swapping + */ + vq_cmd_w0.u64 = rte_cpu_to_be_64(vq_cmd_w0.u64); + + /* vq command w3 */ + vq_cmd_w3.u64 = 0; + vq_cmd_w3.s.grp = 0; + vq_cmd_w3.s.cptr = params->ctx_buf.dma_addr + + offsetof(struct cpt_ctx, k_ctx); + + /* 16 byte aligned cpt res address */ + req->completion_addr = (uint64_t *)((uint8_t *)c_vaddr); + *req->completion_addr = COMPLETION_CODE_INIT; + req->comp_baddr = c_dma; + + /* Fill microcode part of instruction */ + req->ist.ei0 = vq_cmd_w0.u64; + req->ist.ei3 = vq_cmd_w3.u64; + + req->op = op; + + *prep_req = req; + return 0; +} + +static __rte_always_inline int +cpt_kasumi_dec_prep(uint64_t d_offs, + uint64_t d_lens, + fc_params_t *params, + void *op, + void **prep_req) +{ + uint32_t size; + int32_t inputlen = 0, outputlen; + struct cpt_ctx *cpt_ctx; + uint8_t i = 0, iv_len = 8; + struct cpt_request_info *req; + buf_ptr_t *buf_p; + uint32_t encr_offset; + uint32_t encr_data_len; + int flags, m_size; + uint8_t dir = 0; + void *m_vaddr, *c_vaddr; + uint64_t m_dma, c_dma; + uint64_t *offset_vaddr, offset_dma; + vq_cmd_word0_t vq_cmd_w0; + vq_cmd_word3_t vq_cmd_w3; + opcode_info_t opcode; + uint8_t *in_buffer; + uint32_t g_size_bytes, s_size_bytes; + uint64_t dptr_dma, rptr_dma; + sg_comp_t *gather_comp; + sg_comp_t *scatter_comp; + + buf_p = ¶ms->meta_buf; + m_vaddr = buf_p->vaddr; + m_dma = buf_p->dma_addr; + m_size = buf_p->size; + + encr_offset = ENCR_OFFSET(d_offs) / 8; + encr_data_len = ENCR_DLEN(d_lens); + + cpt_ctx = params->ctx_buf.vaddr; + flags = cpt_ctx->zsk_flags; + /* + * Save initial space that followed app data for completion code & + * alternate completion code to fall in same cache line as app data + */ + m_vaddr = (uint8_t *)m_vaddr + COMPLETION_CODE_SIZE; + m_dma += COMPLETION_CODE_SIZE; + size = (uint8_t *)RTE_PTR_ALIGN((uint8_t *)m_vaddr, 16) - + (uint8_t *)m_vaddr; + + c_vaddr = (uint8_t *)m_vaddr + size; + c_dma = m_dma + size; + size += sizeof(cpt_res_s_t); + + m_vaddr = (uint8_t *)m_vaddr + size; + m_dma += size; + m_size -= size; + + /* Reserve memory for cpt request info */ + req = m_vaddr; + + size = sizeof(struct cpt_request_info); + m_vaddr = (uint8_t *)m_vaddr + size; + m_dma += size; + m_size -= size; + + opcode.s.major = CPT_MAJOR_OP_KASUMI | CPT_DMA_MODE; + + /* indicates ECB/CBC, direction, ctx from cptr, iv from dptr */ + opcode.s.minor = ((1 << 6) | (cpt_ctx->k_ecb << 5) | + (dir << 4) | (0 << 3) | (flags & 0x7)); + + /* + * GP op header, lengths are expected in bits. + */ + vq_cmd_w0.u64 = 0; + vq_cmd_w0.s.param1 = rte_cpu_to_be_16(encr_data_len); + vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags); + + /* consider iv len */ + encr_offset += iv_len; + + inputlen = iv_len + (RTE_ALIGN(encr_data_len, 8) / 8); + outputlen = inputlen; + + /* save space for offset ctrl & iv */ + offset_vaddr = m_vaddr; + offset_dma = m_dma; + + m_vaddr = (uint8_t *)m_vaddr + OFF_CTRL_LEN + iv_len; + m_dma += OFF_CTRL_LEN + iv_len; + m_size -= OFF_CTRL_LEN + iv_len; + + /* DPTR has SG list */ + in_buffer = m_vaddr; + dptr_dma = m_dma; + + ((uint16_t *)in_buffer)[0] = 0; + ((uint16_t *)in_buffer)[1] = 0; + + /* TODO Add error check if space will be sufficient */ + gather_comp = (sg_comp_t *)((uint8_t *)m_vaddr + 8); + + /* + * Input Gather List + */ + i = 0; + + /* Offset control word followed by iv */ + *offset_vaddr = rte_cpu_to_be_64((uint64_t)encr_offset << 16); + + i = fill_sg_comp(gather_comp, i, offset_dma, OFF_CTRL_LEN + iv_len); + + /* IV */ + memcpy((uint8_t *)offset_vaddr + OFF_CTRL_LEN, + params->iv_buf, iv_len); + + /* Add input data */ + size = inputlen - iv_len; + if (size) { + i = fill_sg_comp_from_iov(gather_comp, i, + params->src_iov, + 0, &size, NULL, 0); + if (size) + return ERR_BAD_INPUT_ARG; + } + ((uint16_t *)in_buffer)[2] = rte_cpu_to_be_16(i); + g_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t); + + /* + * Output Scatter List + */ + + i = 0; + scatter_comp = (sg_comp_t *)((uint8_t *)gather_comp + g_size_bytes); + + /* IV */ + i = fill_sg_comp(scatter_comp, i, + offset_dma + OFF_CTRL_LEN, + iv_len); + + /* Add output data */ + size = outputlen - iv_len; + if (size) { + i = fill_sg_comp_from_iov(scatter_comp, i, + params->dst_iov, 0, + &size, NULL, 0); + if (size) + return ERR_BAD_INPUT_ARG; + } + ((uint16_t *)in_buffer)[3] = rte_cpu_to_be_16(i); + s_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t); + + size = g_size_bytes + s_size_bytes + SG_LIST_HDR_SIZE; + + /* This is DPTR len incase of SG mode */ + vq_cmd_w0.s.dlen = rte_cpu_to_be_16(size); + + m_vaddr = (uint8_t *)m_vaddr + size; + m_dma += size; + m_size -= size; + + /* cpt alternate completion address saved earlier */ + req->alternate_caddr = (uint64_t *)((uint8_t *)c_vaddr - 8); + *req->alternate_caddr = ~((uint64_t)COMPLETION_CODE_INIT); + rptr_dma = c_dma - 8; + + req->ist.ei1 = dptr_dma; + req->ist.ei2 = rptr_dma; + + /* First 16-bit swap then 64-bit swap */ + /* TODO: HACK: Reverse the vq_cmd and cpt_req bit field definitions + * to eliminate all the swapping + */ + vq_cmd_w0.u64 = rte_cpu_to_be_64(vq_cmd_w0.u64); + + /* vq command w3 */ + vq_cmd_w3.u64 = 0; + vq_cmd_w3.s.grp = 0; + vq_cmd_w3.s.cptr = params->ctx_buf.dma_addr + + offsetof(struct cpt_ctx, k_ctx); + + /* 16 byte aligned cpt res address */ + req->completion_addr = (uint64_t *)((uint8_t *)c_vaddr); + *req->completion_addr = COMPLETION_CODE_INIT; + req->comp_baddr = c_dma; + + /* Fill microcode part of instruction */ + req->ist.ei0 = vq_cmd_w0.u64; + req->ist.ei3 = vq_cmd_w3.u64; + + req->op = op; + + *prep_req = req; + return 0; +} + static __rte_always_inline void * cpt_fc_dec_hmac_prep(uint32_t flags, uint64_t d_offs, @@ -1836,6 +2280,9 @@ cpt_fc_dec_hmac_prep(uint32_t flags, } else if (fc_type == ZUC_SNOW3G) { ret = cpt_zuc_snow3g_dec_prep(flags, d_offs, d_lens, fc_params, op, &prep_req); + } else if (fc_type == KASUMI) { + ret = cpt_kasumi_dec_prep(d_offs, d_lens, fc_params, op, + &prep_req); } else { /* * For AUTH_ONLY case, @@ -1869,6 +2316,9 @@ cpt_fc_enc_hmac_prep(uint32_t flags, uint64_t d_offs, uint64_t d_lens, } else if (fc_type == ZUC_SNOW3G) { ret = cpt_zuc_snow3g_enc_prep(flags, d_offs, d_lens, fc_params, op, &prep_req); + } else if (fc_type == KASUMI) { + ret = cpt_kasumi_enc_prep(flags, d_offs, d_lens, + fc_params, op, &prep_req); } else { ret = ERR_EIO; } -- 2.7.4