From ca7b6374d22efa8592a35d9d191672e7cbf84a44 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Mar 2020 11:14:44 -0700 Subject: [PATCH] IB/isert: fix unaligned immediate-data handling Currently we allocate rx buffers in a single contiguous buffers for headers (iser and iscsi) and data trailer. This means that most likely the data starting offset is aligned to 76 bytes (size of both headers). This worked fine for years, but at some point this broke. To fix this, we should avoid passing unaligned buffers for I/O. The fix is rather simple (although a bit invasive). Simply allocate a buffer for the headers and a buffer for the data and post a 2-entry recv work request. Reported-by: Stephen Rust Signed-off-by: Sagi Grimberg --- drivers/infiniband/ulp/isert/ib_isert.c | 207 +++++++++++++----------- drivers/infiniband/ulp/isert/ib_isert.h | 18 +-- 2 files changed, 123 insertions(+), 102 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a1a035270cab..bd27679d2a1b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -129,7 +129,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; - attr.cap.max_recv_sge = 1; + attr.cap.max_recv_sge = 2; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; if (device->pi_capable) @@ -163,15 +163,74 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) return ret; } +static int num_pages(int len) +{ + return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT); +} + +static void +isert_free_rx_descriptor(struct iser_rx_desc *rx_desc, struct ib_device *ib_dev) +{ + struct ib_sge *rx_sg; + + rx_sg = &rx_desc->rx_sg[1]; + ib_dma_unmap_single(ib_dev, rx_sg->addr, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE); + free_pages((unsigned long)rx_desc->data, + num_pages(ISCSI_DEF_MAX_RECV_SEG_LEN)); + rx_sg = &rx_desc->rx_sg[0]; + ib_dma_unmap_single(ib_dev, rx_sg->addr, + ISER_HEADERS_LEN, DMA_FROM_DEVICE); +} + +static int +isert_alloc_rx_descriptor(struct iser_rx_desc *rx_desc, struct isert_device *device) +{ + struct ib_device *ib_dev = device->ib_device; + struct ib_sge *rx_sg; + + /* headers */ + rx_sg = &rx_desc->rx_sg[0]; + rx_sg->addr = ib_dma_map_single(ib_dev, (void *)rx_desc, + ISER_HEADERS_LEN, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(ib_dev, rx_sg->addr)) + return -ENOMEM; + + rx_sg->length = ISER_HEADERS_LEN; + rx_sg->lkey = device->pd->local_dma_lkey; + + /* data */ + rx_sg = &rx_desc->rx_sg[1]; + rx_desc->data = (char *)__get_free_pages(GFP_KERNEL, + num_pages(ISCSI_DEF_MAX_RECV_SEG_LEN)); + if (!rx_desc->data) + goto alloc_fail; + + rx_sg->addr = ib_dma_map_single(ib_dev, (void *)rx_desc->data, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(ib_dev, rx_sg->addr)) + goto data_map_fail; + + rx_sg->length = ISCSI_DEF_MAX_RECV_SEG_LEN; + rx_sg->lkey = device->pd->local_dma_lkey; + + return 0; + +data_map_fail: + kfree(rx_desc->data); +alloc_fail: + rx_sg = &rx_desc->rx_sg[0]; + ib_dma_unmap_single(ib_dev, rx_sg->addr, + ISER_HEADERS_LEN, DMA_FROM_DEVICE); + return -ENOMEM; +} + static int isert_alloc_rx_descriptors(struct isert_conn *isert_conn) { struct isert_device *device = isert_conn->device; struct ib_device *ib_dev = device->ib_device; - struct iser_rx_desc *rx_desc; - struct ib_sge *rx_sg; - u64 dma_addr; - int i, j; + int i; isert_conn->rx_descs = kcalloc(ISERT_QP_MAX_RECV_DTOS, sizeof(struct iser_rx_desc), @@ -179,32 +238,16 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn) if (!isert_conn->rx_descs) return -ENOMEM; - rx_desc = isert_conn->rx_descs; - - for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++) { - dma_addr = ib_dma_map_single(ib_dev, (void *)rx_desc, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(ib_dev, dma_addr)) - goto dma_map_fail; - - rx_desc->dma_addr = dma_addr; - - rx_sg = &rx_desc->rx_sg; - rx_sg->addr = rx_desc->dma_addr; - rx_sg->length = ISER_RX_PAYLOAD_SIZE; - rx_sg->lkey = device->pd->local_dma_lkey; - rx_desc->rx_cqe.done = isert_recv_done; + for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++) { + if (isert_alloc_rx_descriptor(&isert_conn->rx_descs[i], device)) + goto alloc_fail; } return 0; -dma_map_fail: - rx_desc = isert_conn->rx_descs; - for (j = 0; j < i; j++, rx_desc++) { - ib_dma_unmap_single(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - } - kfree(isert_conn->rx_descs); +alloc_fail: + while (--i) + isert_free_rx_descriptor(&isert_conn->rx_descs[i], ib_dev); isert_conn->rx_descs = NULL; isert_err("conn %p failed to allocate rx descriptors\n", isert_conn); return -ENOMEM; @@ -214,17 +257,13 @@ static void isert_free_rx_descriptors(struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->device->ib_device; - struct iser_rx_desc *rx_desc; int i; if (!isert_conn->rx_descs) return; - rx_desc = isert_conn->rx_descs; - for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++) { - ib_dma_unmap_single(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - } + for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++) + isert_free_rx_descriptor(&isert_conn->rx_descs[i], ib_dev); kfree(isert_conn->rx_descs); isert_conn->rx_descs = NULL; @@ -407,11 +446,7 @@ isert_free_login_buf(struct isert_conn *isert_conn) ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma, ISER_RX_PAYLOAD_SIZE, DMA_TO_DEVICE); kfree(isert_conn->login_rsp_buf); - - ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, - DMA_FROM_DEVICE); - kfree(isert_conn->login_req_buf); + isert_free_rx_descriptor(&isert_conn->login_desc, ib_dev); } static int @@ -420,25 +455,15 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, { int ret; - isert_conn->login_req_buf = kzalloc(sizeof(*isert_conn->login_req_buf), - GFP_KERNEL); - if (!isert_conn->login_req_buf) - return -ENOMEM; - - isert_conn->login_req_dma = ib_dma_map_single(ib_dev, - isert_conn->login_req_buf, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma); - if (ret) { - isert_err("login_req_dma mapping error: %d\n", ret); - isert_conn->login_req_dma = 0; - goto out_free_login_req_buf; - } + ret = isert_alloc_rx_descriptor(&isert_conn->login_desc, + isert_conn->device); + if (ret) + return ret; isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL); if (!isert_conn->login_rsp_buf) { ret = -ENOMEM; - goto out_unmap_login_req_buf; + goto out_free_login_desc; } isert_conn->login_rsp_dma = ib_dma_map_single(ib_dev, @@ -455,11 +480,8 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, out_free_login_rsp_buf: kfree(isert_conn->login_rsp_buf); -out_unmap_login_req_buf: - ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); -out_free_login_req_buf: - kfree(isert_conn->login_req_buf); +out_free_login_desc: + isert_free_rx_descriptor(&isert_conn->login_desc, ib_dev); return ret; } @@ -516,10 +538,6 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_init_conn(isert_conn); isert_conn->cm_id = cma_id; - ret = isert_alloc_login_buf(isert_conn, cma_id->device); - if (ret) - goto out; - device = isert_device_get(cma_id); if (IS_ERR(device)) { ret = PTR_ERR(device); @@ -527,6 +545,10 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) } isert_conn->device = device; + ret = isert_alloc_login_buf(isert_conn, cma_id->device); + if (ret) + goto out; + isert_set_nego_params(isert_conn, &event->param.conn); ret = isert_conn_setup_qp(isert_conn, cma_id); @@ -578,7 +600,7 @@ isert_connect_release(struct isert_conn *isert_conn) ib_destroy_qp(isert_conn->qp); } - if (isert_conn->login_req_buf) + if (isert_conn->login_rsp_buf) isert_free_login_buf(isert_conn); isert_device_put(device); @@ -809,9 +831,10 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count) for (rx_wr = isert_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { rx_desc = &isert_conn->rx_descs[i]; + rx_desc->rx_cqe.done = isert_recv_done; rx_wr->wr_cqe = &rx_desc->rx_cqe; - rx_wr->sg_list = &rx_desc->rx_sg; - rx_wr->num_sge = 1; + rx_wr->sg_list = rx_desc->rx_sg; + rx_wr->num_sge = 2; rx_wr->next = rx_wr + 1; rx_desc->in_use = false; } @@ -840,9 +863,10 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) } rx_desc->in_use = false; + rx_desc->rx_cqe.done = isert_recv_done; rx_wr.wr_cqe = &rx_desc->rx_cqe; - rx_wr.sg_list = &rx_desc->rx_sg; - rx_wr.num_sge = 1; + rx_wr.sg_list = rx_desc->rx_sg; + rx_wr.num_sge = 2; rx_wr.next = NULL; ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL); @@ -960,23 +984,13 @@ static int isert_login_post_recv(struct isert_conn *isert_conn) { struct ib_recv_wr rx_wr; - struct ib_sge sge; int ret; - memset(&sge, 0, sizeof(struct ib_sge)); - sge.addr = isert_conn->login_req_dma; - sge.length = ISER_RX_PAYLOAD_SIZE; - sge.lkey = isert_conn->device->pd->local_dma_lkey; - - isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n", - sge.addr, sge.length, sge.lkey); - - isert_conn->login_req_buf->rx_cqe.done = isert_login_recv_done; - + isert_conn->login_desc.rx_cqe.done = isert_login_recv_done; memset(&rx_wr, 0, sizeof(struct ib_recv_wr)); - rx_wr.wr_cqe = &isert_conn->login_req_buf->rx_cqe; - rx_wr.sg_list = &sge; - rx_wr.num_sge = 1; + rx_wr.wr_cqe = &isert_conn->login_desc.rx_cqe; + rx_wr.sg_list = isert_conn->login_desc.rx_sg; + rx_wr.num_sge = 2; ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL); if (ret) @@ -1051,7 +1065,7 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, static void isert_rx_login_req(struct isert_conn *isert_conn) { - struct iser_rx_desc *rx_desc = isert_conn->login_req_buf; + struct iser_rx_desc *rx_desc = &isert_conn->login_desc; int rx_buflen = isert_conn->login_req_len; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_login *login = conn->conn_login; @@ -1412,11 +1426,13 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc) rx_desc->in_use = true; - ib_dma_sync_single_for_cpu(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(ib_dev, rx_desc->rx_sg[0].addr, + ISER_HEADERS_LEN, DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(ib_dev, rx_desc->rx_sg[1].addr, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE); - isert_dbg("DMA: 0x%llx, iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n", - rx_desc->dma_addr, hdr->opcode, hdr->itt, hdr->flags, + isert_dbg("iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n", + hdr->opcode, hdr->itt, hdr->flags, (int)(wc->byte_len - ISER_HEADERS_LEN)); switch (iser_ctrl->flags & 0xF0) { @@ -1447,8 +1463,10 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc) isert_rx_opcode(isert_conn, rx_desc, read_stag, read_va, write_stag, write_va); - ib_dma_sync_single_for_device(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ib_dma_sync_single_for_device(ib_dev, rx_desc->rx_sg[0].addr, + ISER_HEADERS_LEN, DMA_FROM_DEVICE); + ib_dma_sync_single_for_device(ib_dev, rx_desc->rx_sg[1].addr, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE); } static void @@ -1456,14 +1474,17 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct isert_conn *isert_conn = wc->qp->qp_context; struct ib_device *ib_dev = isert_conn->device->ib_device; + struct iser_rx_desc *rx_desc = &isert_conn->login_desc; if (unlikely(wc->status != IB_WC_SUCCESS)) { isert_print_wc(wc, "login recv"); return; } - ib_dma_sync_single_for_cpu(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(ib_dev, rx_desc->rx_sg[0].addr, + ISER_HEADERS_LEN, DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(ib_dev, rx_desc->rx_sg[1].addr, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE); isert_conn->login_req_len = wc->byte_len - ISER_HEADERS_LEN; @@ -1478,8 +1499,10 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc) complete(&isert_conn->login_req_comp); mutex_unlock(&isert_conn->mutex); - ib_dma_sync_single_for_device(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ib_dma_sync_single_for_device(ib_dev, rx_desc->rx_sg[0].addr, + ISER_HEADERS_LEN, DMA_FROM_DEVICE); + ib_dma_sync_single_for_device(ib_dev, rx_desc->rx_sg[1].addr, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE); } static void diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 3b296bac4f60..407bd6c9eb1f 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -80,14 +80,13 @@ enum iser_conn_state { }; struct iser_rx_desc { - struct iser_ctrl iser_header; - struct iscsi_hdr iscsi_header; - char data[ISCSI_DEF_MAX_RECV_SEG_LEN]; - u64 dma_addr; - struct ib_sge rx_sg; - struct ib_cqe rx_cqe; - bool in_use; - char pad[ISER_RX_PAD_SIZE]; + struct iser_ctrl iser_header; + struct iscsi_hdr iscsi_header; + char *data; + u64 dma_addr[2]; + struct ib_sge rx_sg[2]; + struct ib_cqe rx_cqe; + bool in_use; } __packed; static inline struct iser_rx_desc *cqe_to_rx_desc(struct ib_cqe *cqe) @@ -141,9 +140,8 @@ struct isert_conn { u32 responder_resources; u32 initiator_depth; bool pi_support; - struct iser_rx_desc *login_req_buf; + struct iser_rx_desc login_desc; char *login_rsp_buf; - u64 login_req_dma; int login_req_len; u64 login_rsp_dma; struct iser_rx_desc *rx_descs; -- 2.20.1