* [PATCH] nvmet-rdma: fix bonding failover possible NULL deref @ 2020-04-02 15:29 Sagi Grimberg 2020-04-02 15:35 ` Alex Lyakas 0 siblings, 1 reply; 3+ messages in thread From: Sagi Grimberg @ 2020-04-02 15:29 UTC (permalink / raw) To: linux-nvme, Christoph Hellwig, Keith Busch; +Cc: Max Gurtovoy, Alex Lyakas RDMA_CM_EVENT_ADDR_CHANGE event occur in the case of bonding failover on normal as well as on listening cm_ids. Hence this event will immediately trigger a NULL dereference trying to disconnect a queue for a cm_id that actually belongs to the port. To fix this we provide a different handler for the listener cm_ids that will defer a work to disable+(re)enable the port which essentially destroys and setups another listener cm_id Reported-by: Alex Lyakas <alex@zadara.com> Tested-by: Alex Lyakas <alex@zadara.com> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> --- drivers/nvme/target/rdma.c | 164 +++++++++++++++++++++++++------------ 1 file changed, 113 insertions(+), 51 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 9e1b8c61f54e..8dac89b7aa12 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -105,6 +105,13 @@ struct nvmet_rdma_queue { struct list_head queue_list; }; +struct nvmet_rdma_port { + struct nvmet_port *nport; + struct sockaddr_storage addr; + struct rdma_cm_id *cm_id; + struct delayed_work repair_work; +}; + struct nvmet_rdma_device { struct ib_device *device; struct ib_pd *pd; @@ -1272,6 +1279,7 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { + struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_device *ndev; struct nvmet_rdma_queue *queue; int ret = -EINVAL; @@ -1287,7 +1295,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = -ENOMEM; goto put_device; } - queue->port = cm_id->context; + queue->port = port->nport; if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ @@ -1412,7 +1420,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, struct nvmet_rdma_queue *queue) { - struct nvmet_port *port; + struct nvmet_rdma_port *port; if (queue) { /* @@ -1431,7 +1439,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, * cm_id destroy. use atomic xchg to make sure * we don't compete with remove_port. */ - if (xchg(&port->priv, NULL) != cm_id) + if (xchg(&port->cm_id, NULL) != cm_id) return 0; /* @@ -1462,6 +1470,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, nvmet_rdma_queue_established(queue); break; case RDMA_CM_EVENT_ADDR_CHANGE: + if (!queue) { + struct nvmet_rdma_port *port = cm_id->context; + + schedule_delayed_work(&port->repair_work, 0); + break; + } + /* FALLTHROUGH */ case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_TIMEWAIT_EXIT: nvmet_rdma_queue_disconnect(queue); @@ -1504,42 +1519,19 @@ static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl) mutex_unlock(&nvmet_rdma_queue_mutex); } -static int nvmet_rdma_add_port(struct nvmet_port *port) +static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) { - struct rdma_cm_id *cm_id; - struct sockaddr_storage addr = { }; - __kernel_sa_family_t af; - int ret; + struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); - switch (port->disc_addr.adrfam) { - case NVMF_ADDR_FAMILY_IP4: - af = AF_INET; - break; - case NVMF_ADDR_FAMILY_IP6: - af = AF_INET6; - break; - default: - pr_err("address family %d not supported\n", - port->disc_addr.adrfam); - return -EINVAL; - } - - if (port->inline_data_size < 0) { - port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; - } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { - pr_warn("inline_data_size %u is too large, reducing to %u\n", - port->inline_data_size, - NVMET_RDMA_MAX_INLINE_DATA_SIZE); - port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; - } + if (cm_id) + rdma_destroy_id(cm_id); +} - ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, - port->disc_addr.trsvcid, &addr); - if (ret) { - pr_err("malformed ip/port passed: %s:%s\n", - port->disc_addr.traddr, port->disc_addr.trsvcid); - return ret; - } +static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) +{ + struct sockaddr *addr = (struct sockaddr *)&port->addr; + struct rdma_cm_id *cm_id; + int ret; cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, RDMA_PS_TCP, IB_QPT_RC); @@ -1558,23 +1550,19 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) goto out_destroy_id; } - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); + ret = rdma_bind_addr(cm_id, addr); if (ret) { - pr_err("binding CM ID to %pISpcs failed (%d)\n", - (struct sockaddr *)&addr, ret); + pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; } ret = rdma_listen(cm_id, 128); if (ret) { - pr_err("listening to %pISpcs failed (%d)\n", - (struct sockaddr *)&addr, ret); + pr_err("listening to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; } - pr_info("enabling port %d (%pISpcs)\n", - le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); - port->priv = cm_id; + port->cm_id = cm_id; return 0; out_destroy_id: @@ -1582,18 +1570,92 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return ret; } -static void nvmet_rdma_remove_port(struct nvmet_port *port) +static void nvmet_rdma_repair_port_work(struct work_struct *w) { - struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); + struct nvmet_rdma_port *port = container_of(to_delayed_work(w), + struct nvmet_rdma_port, repair_work); + int ret; - if (cm_id) - rdma_destroy_id(cm_id); + nvmet_rdma_disable_port(port); + ret = nvmet_rdma_enable_port(port); + if (ret) + schedule_delayed_work(&port->repair_work, 5 * HZ); +} + +static int nvmet_rdma_add_port(struct nvmet_port *nport) +{ + struct nvmet_rdma_port *port; + __kernel_sa_family_t af; + int ret; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + nport->priv = port; + port->nport = nport; + INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work); + + switch (nport->disc_addr.adrfam) { + case NVMF_ADDR_FAMILY_IP4: + af = AF_INET; + break; + case NVMF_ADDR_FAMILY_IP6: + af = AF_INET6; + break; + default: + pr_err("address family %d not supported\n", + nport->disc_addr.adrfam); + ret = -EINVAL; + goto out_free_port; + } + + if (nport->inline_data_size < 0) { + nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; + } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { + pr_warn("inline_data_size %u is too large, reducing to %u\n", + nport->inline_data_size, + NVMET_RDMA_MAX_INLINE_DATA_SIZE); + nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; + } + + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, + nport->disc_addr.trsvcid, &port->addr); + if (ret) { + pr_err("malformed ip/port passed: %s:%s\n", + nport->disc_addr.traddr, nport->disc_addr.trsvcid); + goto out_free_port; + } + + ret = nvmet_rdma_enable_port(port); + if(ret) + goto out_free_port; + + pr_info("enabling port %d (%pISpcs)\n", + le16_to_cpu(nport->disc_addr.portid), + (struct sockaddr *)&port->addr); + + return 0; + +out_free_port: + kfree(port); + return ret; +} + +static void nvmet_rdma_remove_port(struct nvmet_port *nport) +{ + struct nvmet_rdma_port *port = nport->priv; + + cancel_delayed_work_sync(&port->repair_work); + nvmet_rdma_disable_port(port); + kfree(port); } static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, - struct nvmet_port *port, char *traddr) + struct nvmet_port *nport, char *traddr) { - struct rdma_cm_id *cm_id = port->priv; + struct nvmet_rdma_port *port = nport->priv; + struct rdma_cm_id *cm_id = port->cm_id; if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { struct nvmet_rdma_rsp *rsp = @@ -1603,7 +1665,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, sprintf(traddr, "%pISc", addr); } else { - memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); + memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); } } -- 2.20.1 _______________________________________________ linux-nvme mailing list linux-nvme@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-nvme ^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] nvmet-rdma: fix bonding failover possible NULL deref 2020-04-02 15:29 [PATCH] nvmet-rdma: fix bonding failover possible NULL deref Sagi Grimberg @ 2020-04-02 15:35 ` Alex Lyakas 2020-04-02 15:38 ` Alex Lyakas 0 siblings, 1 reply; 3+ messages in thread From: Alex Lyakas @ 2020-04-02 15:35 UTC (permalink / raw) To: Sagi Grimberg; +Cc: Keith Busch, Max Gurtovoy, Christoph Hellwig, linux-nvme [-- Attachment #1: Type: text/plain, Size: 11228 bytes --] Hi Sagi, Why did you ignore the following hunk in the patch that I attached? @@ -914,7 +921,8 @@ static void nvmet_rdma_free_dev(struct kref *ref) static struct nvmet_rdma_device * nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) { - struct nvmet_port *port = cm_id->context; + struct nvmet_rdma_port *port = cm_id->context; + struct nvmet_port *nport = port->nport; struct nvmet_rdma_device *ndev; int inline_page_count; int inline_sge_count; Without this hunk, nvmet_rdma_find_get_device will not work properly, or am I missing something? I mentioned this also in review of your initial patch from the summer. Attaching the full patch again... Thanks, Alex. I mentioned this twice, and also added it in my attached path. On Thu, Apr 2, 2020 at 6:30 PM Sagi Grimberg <sagi@grimberg.me> wrote: > > RDMA_CM_EVENT_ADDR_CHANGE event occur in the case of bonding failover > on normal as well as on listening cm_ids. Hence this event will > immediately trigger a NULL dereference trying to disconnect a queue > for a cm_id that actually belongs to the port. > > To fix this we provide a different handler for the listener cm_ids > that will defer a work to disable+(re)enable the port which essentially > destroys and setups another listener cm_id > > Reported-by: Alex Lyakas <alex@zadara.com> > Tested-by: Alex Lyakas <alex@zadara.com> > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > --- > drivers/nvme/target/rdma.c | 164 +++++++++++++++++++++++++------------ > 1 file changed, 113 insertions(+), 51 deletions(-) > > diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c > index 9e1b8c61f54e..8dac89b7aa12 100644 > --- a/drivers/nvme/target/rdma.c > +++ b/drivers/nvme/target/rdma.c > @@ -105,6 +105,13 @@ struct nvmet_rdma_queue { > struct list_head queue_list; > }; > > +struct nvmet_rdma_port { > + struct nvmet_port *nport; > + struct sockaddr_storage addr; > + struct rdma_cm_id *cm_id; > + struct delayed_work repair_work; > +}; > + > struct nvmet_rdma_device { > struct ib_device *device; > struct ib_pd *pd; > @@ -1272,6 +1279,7 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, > static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, > struct rdma_cm_event *event) > { > + struct nvmet_rdma_port *port = cm_id->context; > struct nvmet_rdma_device *ndev; > struct nvmet_rdma_queue *queue; > int ret = -EINVAL; > @@ -1287,7 +1295,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, > ret = -ENOMEM; > goto put_device; > } > - queue->port = cm_id->context; > + queue->port = port->nport; > > if (queue->host_qid == 0) { > /* Let inflight controller teardown complete */ > @@ -1412,7 +1420,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, > static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, > struct nvmet_rdma_queue *queue) > { > - struct nvmet_port *port; > + struct nvmet_rdma_port *port; > > if (queue) { > /* > @@ -1431,7 +1439,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, > * cm_id destroy. use atomic xchg to make sure > * we don't compete with remove_port. > */ > - if (xchg(&port->priv, NULL) != cm_id) > + if (xchg(&port->cm_id, NULL) != cm_id) > return 0; > > /* > @@ -1462,6 +1470,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, > nvmet_rdma_queue_established(queue); > break; > case RDMA_CM_EVENT_ADDR_CHANGE: > + if (!queue) { > + struct nvmet_rdma_port *port = cm_id->context; > + > + schedule_delayed_work(&port->repair_work, 0); > + break; > + } > + /* FALLTHROUGH */ > case RDMA_CM_EVENT_DISCONNECTED: > case RDMA_CM_EVENT_TIMEWAIT_EXIT: > nvmet_rdma_queue_disconnect(queue); > @@ -1504,42 +1519,19 @@ static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl) > mutex_unlock(&nvmet_rdma_queue_mutex); > } > > -static int nvmet_rdma_add_port(struct nvmet_port *port) > +static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) > { > - struct rdma_cm_id *cm_id; > - struct sockaddr_storage addr = { }; > - __kernel_sa_family_t af; > - int ret; > + struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); > > - switch (port->disc_addr.adrfam) { > - case NVMF_ADDR_FAMILY_IP4: > - af = AF_INET; > - break; > - case NVMF_ADDR_FAMILY_IP6: > - af = AF_INET6; > - break; > - default: > - pr_err("address family %d not supported\n", > - port->disc_addr.adrfam); > - return -EINVAL; > - } > - > - if (port->inline_data_size < 0) { > - port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; > - } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { > - pr_warn("inline_data_size %u is too large, reducing to %u\n", > - port->inline_data_size, > - NVMET_RDMA_MAX_INLINE_DATA_SIZE); > - port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; > - } > + if (cm_id) > + rdma_destroy_id(cm_id); > +} > > - ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, > - port->disc_addr.trsvcid, &addr); > - if (ret) { > - pr_err("malformed ip/port passed: %s:%s\n", > - port->disc_addr.traddr, port->disc_addr.trsvcid); > - return ret; > - } > +static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) > +{ > + struct sockaddr *addr = (struct sockaddr *)&port->addr; > + struct rdma_cm_id *cm_id; > + int ret; > > cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, > RDMA_PS_TCP, IB_QPT_RC); > @@ -1558,23 +1550,19 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) > goto out_destroy_id; > } > > - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); > + ret = rdma_bind_addr(cm_id, addr); > if (ret) { > - pr_err("binding CM ID to %pISpcs failed (%d)\n", > - (struct sockaddr *)&addr, ret); > + pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret); > goto out_destroy_id; > } > > ret = rdma_listen(cm_id, 128); > if (ret) { > - pr_err("listening to %pISpcs failed (%d)\n", > - (struct sockaddr *)&addr, ret); > + pr_err("listening to %pISpcs failed (%d)\n", addr, ret); > goto out_destroy_id; > } > > - pr_info("enabling port %d (%pISpcs)\n", > - le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); > - port->priv = cm_id; > + port->cm_id = cm_id; > return 0; > > out_destroy_id: > @@ -1582,18 +1570,92 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) > return ret; > } > > -static void nvmet_rdma_remove_port(struct nvmet_port *port) > +static void nvmet_rdma_repair_port_work(struct work_struct *w) > { > - struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); > + struct nvmet_rdma_port *port = container_of(to_delayed_work(w), > + struct nvmet_rdma_port, repair_work); > + int ret; > > - if (cm_id) > - rdma_destroy_id(cm_id); > + nvmet_rdma_disable_port(port); > + ret = nvmet_rdma_enable_port(port); > + if (ret) > + schedule_delayed_work(&port->repair_work, 5 * HZ); > +} > + > +static int nvmet_rdma_add_port(struct nvmet_port *nport) > +{ > + struct nvmet_rdma_port *port; > + __kernel_sa_family_t af; > + int ret; > + > + port = kzalloc(sizeof(*port), GFP_KERNEL); > + if (!port) > + return -ENOMEM; > + > + nport->priv = port; > + port->nport = nport; > + INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work); > + > + switch (nport->disc_addr.adrfam) { > + case NVMF_ADDR_FAMILY_IP4: > + af = AF_INET; > + break; > + case NVMF_ADDR_FAMILY_IP6: > + af = AF_INET6; > + break; > + default: > + pr_err("address family %d not supported\n", > + nport->disc_addr.adrfam); > + ret = -EINVAL; > + goto out_free_port; > + } > + > + if (nport->inline_data_size < 0) { > + nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; > + } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { > + pr_warn("inline_data_size %u is too large, reducing to %u\n", > + nport->inline_data_size, > + NVMET_RDMA_MAX_INLINE_DATA_SIZE); > + nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; > + } > + > + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, > + nport->disc_addr.trsvcid, &port->addr); > + if (ret) { > + pr_err("malformed ip/port passed: %s:%s\n", > + nport->disc_addr.traddr, nport->disc_addr.trsvcid); > + goto out_free_port; > + } > + > + ret = nvmet_rdma_enable_port(port); > + if(ret) > + goto out_free_port; > + > + pr_info("enabling port %d (%pISpcs)\n", > + le16_to_cpu(nport->disc_addr.portid), > + (struct sockaddr *)&port->addr); > + > + return 0; > + > +out_free_port: > + kfree(port); > + return ret; > +} > + > +static void nvmet_rdma_remove_port(struct nvmet_port *nport) > +{ > + struct nvmet_rdma_port *port = nport->priv; > + > + cancel_delayed_work_sync(&port->repair_work); > + nvmet_rdma_disable_port(port); > + kfree(port); > } > > static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, > - struct nvmet_port *port, char *traddr) > + struct nvmet_port *nport, char *traddr) > { > - struct rdma_cm_id *cm_id = port->priv; > + struct nvmet_rdma_port *port = nport->priv; > + struct rdma_cm_id *cm_id = port->cm_id; > > if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { > struct nvmet_rdma_rsp *rsp = > @@ -1603,7 +1665,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, > > sprintf(traddr, "%pISc", addr); > } else { > - memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); > + memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); > } > } > > -- > 2.20.1 > [-- Attachment #2: 0001-nvmet-rdma-fix-bonding-failover-possible-NULL-deref.5.2.patch --] [-- Type: application/octet-stream, Size: 8809 bytes --] diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 36d906a..ebe7c43 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -102,6 +102,13 @@ struct nvmet_rdma_queue { struct list_head queue_list; }; +struct nvmet_rdma_port { + struct nvmet_port *nport; + struct sockaddr_storage addr; + struct rdma_cm_id *cm_id; + struct delayed_work repair_work; +}; + struct nvmet_rdma_device { struct ib_device *device; struct ib_pd *pd; @@ -914,7 +921,8 @@ static void nvmet_rdma_free_dev(struct kref *ref) static struct nvmet_rdma_device * nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) { - struct nvmet_port *port = cm_id->context; + struct nvmet_rdma_port *port = cm_id->context; + struct nvmet_port *nport = port->nport; struct nvmet_rdma_device *ndev; int inline_page_count; int inline_sge_count; @@ -931,17 +939,17 @@ static void nvmet_rdma_free_dev(struct kref *ref) if (!ndev) goto out_err; - inline_page_count = num_pages(port->inline_data_size); + inline_page_count = num_pages(nport->inline_data_size); inline_sge_count = max(cm_id->device->attrs.max_sge_rd, cm_id->device->attrs.max_recv_sge) - 1; if (inline_page_count > inline_sge_count) { pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", - port->inline_data_size, cm_id->device->name, + nport->inline_data_size, cm_id->device->name, inline_sge_count * PAGE_SIZE); - port->inline_data_size = inline_sge_count * PAGE_SIZE; + nport->inline_data_size = inline_sge_count * PAGE_SIZE; inline_page_count = inline_sge_count; } - ndev->inline_data_size = port->inline_data_size; + ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; ndev->device = cm_id->device; kref_init(&ndev->ref); @@ -1267,6 +1275,7 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { + struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_device *ndev; struct nvmet_rdma_queue *queue; int ret = -EINVAL; @@ -1282,7 +1291,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = -ENOMEM; goto put_device; } - queue->port = cm_id->context; + queue->port = port->nport; if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ @@ -1407,7 +1416,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, struct nvmet_rdma_queue *queue) { - struct nvmet_port *port; + struct nvmet_rdma_port *port; if (queue) { /* @@ -1426,7 +1435,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, * cm_id destroy. use atomic xchg to make sure * we don't compete with remove_port. */ - if (xchg(&port->priv, NULL) != cm_id) + if (xchg(&port->cm_id, NULL) != cm_id) return 0; /* @@ -1457,6 +1466,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, nvmet_rdma_queue_established(queue); break; case RDMA_CM_EVENT_ADDR_CHANGE: + if (!queue) { + struct nvmet_rdma_port *port = cm_id->context; + + schedule_delayed_work(&port->repair_work, 0); + break; + } + /* FALLTHROUGH */ case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_TIMEWAIT_EXIT: nvmet_rdma_queue_disconnect(queue); @@ -1499,42 +1515,19 @@ static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl) mutex_unlock(&nvmet_rdma_queue_mutex); } -static int nvmet_rdma_add_port(struct nvmet_port *port) +static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) { - struct rdma_cm_id *cm_id; - struct sockaddr_storage addr = { }; - __kernel_sa_family_t af; - int ret; - - switch (port->disc_addr.adrfam) { - case NVMF_ADDR_FAMILY_IP4: - af = AF_INET; - break; - case NVMF_ADDR_FAMILY_IP6: - af = AF_INET6; - break; - default: - pr_err("address family %d not supported\n", - port->disc_addr.adrfam); - return -EINVAL; - } + struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); - if (port->inline_data_size < 0) { - port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; - } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { - pr_warn("inline_data_size %u is too large, reducing to %u\n", - port->inline_data_size, - NVMET_RDMA_MAX_INLINE_DATA_SIZE); - port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; - } + if (cm_id) + rdma_destroy_id(cm_id); +} - ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, - port->disc_addr.trsvcid, &addr); - if (ret) { - pr_err("malformed ip/port passed: %s:%s\n", - port->disc_addr.traddr, port->disc_addr.trsvcid); - return ret; - } +static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) +{ + struct sockaddr *addr = (struct sockaddr *)&port->addr; + struct rdma_cm_id *cm_id; + int ret; cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, RDMA_PS_TCP, IB_QPT_RC); @@ -1553,23 +1546,19 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) goto out_destroy_id; } - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); + ret = rdma_bind_addr(cm_id, addr); if (ret) { - pr_err("binding CM ID to %pISpcs failed (%d)\n", - (struct sockaddr *)&addr, ret); + pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; } ret = rdma_listen(cm_id, 128); if (ret) { - pr_err("listening to %pISpcs failed (%d)\n", - (struct sockaddr *)&addr, ret); + pr_err("listening to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; } - pr_info("enabling port %d (%pISpcs)\n", - le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); - port->priv = cm_id; + port->cm_id = cm_id; return 0; out_destroy_id: @@ -1577,18 +1566,92 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return ret; } -static void nvmet_rdma_remove_port(struct nvmet_port *port) +static void nvmet_rdma_repair_port_work(struct work_struct *w) { - struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); + struct nvmet_rdma_port *port = container_of(to_delayed_work(w), + struct nvmet_rdma_port, repair_work); + int ret; - if (cm_id) - rdma_destroy_id(cm_id); + nvmet_rdma_disable_port(port); + ret = nvmet_rdma_enable_port(port); + if (ret) + schedule_delayed_work(&port->repair_work, 5 * HZ); +} + +static int nvmet_rdma_add_port(struct nvmet_port *nport) +{ + struct nvmet_rdma_port *port; + __kernel_sa_family_t af; + int ret; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + nport->priv = port; + port->nport = nport; + INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work); + + switch (nport->disc_addr.adrfam) { + case NVMF_ADDR_FAMILY_IP4: + af = AF_INET; + break; + case NVMF_ADDR_FAMILY_IP6: + af = AF_INET6; + break; + default: + pr_err("address family %d not supported\n", + nport->disc_addr.adrfam); + ret = -EINVAL; + goto out_free_port; + } + + if (nport->inline_data_size < 0) { + nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; + } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { + pr_warn("inline_data_size %u is too large, reducing to %u\n", + nport->inline_data_size, + NVMET_RDMA_MAX_INLINE_DATA_SIZE); + nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; + } + + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, + nport->disc_addr.trsvcid, &port->addr); + if (ret) { + pr_err("malformed ip/port passed: %s:%s\n", + nport->disc_addr.traddr, nport->disc_addr.trsvcid); + goto out_free_port; + } + + ret = nvmet_rdma_enable_port(port); + if(ret) + goto out_free_port; + + pr_info("enabling port %d (%pISpcs)\n", + le16_to_cpu(nport->disc_addr.portid), + (struct sockaddr *)&port->addr); + + return 0; + +out_free_port: + kfree(port); + return ret; +} + +static void nvmet_rdma_remove_port(struct nvmet_port *nport) +{ + struct nvmet_rdma_port *port = nport->priv; + + cancel_delayed_work_sync(&port->repair_work); + nvmet_rdma_disable_port(port); + kfree(port); } static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, - struct nvmet_port *port, char *traddr) + struct nvmet_port *nport, char *traddr) { - struct rdma_cm_id *cm_id = port->priv; + struct nvmet_rdma_port *port = nport->priv; + struct rdma_cm_id *cm_id = port->cm_id; if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { struct nvmet_rdma_rsp *rsp = @@ -1598,7 +1661,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, sprintf(traddr, "%pISc", addr); } else { - memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); + memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); } } [-- Attachment #3: Type: text/plain, Size: 158 bytes --] _______________________________________________ linux-nvme mailing list linux-nvme@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-nvme ^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] nvmet-rdma: fix bonding failover possible NULL deref 2020-04-02 15:35 ` Alex Lyakas @ 2020-04-02 15:38 ` Alex Lyakas 0 siblings, 0 replies; 3+ messages in thread From: Alex Lyakas @ 2020-04-02 15:38 UTC (permalink / raw) To: Sagi Grimberg; +Cc: Keith Busch, Max Gurtovoy, Christoph Hellwig, linux-nvme And the next hunk is needed as well: @@ -931,17 +939,17 @@ static void nvmet_rdma_free_dev(struct kref *ref) if (!ndev) goto out_err; - inline_page_count = num_pages(port->inline_data_size); + inline_page_count = num_pages(nport->inline_data_size); inline_sge_count = max(cm_id->device->attrs.max_sge_rd, cm_id->device->attrs.max_recv_sge) - 1; if (inline_page_count > inline_sge_count) { pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", - port->inline_data_size, cm_id->device->name, + nport->inline_data_size, cm_id->device->name, inline_sge_count * PAGE_SIZE); - port->inline_data_size = inline_sge_count * PAGE_SIZE; + nport->inline_data_size = inline_sge_count * PAGE_SIZE; inline_page_count = inline_sge_count; } - ndev->inline_data_size = port->inline_data_size; + ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; ndev->device = cm_id->device; kref_init(&ndev->ref); On Thu, Apr 2, 2020 at 6:35 PM Alex Lyakas <alex@zadara.com> wrote: > > Hi Sagi, > > Why did you ignore the following hunk in the patch that I attached? > > @@ -914,7 +921,8 @@ static void nvmet_rdma_free_dev(struct kref *ref) > static struct nvmet_rdma_device * > nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) > { > - struct nvmet_port *port = cm_id->context; > + struct nvmet_rdma_port *port = cm_id->context; > + struct nvmet_port *nport = port->nport; > struct nvmet_rdma_device *ndev; > int inline_page_count; > int inline_sge_count; > > Without this hunk, nvmet_rdma_find_get_device will not work properly, > or am I missing something? > > I mentioned this also in review of your initial patch from the summer. > Attaching the full patch again... > > Thanks, > Alex. > > > > I mentioned this twice, and also added it in my attached path. > > On Thu, Apr 2, 2020 at 6:30 PM Sagi Grimberg <sagi@grimberg.me> wrote: > > > > RDMA_CM_EVENT_ADDR_CHANGE event occur in the case of bonding failover > > on normal as well as on listening cm_ids. Hence this event will > > immediately trigger a NULL dereference trying to disconnect a queue > > for a cm_id that actually belongs to the port. > > > > To fix this we provide a different handler for the listener cm_ids > > that will defer a work to disable+(re)enable the port which essentially > > destroys and setups another listener cm_id > > > > Reported-by: Alex Lyakas <alex@zadara.com> > > Tested-by: Alex Lyakas <alex@zadara.com> > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > > --- > > drivers/nvme/target/rdma.c | 164 +++++++++++++++++++++++++------------ > > 1 file changed, 113 insertions(+), 51 deletions(-) > > > > diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c > > index 9e1b8c61f54e..8dac89b7aa12 100644 > > --- a/drivers/nvme/target/rdma.c > > +++ b/drivers/nvme/target/rdma.c > > @@ -105,6 +105,13 @@ struct nvmet_rdma_queue { > > struct list_head queue_list; > > }; > > > > +struct nvmet_rdma_port { > > + struct nvmet_port *nport; > > + struct sockaddr_storage addr; > > + struct rdma_cm_id *cm_id; > > + struct delayed_work repair_work; > > +}; > > + > > struct nvmet_rdma_device { > > struct ib_device *device; > > struct ib_pd *pd; > > @@ -1272,6 +1279,7 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, > > static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, > > struct rdma_cm_event *event) > > { > > + struct nvmet_rdma_port *port = cm_id->context; > > struct nvmet_rdma_device *ndev; > > struct nvmet_rdma_queue *queue; > > int ret = -EINVAL; > > @@ -1287,7 +1295,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, > > ret = -ENOMEM; > > goto put_device; > > } > > - queue->port = cm_id->context; > > + queue->port = port->nport; > > > > if (queue->host_qid == 0) { > > /* Let inflight controller teardown complete */ > > @@ -1412,7 +1420,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, > > static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, > > struct nvmet_rdma_queue *queue) > > { > > - struct nvmet_port *port; > > + struct nvmet_rdma_port *port; > > > > if (queue) { > > /* > > @@ -1431,7 +1439,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, > > * cm_id destroy. use atomic xchg to make sure > > * we don't compete with remove_port. > > */ > > - if (xchg(&port->priv, NULL) != cm_id) > > + if (xchg(&port->cm_id, NULL) != cm_id) > > return 0; > > > > /* > > @@ -1462,6 +1470,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, > > nvmet_rdma_queue_established(queue); > > break; > > case RDMA_CM_EVENT_ADDR_CHANGE: > > + if (!queue) { > > + struct nvmet_rdma_port *port = cm_id->context; > > + > > + schedule_delayed_work(&port->repair_work, 0); > > + break; > > + } > > + /* FALLTHROUGH */ > > case RDMA_CM_EVENT_DISCONNECTED: > > case RDMA_CM_EVENT_TIMEWAIT_EXIT: > > nvmet_rdma_queue_disconnect(queue); > > @@ -1504,42 +1519,19 @@ static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl) > > mutex_unlock(&nvmet_rdma_queue_mutex); > > } > > > > -static int nvmet_rdma_add_port(struct nvmet_port *port) > > +static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) > > { > > - struct rdma_cm_id *cm_id; > > - struct sockaddr_storage addr = { }; > > - __kernel_sa_family_t af; > > - int ret; > > + struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); > > > > - switch (port->disc_addr.adrfam) { > > - case NVMF_ADDR_FAMILY_IP4: > > - af = AF_INET; > > - break; > > - case NVMF_ADDR_FAMILY_IP6: > > - af = AF_INET6; > > - break; > > - default: > > - pr_err("address family %d not supported\n", > > - port->disc_addr.adrfam); > > - return -EINVAL; > > - } > > - > > - if (port->inline_data_size < 0) { > > - port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; > > - } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { > > - pr_warn("inline_data_size %u is too large, reducing to %u\n", > > - port->inline_data_size, > > - NVMET_RDMA_MAX_INLINE_DATA_SIZE); > > - port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; > > - } > > + if (cm_id) > > + rdma_destroy_id(cm_id); > > +} > > > > - ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, > > - port->disc_addr.trsvcid, &addr); > > - if (ret) { > > - pr_err("malformed ip/port passed: %s:%s\n", > > - port->disc_addr.traddr, port->disc_addr.trsvcid); > > - return ret; > > - } > > +static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) > > +{ > > + struct sockaddr *addr = (struct sockaddr *)&port->addr; > > + struct rdma_cm_id *cm_id; > > + int ret; > > > > cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, > > RDMA_PS_TCP, IB_QPT_RC); > > @@ -1558,23 +1550,19 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) > > goto out_destroy_id; > > } > > > > - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); > > + ret = rdma_bind_addr(cm_id, addr); > > if (ret) { > > - pr_err("binding CM ID to %pISpcs failed (%d)\n", > > - (struct sockaddr *)&addr, ret); > > + pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret); > > goto out_destroy_id; > > } > > > > ret = rdma_listen(cm_id, 128); > > if (ret) { > > - pr_err("listening to %pISpcs failed (%d)\n", > > - (struct sockaddr *)&addr, ret); > > + pr_err("listening to %pISpcs failed (%d)\n", addr, ret); > > goto out_destroy_id; > > } > > > > - pr_info("enabling port %d (%pISpcs)\n", > > - le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); > > - port->priv = cm_id; > > + port->cm_id = cm_id; > > return 0; > > > > out_destroy_id: > > @@ -1582,18 +1570,92 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) > > return ret; > > } > > > > -static void nvmet_rdma_remove_port(struct nvmet_port *port) > > +static void nvmet_rdma_repair_port_work(struct work_struct *w) > > { > > - struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); > > + struct nvmet_rdma_port *port = container_of(to_delayed_work(w), > > + struct nvmet_rdma_port, repair_work); > > + int ret; > > > > - if (cm_id) > > - rdma_destroy_id(cm_id); > > + nvmet_rdma_disable_port(port); > > + ret = nvmet_rdma_enable_port(port); > > + if (ret) > > + schedule_delayed_work(&port->repair_work, 5 * HZ); > > +} > > + > > +static int nvmet_rdma_add_port(struct nvmet_port *nport) > > +{ > > + struct nvmet_rdma_port *port; > > + __kernel_sa_family_t af; > > + int ret; > > + > > + port = kzalloc(sizeof(*port), GFP_KERNEL); > > + if (!port) > > + return -ENOMEM; > > + > > + nport->priv = port; > > + port->nport = nport; > > + INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work); > > + > > + switch (nport->disc_addr.adrfam) { > > + case NVMF_ADDR_FAMILY_IP4: > > + af = AF_INET; > > + break; > > + case NVMF_ADDR_FAMILY_IP6: > > + af = AF_INET6; > > + break; > > + default: > > + pr_err("address family %d not supported\n", > > + nport->disc_addr.adrfam); > > + ret = -EINVAL; > > + goto out_free_port; > > + } > > + > > + if (nport->inline_data_size < 0) { > > + nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; > > + } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { > > + pr_warn("inline_data_size %u is too large, reducing to %u\n", > > + nport->inline_data_size, > > + NVMET_RDMA_MAX_INLINE_DATA_SIZE); > > + nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; > > + } > > + > > + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, > > + nport->disc_addr.trsvcid, &port->addr); > > + if (ret) { > > + pr_err("malformed ip/port passed: %s:%s\n", > > + nport->disc_addr.traddr, nport->disc_addr.trsvcid); > > + goto out_free_port; > > + } > > + > > + ret = nvmet_rdma_enable_port(port); > > + if(ret) > > + goto out_free_port; > > + > > + pr_info("enabling port %d (%pISpcs)\n", > > + le16_to_cpu(nport->disc_addr.portid), > > + (struct sockaddr *)&port->addr); > > + > > + return 0; > > + > > +out_free_port: > > + kfree(port); > > + return ret; > > +} > > + > > +static void nvmet_rdma_remove_port(struct nvmet_port *nport) > > +{ > > + struct nvmet_rdma_port *port = nport->priv; > > + > > + cancel_delayed_work_sync(&port->repair_work); > > + nvmet_rdma_disable_port(port); > > + kfree(port); > > } > > > > static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, > > - struct nvmet_port *port, char *traddr) > > + struct nvmet_port *nport, char *traddr) > > { > > - struct rdma_cm_id *cm_id = port->priv; > > + struct nvmet_rdma_port *port = nport->priv; > > + struct rdma_cm_id *cm_id = port->cm_id; > > > > if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { > > struct nvmet_rdma_rsp *rsp = > > @@ -1603,7 +1665,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, > > > > sprintf(traddr, "%pISc", addr); > > } else { > > - memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); > > + memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); > > } > > } > > > > -- > > 2.20.1 > > _______________________________________________ linux-nvme mailing list linux-nvme@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-nvme ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-04-02 15:38 UTC | newest] Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-04-02 15:29 [PATCH] nvmet-rdma: fix bonding failover possible NULL deref Sagi Grimberg 2020-04-02 15:35 ` Alex Lyakas 2020-04-02 15:38 ` Alex Lyakas
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).