Linux-NVME Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 1/4] block: centrelize PI remapping logic to the block layer
@ 2019-09-03 15:14 Max Gurtovoy
  2019-09-03 15:14 ` [PATCH 2/4] nvme-rdma: simplify error flow in nvme_rdma_queue_rq Max Gurtovoy
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Max Gurtovoy @ 2019-09-03 15:14 UTC (permalink / raw)
  To: linux-block, axboe, martin.petersen, linux-nvme, keith.busch, hch, sagi
  Cc: Max Gurtovoy, shlomin, israelr

Currently dif_prepare/dif_complete functions are called during the
NVMe and SCSi layers command preparetion/completion, but their actual
place should be the block layer since T10-PI is a general data integrity
feature that is used by block storage protocols.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---
 block/blk-core.c         |  6 ++++++
 block/blk-mq.c           |  4 ++++
 block/t10-pi.c           | 11 ++++-------
 drivers/nvme/host/core.c | 28 +++++++++++++++++++---------
 drivers/scsi/sd.c        | 28 ++++++++++------------------
 drivers/scsi/sd.h        |  1 -
 drivers/scsi/sd_dif.c    |  2 +-
 include/linux/genhd.h    |  1 +
 include/linux/t10-pi.h   | 10 ++++------
 9 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d0cc6e1..58ecfd3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include <linux/ratelimit.h>
 #include <linux/pm_runtime.h>
 #include <linux/blk-cgroup.h>
+#include <linux/t10-pi.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
 
@@ -1405,6 +1406,11 @@ bool blk_update_request(struct request *req, blk_status_t error,
 	if (!req->bio)
 		return false;
 
+	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+	    error == BLK_STS_OK)
+		t10_pi_complete(req,
+				nr_bytes / queue_logical_block_size(req->q));
+
 	if (unlikely(error && !blk_rq_is_passthrough(req) &&
 		     !(req->rq_flags & RQF_QUIET)))
 		print_req_error(req, error, __func__);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0835f4d..30ec078 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -30,6 +30,7 @@
 #include <trace/events/block.h>
 
 #include <linux/blk-mq.h>
+#include <linux/t10-pi.h>
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -693,6 +694,9 @@ void blk_mq_start_request(struct request *rq)
 		 */
 		rq->nr_phys_segments++;
 	}
+
+	if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
+		t10_pi_prepare(rq);
 }
 EXPORT_SYMBOL(blk_mq_start_request);
 
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 0c00946..a6d9722 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -171,7 +171,6 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 /**
  * t10_pi_prepare - prepare PI prior submitting request to device
  * @rq:              request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
  *
  * For Type 1/Type 2, the virtual start sector is the one that was
  * originally submitted by the block layer for the ref_tag usage. Due to
@@ -181,8 +180,9 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
  *
  * Type 3 does not have a reference tag so no remapping is required.
  */
-void t10_pi_prepare(struct request *rq, u8 protection_type)
+void t10_pi_prepare(struct request *rq)
 {
+	u8 protection_type = rq->rq_disk->protection_type;
 	const int tuple_sz = rq->q->integrity.tuple_size;
 	u32 ref_tag = t10_pi_ref_tag(rq);
 	struct bio *bio;
@@ -222,12 +222,10 @@ void t10_pi_prepare(struct request *rq, u8 protection_type)
 		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
 	}
 }
-EXPORT_SYMBOL(t10_pi_prepare);
 
 /**
  * t10_pi_complete - prepare PI prior returning request to the block layer
  * @rq:              request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
  * @intervals:       total elements to prepare
  *
  * For Type 1/Type 2, the virtual start sector is the one that was
@@ -239,9 +237,9 @@ void t10_pi_prepare(struct request *rq, u8 protection_type)
  *
  * Type 3 does not have a reference tag so no remapping is required.
  */
-void t10_pi_complete(struct request *rq, u8 protection_type,
-		     unsigned int intervals)
+void t10_pi_complete(struct request *rq, unsigned int intervals)
 {
+	u8 protection_type = rq->rq_disk->protection_type;
 	const int tuple_sz = rq->q->integrity.tuple_size;
 	u32 ref_tag = t10_pi_ref_tag(rq);
 	struct bio *bio;
@@ -276,4 +274,3 @@ void t10_pi_complete(struct request *rq, u8 protection_type,
 		}
 	}
 }
-EXPORT_SYMBOL(t10_pi_complete);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d3d6b7b..b91ea60 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -659,8 +659,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 			if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
 				return BLK_STS_NOTSUPP;
 			control |= NVME_RW_PRINFO_PRACT;
-		} else if (req_op(req) == REQ_OP_WRITE) {
-			t10_pi_prepare(req, ns->pi_type);
 		}
 
 		switch (ns->pi_type) {
@@ -683,13 +681,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 
 void nvme_cleanup_cmd(struct request *req)
 {
-	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
-	    nvme_req(req)->status == 0) {
-		struct nvme_ns *ns = req->rq_disk->private_data;
-
-		t10_pi_complete(req, ns->pi_type,
-				blk_rq_bytes(req) >> ns->lba_shift);
-	}
 	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
 		struct nvme_ns *ns = req->rq_disk->private_data;
 		struct page *page = req->special_vec.bv_page;
@@ -1693,6 +1684,24 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	blk_mq_unfreeze_queue(disk->queue);
 }
 
+static void nvme_set_disk_prot_type(struct nvme_ns *ns, struct gendisk *disk)
+{
+	switch (ns->pi_type) {
+	case NVME_NS_DPS_PI_TYPE1:
+		disk->protection_type = T10_PI_TYPE1_PROTECTION;
+		break;
+	case NVME_NS_DPS_PI_TYPE2:
+		disk->protection_type = T10_PI_TYPE2_PROTECTION;
+		break;
+	case NVME_NS_DPS_PI_TYPE3:
+		disk->protection_type = T10_PI_TYPE3_PROTECTION;
+		break;
+	default:
+		disk->protection_type = T10_PI_TYPE0_PROTECTION;
+		break;
+	}
+}
+
 static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 {
 	struct nvme_ns *ns = disk->private_data;
@@ -1712,6 +1721,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 		ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
 	else
 		ns->pi_type = 0;
+	nvme_set_disk_prot_type(ns, disk);
 
 	if (ns->noiob)
 		nvme_set_chunk_size(ns);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 149d406..fa7e7d4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -55,7 +55,6 @@
 #include <linux/sed-opal.h>
 #include <linux/pm_runtime.h>
 #include <linux/pr.h>
-#include <linux/t10-pi.h>
 #include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
@@ -309,7 +308,7 @@ static void sd_set_flush_flag(struct scsi_disk *sdkp)
 {
 	struct scsi_disk *sdkp = to_scsi_disk(dev);
 
-	return sprintf(buf, "%u\n", sdkp->protection_type);
+	return sprintf(buf, "%u\n", sdkp->disk->protection_type);
 }
 
 static ssize_t
@@ -329,7 +328,7 @@ static void sd_set_flush_flag(struct scsi_disk *sdkp)
 		return err;
 
 	if (val <= T10_PI_TYPE3_PROTECTION)
-		sdkp->protection_type = val;
+		sdkp->disk->protection_type = val;
 
 	return count;
 }
@@ -343,8 +342,8 @@ static void sd_set_flush_flag(struct scsi_disk *sdkp)
 	struct scsi_device *sdp = sdkp->device;
 	unsigned int dif, dix;
 
-	dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
-	dix = scsi_host_dix_capable(sdp->host, sdkp->protection_type);
+	dif = scsi_host_dif_capable(sdp->host, sdkp->disk->protection_type);
+	dix = scsi_host_dix_capable(sdp->host, sdkp->disk->protection_type);
 
 	if (!dix && scsi_host_dix_capable(sdp->host, T10_PI_TYPE0_PROTECTION)) {
 		dif = 0;
@@ -1209,17 +1208,15 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 
 	fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0;
 	dix = scsi_prot_sg_count(cmd);
-	dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
-
-	if (write && dix)
-		t10_pi_prepare(cmd->request, sdkp->protection_type);
+	dif = scsi_host_dif_capable(cmd->device->host,
+				    sdkp->disk->protection_type);
 
 	if (dif || dix)
 		protect = sd_setup_protect_cmnd(cmd, dix, dif);
 	else
 		protect = 0;
 
-	if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) {
+	if (protect && sdkp->disk->protection_type == T10_PI_TYPE2_PROTECTION) {
 		ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks,
 					 protect | fua);
 	} else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) {
@@ -2051,11 +2048,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 					   "sd_done: completed %d of %d bytes\n",
 					   good_bytes, scsi_bufflen(SCpnt)));
 
-	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
-	    good_bytes)
-		t10_pi_complete(SCpnt->request, sdkp->protection_type,
-				good_bytes / scsi_prot_interval(SCpnt));
-
 	return good_bytes;
 }
 
@@ -2204,7 +2196,7 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer
 	else if (scsi_host_dif_capable(sdp->host, type))
 		ret = 1;
 
-	if (sdkp->first_scan || type != sdkp->protection_type)
+	if (sdkp->first_scan || type != sdkp->disk->protection_type)
 		switch (ret) {
 		case -ENODEV:
 			sd_printk(KERN_ERR, sdkp, "formatted with unsupported" \
@@ -2221,7 +2213,7 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer
 			break;
 		}
 
-	sdkp->protection_type = type;
+	sdkp->disk->protection_type = type;
 
 	return ret;
 }
@@ -2813,7 +2805,7 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer)
 	if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
 		return;
 
-	if (sdkp->protection_type == 0)
+	if (sdkp->disk->protection_type == 0)
 		return;
 
 	res = scsi_mode_sense(sdp, 1, 0x0a, buffer, 36, SD_TIMEOUT,
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 38c5094..770b6b0f 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -94,7 +94,6 @@ struct scsi_disk {
 	unsigned int	medium_access_timed_out;
 	u8		media_present;
 	u8		write_prot;
-	u8		protection_type;/* Data Integrity Field */
 	u8		provisioning_mode;
 	u8		zeroing_mode;
 	unsigned	ATO : 1;	/* state of disk ATO bit */
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 4cadb26..cbd0cce 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -28,7 +28,7 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 {
 	struct scsi_device *sdp = sdkp->device;
 	struct gendisk *disk = sdkp->disk;
-	u8 type = sdkp->protection_type;
+	u8 type = sdkp->disk->protection_type;
 	struct blk_integrity bi;
 	int dif, dix;
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 8b5330d..5f58736 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -219,6 +219,7 @@ struct gendisk {
 	int node_id;
 	struct badblocks *bb;
 	struct lockdep_map lockdep_map;
+	u8 protection_type;/* Data Integrity Field */
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 3e2a80c..108008e 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -54,15 +54,13 @@ static inline u32 t10_pi_ref_tag(struct request *rq)
 extern const struct blk_integrity_profile t10_pi_type3_ip;
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
-extern void t10_pi_prepare(struct request *rq, u8 protection_type);
-extern void t10_pi_complete(struct request *rq, u8 protection_type,
-			    unsigned int intervals);
+extern void t10_pi_prepare(struct request *rq);
+extern void t10_pi_complete(struct request *rq, unsigned int intervals);
 #else
-static inline void t10_pi_complete(struct request *rq, u8 protection_type,
-				   unsigned int intervals)
+static inline void t10_pi_complete(struct request *rq, unsigned int intervals)
 {
 }
-static inline void t10_pi_prepare(struct request *rq, u8 protection_type)
+static inline void t10_pi_prepare(struct request *rq)
 {
 }
 #endif
-- 
1.8.3.1


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/4] nvme-rdma: simplify error flow in nvme_rdma_queue_rq
  2019-09-03 15:14 [PATCH 1/4] block: centrelize PI remapping logic to the block layer Max Gurtovoy
@ 2019-09-03 15:14 ` Max Gurtovoy
  2019-09-04  5:51   ` Christoph Hellwig
  2019-09-03 15:14 ` [PATCH 3/4] nvme-tcp: introduce nvme_tcp_complete_rq callback Max Gurtovoy
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: Max Gurtovoy @ 2019-09-03 15:14 UTC (permalink / raw)
  To: linux-block, axboe, martin.petersen, linux-nvme, keith.busch, hch, sagi
  Cc: Max Gurtovoy, shlomin, israelr

Make the error flow symmetric to the good flow by moving the call to
nvme_cleanup_cmd from nvme_rdma_unmap_data function.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---
 drivers/nvme/host/rdma.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 1a6449b..db4a60f 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1149,7 +1149,6 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
 			req->nents, rq_data_dir(rq) ==
 				    WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
-	nvme_cleanup_cmd(rq);
 	sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
 }
 
@@ -1748,7 +1747,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (unlikely(err < 0)) {
 		dev_err(queue->ctrl->ctrl.device,
 			     "Failed to map data (%d)\n", err);
-		nvme_cleanup_cmd(rq);
 		goto err;
 	}
 
@@ -1759,18 +1757,19 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
 			req->mr ? &req->reg_wr.wr : NULL);
-	if (unlikely(err)) {
-		nvme_rdma_unmap_data(queue, rq);
-		goto err;
-	}
+	if (unlikely(err))
+		goto err_unmap;
 
 	return BLK_STS_OK;
 
+err_unmap:
+	nvme_rdma_unmap_data(queue, rq);
 err:
 	if (err == -ENOMEM || err == -EAGAIN)
 		ret = BLK_STS_RESOURCE;
 	else
 		ret = BLK_STS_IOERR;
+	nvme_cleanup_cmd(rq);
 unmap_qe:
 	ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
 			    DMA_TO_DEVICE);
@@ -1791,6 +1790,7 @@ static void nvme_rdma_complete_rq(struct request *rq)
 	struct ib_device *ibdev = queue->device->dev;
 
 	nvme_rdma_unmap_data(queue, rq);
+	nvme_cleanup_cmd(rq);
 	ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
 			    DMA_TO_DEVICE);
 	nvme_complete_rq(rq);
-- 
1.8.3.1


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/4] nvme-tcp: introduce nvme_tcp_complete_rq callback
  2019-09-03 15:14 [PATCH 1/4] block: centrelize PI remapping logic to the block layer Max Gurtovoy
  2019-09-03 15:14 ` [PATCH 2/4] nvme-rdma: simplify error flow in nvme_rdma_queue_rq Max Gurtovoy
@ 2019-09-03 15:14 ` Max Gurtovoy
  2019-09-03 19:15   ` Sagi Grimberg
  2019-09-03 15:14 ` [PATCH 4/4] nvmet-loop: fix possible leakage during error flow Max Gurtovoy
  2019-09-03 19:11 ` [PATCH 1/4] block: centrelize PI remapping logic to the block layer Sagi Grimberg
  3 siblings, 1 reply; 14+ messages in thread
From: Max Gurtovoy @ 2019-09-03 15:14 UTC (permalink / raw)
  To: linux-block, axboe, martin.petersen, linux-nvme, keith.busch, hch, sagi
  Cc: Max Gurtovoy, shlomin, israelr

The nvme_cleanup_cmd function should be called to avoid resource leakage
(it's the opposite to nvme_setup_cmd). Fix the error flow during command
submission and also fix the missing call in command completion.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---
 drivers/nvme/host/tcp.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 606b13d..91b500a 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2093,6 +2093,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 
 	ret = nvme_tcp_map_data(queue, rq);
 	if (unlikely(ret)) {
+		nvme_cleanup_cmd(rq);
 		dev_err(queue->ctrl->ctrl.device,
 			"Failed to map data (%d)\n", ret);
 		return ret;
@@ -2101,6 +2102,12 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 	return 0;
 }
 
+static void nvme_tcp_complete_rq(struct request *rq)
+{
+	nvme_cleanup_cmd(rq);
+	nvme_complete_rq(rq);
+}
+
 static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
@@ -2161,7 +2168,7 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
 
 static struct blk_mq_ops nvme_tcp_mq_ops = {
 	.queue_rq	= nvme_tcp_queue_rq,
-	.complete	= nvme_complete_rq,
+	.complete	= nvme_tcp_complete_rq,
 	.init_request	= nvme_tcp_init_request,
 	.exit_request	= nvme_tcp_exit_request,
 	.init_hctx	= nvme_tcp_init_hctx,
@@ -2171,7 +2178,7 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
 
 static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
 	.queue_rq	= nvme_tcp_queue_rq,
-	.complete	= nvme_complete_rq,
+	.complete	= nvme_tcp_complete_rq,
 	.init_request	= nvme_tcp_init_request,
 	.exit_request	= nvme_tcp_exit_request,
 	.init_hctx	= nvme_tcp_init_admin_hctx,
-- 
1.8.3.1


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/4] nvmet-loop: fix possible leakage during error flow
  2019-09-03 15:14 [PATCH 1/4] block: centrelize PI remapping logic to the block layer Max Gurtovoy
  2019-09-03 15:14 ` [PATCH 2/4] nvme-rdma: simplify error flow in nvme_rdma_queue_rq Max Gurtovoy
  2019-09-03 15:14 ` [PATCH 3/4] nvme-tcp: introduce nvme_tcp_complete_rq callback Max Gurtovoy
@ 2019-09-03 15:14 ` Max Gurtovoy
  2019-09-04  5:50   ` Christoph Hellwig
  2019-09-03 19:11 ` [PATCH 1/4] block: centrelize PI remapping logic to the block layer Sagi Grimberg
  3 siblings, 1 reply; 14+ messages in thread
From: Max Gurtovoy @ 2019-09-03 15:14 UTC (permalink / raw)
  To: linux-block, axboe, martin.petersen, linux-nvme, keith.busch, hch, sagi
  Cc: Max Gurtovoy, shlomin, israelr

During nvme_loop_queue_rq error flow, one must call nvme_cleanup_cmd since
it's symmetric to nvme_setup_cmd.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---
 drivers/nvme/target/loop.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 0940c50..7b857c3 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -157,8 +157,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		iod->sg_table.sgl = iod->first_sgl;
 		if (sg_alloc_table_chained(&iod->sg_table,
 				blk_rq_nr_phys_segments(req),
-				iod->sg_table.sgl, SG_CHUNK_SIZE))
+				iod->sg_table.sgl, SG_CHUNK_SIZE)) {
+			nvme_cleanup_cmd(req);
 			return BLK_STS_RESOURCE;
+		}
 
 		iod->req.sg = iod->sg_table.sgl;
 		iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
-- 
1.8.3.1


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] block: centrelize PI remapping logic to the block layer
  2019-09-03 15:14 [PATCH 1/4] block: centrelize PI remapping logic to the block layer Max Gurtovoy
                   ` (2 preceding siblings ...)
  2019-09-03 15:14 ` [PATCH 4/4] nvmet-loop: fix possible leakage during error flow Max Gurtovoy
@ 2019-09-03 19:11 ` Sagi Grimberg
  2019-09-03 19:21   ` Jens Axboe
  3 siblings, 1 reply; 14+ messages in thread
From: Sagi Grimberg @ 2019-09-03 19:11 UTC (permalink / raw)
  To: Max Gurtovoy, linux-block, axboe, martin.petersen, linux-nvme,
	keith.busch, hch
  Cc: shlomin, israelr


> +	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
> +	    error == BLK_STS_OK)
> +		t10_pi_complete(req,
> +				nr_bytes / queue_logical_block_size(req->q));
> +

div in this path? better to use  >> ilog2(block_size).

Also, would be better to have a wrapper in place like:

static inline unsigned short blk_integrity_interval(struct request *rq)
{
	return queue_logical_block_size(rq->q);
}

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] nvme-tcp: introduce nvme_tcp_complete_rq callback
  2019-09-03 15:14 ` [PATCH 3/4] nvme-tcp: introduce nvme_tcp_complete_rq callback Max Gurtovoy
@ 2019-09-03 19:15   ` Sagi Grimberg
  2019-09-04  5:54     ` Christoph Hellwig
  0 siblings, 1 reply; 14+ messages in thread
From: Sagi Grimberg @ 2019-09-03 19:15 UTC (permalink / raw)
  To: Max Gurtovoy, linux-block, axboe, martin.petersen, linux-nvme,
	keith.busch, hch
  Cc: shlomin, israelr


> The nvme_cleanup_cmd function should be called to avoid resource leakage
> (it's the opposite to nvme_setup_cmd). Fix the error flow during command
> submission and also fix the missing call in command completion.

Is it always called with nvme_complete_rq? Why not just put it there?

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] block: centrelize PI remapping logic to the block layer
  2019-09-03 19:11 ` [PATCH 1/4] block: centrelize PI remapping logic to the block layer Sagi Grimberg
@ 2019-09-03 19:21   ` Jens Axboe
  2019-09-04  5:49     ` Christoph Hellwig
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2019-09-03 19:21 UTC (permalink / raw)
  To: Sagi Grimberg, Max Gurtovoy, linux-block, martin.petersen,
	linux-nvme, keith.busch, hch
  Cc: shlomin, israelr

On 9/3/19 1:11 PM, Sagi Grimberg wrote:
> 
>> +	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
>> +	    error == BLK_STS_OK)
>> +		t10_pi_complete(req,
>> +				nr_bytes / queue_logical_block_size(req->q));
>> +
> 
> div in this path? better to use  >> ilog2(block_size).
> 
> Also, would be better to have a wrapper in place like:
> 
> static inline unsigned short blk_integrity_interval(struct request *rq)
> {
> 	return queue_logical_block_size(rq->q);
> }

If it's a hot path thing that matters, I'd strongly suggest to add
a queue block size shift instead.

-- 
Jens Axboe


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] block: centrelize PI remapping logic to the block layer
  2019-09-03 19:21   ` Jens Axboe
@ 2019-09-04  5:49     ` Christoph Hellwig
  2019-09-04  8:32       ` Max Gurtovoy
  0 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2019-09-04  5:49 UTC (permalink / raw)
  To: Jens Axboe
  Cc: keith.busch, Sagi Grimberg, martin.petersen, israelr, linux-nvme,
	linux-block, shlomin, Max Gurtovoy, hch

On Tue, Sep 03, 2019 at 01:21:59PM -0600, Jens Axboe wrote:
> On 9/3/19 1:11 PM, Sagi Grimberg wrote:
> > 
> >> +	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
> >> +	    error == BLK_STS_OK)
> >> +		t10_pi_complete(req,
> >> +				nr_bytes / queue_logical_block_size(req->q));
> >> +
> > 
> > div in this path? better to use  >> ilog2(block_size).
> > 
> > Also, would be better to have a wrapper in place like:
> > 
> > static inline unsigned short blk_integrity_interval(struct request *rq)
> > {
> > 	return queue_logical_block_size(rq->q);
> > }
> 
> If it's a hot path thing that matters, I'd strongly suggest to add
> a queue block size shift instead.

Make that a protection_interval_shift, please.  While that currently
is the same as the logical block size the concepts are a little
different, and that makes it clear.  Except for that this patch looks
very nice to me, it is great to avoid having drivers to deal with the
PI remapping.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] nvmet-loop: fix possible leakage during error flow
  2019-09-03 15:14 ` [PATCH 4/4] nvmet-loop: fix possible leakage during error flow Max Gurtovoy
@ 2019-09-04  5:50   ` Christoph Hellwig
  0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2019-09-04  5:50 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: axboe, keith.busch, sagi, martin.petersen, israelr, linux-nvme,
	linux-block, shlomin, hch

On Tue, Sep 03, 2019 at 06:14:15PM +0300, Max Gurtovoy wrote:
> During nvme_loop_queue_rq error flow, one must call nvme_cleanup_cmd since
> it's symmetric to nvme_setup_cmd.
> 
> Signed-off-by: Max Gurtovoy <maxg@mellanox.com>

This seems independent from the rest of the series.  Looks good,
and we should queue it up ASAP:

Reviewed-by: Christoph Hellwig <hch@lst.de>

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] nvme-rdma: simplify error flow in nvme_rdma_queue_rq
  2019-09-03 15:14 ` [PATCH 2/4] nvme-rdma: simplify error flow in nvme_rdma_queue_rq Max Gurtovoy
@ 2019-09-04  5:51   ` Christoph Hellwig
  0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2019-09-04  5:51 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: axboe, keith.busch, sagi, martin.petersen, israelr, linux-nvme,
	linux-block, shlomin, hch

On Tue, Sep 03, 2019 at 06:14:13PM +0300, Max Gurtovoy wrote:
> Make the error flow symmetric to the good flow by moving the call to
> nvme_cleanup_cmd from nvme_rdma_unmap_data function.
> 
> Signed-off-by: Max Gurtovoy <maxg@mellanox.com>

Looks good, and also independent from the PI changes:

Reviewed-by: Christoph Hellwig <hch@lst.de>

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] nvme-tcp: introduce nvme_tcp_complete_rq callback
  2019-09-03 19:15   ` Sagi Grimberg
@ 2019-09-04  5:54     ` Christoph Hellwig
  2019-09-04  9:02       ` Max Gurtovoy
  0 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2019-09-04  5:54 UTC (permalink / raw)
  To: Sagi Grimberg
  Cc: axboe, keith.busch, martin.petersen, israelr, linux-nvme,
	linux-block, shlomin, Max Gurtovoy, hch

On Tue, Sep 03, 2019 at 12:15:48PM -0700, Sagi Grimberg wrote:
>
>> The nvme_cleanup_cmd function should be called to avoid resource leakage
>> (it's the opposite to nvme_setup_cmd). Fix the error flow during command
>> submission and also fix the missing call in command completion.
>
> Is it always called with nvme_complete_rq? Why not just put it there?

Yes, unless I am missing something we could call nvme_cleanup_cmd
at the beginning of nvme_complete_rq.

Max, can you send one series for all the nvme_cleanup_cmd fixes and
cleanups and split that from the PI work?  That might be a little
less confusing.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] block: centrelize PI remapping logic to the block layer
  2019-09-04  5:49     ` Christoph Hellwig
@ 2019-09-04  8:32       ` Max Gurtovoy
  2019-09-04 12:49         ` Christoph Hellwig
  0 siblings, 1 reply; 14+ messages in thread
From: Max Gurtovoy @ 2019-09-04  8:32 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe
  Cc: keith.busch, Sagi Grimberg, martin.petersen, israelr, linux-nvme,
	linux-block, shlomin


On 9/4/2019 8:49 AM, Christoph Hellwig wrote:
> On Tue, Sep 03, 2019 at 01:21:59PM -0600, Jens Axboe wrote:
>> On 9/3/19 1:11 PM, Sagi Grimberg wrote:
>>>> +	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
>>>> +	    error == BLK_STS_OK)
>>>> +		t10_pi_complete(req,
>>>> +				nr_bytes / queue_logical_block_size(req->q));
>>>> +
>>> div in this path? better to use  >> ilog2(block_size).
>>>
>>> Also, would be better to have a wrapper in place like:
>>>
>>> static inline unsigned short blk_integrity_interval(struct request *rq)
>>> {
>>> 	return queue_logical_block_size(rq->q);
>>> }
>> If it's a hot path thing that matters, I'd strongly suggest to add
>> a queue block size shift instead.
> Make that a protection_interval_shift, please.  While that currently
> is the same as the logical block size the concepts are a little
> different, and that makes it clear.  Except for that this patch looks
> very nice to me, it is great to avoid having drivers to deal with the
> PI remapping.

Christoph,

I was thinking about the following addition to the code (combination of 
all the suggestions):


diff --git a/block/blk-core.c b/block/blk-core.c
index 58ecfd3..cef604c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1409,7 +1409,7 @@ bool blk_update_request(struct request *req, 
blk_status_t error,
         if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
             error == BLK_STS_OK)
                 t10_pi_complete(req,
-                               nr_bytes / 
queue_logical_block_size(req->q));
+                               nr_bytes >> 
queue_logical_block_shift(req->q));

         if (unlikely(error && !blk_rq_is_passthrough(req) &&
                      !(req->rq_flags & RQF_QUIET)))
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 2c18312..8183ffc 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -330,6 +330,7 @@ void blk_queue_max_segment_size(struct request_queue 
*q, unsigned int max_size)
  void blk_queue_logical_block_size(struct request_queue *q, unsigned 
short size)
  {
         q->limits.logical_block_size = size;
+       q->limits.logical_block_shift = ilog2(size);

         if (q->limits.physical_block_size < size)
                 q->limits.physical_block_size = size;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ef375d..4a0115e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -332,6 +332,7 @@ struct queue_limits {
         unsigned int            discard_alignment;

         unsigned short          logical_block_size;
+       unsigned short          logical_block_shift;
         unsigned short          max_segments;
         unsigned short          max_integrity_segments;
         unsigned short          max_discard_segments;
@@ -1267,6 +1268,16 @@ static inline unsigned int 
queue_max_segment_size(struct request_queue *q)
         return q->limits.max_segment_size;
  }

+static inline unsigned short queue_logical_block_shift(struct 
request_queue *q)
+{
+       unsigned short retval = 9;
+
+       if (q && q->limits.logical_block_shift)
+               retval = q->limits.logical_block_shift;
+
+       return retval;
+}
+
  static inline unsigned short queue_logical_block_size(struct 
request_queue *q)



_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] nvme-tcp: introduce nvme_tcp_complete_rq callback
  2019-09-04  5:54     ` Christoph Hellwig
@ 2019-09-04  9:02       ` Max Gurtovoy
  0 siblings, 0 replies; 14+ messages in thread
From: Max Gurtovoy @ 2019-09-04  9:02 UTC (permalink / raw)
  To: Christoph Hellwig, Sagi Grimberg
  Cc: axboe, keith.busch, martin.petersen, israelr, linux-nvme,
	linux-block, shlomin


On 9/4/2019 8:54 AM, Christoph Hellwig wrote:
> On Tue, Sep 03, 2019 at 12:15:48PM -0700, Sagi Grimberg wrote:
>>> The nvme_cleanup_cmd function should be called to avoid resource leakage
>>> (it's the opposite to nvme_setup_cmd). Fix the error flow during command
>>> submission and also fix the missing call in command completion.
>> Is it always called with nvme_complete_rq? Why not just put it there?
> Yes, unless I am missing something we could call nvme_cleanup_cmd
> at the beginning of nvme_complete_rq.

This will cause change is error flow in nvme_fc but I can check this.

>
> Max, can you send one series for all the nvme_cleanup_cmd fixes and
> cleanups and split that from the PI work?  That might be a little
> less confusing.

Yes I will. There is a connection between the patches but for now only 
the nvme-pci supports T10 in the nvme subsystem, we can separate them.

There will be still a small gap in the error flow of the pci driver that 
will call nvme_cleanup_cmd and do the t10 remap that he shouldn't (but 
that's the behavior today)


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] block: centrelize PI remapping logic to the block layer
  2019-09-04  8:32       ` Max Gurtovoy
@ 2019-09-04 12:49         ` Christoph Hellwig
  0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2019-09-04 12:49 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: Jens Axboe, keith.busch, Sagi Grimberg, martin.petersen, israelr,
	linux-nvme, linux-block, shlomin, Christoph Hellwig

On Wed, Sep 04, 2019 at 11:32:04AM +0300, Max Gurtovoy wrote:
>
> On 9/4/2019 8:49 AM, Christoph Hellwig wrote:
>> On Tue, Sep 03, 2019 at 01:21:59PM -0600, Jens Axboe wrote:
>>> On 9/3/19 1:11 PM, Sagi Grimberg wrote:
>>>>> +	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
>>>>> +	    error == BLK_STS_OK)
>>>>> +		t10_pi_complete(req,
>>>>> +				nr_bytes / queue_logical_block_size(req->q));
>>>>> +
>>>> div in this path? better to use  >> ilog2(block_size).
>>>>
>>>> Also, would be better to have a wrapper in place like:
>>>>
>>>> static inline unsigned short blk_integrity_interval(struct request *rq)
>>>> {
>>>> 	return queue_logical_block_size(rq->q);
>>>> }
>>> If it's a hot path thing that matters, I'd strongly suggest to add
>>> a queue block size shift instead.
>> Make that a protection_interval_shift, please.  While that currently
>> is the same as the logical block size the concepts are a little
>> different, and that makes it clear.  Except for that this patch looks
>> very nice to me, it is great to avoid having drivers to deal with the
>> PI remapping.
>
> Christoph,
>
> I was thinking about the following addition to the code (combination of all 
> the suggestions):

I'll defer to Martin, but I think we still need the integrity_interval
naming in some form.

static inline unsigned short queue_logical_block_shift(struct  request_queue *q)
> +{
> +       unsigned short retval = 9;
> +
> +       if (q && q->limits.logical_block_shift)
> +               retval = q->limits.logical_block_shift;
> +
> +       return retval;

I don't think a NULL queue makes any sense here.  And I'd rather
ensure the field is always set rather than adding a conditional here.

And btw, centrelize in the Subject should be centralize.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, back to index

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-03 15:14 [PATCH 1/4] block: centrelize PI remapping logic to the block layer Max Gurtovoy
2019-09-03 15:14 ` [PATCH 2/4] nvme-rdma: simplify error flow in nvme_rdma_queue_rq Max Gurtovoy
2019-09-04  5:51   ` Christoph Hellwig
2019-09-03 15:14 ` [PATCH 3/4] nvme-tcp: introduce nvme_tcp_complete_rq callback Max Gurtovoy
2019-09-03 19:15   ` Sagi Grimberg
2019-09-04  5:54     ` Christoph Hellwig
2019-09-04  9:02       ` Max Gurtovoy
2019-09-03 15:14 ` [PATCH 4/4] nvmet-loop: fix possible leakage during error flow Max Gurtovoy
2019-09-04  5:50   ` Christoph Hellwig
2019-09-03 19:11 ` [PATCH 1/4] block: centrelize PI remapping logic to the block layer Sagi Grimberg
2019-09-03 19:21   ` Jens Axboe
2019-09-04  5:49     ` Christoph Hellwig
2019-09-04  8:32       ` Max Gurtovoy
2019-09-04 12:49         ` Christoph Hellwig

Linux-NVME Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nvme/0 linux-nvme/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nvme linux-nvme/ https://lore.kernel.org/linux-nvme \
		linux-nvme@lists.infradead.org linux-nvme@archiver.kernel.org
	public-inbox-index linux-nvme


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.infradead.lists.linux-nvme


AGPL code for this site: git clone https://public-inbox.org/ public-inbox