Linux-Block Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v4 1/3] block: centralize PI remapping logic to the block layer
@ 2019-09-08 15:26 Max Gurtovoy
  2019-09-08 15:26 ` [PATCH v4 2/3] block: don't remap ref tag for T10 PI type 0 Max Gurtovoy
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Max Gurtovoy @ 2019-09-08 15:26 UTC (permalink / raw)
  To: linux-block, axboe, martin.petersen, linux-nvme, keith.busch, hch, sagi
  Cc: shlomin, israelr, Max Gurtovoy

Currently t10_pi_prepare/t10_pi_complete functions are called during the
NVMe and SCSi layers command preparetion/completion, but their actual
place should be the block layer since T10-PI is a general data integrity
feature that is used by block storage protocols.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---

changes from v3:
 - fix > 80 liner
 - move the protection_type assignment into nvme_update_disk_info
 - added a comment regarding dps and DIF type values
 - drop redundant externs from t10-pi.h

changes from v2:
 - remove local variable for protection_type
 - remove remapping between NVMe T10 definition to blk definition
 - added patches 2/3 and 3/3
 - remove pi_type from ns structure

changes from v1:
 - seperate from nvme_cleanup command patches
 - introduce blk_integrity_interval_shift to avoid div in fast path
---
 block/blk-core.c         |  5 +++++
 block/blk-mq.c           |  4 ++++
 block/blk-settings.c     |  1 +
 block/t10-pi.c           | 16 ++++++----------
 drivers/nvme/host/core.c | 38 ++++++++++++++++++--------------------
 drivers/nvme/host/nvme.h |  1 -
 drivers/scsi/sd.c        | 28 ++++++++++------------------
 drivers/scsi/sd.h        |  1 -
 drivers/scsi/sd_dif.c    |  2 +-
 include/linux/blkdev.h   | 12 ++++++++++++
 include/linux/genhd.h    |  1 +
 include/linux/t10-pi.h   | 10 ++++------
 12 files changed, 62 insertions(+), 57 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d0cc6e1..eda33f9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include <linux/ratelimit.h>
 #include <linux/pm_runtime.h>
 #include <linux/blk-cgroup.h>
+#include <linux/t10-pi.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
 
@@ -1405,6 +1406,10 @@ bool blk_update_request(struct request *req, blk_status_t error,
 	if (!req->bio)
 		return false;
 
+	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+	    error == BLK_STS_OK)
+		t10_pi_complete(req, nr_bytes);
+
 	if (unlikely(error && !blk_rq_is_passthrough(req) &&
 		     !(req->rq_flags & RQF_QUIET)))
 		print_req_error(req, error, __func__);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0835f4d..30ec078 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -30,6 +30,7 @@
 #include <trace/events/block.h>
 
 #include <linux/blk-mq.h>
+#include <linux/t10-pi.h>
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -693,6 +694,9 @@ void blk_mq_start_request(struct request *rq)
 		 */
 		rq->nr_phys_segments++;
 	}
+
+	if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
+		t10_pi_prepare(rq);
 }
 EXPORT_SYMBOL(blk_mq_start_request);
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 2c18312..8183ffc 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -330,6 +330,7 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
 {
 	q->limits.logical_block_size = size;
+	q->limits.logical_block_shift = ilog2(size);
 
 	if (q->limits.physical_block_size < size)
 		q->limits.physical_block_size = size;
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 0c00946..a33eac4 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -171,7 +171,6 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 /**
  * t10_pi_prepare - prepare PI prior submitting request to device
  * @rq:              request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
  *
  * For Type 1/Type 2, the virtual start sector is the one that was
  * originally submitted by the block layer for the ref_tag usage. Due to
@@ -181,13 +180,13 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
  *
  * Type 3 does not have a reference tag so no remapping is required.
  */
-void t10_pi_prepare(struct request *rq, u8 protection_type)
+void t10_pi_prepare(struct request *rq)
 {
 	const int tuple_sz = rq->q->integrity.tuple_size;
 	u32 ref_tag = t10_pi_ref_tag(rq);
 	struct bio *bio;
 
-	if (protection_type == T10_PI_TYPE3_PROTECTION)
+	if (rq->rq_disk->protection_type == T10_PI_TYPE3_PROTECTION)
 		return;
 
 	__rq_for_each_bio(bio, rq) {
@@ -222,13 +221,11 @@ void t10_pi_prepare(struct request *rq, u8 protection_type)
 		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
 	}
 }
-EXPORT_SYMBOL(t10_pi_prepare);
 
 /**
  * t10_pi_complete - prepare PI prior returning request to the block layer
  * @rq:              request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
- * @intervals:       total elements to prepare
+ * @nr_bytes:        total bytes to prepare
  *
  * For Type 1/Type 2, the virtual start sector is the one that was
  * originally submitted by the block layer for the ref_tag usage. Due to
@@ -239,14 +236,14 @@ void t10_pi_prepare(struct request *rq, u8 protection_type)
  *
  * Type 3 does not have a reference tag so no remapping is required.
  */
-void t10_pi_complete(struct request *rq, u8 protection_type,
-		     unsigned int intervals)
+void t10_pi_complete(struct request *rq, unsigned int nr_bytes)
 {
+	unsigned intervals = nr_bytes >> blk_integrity_interval_shift(rq->q);
 	const int tuple_sz = rq->q->integrity.tuple_size;
 	u32 ref_tag = t10_pi_ref_tag(rq);
 	struct bio *bio;
 
-	if (protection_type == T10_PI_TYPE3_PROTECTION)
+	if (rq->rq_disk->protection_type == T10_PI_TYPE3_PROTECTION)
 		return;
 
 	__rq_for_each_bio(bio, rq) {
@@ -276,4 +273,3 @@ void t10_pi_complete(struct request *rq, u8 protection_type,
 		}
 	}
 }
-EXPORT_SYMBOL(t10_pi_complete);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d3d6b7b..bdc0a64 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -194,7 +194,8 @@ static int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
 
 static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
 {
-	return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
+	return ns->disk->protection_type &&
+		ns->ms == sizeof(struct t10_pi_tuple);
 }
 
 static blk_status_t nvme_error_status(struct request *req)
@@ -659,11 +660,9 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 			if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
 				return BLK_STS_NOTSUPP;
 			control |= NVME_RW_PRINFO_PRACT;
-		} else if (req_op(req) == REQ_OP_WRITE) {
-			t10_pi_prepare(req, ns->pi_type);
 		}
 
-		switch (ns->pi_type) {
+		switch (req->rq_disk->protection_type) {
 		case NVME_NS_DPS_PI_TYPE3:
 			control |= NVME_RW_PRINFO_PRCHK_GUARD;
 			break;
@@ -683,13 +682,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 
 void nvme_cleanup_cmd(struct request *req)
 {
-	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
-	    nvme_req(req)->status == 0) {
-		struct nvme_ns *ns = req->rq_disk->private_data;
-
-		t10_pi_complete(req, ns->pi_type,
-				blk_rq_bytes(req) >> ns->lba_shift);
-	}
 	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
 		struct nvme_ns *ns = req->rq_disk->private_data;
 		struct page *page = req->special_vec.bv_page;
@@ -1500,12 +1492,12 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
+static void nvme_init_integrity(struct gendisk *disk, u16 ms)
 {
 	struct blk_integrity integrity;
 
 	memset(&integrity, 0, sizeof(integrity));
-	switch (pi_type) {
+	switch (disk->protection_type) {
 	case NVME_NS_DPS_PI_TYPE3:
 		integrity.profile = &t10_pi_type3_crc;
 		integrity.tag_size = sizeof(u16) + sizeof(u32);
@@ -1526,7 +1518,7 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
 	blk_queue_max_integrity_segments(disk->queue, 1);
 }
 #else
-static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
+static void nvme_init_integrity(struct gendisk *disk, u16 ms)
 {
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
@@ -1673,9 +1665,20 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	blk_queue_io_min(disk->queue, phys_bs);
 	blk_queue_io_opt(disk->queue, io_opt);
 
+	/*
+	 * Linux PI implementation requires metadata equal to t10_pi tuple size.
+	 * The NVMe dps value matches the general disk protection type because
+	 * according to the NVMe spec it supports the same protection types as
+	 * DIF (Type 1/2/3 of end-to-end data protection information).
+	 */
+	if (ns->ms == sizeof(struct t10_pi_tuple))
+		disk->protection_type = id->dps & NVME_NS_DPS_PI_MASK;
+	else
+		disk->protection_type = 0;
+
 	if (ns->ms && !ns->ext &&
 	    (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
-		nvme_init_integrity(disk, ns->ms, ns->pi_type);
+		nvme_init_integrity(disk, ns->ms);
 	if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) ||
 	    ns->lba_shift > PAGE_SHIFT)
 		capacity = 0;
@@ -1707,11 +1710,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 	ns->noiob = le16_to_cpu(id->noiob);
 	ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
 	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
-	/* the PI implementation requires metadata equal t10 pi tuple size */
-	if (ns->ms == sizeof(struct t10_pi_tuple))
-		ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
-	else
-		ns->pi_type = 0;
 
 	if (ns->noiob)
 		nvme_set_chunk_size(ns);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 2d678fb..15a85a5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -350,7 +350,6 @@ struct nvme_ns {
 	u16 sgs;
 	u32 sws;
 	bool ext;
-	u8 pi_type;
 	unsigned long flags;
 #define NVME_NS_REMOVING	0
 #define NVME_NS_DEAD     	1
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 149d406..fa7e7d4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -55,7 +55,6 @@
 #include <linux/sed-opal.h>
 #include <linux/pm_runtime.h>
 #include <linux/pr.h>
-#include <linux/t10-pi.h>
 #include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
@@ -309,7 +308,7 @@ static void sd_set_flush_flag(struct scsi_disk *sdkp)
 {
 	struct scsi_disk *sdkp = to_scsi_disk(dev);
 
-	return sprintf(buf, "%u\n", sdkp->protection_type);
+	return sprintf(buf, "%u\n", sdkp->disk->protection_type);
 }
 
 static ssize_t
@@ -329,7 +328,7 @@ static void sd_set_flush_flag(struct scsi_disk *sdkp)
 		return err;
 
 	if (val <= T10_PI_TYPE3_PROTECTION)
-		sdkp->protection_type = val;
+		sdkp->disk->protection_type = val;
 
 	return count;
 }
@@ -343,8 +342,8 @@ static void sd_set_flush_flag(struct scsi_disk *sdkp)
 	struct scsi_device *sdp = sdkp->device;
 	unsigned int dif, dix;
 
-	dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
-	dix = scsi_host_dix_capable(sdp->host, sdkp->protection_type);
+	dif = scsi_host_dif_capable(sdp->host, sdkp->disk->protection_type);
+	dix = scsi_host_dix_capable(sdp->host, sdkp->disk->protection_type);
 
 	if (!dix && scsi_host_dix_capable(sdp->host, T10_PI_TYPE0_PROTECTION)) {
 		dif = 0;
@@ -1209,17 +1208,15 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 
 	fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0;
 	dix = scsi_prot_sg_count(cmd);
-	dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
-
-	if (write && dix)
-		t10_pi_prepare(cmd->request, sdkp->protection_type);
+	dif = scsi_host_dif_capable(cmd->device->host,
+				    sdkp->disk->protection_type);
 
 	if (dif || dix)
 		protect = sd_setup_protect_cmnd(cmd, dix, dif);
 	else
 		protect = 0;
 
-	if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) {
+	if (protect && sdkp->disk->protection_type == T10_PI_TYPE2_PROTECTION) {
 		ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks,
 					 protect | fua);
 	} else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) {
@@ -2051,11 +2048,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 					   "sd_done: completed %d of %d bytes\n",
 					   good_bytes, scsi_bufflen(SCpnt)));
 
-	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
-	    good_bytes)
-		t10_pi_complete(SCpnt->request, sdkp->protection_type,
-				good_bytes / scsi_prot_interval(SCpnt));
-
 	return good_bytes;
 }
 
@@ -2204,7 +2196,7 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer
 	else if (scsi_host_dif_capable(sdp->host, type))
 		ret = 1;
 
-	if (sdkp->first_scan || type != sdkp->protection_type)
+	if (sdkp->first_scan || type != sdkp->disk->protection_type)
 		switch (ret) {
 		case -ENODEV:
 			sd_printk(KERN_ERR, sdkp, "formatted with unsupported" \
@@ -2221,7 +2213,7 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer
 			break;
 		}
 
-	sdkp->protection_type = type;
+	sdkp->disk->protection_type = type;
 
 	return ret;
 }
@@ -2813,7 +2805,7 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer)
 	if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
 		return;
 
-	if (sdkp->protection_type == 0)
+	if (sdkp->disk->protection_type == 0)
 		return;
 
 	res = scsi_mode_sense(sdp, 1, 0x0a, buffer, 36, SD_TIMEOUT,
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 38c5094..770b6b0f 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -94,7 +94,6 @@ struct scsi_disk {
 	unsigned int	medium_access_timed_out;
 	u8		media_present;
 	u8		write_prot;
-	u8		protection_type;/* Data Integrity Field */
 	u8		provisioning_mode;
 	u8		zeroing_mode;
 	unsigned	ATO : 1;	/* state of disk ATO bit */
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 4cadb26..cbd0cce 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -28,7 +28,7 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 {
 	struct scsi_device *sdp = sdkp->device;
 	struct gendisk *disk = sdkp->disk;
-	u8 type = sdkp->protection_type;
+	u8 type = sdkp->disk->protection_type;
 	struct blk_integrity bi;
 	int dif, dix;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ef375d..5901a53 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -332,6 +332,7 @@ struct queue_limits {
 	unsigned int		discard_alignment;
 
 	unsigned short		logical_block_size;
+	unsigned short		logical_block_shift;
 	unsigned short		max_segments;
 	unsigned short		max_integrity_segments;
 	unsigned short		max_discard_segments;
@@ -1543,6 +1544,12 @@ static inline void blk_queue_max_integrity_segments(struct request_queue *q,
 }
 
 static inline unsigned short
+blk_integrity_interval_shift(struct request_queue *q)
+{
+	return q->limits.logical_block_shift;
+}
+
+static inline unsigned short
 queue_max_integrity_segments(struct request_queue *q)
 {
 	return q->limits.max_integrity_segments;
@@ -1626,6 +1633,11 @@ static inline void blk_queue_max_integrity_segments(struct request_queue *q,
 						    unsigned int segs)
 {
 }
+static inline unsigned short
+blk_integrity_interval_shift(struct request_queue *q)
+{
+	return 0;
+}
 static inline unsigned short queue_max_integrity_segments(struct request_queue *q)
 {
 	return 0;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 8b5330d..5f58736 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -219,6 +219,7 @@ struct gendisk {
 	int node_id;
 	struct badblocks *bb;
 	struct lockdep_map lockdep_map;
+	u8 protection_type;/* Data Integrity Field */
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 3e2a80c..c161824 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -54,15 +54,13 @@ static inline u32 t10_pi_ref_tag(struct request *rq)
 extern const struct blk_integrity_profile t10_pi_type3_ip;
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
-extern void t10_pi_prepare(struct request *rq, u8 protection_type);
-extern void t10_pi_complete(struct request *rq, u8 protection_type,
-			    unsigned int intervals);
+void t10_pi_prepare(struct request *rq);
+void t10_pi_complete(struct request *rq, unsigned int nr_bytes);
 #else
-static inline void t10_pi_complete(struct request *rq, u8 protection_type,
-				   unsigned int intervals)
+static inline void t10_pi_complete(struct request *rq, unsigned int nr_bytes)
 {
 }
-static inline void t10_pi_prepare(struct request *rq, u8 protection_type)
+static inline void t10_pi_prepare(struct request *rq)
 {
 }
 #endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 2/3] block: don't remap ref tag for T10 PI type 0
  2019-09-08 15:26 [PATCH v4 1/3] block: centralize PI remapping logic to the block layer Max Gurtovoy
@ 2019-09-08 15:26 ` Max Gurtovoy
  2019-09-09  2:22   ` Martin K. Petersen
  2019-09-08 15:26 ` [PATCH v4 3/3] nvme: remove PI values definition from NVMe subsystem Max Gurtovoy
  2019-09-09  2:21 ` [PATCH v4 1/3] block: centralize PI remapping logic to the block layer Martin K. Petersen
  2 siblings, 1 reply; 16+ messages in thread
From: Max Gurtovoy @ 2019-09-08 15:26 UTC (permalink / raw)
  To: linux-block, axboe, martin.petersen, linux-nvme, keith.busch, hch, sagi
  Cc: shlomin, israelr, Max Gurtovoy

Only type 1 and type 2 have a reference tag by definition.

Suggested-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---

changes from v3:
 - added blk_integrity_need_remap helper

---
 block/t10-pi.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/block/t10-pi.c b/block/t10-pi.c
index a33eac4..753b5a8 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -168,6 +168,12 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 };
 EXPORT_SYMBOL(t10_pi_type3_ip);
 
+static inline bool blk_integrity_need_remap(struct gendisk *disk)
+{
+	return disk->protection_type == T10_PI_TYPE1_PROTECTION ||
+		disk->protection_type == T10_PI_TYPE2_PROTECTION;
+}
+
 /**
  * t10_pi_prepare - prepare PI prior submitting request to device
  * @rq:              request with PI that should be prepared
@@ -178,7 +184,7 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
  * likely to be different. Remap protection information to match the
  * physical LBA.
  *
- * Type 3 does not have a reference tag so no remapping is required.
+ * Types 0 and 3 don't have a reference tag so no remapping is required.
  */
 void t10_pi_prepare(struct request *rq)
 {
@@ -186,7 +192,7 @@ void t10_pi_prepare(struct request *rq)
 	u32 ref_tag = t10_pi_ref_tag(rq);
 	struct bio *bio;
 
-	if (rq->rq_disk->protection_type == T10_PI_TYPE3_PROTECTION)
+	if (!blk_integrity_need_remap(rq->rq_disk))
 		return;
 
 	__rq_for_each_bio(bio, rq) {
@@ -234,7 +240,7 @@ void t10_pi_prepare(struct request *rq)
  * to the device, we should remap it back to virtual values expected by the
  * block layer.
  *
- * Type 3 does not have a reference tag so no remapping is required.
+ * Types 0 and 3 don't have a reference tag so no remapping is required.
  */
 void t10_pi_complete(struct request *rq, unsigned int nr_bytes)
 {
@@ -243,7 +249,7 @@ void t10_pi_complete(struct request *rq, unsigned int nr_bytes)
 	u32 ref_tag = t10_pi_ref_tag(rq);
 	struct bio *bio;
 
-	if (rq->rq_disk->protection_type == T10_PI_TYPE3_PROTECTION)
+	if (!blk_integrity_need_remap(rq->rq_disk))
 		return;
 
 	__rq_for_each_bio(bio, rq) {
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 3/3] nvme: remove PI values definition from NVMe subsystem
  2019-09-08 15:26 [PATCH v4 1/3] block: centralize PI remapping logic to the block layer Max Gurtovoy
  2019-09-08 15:26 ` [PATCH v4 2/3] block: don't remap ref tag for T10 PI type 0 Max Gurtovoy
@ 2019-09-08 15:26 ` Max Gurtovoy
  2019-09-09  2:21 ` [PATCH v4 1/3] block: centralize PI remapping logic to the block layer Martin K. Petersen
  2 siblings, 0 replies; 16+ messages in thread
From: Max Gurtovoy @ 2019-09-08 15:26 UTC (permalink / raw)
  To: linux-block, axboe, martin.petersen, linux-nvme, keith.busch, hch, sagi
  Cc: shlomin, israelr, Max Gurtovoy

Use block layer definition instead of re-defining it with the same
values.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---

changes from v3:
 - added Reviewed-by signature

---
 drivers/nvme/host/core.c | 12 ++++++------
 include/linux/nvme.h     |  3 ---
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bdc0a64..a1c0ce0 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -663,11 +663,11 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 		}
 
 		switch (req->rq_disk->protection_type) {
-		case NVME_NS_DPS_PI_TYPE3:
+		case T10_PI_TYPE3_PROTECTION:
 			control |= NVME_RW_PRINFO_PRCHK_GUARD;
 			break;
-		case NVME_NS_DPS_PI_TYPE1:
-		case NVME_NS_DPS_PI_TYPE2:
+		case T10_PI_TYPE1_PROTECTION:
+		case T10_PI_TYPE2_PROTECTION:
 			control |= NVME_RW_PRINFO_PRCHK_GUARD |
 					NVME_RW_PRINFO_PRCHK_REF;
 			cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
@@ -1498,13 +1498,13 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms)
 
 	memset(&integrity, 0, sizeof(integrity));
 	switch (disk->protection_type) {
-	case NVME_NS_DPS_PI_TYPE3:
+	case T10_PI_TYPE3_PROTECTION:
 		integrity.profile = &t10_pi_type3_crc;
 		integrity.tag_size = sizeof(u16) + sizeof(u32);
 		integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
 		break;
-	case NVME_NS_DPS_PI_TYPE1:
-	case NVME_NS_DPS_PI_TYPE2:
+	case T10_PI_TYPE1_PROTECTION:
+	case T10_PI_TYPE2_PROTECTION:
 		integrity.profile = &t10_pi_type1_crc;
 		integrity.tag_size = sizeof(u16);
 		integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 01aa6a6..8d45c3e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -381,9 +381,6 @@ enum {
 	NVME_NS_DPC_PI_TYPE1	= 1 << 0,
 	NVME_NS_DPS_PI_FIRST	= 1 << 3,
 	NVME_NS_DPS_PI_MASK	= 0x7,
-	NVME_NS_DPS_PI_TYPE1	= 1,
-	NVME_NS_DPS_PI_TYPE2	= 2,
-	NVME_NS_DPS_PI_TYPE3	= 3,
 };
 
 struct nvme_ns_id_desc {
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] block: centralize PI remapping logic to the block layer
  2019-09-08 15:26 [PATCH v4 1/3] block: centralize PI remapping logic to the block layer Max Gurtovoy
  2019-09-08 15:26 ` [PATCH v4 2/3] block: don't remap ref tag for T10 PI type 0 Max Gurtovoy
  2019-09-08 15:26 ` [PATCH v4 3/3] nvme: remove PI values definition from NVMe subsystem Max Gurtovoy
@ 2019-09-09  2:21 ` Martin K. Petersen
  2019-09-09 13:55   ` Max Gurtovoy
  2 siblings, 1 reply; 16+ messages in thread
From: Martin K. Petersen @ 2019-09-09  2:21 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: linux-block, axboe, martin.petersen, linux-nvme, keith.busch,
	hch, sagi, shlomin, israelr


Hi Max!

> @@ -309,7 +308,7 @@ static void sd_set_flush_flag(struct scsi_disk *sdkp)
>  {
>  	struct scsi_disk *sdkp = to_scsi_disk(dev);
>  
> -	return sprintf(buf, "%u\n", sdkp->protection_type);
> +	return sprintf(buf, "%u\n", sdkp->disk->protection_type);

I'm fine with moving the prepare/complete logic to the block layer. But
the block layer should always be using information from the integrity
profile. sdkp->protection_type is a SCSI disk property which is used to
pick the right integrity profile when a device is discovered and
registered.

 - sdkp->protection_type is the type the disk is formatted with. This
   may or may not be the same as the metadata format used by DIX and the
   block layer.

 - The DIX protection type (which is what matters for protection
   information preparation) is encapsulated in the integrity profile
   registered for the block device. The profile describes an abstract
   protection format and can (at least in theory) carry non-T10 PI
   protection information.

Linux currently uses the Type 1 block layer integrity profile for
devices formatted with T10 PI Types 0, 1, and 2. And the Type 3 block
layer integrity profile for devices formatted with T10 PI Type 3. This
profile is what we should be keying off of in t10-pi.c, not the
protection_type (the fact that protection_type is even there is because
the code was lifted out from sd.c).

I would prefer to introduce .prepare_fn and .complete_fn for the Type 1
profile to match the existing .generate_fn and verify_fn. And then adapt
t10_pi_prepare() / t10_pi_complete() to plug into these new
callbacks. The need for protection_type and Type 3 matching goes away in
that case since the callbacks would only be set for the Type 1 profile.

>  static inline unsigned short
> +blk_integrity_interval_shift(struct request_queue *q)
> +{
> +	return q->limits.logical_block_shift;
> +}
> +

Why not use bio_integrity_intervals() or bi->interval_exp?

Note that for T10 PI Type 2, the protection interval is not necessarily
the logical block size.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 2/3] block: don't remap ref tag for T10 PI type 0
  2019-09-08 15:26 ` [PATCH v4 2/3] block: don't remap ref tag for T10 PI type 0 Max Gurtovoy
@ 2019-09-09  2:22   ` Martin K. Petersen
  2019-09-09  2:36     ` Keith Busch
  0 siblings, 1 reply; 16+ messages in thread
From: Martin K. Petersen @ 2019-09-09  2:22 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: linux-block, axboe, martin.petersen, linux-nvme, keith.busch,
	hch, sagi, shlomin, israelr


Max,

> Only type 1 and type 2 have a reference tag by definition.

DIX Type 0 needs remapping so this assertion is not correct.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 2/3] block: don't remap ref tag for T10 PI type 0
  2019-09-09  2:22   ` Martin K. Petersen
@ 2019-09-09  2:36     ` Keith Busch
  2019-09-09  2:49       ` Martin K. Petersen
  0 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2019-09-09  2:36 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Max Gurtovoy, axboe, keith.busch, sagi, israelr, linux-nvme,
	linux-block, shlomin, hch

On Sun, Sep 08, 2019 at 10:22:50PM -0400, Martin K. Petersen wrote:
> 
> Max,
> 
> > Only type 1 and type 2 have a reference tag by definition.
> 
> DIX Type 0 needs remapping so this assertion is not correct.

At least for nvme, type 0 means you have meta data but not for protection
information, so remapping the place the where reference tag exists for
other PI types corrupts the metadata.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 2/3] block: don't remap ref tag for T10 PI type 0
  2019-09-09  2:36     ` Keith Busch
@ 2019-09-09  2:49       ` Martin K. Petersen
  2019-09-09 13:31         ` Max Gurtovoy
  0 siblings, 1 reply; 16+ messages in thread
From: Martin K. Petersen @ 2019-09-09  2:49 UTC (permalink / raw)
  To: Keith Busch
  Cc: Martin K. Petersen, Max Gurtovoy, axboe, keith.busch, sagi,
	israelr, linux-nvme, linux-block, shlomin, hch


Keith,

> At least for nvme, type 0 means you have meta data but not for
> protection information,

Yeah, NVMe does not support DIX Type 0.

> so remapping the place the where reference tag exists for other PI
> types corrupts the metadata.

But the device shouldn't have an integrity profile in that case (see
previous mail about why keying off of the protection_type is a problem).

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 2/3] block: don't remap ref tag for T10 PI type 0
  2019-09-09  2:49       ` Martin K. Petersen
@ 2019-09-09 13:31         ` Max Gurtovoy
  0 siblings, 0 replies; 16+ messages in thread
From: Max Gurtovoy @ 2019-09-09 13:31 UTC (permalink / raw)
  To: Martin K. Petersen, Keith Busch
  Cc: axboe, keith.busch, sagi, israelr, linux-nvme, linux-block, shlomin, hch


On 9/9/2019 5:49 AM, Martin K. Petersen wrote:
> Keith,
>
>> At least for nvme, type 0 means you have meta data but not for
>> protection information,
> Yeah, NVMe does not support DIX Type 0.
>
>> so remapping the place the where reference tag exists for other PI
>> types corrupts the metadata.
> But the device shouldn't have an integrity profile in that case (see
> previous mail about why keying off of the protection_type is a problem).


I see. Ok I'll not handle NVMe type 0 in this patchset.

There is no kernel implementation for that so I don't see a good reason 
doing so after Martin's proposal.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] block: centralize PI remapping logic to the block layer
  2019-09-09  2:21 ` [PATCH v4 1/3] block: centralize PI remapping logic to the block layer Martin K. Petersen
@ 2019-09-09 13:55   ` Max Gurtovoy
  2019-09-10  2:29     ` Martin K. Petersen
  0 siblings, 1 reply; 16+ messages in thread
From: Max Gurtovoy @ 2019-09-09 13:55 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: linux-block, axboe, linux-nvme, keith.busch, hch, sagi, shlomin, israelr


On 9/9/2019 5:21 AM, Martin K. Petersen wrote:
> Hi Max!

Hi Martin,


>
>> @@ -309,7 +308,7 @@ static void sd_set_flush_flag(struct scsi_disk *sdkp)
>>   {
>>   	struct scsi_disk *sdkp = to_scsi_disk(dev);
>>   
>> -	return sprintf(buf, "%u\n", sdkp->protection_type);
>> +	return sprintf(buf, "%u\n", sdkp->disk->protection_type);
> I'm fine with moving the prepare/complete logic to the block layer. But
> the block layer should always be using information from the integrity
> profile. sdkp->protection_type is a SCSI disk property which is used to
> pick the right integrity profile when a device is discovered and
> registered.
>
>   - sdkp->protection_type is the type the disk is formatted with. This
>     may or may not be the same as the metadata format used by DIX and the
>     block layer.

ok i'll leave it as is.

>
>   - The DIX protection type (which is what matters for protection
>     information preparation) is encapsulated in the integrity profile
>     registered for the block device. The profile describes an abstract
>     protection format and can (at least in theory) carry non-T10 PI
>     protection information.
>
> Linux currently uses the Type 1 block layer integrity profile for
> devices formatted with T10 PI Types 0, 1, and 2. And the Type 3 block
> layer integrity profile for devices formatted with T10 PI Type 3. This
> profile is what we should be keying off of in t10-pi.c, not the
> protection_type (the fact that protection_type is even there is because
> the code was lifted out from sd.c).

maybe we can add profiles to type0 and type2 in the future and have more 
readable code.

>
> I would prefer to introduce .prepare_fn and .complete_fn for the Type 1
> profile to match the existing .generate_fn and verify_fn. And then adapt
> t10_pi_prepare() / t10_pi_complete() to plug into these new
> callbacks. The need for protection_type and Type 3 matching goes away in
> that case since the callbacks would only be set for the Type 1 profile.

Sounds good and simple.

I think I'll prepare dummy/empty callbacks for type3 and for nop 
profiles instead of setting it to NULL.

agreed ?

>
>>   static inline unsigned short
>> +blk_integrity_interval_shift(struct request_queue *q)
>> +{
>> +	return q->limits.logical_block_shift;
>> +}
>> +
> Why not use bio_integrity_intervals() or bi->interval_exp?

I'll use bi->interval_ext.


>
> Note that for T10 PI Type 2, the protection interval is not necessarily
> the logical block size.

thanks for the review. It will make my patchset easier and it will 
contain 1 patch now.

>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] block: centralize PI remapping logic to the block layer
  2019-09-09 13:55   ` Max Gurtovoy
@ 2019-09-10  2:29     ` Martin K. Petersen
  2019-09-10 22:27       ` Max Gurtovoy
  0 siblings, 1 reply; 16+ messages in thread
From: Martin K. Petersen @ 2019-09-10  2:29 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: Martin K. Petersen, linux-block, axboe, linux-nvme, keith.busch,
	hch, sagi, shlomin, israelr


Max,

> maybe we can add profiles to type0 and type2 in the future and have
> more readable code.

It's a deliberate feature that we treat DIX Type 0, 1, and 2 the
same. It's very common to mix and match legacy drives, T10 PI Type 1,
and T10 PI Type 2 devices in a system. In order for MD/DM stacking,
multipathing, etc. to work, it is important that all devices share the
same protection format, interpretation of the tags, etc.

Type 2, where the ref tag can be different from the LBA, was designed
exclusively for use inside disk arrays where the array firmware is the
sole entity accessing blocks on media. And thus always knows what the
expected ref tag should be for a given LBA (typically the LUN LBA as
seen by the host interface and not the target LBA on the back-end
drive).

For Linux, however, where we need to support dd'ing from the device node
without any knowledge an application or filesystem may have about the
written PI, it's imperative that the reference tag is something
predictable. Therefore it is deliberate that we always use the LBA (or
a derivative thereof for the smaller intervals) for the reference tag.
Even if T10 PI Type 2 in theory allows for the tag to be an arbitrary
number. But Linux is a general purpose OS and not an array controller
firmware. So we can't really leverage that capability.

Also. Take MD, for instance. The same I/O could be going to a mirror of
Type 1 and Type 2 devices. We obviously can't have two different types
of PI hanging off a bio. Nor do we have the capability to handle
arbitrary MD/DM stacking with PI format properties potentially changing
many times within the block range constituting a single I/O.

That's why we have the integrity profile which describes a common block
layer PI format that's somewhat orthogonal to how the underlying device
is formatted.

There are a couple of warts in that department. One is the IP checksum
which is now mostly a legacy thing and not implemented/relevant for
NVMe. The other is Type 3 devices that need special care and
feeding. But Type 3 does not appear to be actively used by anyone
anymore. We recently discovered that it's completely broken in the NVMe
spec and nobody ever noticed. And I don't think it was ever used
as-written in SCSI (Type 3 was an attempt to standardize a particular
vendor's existing, proprietary format).

Anyway. So my take on all this is that the T10-DIF-TYPE1-CRC profile is
"it" and everything else is legacy.

> I think I'll prepare dummy/empty callbacks for type3 and for nop
> profiles instead of setting it to NULL.
>
> agreed ?

Sure. Whatever works.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] block: centralize PI remapping logic to the block layer
  2019-09-10  2:29     ` Martin K. Petersen
@ 2019-09-10 22:27       ` Max Gurtovoy
  2019-09-11  1:16         ` Martin K. Petersen
  0 siblings, 1 reply; 16+ messages in thread
From: Max Gurtovoy @ 2019-09-10 22:27 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: linux-block, axboe, linux-nvme, keith.busch, hch, sagi, shlomin, israelr


On 9/10/2019 5:29 AM, Martin K. Petersen wrote:
> Max,

Hi Martin,

thanks for the great explanation !

>
>> maybe we can add profiles to type0 and type2 in the future and have
>> more readable code.
> It's a deliberate feature that we treat DIX Type 0, 1, and 2 the
> same. It's very common to mix and match legacy drives, T10 PI Type 1,
> and T10 PI Type 2 devices in a system. In order for MD/DM stacking,
> multipathing, etc. to work, it is important that all devices share the
> same protection format, interpretation of the tags, etc.
>
> Type 2, where the ref tag can be different from the LBA, was designed
> exclusively for use inside disk arrays where the array firmware is the
> sole entity accessing blocks on media. And thus always knows what the
> expected ref tag should be for a given LBA (typically the LUN LBA as
> seen by the host interface and not the target LBA on the back-end
> drive).
>
> For Linux, however, where we need to support dd'ing from the device node
> without any knowledge an application or filesystem may have about the
> written PI, it's imperative that the reference tag is something
> predictable. Therefore it is deliberate that we always use the LBA (or
> a derivative thereof for the smaller intervals) for the reference tag.
> Even if T10 PI Type 2 in theory allows for the tag to be an arbitrary
> number. But Linux is a general purpose OS and not an array controller
> firmware. So we can't really leverage that capability.
>
> Also. Take MD, for instance. The same I/O could be going to a mirror of
> Type 1 and Type 2 devices. We obviously can't have two different types
> of PI hanging off a bio. Nor do we have the capability to handle
> arbitrary MD/DM stacking with PI format properties potentially changing
> many times within the block range constituting a single I/O.

I guess Type 1 and Type 3 mirrors can work because Type 3 doesn't have a 
ref tag, right ?

> That's why we have the integrity profile which describes a common block
> layer PI format that's somewhat orthogonal to how the underlying device
> is formatted.
>
> There are a couple of warts in that department. One is the IP checksum
> which is now mostly a legacy thing and not implemented/relevant for
> NVMe. The other is Type 3 devices that need special care and
> feeding. But Type 3 does not appear to be actively used by anyone
> anymore. We recently discovered that it's completely broken in the NVMe
> spec and nobody ever noticed. And I don't think it was ever used
> as-written in SCSI (Type 3 was an attempt to standardize a particular
> vendor's existing, proprietary format).
>
> Anyway. So my take on all this is that the T10-DIF-TYPE1-CRC profile is
> "it" and everything else is legacy.

do you see any reason to support the broken type 3 ?


>
>> I think I'll prepare dummy/empty callbacks for type3 and for nop
>> profiles instead of setting it to NULL.
>>
>> agreed ?
> Sure. Whatever works.

I'll send the patch tomorrow for review.

-Max.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] block: centralize PI remapping logic to the block layer
  2019-09-10 22:27       ` Max Gurtovoy
@ 2019-09-11  1:16         ` Martin K. Petersen
  2019-09-11  9:12           ` Max Gurtovoy
  0 siblings, 1 reply; 16+ messages in thread
From: Martin K. Petersen @ 2019-09-11  1:16 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: Martin K. Petersen, linux-block, axboe, linux-nvme, keith.busch,
	hch, sagi, shlomin, israelr


Max,

> I guess Type 1 and Type 3 mirrors can work because Type 3 doesn't have
> a ref tag, right ?

It will work but you'll lose ref tag checking on the Type 3 side of the
mirror. So not exactly desirable. And in our experience, the ref tag is
hugely important.

Also, there are probably some headaches lurking in the slightly
different app/ref tag escape handling if you mix the two
formats. Whereas Type 1 and 2 are 100% identical in behavior if you use
the LBA as the ref tag.

>> Anyway. So my take on all this is that the T10-DIF-TYPE1-CRC profile is
>> "it" and everything else is legacy.
>
> do you see any reason to support the broken type 3 ?

Only to support existing installations. We can't really remove it
without the risk of breaking something for somebody out there.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] block: centralize PI remapping logic to the block layer
  2019-09-11  1:16         ` Martin K. Petersen
@ 2019-09-11  9:12           ` Max Gurtovoy
  2019-09-13 22:20             ` Martin K. Petersen
  0 siblings, 1 reply; 16+ messages in thread
From: Max Gurtovoy @ 2019-09-11  9:12 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: axboe, keith.busch, sagi, israelr, linux-nvme, linux-block, shlomin, hch


On 9/11/2019 4:16 AM, Martin K. Petersen wrote:
> Max,
>
>> I guess Type 1 and Type 3 mirrors can work because Type 3 doesn't have
>> a ref tag, right ?
> It will work but you'll lose ref tag checking on the Type 3 side of the
> mirror. So not exactly desirable. And in our experience, the ref tag is
> hugely important.
>
> Also, there are probably some headaches lurking in the slightly
> different app/ref tag escape handling if you mix the two
> formats. Whereas Type 1 and 2 are 100% identical in behavior if you use
> the LBA as the ref tag.
>
>>> Anyway. So my take on all this is that the T10-DIF-TYPE1-CRC profile is
>>> "it" and everything else is legacy.
>> do you see any reason to support the broken type 3 ?
> Only to support existing installations. We can't really remove it
> without the risk of breaking something for somebody out there.

what about broken type 3 in the NVMe spec ?

I don't really know what is broken there but maybe we can avoid 
supporting it for NVMe until it's fixed.



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] block: centralize PI remapping logic to the block layer
  2019-09-11  9:12           ` Max Gurtovoy
@ 2019-09-13 22:20             ` Martin K. Petersen
  2019-09-16  8:03               ` Christoph Hellwig
  0 siblings, 1 reply; 16+ messages in thread
From: Martin K. Petersen @ 2019-09-13 22:20 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: Martin K. Petersen, axboe, keith.busch, sagi, israelr,
	linux-nvme, linux-block, shlomin, hch


Max,

> what about broken type 3 in the NVMe spec ?
>
> I don't really know what is broken there but maybe we can avoid
> supporting it for NVMe until it's fixed.

The intent in NVMe was for Type 3 to work exactly like it does in
SCSI. But the way the spec is worded it does not. So it is unclear
whether implementors (if any) went with the SCSI compatible route or
with what the NVMe spec actually says.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] block: centralize PI remapping logic to the block layer
  2019-09-13 22:20             ` Martin K. Petersen
@ 2019-09-16  8:03               ` Christoph Hellwig
  2019-09-16 17:19                 ` Martin K. Petersen
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2019-09-16  8:03 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Max Gurtovoy, axboe, keith.busch, sagi, israelr, linux-nvme,
	linux-block, shlomin, hch

On Fri, Sep 13, 2019 at 06:20:23PM -0400, Martin K. Petersen wrote:
> 
> Max,
> 
> > what about broken type 3 in the NVMe spec ?
> >
> > I don't really know what is broken there but maybe we can avoid
> > supporting it for NVMe until it's fixed.
> 
> The intent in NVMe was for Type 3 to work exactly like it does in
> SCSI. But the way the spec is worded it does not. So it is unclear
> whether implementors (if any) went with the SCSI compatible route or
> with what the NVMe spec actually says.

Do we actually have Linux users of Type 3 at all?  I think for NVMe
we could just trivially disable Linux support, and I suspect for SCSI
as well, but I'll have to defer to you on that.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] block: centralize PI remapping logic to the block layer
  2019-09-16  8:03               ` Christoph Hellwig
@ 2019-09-16 17:19                 ` Martin K. Petersen
  0 siblings, 0 replies; 16+ messages in thread
From: Martin K. Petersen @ 2019-09-16 17:19 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Martin K. Petersen, Max Gurtovoy, axboe, keith.busch, sagi,
	israelr, linux-nvme, linux-block, shlomin


Christoph,

> Do we actually have Linux users of Type 3 at all?  I think for NVMe
> we could just trivially disable Linux support, and I suspect for SCSI
> as well, but I'll have to defer to you on that.

There were several companies looking into building Linux things using
Type 3 SCSI devices. No idea whether this happened. I definitely still
get lots of mail from people using Type 2. Something which also really
shouldn't exist outside of a disk array.

For NVMe, I assume nobody has tried Type 3 given the discrepancy between
how SCSI works and how the NVMe spec is currently written.

In any case. Since Type 3 is a pretty trivial subset of Type 1, I don't
see much benefit in actively removing it. One could argue we could
reduce the plumbing by removing a level of indirection. However, we'll
need the infrastructure to support the impending Type 4 as well. So my
preference is to just leave things as-is for now.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, back to index

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-08 15:26 [PATCH v4 1/3] block: centralize PI remapping logic to the block layer Max Gurtovoy
2019-09-08 15:26 ` [PATCH v4 2/3] block: don't remap ref tag for T10 PI type 0 Max Gurtovoy
2019-09-09  2:22   ` Martin K. Petersen
2019-09-09  2:36     ` Keith Busch
2019-09-09  2:49       ` Martin K. Petersen
2019-09-09 13:31         ` Max Gurtovoy
2019-09-08 15:26 ` [PATCH v4 3/3] nvme: remove PI values definition from NVMe subsystem Max Gurtovoy
2019-09-09  2:21 ` [PATCH v4 1/3] block: centralize PI remapping logic to the block layer Martin K. Petersen
2019-09-09 13:55   ` Max Gurtovoy
2019-09-10  2:29     ` Martin K. Petersen
2019-09-10 22:27       ` Max Gurtovoy
2019-09-11  1:16         ` Martin K. Petersen
2019-09-11  9:12           ` Max Gurtovoy
2019-09-13 22:20             ` Martin K. Petersen
2019-09-16  8:03               ` Christoph Hellwig
2019-09-16 17:19                 ` Martin K. Petersen

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org linux-block@archiver.kernel.org
	public-inbox-index linux-block


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/ public-inbox