All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/3] block: move ref_tag calculation func to the block layer
@ 2018-07-25  8:46 ` Max Gurtovoy
  0 siblings, 0 replies; 12+ messages in thread
From: Max Gurtovoy @ 2018-07-25  8:46 UTC (permalink / raw)
  To: keith.busch, linux-nvme, sagi, hch, martin.petersen, linux-block, axboe
  Cc: vladimirk, Max Gurtovoy

Currently this function is implemented in the scsi layer, but it's
actual place should be the block layer since T10-PI is a general
data integrity feature that is used in the nvme protocol as well.

Suggested-by: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---
 drivers/infiniband/ulp/iser/iser_memory.c | 2 +-
 drivers/nvme/host/core.c                  | 3 +--
 drivers/scsi/mpt3sas/mpt3sas_scsih.c      | 2 +-
 drivers/scsi/sd_dif.c                     | 4 ++--
 include/linux/t10-pi.h                    | 6 ++++++
 include/scsi/scsi_cmnd.h                  | 7 +------
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index ca844a9..130bf16 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -311,7 +311,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
 {
 	domain->sig_type = IB_SIG_TYPE_T10_DIF;
 	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
-	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
+	domain->sig.dif.ref_tag = t10_pi_ref_tag(sc->request);
 	/*
 	 * At the moment we hard code those, but in the future
 	 * we will take them from sc.
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bf65501..191177b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -627,8 +627,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 		case NVME_NS_DPS_PI_TYPE2:
 			control |= NVME_RW_PRINFO_PRCHK_GUARD |
 					NVME_RW_PRINFO_PRCHK_REF;
-			cmnd->rw.reftag = cpu_to_le32(
-					nvme_block_nr(ns, blk_rq_pos(req)));
+			cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
 			break;
 		}
 	}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index b8d131a..dd738ae 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -4568,7 +4568,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending)
 		    MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG |
 		    MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
 		mpi_request->CDB.EEDP32.PrimaryReferenceTag =
-		    cpu_to_be32(scsi_prot_ref_tag(scmd));
+		    cpu_to_be32(t10_pi_ref_tag(scmd->request));
 		break;
 
 	case SCSI_PROT_DIF_TYPE3:
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 9035380..d8de43d 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -124,7 +124,7 @@ void sd_dif_prepare(struct scsi_cmnd *scmd)
 	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION)
 		return;
 
-	phys = scsi_prot_ref_tag(scmd);
+	phys = t10_pi_ref_tag(scmd->request);
 
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
@@ -176,7 +176,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 		return;
 
 	intervals = good_bytes / scsi_prot_interval(scmd);
-	phys = scsi_prot_ref_tag(scmd);
+	phys = t10_pi_ref_tag(scmd->request);
 
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c6aa8a3..635855e 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -37,6 +37,12 @@ struct t10_pi_tuple {
 #define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
 #define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
 
+static inline u32 t10_pi_ref_tag(struct request *rq)
+{
+	return blk_rq_pos(rq) >>
+		(rq->q->integrity.interval_exp - 9) & 0xffffffff;
+}
+
 extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index aaf1e97..cae229b 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -4,6 +4,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/blkdev.h>
+#include <linux/t10-pi.h>
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/timer.h>
@@ -313,12 +314,6 @@ static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd)
 	return scmd->device->sector_size;
 }
 
-static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd)
-{
-	return blk_rq_pos(scmd->request) >>
-		(ilog2(scsi_prot_interval(scmd)) - 9) & 0xffffffff;
-}
-
 static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
 {
 	return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 1/3] block: move ref_tag calculation func to the block layer
@ 2018-07-25  8:46 ` Max Gurtovoy
  0 siblings, 0 replies; 12+ messages in thread
From: Max Gurtovoy @ 2018-07-25  8:46 UTC (permalink / raw)


Currently this function is implemented in the scsi layer, but it's
actual place should be the block layer since T10-PI is a general
data integrity feature that is used in the nvme protocol as well.

Suggested-by: Christoph Hellwig <hch at lst.de>
Cc: Jens Axboe <axboe at kernel.dk>
Cc: Martin K. Petersen <martin.petersen at oracle.com>
Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---
 drivers/infiniband/ulp/iser/iser_memory.c | 2 +-
 drivers/nvme/host/core.c                  | 3 +--
 drivers/scsi/mpt3sas/mpt3sas_scsih.c      | 2 +-
 drivers/scsi/sd_dif.c                     | 4 ++--
 include/linux/t10-pi.h                    | 6 ++++++
 include/scsi/scsi_cmnd.h                  | 7 +------
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index ca844a9..130bf16 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -311,7 +311,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
 {
 	domain->sig_type = IB_SIG_TYPE_T10_DIF;
 	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
-	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
+	domain->sig.dif.ref_tag = t10_pi_ref_tag(sc->request);
 	/*
 	 * At the moment we hard code those, but in the future
 	 * we will take them from sc.
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bf65501..191177b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -627,8 +627,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 		case NVME_NS_DPS_PI_TYPE2:
 			control |= NVME_RW_PRINFO_PRCHK_GUARD |
 					NVME_RW_PRINFO_PRCHK_REF;
-			cmnd->rw.reftag = cpu_to_le32(
-					nvme_block_nr(ns, blk_rq_pos(req)));
+			cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
 			break;
 		}
 	}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index b8d131a..dd738ae 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -4568,7 +4568,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending)
 		    MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG |
 		    MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
 		mpi_request->CDB.EEDP32.PrimaryReferenceTag =
-		    cpu_to_be32(scsi_prot_ref_tag(scmd));
+		    cpu_to_be32(t10_pi_ref_tag(scmd->request));
 		break;
 
 	case SCSI_PROT_DIF_TYPE3:
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 9035380..d8de43d 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -124,7 +124,7 @@ void sd_dif_prepare(struct scsi_cmnd *scmd)
 	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION)
 		return;
 
-	phys = scsi_prot_ref_tag(scmd);
+	phys = t10_pi_ref_tag(scmd->request);
 
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
@@ -176,7 +176,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 		return;
 
 	intervals = good_bytes / scsi_prot_interval(scmd);
-	phys = scsi_prot_ref_tag(scmd);
+	phys = t10_pi_ref_tag(scmd->request);
 
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c6aa8a3..635855e 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -37,6 +37,12 @@ struct t10_pi_tuple {
 #define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
 #define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
 
+static inline u32 t10_pi_ref_tag(struct request *rq)
+{
+	return blk_rq_pos(rq) >>
+		(rq->q->integrity.interval_exp - 9) & 0xffffffff;
+}
+
 extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index aaf1e97..cae229b 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -4,6 +4,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/blkdev.h>
+#include <linux/t10-pi.h>
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/timer.h>
@@ -313,12 +314,6 @@ static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd)
 	return scmd->device->sector_size;
 }
 
-static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd)
-{
-	return blk_rq_pos(scmd->request) >>
-		(ilog2(scsi_prot_interval(scmd)) - 9) & 0xffffffff;
-}
-
 static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
 {
 	return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 2/3] block: move dif_prepare/dif_complete functions to block layer
  2018-07-25  8:46 ` Max Gurtovoy
@ 2018-07-25  8:47   ` Max Gurtovoy
  -1 siblings, 0 replies; 12+ messages in thread
From: Max Gurtovoy @ 2018-07-25  8:47 UTC (permalink / raw)
  To: keith.busch, linux-nvme, sagi, hch, martin.petersen, linux-block, axboe
  Cc: vladimirk, Max Gurtovoy

Currently these functions are implemented in the scsi layer, but their
actual place should be the block layer since T10-PI is a general data
integrity feature that is used in the nvme protocol as well. Also, use
the tuple size from the integrity profile since it may vary between
integrity types.

Suggested-by: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---

changes from v2:
 - convert comments to kerneldoc format
 - removed SCSI specific comment
 - fix kmap_atomic/kunmap_atomic addresses
 - fix iteration over t10_pi_tuple's

changes from v1 (Christoph, Martin and Keith comments):
 - moved the functions to t10-pi.c
 - updated tuple size
 - changed local variables scope
 - remove/add new lines

---
 block/t10-pi.c         | 114 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/sd.c      |   8 ++--
 drivers/scsi/sd.h      |   9 ----
 drivers/scsi/sd_dif.c  | 113 ------------------------------------------------
 include/linux/t10-pi.h |   3 ++
 5 files changed, 122 insertions(+), 125 deletions(-)

diff --git a/block/t10-pi.c b/block/t10-pi.c
index a98db38..6d2acfe 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -184,3 +184,117 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 	.verify_fn		= t10_pi_type3_verify_ip,
 };
 EXPORT_SYMBOL(t10_pi_type3_ip);
+
+/**
+ * t10_pi_prepare - prepare PI prior submitting request to device
+ * @rq:              request with PI that should be prepared
+ * @protection_type: PI type (Type 1/Type 2/Type 3)
+ *
+ * For Type 1/Type 2, the virtual start sector is the one that was
+ * originally submitted by the block layer for the ref_tag usage. Due to
+ * partitioning, MD/DM cloning, etc. the actual physical start sector is
+ * likely to be different. Remap protection information to match the
+ * physical LBA.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void t10_pi_prepare(struct request *rq, u8 protection_type)
+{
+	const int tuple_sz = rq->q->integrity.tuple_size;
+	u32 ref_tag = t10_pi_ref_tag(rq);
+	struct bio *bio;
+
+	if (protection_type == T10_PI_TYPE3_PROTECTION)
+		return;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		u32 virt = bip_get_seed(bip) & 0xffffffff;
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		/* Already remapped? */
+		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
+			break;
+
+		bip_for_each_vec(iv, bip, iter) {
+			struct t10_pi_tuple *pi;
+			void *p, *pmap;
+			unsigned int j;
+
+			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
+			p = pmap;
+			for (j = 0; j < iv.bv_len; j += tuple_sz) {
+				pi = (struct t10_pi_tuple *)p;
+				if (be32_to_cpu(pi->ref_tag) == virt)
+					pi->ref_tag = cpu_to_be32(ref_tag);
+				virt++;
+				ref_tag++;
+				p += tuple_sz;
+			}
+
+			kunmap_atomic(pmap);
+		}
+
+		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
+	}
+}
+EXPORT_SYMBOL(t10_pi_prepare);
+
+/**
+ * t10_pi_complete - prepare PI prior returning request to the block layer
+ * @rq:              request with PI that should be prepared
+ * @protection_type: PI type (Type 1/Type 2/Type 3)
+ * @intervals:       total elements to prepare
+ *
+ * For Type 1/Type 2, the virtual start sector is the one that was
+ * originally submitted by the block layer for the ref_tag usage. Due to
+ * partitioning, MD/DM cloning, etc. the actual physical start sector is
+ * likely to be different. Since the physical start sector was submitted
+ * to the device, we should remap it back to virtual values expected by the
+ * block layer.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void t10_pi_complete(struct request *rq, u8 protection_type,
+		     unsigned int intervals)
+{
+	const int tuple_sz = rq->q->integrity.tuple_size;
+	u32 ref_tag = t10_pi_ref_tag(rq);
+	struct bio *bio;
+
+	if (protection_type == T10_PI_TYPE3_PROTECTION)
+		return;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		u32 virt = bip_get_seed(bip) & 0xffffffff;
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		bip_for_each_vec(iv, bip, iter) {
+			struct t10_pi_tuple *pi;
+			void *p, *pmap;
+			unsigned int j;
+
+			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
+			p = pmap;
+			for (j = 0; j < iv.bv_len; j += tuple_sz) {
+				if (intervals == 0) {
+					kunmap_atomic(pmap);
+					return;
+				}
+				pi = (struct t10_pi_tuple *)p;
+				if (be32_to_cpu(pi->ref_tag) == ref_tag)
+					pi->ref_tag = cpu_to_be32(virt);
+				virt++;
+				ref_tag++;
+				intervals--;
+				p += tuple_sz;
+			}
+
+			kunmap_atomic(pmap);
+		}
+	}
+}
+EXPORT_SYMBOL(t10_pi_complete);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9421d98..bbebdc3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1119,7 +1119,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 		SCpnt->cmnd[0] = WRITE_6;
 
 		if (blk_integrity_rq(rq))
-			sd_dif_prepare(SCpnt);
+			t10_pi_prepare(SCpnt->request, sdkp->protection_type);
 
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_6;
@@ -2047,8 +2047,10 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 					   "sd_done: completed %d of %d bytes\n",
 					   good_bytes, scsi_bufflen(SCpnt)));
 
-	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
-		sd_dif_complete(SCpnt, good_bytes);
+	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
+	    good_bytes)
+		t10_pi_complete(SCpnt->request, sdkp->protection_type,
+				good_bytes / scsi_prot_interval(SCpnt));
 
 	return good_bytes;
 }
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 392c7d0..a7d4f50 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -254,21 +254,12 @@ static inline unsigned int sd_prot_flag_mask(unsigned int prot_op)
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 
 extern void sd_dif_config_host(struct scsi_disk *);
-extern void sd_dif_prepare(struct scsi_cmnd *scmd);
-extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 static inline void sd_dif_config_host(struct scsi_disk *disk)
 {
 }
-static inline int sd_dif_prepare(struct scsi_cmnd *scmd)
-{
-	return 0;
-}
-static inline void sd_dif_complete(struct scsi_cmnd *cmd, unsigned int a)
-{
-}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index d8de43d..db72c82 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -95,116 +95,3 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 	blk_integrity_register(disk, &bi);
 }
 
-/*
- * The virtual start sector is the one that was originally submitted
- * by the block layer.	Due to partitioning, MD/DM cloning, etc. the
- * actual physical start sector is likely to be different.  Remap
- * protection information to match the physical LBA.
- *
- * From a protocol perspective there's a slight difference between
- * Type 1 and 2.  The latter uses 32-byte CDBs exclusively, and the
- * reference tag is seeded in the CDB.  This gives us the potential to
- * avoid virt->phys remapping during write.  However, at read time we
- * don't know whether the virt sector is the same as when we wrote it
- * (we could be reading from real disk as opposed to MD/DM device.  So
- * we always remap Type 2 making it identical to Type 1.
- *
- * Type 3 does not have a reference tag so no remapping is required.
- */
-void sd_dif_prepare(struct scsi_cmnd *scmd)
-{
-	const int tuple_sz = sizeof(struct t10_pi_tuple);
-	struct bio *bio;
-	struct scsi_disk *sdkp;
-	struct t10_pi_tuple *pi;
-	u32 phys, virt;
-
-	sdkp = scsi_disk(scmd->request->rq_disk);
-
-	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION)
-		return;
-
-	phys = t10_pi_ref_tag(scmd->request);
-
-	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		struct bio_vec iv;
-		struct bvec_iter iter;
-		unsigned int j;
-
-		/* Already remapped? */
-		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
-			break;
-
-		virt = bip_get_seed(bip) & 0xffffffff;
-
-		bip_for_each_vec(iv, bip, iter) {
-			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
-			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
-				if (be32_to_cpu(pi->ref_tag) == virt)
-					pi->ref_tag = cpu_to_be32(phys);
-
-				virt++;
-				phys++;
-			}
-
-			kunmap_atomic(pi);
-		}
-
-		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
-	}
-}
-
-/*
- * Remap physical sector values in the reference tag to the virtual
- * values expected by the block layer.
- */
-void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
-{
-	const int tuple_sz = sizeof(struct t10_pi_tuple);
-	struct scsi_disk *sdkp;
-	struct bio *bio;
-	struct t10_pi_tuple *pi;
-	unsigned int j, intervals;
-	u32 phys, virt;
-
-	sdkp = scsi_disk(scmd->request->rq_disk);
-
-	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION || good_bytes == 0)
-		return;
-
-	intervals = good_bytes / scsi_prot_interval(scmd);
-	phys = t10_pi_ref_tag(scmd->request);
-
-	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		struct bio_vec iv;
-		struct bvec_iter iter;
-
-		virt = bip_get_seed(bip) & 0xffffffff;
-
-		bip_for_each_vec(iv, bip, iter) {
-			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
-			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
-				if (intervals == 0) {
-					kunmap_atomic(pi);
-					return;
-				}
-
-				if (be32_to_cpu(pi->ref_tag) == phys)
-					pi->ref_tag = cpu_to_be32(virt);
-
-				virt++;
-				phys++;
-				intervals--;
-			}
-
-			kunmap_atomic(pi);
-		}
-	}
-}
-
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 635855e..81ae4c4 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -47,5 +47,8 @@ static inline u32 t10_pi_ref_tag(struct request *rq)
 extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
 extern const struct blk_integrity_profile t10_pi_type3_ip;
+extern void t10_pi_prepare(struct request *rq, u8 protection_type);
+extern void t10_pi_complete(struct request *rq, u8 protection_type,
+			    unsigned int intervals);
 
 #endif
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 2/3] block: move dif_prepare/dif_complete functions to block layer
@ 2018-07-25  8:47   ` Max Gurtovoy
  0 siblings, 0 replies; 12+ messages in thread
From: Max Gurtovoy @ 2018-07-25  8:47 UTC (permalink / raw)


Currently these functions are implemented in the scsi layer, but their
actual place should be the block layer since T10-PI is a general data
integrity feature that is used in the nvme protocol as well. Also, use
the tuple size from the integrity profile since it may vary between
integrity types.

Suggested-by: Christoph Hellwig <hch at lst.de>
Cc: Jens Axboe <axboe at kernel.dk>
Cc: Martin K. Petersen <martin.petersen at oracle.com>
Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---

changes from v2:
 - convert comments to kerneldoc format
 - removed SCSI specific comment
 - fix kmap_atomic/kunmap_atomic addresses
 - fix iteration over t10_pi_tuple's

changes from v1 (Christoph, Martin and Keith comments):
 - moved the functions to t10-pi.c
 - updated tuple size
 - changed local variables scope
 - remove/add new lines

---
 block/t10-pi.c         | 114 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/sd.c      |   8 ++--
 drivers/scsi/sd.h      |   9 ----
 drivers/scsi/sd_dif.c  | 113 ------------------------------------------------
 include/linux/t10-pi.h |   3 ++
 5 files changed, 122 insertions(+), 125 deletions(-)

diff --git a/block/t10-pi.c b/block/t10-pi.c
index a98db38..6d2acfe 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -184,3 +184,117 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 	.verify_fn		= t10_pi_type3_verify_ip,
 };
 EXPORT_SYMBOL(t10_pi_type3_ip);
+
+/**
+ * t10_pi_prepare - prepare PI prior submitting request to device
+ * @rq:              request with PI that should be prepared
+ * @protection_type: PI type (Type 1/Type 2/Type 3)
+ *
+ * For Type 1/Type 2, the virtual start sector is the one that was
+ * originally submitted by the block layer for the ref_tag usage. Due to
+ * partitioning, MD/DM cloning, etc. the actual physical start sector is
+ * likely to be different. Remap protection information to match the
+ * physical LBA.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void t10_pi_prepare(struct request *rq, u8 protection_type)
+{
+	const int tuple_sz = rq->q->integrity.tuple_size;
+	u32 ref_tag = t10_pi_ref_tag(rq);
+	struct bio *bio;
+
+	if (protection_type == T10_PI_TYPE3_PROTECTION)
+		return;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		u32 virt = bip_get_seed(bip) & 0xffffffff;
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		/* Already remapped? */
+		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
+			break;
+
+		bip_for_each_vec(iv, bip, iter) {
+			struct t10_pi_tuple *pi;
+			void *p, *pmap;
+			unsigned int j;
+
+			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
+			p = pmap;
+			for (j = 0; j < iv.bv_len; j += tuple_sz) {
+				pi = (struct t10_pi_tuple *)p;
+				if (be32_to_cpu(pi->ref_tag) == virt)
+					pi->ref_tag = cpu_to_be32(ref_tag);
+				virt++;
+				ref_tag++;
+				p += tuple_sz;
+			}
+
+			kunmap_atomic(pmap);
+		}
+
+		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
+	}
+}
+EXPORT_SYMBOL(t10_pi_prepare);
+
+/**
+ * t10_pi_complete - prepare PI prior returning request to the block layer
+ * @rq:              request with PI that should be prepared
+ * @protection_type: PI type (Type 1/Type 2/Type 3)
+ * @intervals:       total elements to prepare
+ *
+ * For Type 1/Type 2, the virtual start sector is the one that was
+ * originally submitted by the block layer for the ref_tag usage. Due to
+ * partitioning, MD/DM cloning, etc. the actual physical start sector is
+ * likely to be different. Since the physical start sector was submitted
+ * to the device, we should remap it back to virtual values expected by the
+ * block layer.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void t10_pi_complete(struct request *rq, u8 protection_type,
+		     unsigned int intervals)
+{
+	const int tuple_sz = rq->q->integrity.tuple_size;
+	u32 ref_tag = t10_pi_ref_tag(rq);
+	struct bio *bio;
+
+	if (protection_type == T10_PI_TYPE3_PROTECTION)
+		return;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		u32 virt = bip_get_seed(bip) & 0xffffffff;
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		bip_for_each_vec(iv, bip, iter) {
+			struct t10_pi_tuple *pi;
+			void *p, *pmap;
+			unsigned int j;
+
+			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
+			p = pmap;
+			for (j = 0; j < iv.bv_len; j += tuple_sz) {
+				if (intervals == 0) {
+					kunmap_atomic(pmap);
+					return;
+				}
+				pi = (struct t10_pi_tuple *)p;
+				if (be32_to_cpu(pi->ref_tag) == ref_tag)
+					pi->ref_tag = cpu_to_be32(virt);
+				virt++;
+				ref_tag++;
+				intervals--;
+				p += tuple_sz;
+			}
+
+			kunmap_atomic(pmap);
+		}
+	}
+}
+EXPORT_SYMBOL(t10_pi_complete);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9421d98..bbebdc3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1119,7 +1119,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 		SCpnt->cmnd[0] = WRITE_6;
 
 		if (blk_integrity_rq(rq))
-			sd_dif_prepare(SCpnt);
+			t10_pi_prepare(SCpnt->request, sdkp->protection_type);
 
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_6;
@@ -2047,8 +2047,10 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 					   "sd_done: completed %d of %d bytes\n",
 					   good_bytes, scsi_bufflen(SCpnt)));
 
-	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
-		sd_dif_complete(SCpnt, good_bytes);
+	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
+	    good_bytes)
+		t10_pi_complete(SCpnt->request, sdkp->protection_type,
+				good_bytes / scsi_prot_interval(SCpnt));
 
 	return good_bytes;
 }
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 392c7d0..a7d4f50 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -254,21 +254,12 @@ static inline unsigned int sd_prot_flag_mask(unsigned int prot_op)
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 
 extern void sd_dif_config_host(struct scsi_disk *);
-extern void sd_dif_prepare(struct scsi_cmnd *scmd);
-extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 static inline void sd_dif_config_host(struct scsi_disk *disk)
 {
 }
-static inline int sd_dif_prepare(struct scsi_cmnd *scmd)
-{
-	return 0;
-}
-static inline void sd_dif_complete(struct scsi_cmnd *cmd, unsigned int a)
-{
-}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index d8de43d..db72c82 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -95,116 +95,3 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 	blk_integrity_register(disk, &bi);
 }
 
-/*
- * The virtual start sector is the one that was originally submitted
- * by the block layer.	Due to partitioning, MD/DM cloning, etc. the
- * actual physical start sector is likely to be different.  Remap
- * protection information to match the physical LBA.
- *
- * From a protocol perspective there's a slight difference between
- * Type 1 and 2.  The latter uses 32-byte CDBs exclusively, and the
- * reference tag is seeded in the CDB.  This gives us the potential to
- * avoid virt->phys remapping during write.  However, at read time we
- * don't know whether the virt sector is the same as when we wrote it
- * (we could be reading from real disk as opposed to MD/DM device.  So
- * we always remap Type 2 making it identical to Type 1.
- *
- * Type 3 does not have a reference tag so no remapping is required.
- */
-void sd_dif_prepare(struct scsi_cmnd *scmd)
-{
-	const int tuple_sz = sizeof(struct t10_pi_tuple);
-	struct bio *bio;
-	struct scsi_disk *sdkp;
-	struct t10_pi_tuple *pi;
-	u32 phys, virt;
-
-	sdkp = scsi_disk(scmd->request->rq_disk);
-
-	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION)
-		return;
-
-	phys = t10_pi_ref_tag(scmd->request);
-
-	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		struct bio_vec iv;
-		struct bvec_iter iter;
-		unsigned int j;
-
-		/* Already remapped? */
-		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
-			break;
-
-		virt = bip_get_seed(bip) & 0xffffffff;
-
-		bip_for_each_vec(iv, bip, iter) {
-			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
-			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
-				if (be32_to_cpu(pi->ref_tag) == virt)
-					pi->ref_tag = cpu_to_be32(phys);
-
-				virt++;
-				phys++;
-			}
-
-			kunmap_atomic(pi);
-		}
-
-		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
-	}
-}
-
-/*
- * Remap physical sector values in the reference tag to the virtual
- * values expected by the block layer.
- */
-void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
-{
-	const int tuple_sz = sizeof(struct t10_pi_tuple);
-	struct scsi_disk *sdkp;
-	struct bio *bio;
-	struct t10_pi_tuple *pi;
-	unsigned int j, intervals;
-	u32 phys, virt;
-
-	sdkp = scsi_disk(scmd->request->rq_disk);
-
-	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION || good_bytes == 0)
-		return;
-
-	intervals = good_bytes / scsi_prot_interval(scmd);
-	phys = t10_pi_ref_tag(scmd->request);
-
-	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		struct bio_vec iv;
-		struct bvec_iter iter;
-
-		virt = bip_get_seed(bip) & 0xffffffff;
-
-		bip_for_each_vec(iv, bip, iter) {
-			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
-			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
-				if (intervals == 0) {
-					kunmap_atomic(pi);
-					return;
-				}
-
-				if (be32_to_cpu(pi->ref_tag) == phys)
-					pi->ref_tag = cpu_to_be32(virt);
-
-				virt++;
-				phys++;
-				intervals--;
-			}
-
-			kunmap_atomic(pi);
-		}
-	}
-}
-
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 635855e..81ae4c4 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -47,5 +47,8 @@ static inline u32 t10_pi_ref_tag(struct request *rq)
 extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
 extern const struct blk_integrity_profile t10_pi_type3_ip;
+extern void t10_pi_prepare(struct request *rq, u8 protection_type);
+extern void t10_pi_complete(struct request *rq, u8 protection_type,
+			    unsigned int intervals);
 
 #endif
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 3/3] nvme: use blk API to remap ref tags for IOs with metadata
  2018-07-25  8:46 ` Max Gurtovoy
@ 2018-07-25  8:47   ` Max Gurtovoy
  -1 siblings, 0 replies; 12+ messages in thread
From: Max Gurtovoy @ 2018-07-25  8:47 UTC (permalink / raw)
  To: keith.busch, linux-nvme, sagi, hch, martin.petersen, linux-block, axboe
  Cc: vladimirk, Max Gurtovoy

Also moved the logic of the remapping to the nvme core driver instead
of implementing it in the nvme pci driver. This way all the other nvme
transport drivers will benefit from it (in case they'll implement metadata
support).

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---

changes from v2:
 - add Reviewed-by tag
changes from v1:
 - check status of nvme_req instead of converting to BLK_STS

---
 drivers/nvme/host/core.c | 18 ++++++++++++
 drivers/nvme/host/nvme.h |  9 +-----
 drivers/nvme/host/pci.c  | 75 +-----------------------------------------------
 3 files changed, 20 insertions(+), 82 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 191177b..b57abe5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -617,6 +617,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 			if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
 				return BLK_STS_NOTSUPP;
 			control |= NVME_RW_PRINFO_PRACT;
+		} else if (req_op(req) == REQ_OP_WRITE) {
+			t10_pi_prepare(req, ns->pi_type);
 		}
 
 		switch (ns->pi_type) {
@@ -637,6 +639,22 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 	return 0;
 }
 
+void nvme_cleanup_cmd(struct request *req)
+{
+	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+	    nvme_req(req)->status == 0) {
+		struct nvme_ns *ns = req->rq_disk->private_data;
+
+		t10_pi_complete(req, ns->pi_type,
+				blk_rq_bytes(req) >> ns->lba_shift);
+	}
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+		kfree(page_address(req->special_vec.bv_page) +
+		      req->special_vec.bv_offset);
+	}
+}
+EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
+
 blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd)
 {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 0c4a33d..dfc01ff 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -356,14 +356,6 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 	return (sector >> (ns->lba_shift - 9));
 }
 
-static inline void nvme_cleanup_cmd(struct request *req)
-{
-	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
-		kfree(page_address(req->special_vec.bv_page) +
-		      req->special_vec.bv_offset);
-	}
-}
-
 static inline void nvme_end_request(struct request *req, __le16 status,
 		union nvme_result result)
 {
@@ -420,6 +412,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid);
+void nvme_cleanup_cmd(struct request *req);
 blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ddd441b..03beac5 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -535,73 +535,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
 		mempool_free(iod->sg, dev->iod_mempool);
 }
 
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-	if (be32_to_cpu(pi->ref_tag) == v)
-		pi->ref_tag = cpu_to_be32(p);
-}
-
-static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-	if (be32_to_cpu(pi->ref_tag) == p)
-		pi->ref_tag = cpu_to_be32(v);
-}
-
-/**
- * nvme_dif_remap - remaps ref tags to bip seed and physical lba
- *
- * The virtual start sector is the one that was originally submitted by the
- * block layer.	Due to partitioning, MD/DM cloning, etc. the actual physical
- * start sector may be different. Remap protection information to match the
- * physical LBA on writes, and back to the original seed on reads.
- *
- * Type 0 and 3 do not have a ref tag, so no remapping required.
- */
-static void nvme_dif_remap(struct request *req,
-			void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
-{
-	struct nvme_ns *ns = req->rq_disk->private_data;
-	struct bio_integrity_payload *bip;
-	struct t10_pi_tuple *pi;
-	void *p, *pmap;
-	u32 i, nlb, ts, phys, virt;
-
-	if (!ns->pi_type || ns->pi_type == NVME_NS_DPS_PI_TYPE3)
-		return;
-
-	bip = bio_integrity(req->bio);
-	if (!bip)
-		return;
-
-	pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
-
-	p = pmap;
-	virt = bip_get_seed(bip);
-	phys = nvme_block_nr(ns, blk_rq_pos(req));
-	nlb = (blk_rq_bytes(req) >> ns->lba_shift);
-	ts = ns->disk->queue->integrity.tuple_size;
-
-	for (i = 0; i < nlb; i++, virt++, phys++) {
-		pi = (struct t10_pi_tuple *)p;
-		dif_swap(phys, virt, pi);
-		p += ts;
-	}
-	kunmap_atomic(pmap);
-}
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-static void nvme_dif_remap(struct request *req,
-			void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
-{
-}
-static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-}
-static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-}
-#endif
-
 static void nvme_print_sgl(struct scatterlist *sgl, int nents)
 {
 	int i;
@@ -827,9 +760,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
 			goto out_unmap;
 
-		if (req_op(req) == REQ_OP_WRITE)
-			nvme_dif_remap(req, nvme_dif_prep);
-
 		if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
 			goto out_unmap;
 	}
@@ -852,11 +782,8 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 
 	if (iod->nents) {
 		dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
-		if (blk_integrity_rq(req)) {
-			if (req_op(req) == REQ_OP_READ)
-				nvme_dif_remap(req, nvme_dif_complete);
+		if (blk_integrity_rq(req))
 			dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
-		}
 	}
 
 	nvme_cleanup_cmd(req);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 3/3] nvme: use blk API to remap ref tags for IOs with metadata
@ 2018-07-25  8:47   ` Max Gurtovoy
  0 siblings, 0 replies; 12+ messages in thread
From: Max Gurtovoy @ 2018-07-25  8:47 UTC (permalink / raw)


Also moved the logic of the remapping to the nvme core driver instead
of implementing it in the nvme pci driver. This way all the other nvme
transport drivers will benefit from it (in case they'll implement metadata
support).

Suggested-by: Christoph Hellwig <hch at lst.de>
Reviewed-by: Christoph Hellwig <hch at lst.de>
Cc: Jens Axboe <axboe at kernel.dk>
Cc: Martin K. Petersen <martin.petersen at oracle.com>
Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---

changes from v2:
 - add Reviewed-by tag
changes from v1:
 - check status of nvme_req instead of converting to BLK_STS

---
 drivers/nvme/host/core.c | 18 ++++++++++++
 drivers/nvme/host/nvme.h |  9 +-----
 drivers/nvme/host/pci.c  | 75 +-----------------------------------------------
 3 files changed, 20 insertions(+), 82 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 191177b..b57abe5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -617,6 +617,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 			if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
 				return BLK_STS_NOTSUPP;
 			control |= NVME_RW_PRINFO_PRACT;
+		} else if (req_op(req) == REQ_OP_WRITE) {
+			t10_pi_prepare(req, ns->pi_type);
 		}
 
 		switch (ns->pi_type) {
@@ -637,6 +639,22 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 	return 0;
 }
 
+void nvme_cleanup_cmd(struct request *req)
+{
+	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+	    nvme_req(req)->status == 0) {
+		struct nvme_ns *ns = req->rq_disk->private_data;
+
+		t10_pi_complete(req, ns->pi_type,
+				blk_rq_bytes(req) >> ns->lba_shift);
+	}
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+		kfree(page_address(req->special_vec.bv_page) +
+		      req->special_vec.bv_offset);
+	}
+}
+EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
+
 blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd)
 {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 0c4a33d..dfc01ff 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -356,14 +356,6 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 	return (sector >> (ns->lba_shift - 9));
 }
 
-static inline void nvme_cleanup_cmd(struct request *req)
-{
-	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
-		kfree(page_address(req->special_vec.bv_page) +
-		      req->special_vec.bv_offset);
-	}
-}
-
 static inline void nvme_end_request(struct request *req, __le16 status,
 		union nvme_result result)
 {
@@ -420,6 +412,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid);
+void nvme_cleanup_cmd(struct request *req);
 blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ddd441b..03beac5 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -535,73 +535,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
 		mempool_free(iod->sg, dev->iod_mempool);
 }
 
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-	if (be32_to_cpu(pi->ref_tag) == v)
-		pi->ref_tag = cpu_to_be32(p);
-}
-
-static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-	if (be32_to_cpu(pi->ref_tag) == p)
-		pi->ref_tag = cpu_to_be32(v);
-}
-
-/**
- * nvme_dif_remap - remaps ref tags to bip seed and physical lba
- *
- * The virtual start sector is the one that was originally submitted by the
- * block layer.	Due to partitioning, MD/DM cloning, etc. the actual physical
- * start sector may be different. Remap protection information to match the
- * physical LBA on writes, and back to the original seed on reads.
- *
- * Type 0 and 3 do not have a ref tag, so no remapping required.
- */
-static void nvme_dif_remap(struct request *req,
-			void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
-{
-	struct nvme_ns *ns = req->rq_disk->private_data;
-	struct bio_integrity_payload *bip;
-	struct t10_pi_tuple *pi;
-	void *p, *pmap;
-	u32 i, nlb, ts, phys, virt;
-
-	if (!ns->pi_type || ns->pi_type == NVME_NS_DPS_PI_TYPE3)
-		return;
-
-	bip = bio_integrity(req->bio);
-	if (!bip)
-		return;
-
-	pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
-
-	p = pmap;
-	virt = bip_get_seed(bip);
-	phys = nvme_block_nr(ns, blk_rq_pos(req));
-	nlb = (blk_rq_bytes(req) >> ns->lba_shift);
-	ts = ns->disk->queue->integrity.tuple_size;
-
-	for (i = 0; i < nlb; i++, virt++, phys++) {
-		pi = (struct t10_pi_tuple *)p;
-		dif_swap(phys, virt, pi);
-		p += ts;
-	}
-	kunmap_atomic(pmap);
-}
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-static void nvme_dif_remap(struct request *req,
-			void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
-{
-}
-static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-}
-static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-}
-#endif
-
 static void nvme_print_sgl(struct scatterlist *sgl, int nents)
 {
 	int i;
@@ -827,9 +760,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
 			goto out_unmap;
 
-		if (req_op(req) == REQ_OP_WRITE)
-			nvme_dif_remap(req, nvme_dif_prep);
-
 		if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
 			goto out_unmap;
 	}
@@ -852,11 +782,8 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 
 	if (iod->nents) {
 		dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
-		if (blk_integrity_rq(req)) {
-			if (req_op(req) == REQ_OP_READ)
-				nvme_dif_remap(req, nvme_dif_complete);
+		if (blk_integrity_rq(req))
 			dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
-		}
 	}
 
 	nvme_cleanup_cmd(req);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 2/3] block: move dif_prepare/dif_complete functions to block layer
  2018-07-25  8:47   ` Max Gurtovoy
@ 2018-07-25 11:22     ` Christoph Hellwig
  -1 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2018-07-25 11:22 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: keith.busch, linux-nvme, sagi, hch, martin.petersen, linux-block,
	axboe, vladimirk

> +			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
> +			p = pmap;

Maybe:

			pmap = p = kmap_atomic(iv.bv_page) + iv.bv_offset;

> +			for (j = 0; j < iv.bv_len; j += tuple_sz) {
> +				pi = (struct t10_pi_tuple *)p;

No need for the cast, also the pi declaration can be moved into the
inner scope:

				struct t10_pi_tuple *pi = p;

> +			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
> +			p = pmap;
> +			for (j = 0; j < iv.bv_len; j += tuple_sz) {
> +				if (intervals == 0) {
> +					kunmap_atomic(pmap);
> +					return;
> +				}
> +				pi = (struct t10_pi_tuple *)p;

Same here.

Also the intervals check would make sense in the for loop I think, e.g.:

			pmap = p = kmap_atomic(iv.bv_page) + iv.bv_offset;
			for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
				struct t10_pi_tuple *pi = p;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v3 2/3] block: move dif_prepare/dif_complete functions to block layer
@ 2018-07-25 11:22     ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2018-07-25 11:22 UTC (permalink / raw)


> +			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
> +			p = pmap;

Maybe:

			pmap = p = kmap_atomic(iv.bv_page) + iv.bv_offset;

> +			for (j = 0; j < iv.bv_len; j += tuple_sz) {
> +				pi = (struct t10_pi_tuple *)p;

No need for the cast, also the pi declaration can be moved into the
inner scope:

				struct t10_pi_tuple *pi = p;

> +			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
> +			p = pmap;
> +			for (j = 0; j < iv.bv_len; j += tuple_sz) {
> +				if (intervals == 0) {
> +					kunmap_atomic(pmap);
> +					return;
> +				}
> +				pi = (struct t10_pi_tuple *)p;

Same here.

Also the intervals check would make sense in the for loop I think, e.g.:

			pmap = p = kmap_atomic(iv.bv_page) + iv.bv_offset;
			for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
				struct t10_pi_tuple *pi = p;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 2/3] block: move dif_prepare/dif_complete functions to block layer
  2018-07-25 11:22     ` Christoph Hellwig
@ 2018-07-25 11:47       ` Max Gurtovoy
  -1 siblings, 0 replies; 12+ messages in thread
From: Max Gurtovoy @ 2018-07-25 11:47 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: keith.busch, linux-nvme, sagi, martin.petersen, linux-block,
	axboe, vladimirk



On 7/25/2018 2:22 PM, Christoph Hellwig wrote:
>> +			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
>> +			p = pmap;
> 
> Maybe:
> 
> 			pmap = p = kmap_atomic(iv.bv_page) + iv.bv_offset;
> 
>> +			for (j = 0; j < iv.bv_len; j += tuple_sz) {
>> +				pi = (struct t10_pi_tuple *)p;
> 
> No need for the cast, also the pi declaration can be moved into the
> inner scope:
> 
> 				struct t10_pi_tuple *pi = p;
> 
>> +			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
>> +			p = pmap;
>> +			for (j = 0; j < iv.bv_len; j += tuple_sz) {
>> +				if (intervals == 0) {
>> +					kunmap_atomic(pmap);
>> +					return;
>> +				}
>> +				pi = (struct t10_pi_tuple *)p;
> 
> Same here.
> 
> Also the intervals check would make sense in the for loop I think, e.g.:
> 
> 			pmap = p = kmap_atomic(iv.bv_page) + iv.bv_offset;
> 			for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
> 				struct t10_pi_tuple *pi = p;
> 


Yes it make sense, but we can do more iterations before we return but I 
guess it's rare and it's worth to have less lines of code.
I'll update it for the next version.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v3 2/3] block: move dif_prepare/dif_complete functions to block layer
@ 2018-07-25 11:47       ` Max Gurtovoy
  0 siblings, 0 replies; 12+ messages in thread
From: Max Gurtovoy @ 2018-07-25 11:47 UTC (permalink / raw)




On 7/25/2018 2:22 PM, Christoph Hellwig wrote:
>> +			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
>> +			p = pmap;
> 
> Maybe:
> 
> 			pmap = p = kmap_atomic(iv.bv_page) + iv.bv_offset;
> 
>> +			for (j = 0; j < iv.bv_len; j += tuple_sz) {
>> +				pi = (struct t10_pi_tuple *)p;
> 
> No need for the cast, also the pi declaration can be moved into the
> inner scope:
> 
> 				struct t10_pi_tuple *pi = p;
> 
>> +			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
>> +			p = pmap;
>> +			for (j = 0; j < iv.bv_len; j += tuple_sz) {
>> +				if (intervals == 0) {
>> +					kunmap_atomic(pmap);
>> +					return;
>> +				}
>> +				pi = (struct t10_pi_tuple *)p;
> 
> Same here.
> 
> Also the intervals check would make sense in the for loop I think, e.g.:
> 
> 			pmap = p = kmap_atomic(iv.bv_page) + iv.bv_offset;
> 			for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
> 				struct t10_pi_tuple *pi = p;
> 


Yes it make sense, but we can do more iterations before we return but I 
guess it's rare and it's worth to have less lines of code.
I'll update it for the next version.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 2/3] block: move dif_prepare/dif_complete functions to block layer
  2018-07-25 11:22     ` Christoph Hellwig
@ 2018-07-25 14:53       ` Keith Busch
  -1 siblings, 0 replies; 12+ messages in thread
From: Keith Busch @ 2018-07-25 14:53 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Max Gurtovoy, axboe, linux-block, sagi, martin.petersen,
	vladimirk, linux-nvme, keith.busch

On Wed, Jul 25, 2018 at 01:22:47PM +0200, Christoph Hellwig wrote:
> > +			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
> > +			p = pmap;
> 
> Maybe:
> 
> 			pmap = p = kmap_atomic(iv.bv_page) + iv.bv_offset;

Max pointed out that even with this, we're still calling kunmap_atomic()
with an address potentially at an offset from the page that was kmap'ed.
While currently harmless, perhaps for correctness:

 			pmap = kmap_atomic(iv.bv_page);
			p = pmap + iv.bv_offset;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v3 2/3] block: move dif_prepare/dif_complete functions to block layer
@ 2018-07-25 14:53       ` Keith Busch
  0 siblings, 0 replies; 12+ messages in thread
From: Keith Busch @ 2018-07-25 14:53 UTC (permalink / raw)


On Wed, Jul 25, 2018@01:22:47PM +0200, Christoph Hellwig wrote:
> > +			pmap = kmap_atomic(iv.bv_page) + iv.bv_offset;
> > +			p = pmap;
> 
> Maybe:
> 
> 			pmap = p = kmap_atomic(iv.bv_page) + iv.bv_offset;

Max pointed out that even with this, we're still calling kunmap_atomic()
with an address potentially at an offset from the page that was kmap'ed.
While currently harmless, perhaps for correctness:

 			pmap = kmap_atomic(iv.bv_page);
			p = pmap + iv.bv_offset;

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2018-07-25 14:53 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-25  8:46 [PATCH v3 1/3] block: move ref_tag calculation func to the block layer Max Gurtovoy
2018-07-25  8:46 ` Max Gurtovoy
2018-07-25  8:47 ` [PATCH v3 2/3] block: move dif_prepare/dif_complete functions to " Max Gurtovoy
2018-07-25  8:47   ` Max Gurtovoy
2018-07-25 11:22   ` Christoph Hellwig
2018-07-25 11:22     ` Christoph Hellwig
2018-07-25 11:47     ` Max Gurtovoy
2018-07-25 11:47       ` Max Gurtovoy
2018-07-25 14:53     ` Keith Busch
2018-07-25 14:53       ` Keith Busch
2018-07-25  8:47 ` [PATCH v3 3/3] nvme: use blk API to remap ref tags for IOs with metadata Max Gurtovoy
2018-07-25  8:47   ` Max Gurtovoy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.