All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/7] NVMe Data Integrity Extensions
@ 2013-03-21 17:52 Keith Busch
  2013-03-21 17:52 ` [PATCH v2 1/7] sd: remove invalid ref tag check Keith Busch
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Keith Busch @ 2013-03-21 17:52 UTC (permalink / raw)


This patch set adds data integrity extensions to NVMe. Contrast with
the previous patch attempt, this makes some modifications to sd_dif
to export the template so a block driver can make use of it instead of
copying it. There are more namespace block formats possible than what is
available in SCSI, so I had to make some modifications to the integrity
code to take this into account. I hope that's okay.

Keith Busch (7):
  sd: remove invalid ref tag check
  sd: skip verifying unwritten sectors
  sd: hw sector size calculation
  sd: arbitrary dif meta-data sizes
  sd: export dif integrity template
  NVMe: Split non-mergeable bio requests
  NVMe: End-to-end data protection

 drivers/block/nvme.c   |  277 ++++++++++++++++++++++++++++++++++++++++++------
 drivers/scsi/sd_dif.c  |  144 ++++++++++++++++---------
 fs/bio-integrity.c     |   19 ++--
 include/linux/blkdev.h |   10 ++-
 include/linux/nvme.h   |   28 ++++-
 5 files changed, 378 insertions(+), 100 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 1/7] sd: remove invalid ref tag check
  2013-03-21 17:52 [PATCH v2 0/7] NVMe Data Integrity Extensions Keith Busch
@ 2013-03-21 17:52 ` Keith Busch
  2013-03-21 17:52 ` [PATCH v2 2/7] sd: skip verifying unwritten sectors Keith Busch
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2013-03-21 17:52 UTC (permalink / raw)


Removes incorrect check for a ref tag of 0xffffffff on verify, which is
potentially a valid ref.

Cc: Martin K. Petersen <martin.petersen at oracle.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/scsi/sd_dif.c |    8 --------
 1 files changed, 0 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 04998f3..6174ca4 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -93,14 +93,6 @@ static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 		if (sdt->app_tag == 0xffff)
 			return 0;
 
-		/* Bad ref tag received from disk */
-		if (sdt->ref_tag == 0xffffffff) {
-			printk(KERN_ERR
-			       "%s: bad phys ref tag on sector %lu\n",
-			       bix->disk_name, (unsigned long)sector);
-			return -EIO;
-		}
-
 		if (be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) {
 			printk(KERN_ERR
 			       "%s: ref tag error on sector %lu (rcvd %u)\n",
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 2/7] sd: skip verifying unwritten sectors
  2013-03-21 17:52 [PATCH v2 0/7] NVMe Data Integrity Extensions Keith Busch
  2013-03-21 17:52 ` [PATCH v2 1/7] sd: remove invalid ref tag check Keith Busch
@ 2013-03-21 17:52 ` Keith Busch
  2013-03-21 17:52 ` [PATCH v2 3/7] sd: hw sector size calculation Keith Busch
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2013-03-21 17:52 UTC (permalink / raw)


This skips verifying unwritten sectors and continues on to the next
sector. Previously, the code would return early if an unwritten sector
was encountered, whether the next sector was written or not.

Cc: Martin K. Petersen <martin.petersen at oracle.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/scsi/sd_dif.c |   18 ++++++------------
 1 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 6174ca4..7cf0a39 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -83,15 +83,15 @@ static void sd_dif_type1_generate_ip(struct blk_integrity_exchg *bix)
 static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
+	void *end = buf + bix->data_size;
 	struct sd_dif_tuple *sdt = bix->prot_buf;
 	sector_t sector = bix->sector;
-	unsigned int i;
 	__u16 csum;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
+	for (; buf < end; buf += bix->sector_size, sdt++, sector++) {
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff)
-			return 0;
+			continue;
 
 		if (be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) {
 			printk(KERN_ERR
@@ -110,9 +110,6 @@ static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
 			return -EIO;
 		}
-
-		buf += bix->sector_size;
-		sector++;
 	}
 
 	return 0;
@@ -208,15 +205,15 @@ static void sd_dif_type3_generate_ip(struct blk_integrity_exchg *bix)
 static int sd_dif_type3_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
+	void *end = buf + bix->data_size;
 	struct sd_dif_tuple *sdt = bix->prot_buf;
 	sector_t sector = bix->sector;
-	unsigned int i;
 	__u16 csum;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
+	for (; buf < end; buf += bix->sector_size, sdt++, sector++) {
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff && sdt->ref_tag == 0xffffffff)
-			return 0;
+			continue;
 
 		csum = fn(buf, bix->sector_size);
 
@@ -227,9 +224,6 @@ static int sd_dif_type3_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
 			return -EIO;
 		}
-
-		buf += bix->sector_size;
-		sector++;
 	}
 
 	return 0;
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 3/7] sd: hw sector size calculation
  2013-03-21 17:52 [PATCH v2 0/7] NVMe Data Integrity Extensions Keith Busch
  2013-03-21 17:52 ` [PATCH v2 1/7] sd: remove invalid ref tag check Keith Busch
  2013-03-21 17:52 ` [PATCH v2 2/7] sd: skip verifying unwritten sectors Keith Busch
@ 2013-03-21 17:52 ` Keith Busch
  2013-03-21 17:52 ` [PATCH v2 4/7] sd: arbitrary dif meta-data sizes Keith Busch
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2013-03-21 17:52 UTC (permalink / raw)


Fixes the calculating the number of sectors and starting sector related
to protection information. The calculation uses shifting and requires the
sector sizes be a power of 2 >= 512, which I think is true for all drives.

Cc: Martin K. Petersen <martin.petersen at oracle.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>

I try to avoid full divides in an IO path if possible hence the shift
operations, but if this is too obscure or there are valid block formats
where this wouldn't work, happy to change it to something else.
---
 drivers/scsi/sd_dif.c |   12 ++++++------
 fs/bio-integrity.c    |   11 ++++-------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 7cf0a39..aae5507 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -376,7 +376,8 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
 			break;
 
-		virt = bio->bi_integrity->bip_sector & 0xffffffff;
+		virt = (bio->bi_integrity->bip_sector >>
+				(__ffs(sector_sz) - 9)) & 0xffffffff;
 
 		bip_for_each_vec(iv, bio->bi_integrity, i) {
 			sdt = kmap_atomic(iv->bv_page)
@@ -419,14 +420,13 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 	sector_sz = scmd->device->sector_size;
 	sectors = good_bytes / sector_sz;
 
-	phys = blk_rq_pos(scmd->request) & 0xffffffff;
-	if (sector_sz == 4096)
-		phys >>= 3;
-
+	phys = (blk_rq_pos(scmd->request) >> (__ffs(sector_sz) - 9)) &
+								0xffffffff;
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_vec *iv;
 
-		virt = bio->bi_integrity->bip_sector & 0xffffffff;
+		virt = (bio->bi_integrity->bip_sector >>
+					(__ffs(sector_sz) - 9)) & 0xffffffff;
 
 		bip_for_each_vec(iv, bio->bi_integrity, i) {
 			sdt = kmap_atomic(iv->bv_page)
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index a3f28f3..b4ab7da 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -231,11 +231,7 @@ EXPORT_SYMBOL(bio_integrity_enabled);
 static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
 						    unsigned int sectors)
 {
-	/* At this point there are only 512b or 4096b DIF/EPP devices */
-	if (bi->sector_size == 4096)
-		return sectors >>= 3;
-
-	return sectors;
+	return sectors >> (__ffs(bi->sector_size) - 9);
 }
 
 /**
@@ -335,7 +331,7 @@ static void bio_integrity_generate(struct bio *bio)
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	struct blk_integrity_exchg bix;
 	struct bio_vec *bv;
-	sector_t sector = bio->bi_sector;
+	sector_t sector = bio->bi_sector >> (__ffs(bi->sector_size) - 9);
 	unsigned int i, sectors, total;
 	void *prot_buf = bio->bi_integrity->bip_buf;
 
@@ -476,7 +472,8 @@ static int bio_integrity_verify(struct bio *bio)
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	struct blk_integrity_exchg bix;
 	struct bio_vec *bv;
-	sector_t sector = bio->bi_integrity->bip_sector;
+	sector_t sector = bio->bi_integrity->bip_sector >>
+						(__ffs(bi->sector_size) - 9);
 	unsigned int i, sectors, total, ret;
 	void *prot_buf = bio->bi_integrity->bip_buf;
 
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 4/7] sd: arbitrary dif meta-data sizes
  2013-03-21 17:52 [PATCH v2 0/7] NVMe Data Integrity Extensions Keith Busch
                   ` (2 preceding siblings ...)
  2013-03-21 17:52 ` [PATCH v2 3/7] sd: hw sector size calculation Keith Busch
@ 2013-03-21 17:52 ` Keith Busch
  2013-03-21 17:52 ` [PATCH v2 5/7] sd: export dif integrity template Keith Busch
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2013-03-21 17:52 UTC (permalink / raw)


Possible block formats with protection information may have larger
meta-data per physical block than an 8byte DIF. The first 8 bytes of
the meta-data region are for DIF data, while the remaining meta-data
is application specific. The size of the integrity buffer is already
allocated large enough for the meta-data size per sector, but accessing
the integrity buffer assumed meta-data region per sector was the size
of the DIF tuple. This patch updates the DIF tuple accesses to take into
account size of the block meta-data.

Cc: Martin K. Petersen <martin.petersen at oracle.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/scsi/sd_dif.c  |   86 ++++++++++++++++++++++++++++++++---------------
 fs/bio-integrity.c     |    8 +++-
 include/linux/blkdev.h |    5 ++-
 3 files changed, 67 insertions(+), 32 deletions(-)

diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index aae5507..1735513 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -56,16 +56,19 @@ static __u16 sd_dif_ip_fn(void *data, unsigned int len)
 static void sd_dif_type1_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
+	void *tuple = bix->prot_buf;
 	sector_t sector = bix->sector;
+	struct sd_dif_tuple *sdt;
 	unsigned int i;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
+	for (i = 0 ; i < bix->data_size ; i += bix->sector_size) {
+		sdt = tuple;
 		sdt->guard_tag = fn(buf, bix->sector_size);
 		sdt->ref_tag = cpu_to_be32(sector & 0xffffffff);
 		sdt->app_tag = 0;
 
 		buf += bix->sector_size;
+		tuple += bix->tuple_size;
 		sector++;
 	}
 }
@@ -84,11 +87,15 @@ static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
 	void *end = buf + bix->data_size;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
+	void *tuple = bix->prot_buf;
+	struct sd_dif_tuple *sdt;
 	sector_t sector = bix->sector;
 	__u16 csum;
 
-	for (; buf < end; buf += bix->sector_size, sdt++, sector++) {
+	for (; buf < end; buf += bix->sector_size, tuple += bix->tuple_size,
+								sector++) {
+		sdt = tuple;
+
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff)
 			continue;
@@ -128,25 +135,31 @@ static int sd_dif_type1_verify_ip(struct blk_integrity_exchg *bix)
 /*
  * Functions for interleaving and deinterleaving application tags
  */
-static void sd_dif_type1_set_tag(void *prot, void *tag_buf, unsigned int sectors)
+static void sd_dif_type1_set_tag(void *prot, void *tag_buf, unsigned int sectors,
+							unsigned short tuple_size)
 {
-	struct sd_dif_tuple *sdt = prot;
+	void *tuple = prot;
 	u8 *tag = tag_buf;
+	struct sd_dif_tuple *sdt;
 	unsigned int i, j;
 
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
+	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, tuple += tuple_size) {
+		sdt = tuple;
 		sdt->app_tag = tag[j] << 8 | tag[j+1];
 		BUG_ON(sdt->app_tag == 0xffff);
 	}
 }
 
-static void sd_dif_type1_get_tag(void *prot, void *tag_buf, unsigned int sectors)
+static void sd_dif_type1_get_tag(void *prot, void *tag_buf, unsigned int sectors,
+							unsigned short tuple_size)
 {
-	struct sd_dif_tuple *sdt = prot;
+	void *tuple = prot;
 	u8 *tag = tag_buf;
+	struct sd_dif_tuple *sdt;
 	unsigned int i, j;
 
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
+	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, tuple += tuple_size) {
+		sdt = tuple;
 		tag[j] = (sdt->app_tag & 0xff00) >> 8;
 		tag[j+1] = sdt->app_tag & 0xff;
 	}
@@ -180,15 +193,18 @@ static struct blk_integrity dif_type1_integrity_ip = {
 static void sd_dif_type3_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
+	void *tuple = bix->prot_buf;
+	struct sd_dif_tuple *sdt;
 	unsigned int i;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
+	for (i = 0 ; i < bix->data_size ; i += bix->sector_size) {
+		sdt = tuple;
 		sdt->guard_tag = fn(buf, bix->sector_size);
 		sdt->ref_tag = 0;
 		sdt->app_tag = 0;
 
 		buf += bix->sector_size;
+		tuple += bix->tuple_size;
 	}
 }
 
@@ -206,11 +222,15 @@ static int sd_dif_type3_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
 	void *end = buf + bix->data_size;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
+	void *tuple = bix->prot_buf;
 	sector_t sector = bix->sector;
+	struct sd_dif_tuple *sdt;
 	__u16 csum;
 
-	for (; buf < end; buf += bix->sector_size, sdt++, sector++) {
+	for (; buf < end; buf += bix->sector_size, tuple += bix->tuple_size,
+								sector++) {
+		sdt = tuple;
+
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff && sdt->ref_tag == 0xffffffff)
 			continue;
@@ -239,26 +259,32 @@ static int sd_dif_type3_verify_ip(struct blk_integrity_exchg *bix)
 	return sd_dif_type3_verify(bix, sd_dif_ip_fn);
 }
 
-static void sd_dif_type3_set_tag(void *prot, void *tag_buf, unsigned int sectors)
+static void sd_dif_type3_set_tag(void *prot, void *tag_buf, unsigned int sectors,
+							unsigned short tuple_size)
 {
-	struct sd_dif_tuple *sdt = prot;
+	void *tuple = prot;
 	u8 *tag = tag_buf;
+	struct sd_dif_tuple *sdt;
 	unsigned int i, j;
 
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 6, sdt++) {
+	for (i = 0, j = 0 ; i < sectors ; i++, j += 6, tuple += tuple_size) {
+		sdt = tuple;
 		sdt->app_tag = tag[j] << 8 | tag[j+1];
 		sdt->ref_tag = tag[j+2] << 24 | tag[j+3] << 16 |
 			tag[j+4] << 8 | tag[j+5];
 	}
 }
 
-static void sd_dif_type3_get_tag(void *prot, void *tag_buf, unsigned int sectors)
+static void sd_dif_type3_get_tag(void *prot, void *tag_buf, unsigned int sectors,
+							unsigned short tuple_size)
 {
-	struct sd_dif_tuple *sdt = prot;
+	void *tuple = prot;
 	u8 *tag = tag_buf;
+	struct sd_dif_tuple *sdt;
 	unsigned int i, j;
 
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
+	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, tuple += tuple_size) {
+		sdt = tuple;
 		tag[j] = (sdt->app_tag & 0xff00) >> 8;
 		tag[j+1] = sdt->app_tag & 0xff;
 		tag[j+2] = (sdt->ref_tag & 0xff000000) >> 24;
@@ -355,14 +381,15 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		    unsigned int sector_sz)
 {
-	const int tuple_sz = sizeof(struct sd_dif_tuple);
 	struct bio *bio;
 	struct scsi_disk *sdkp;
 	struct sd_dif_tuple *sdt;
+	struct blk_integrity *bi;
 	unsigned int i, j;
 	u32 phys, virt;
 
 	sdkp = rq->bio->bi_bdev->bd_disk->private_data;
+	bi = bdev_get_integrity(rq->bio->bi_bdev);
 
 	if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION)
 		return;
@@ -380,14 +407,15 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 				(__ffs(sector_sz) - 9)) & 0xffffffff;
 
 		bip_for_each_vec(iv, bio->bi_integrity, i) {
-			sdt = kmap_atomic(iv->bv_page)
+			void *tuple = kmap_atomic(iv->bv_page)
 				+ iv->bv_offset;
 
-			for (j = 0 ; j < iv->bv_len ; j += tuple_sz, sdt++) {
-
+			for (j = 0 ; j < iv->bv_len ; j += bi->tuple_size) {
+				sdt = tuple;
 				if (be32_to_cpu(sdt->ref_tag) == virt)
 					sdt->ref_tag = cpu_to_be32(phys);
 
+				tuple += bi->tuple_size;
 				virt++;
 				phys++;
 			}
@@ -405,14 +433,15 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
  */
 void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 {
-	const int tuple_sz = sizeof(struct sd_dif_tuple);
 	struct scsi_disk *sdkp;
 	struct bio *bio;
 	struct sd_dif_tuple *sdt;
+	struct blk_integrity *bi;
 	unsigned int i, j, sectors, sector_sz;
 	u32 phys, virt;
 
 	sdkp = scsi_disk(scmd->request->rq_disk);
+	bi = bdev_get_integrity(scmd->request->bio->bi_bdev);
 
 	if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION || good_bytes == 0)
 		return;
@@ -429,11 +458,11 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 					(__ffs(sector_sz) - 9)) & 0xffffffff;
 
 		bip_for_each_vec(iv, bio->bi_integrity, i) {
-			sdt = kmap_atomic(iv->bv_page)
+			void *tuple = kmap_atomic(iv->bv_page)
 				+ iv->bv_offset;
 
-			for (j = 0 ; j < iv->bv_len ; j += tuple_sz, sdt++) {
-
+			for (j = 0 ; j < iv->bv_len ; j += bi->tuple_size) {
+				sdt = tuple;
 				if (sectors == 0) {
 					kunmap_atomic(sdt);
 					return;
@@ -442,6 +471,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 				if (be32_to_cpu(sdt->ref_tag) == phys)
 					sdt->ref_tag = cpu_to_be32(virt);
 
+				tuple += bi->tuple_size;
 				virt++;
 				phys++;
 				sectors--;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index b4ab7da..81b0ddf 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -273,9 +273,11 @@ int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
 	}
 
 	if (set)
-		bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
+		bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors,
+							bi->tuple_size);
 	else
-		bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
+		bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors,
+							bi->tuple_size);
 
 	return 0;
 }
@@ -338,6 +340,7 @@ static void bio_integrity_generate(struct bio *bio)
 	total = 0;
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	bix.sector_size = bi->sector_size;
+	bix.tuple_size = bi->tuple_size;
 
 	bio_for_each_segment(bv, bio, i) {
 		void *kaddr = kmap_atomic(bv->bv_page);
@@ -480,6 +483,7 @@ static int bio_integrity_verify(struct bio *bio)
 	ret = total = 0;
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	bix.sector_size = bi->sector_size;
+	bix.tuple_size = bi->tuple_size;
 
 	bio_for_each_segment(bv, bio, i) {
 		void *kaddr = kmap_atomic(bv->bv_page);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1756001..70eccab 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1325,13 +1325,14 @@ struct blk_integrity_exchg {
 	sector_t		sector;
 	unsigned int		data_size;
 	unsigned short		sector_size;
+	unsigned short		tuple_size;
 	const char		*disk_name;
 };
 
 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
-typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
-typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
+typedef void (integrity_set_tag_fn) (void *, void *, unsigned int, unsigned short);
+typedef void (integrity_get_tag_fn) (void *, void *, unsigned int, unsigned short);
 
 struct blk_integrity {
 	integrity_gen_fn	*generate_fn;
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 5/7] sd: export dif integrity template
  2013-03-21 17:52 [PATCH v2 0/7] NVMe Data Integrity Extensions Keith Busch
                   ` (3 preceding siblings ...)
  2013-03-21 17:52 ` [PATCH v2 4/7] sd: arbitrary dif meta-data sizes Keith Busch
@ 2013-03-21 17:52 ` Keith Busch
  2013-03-21 17:52 ` [PATCH v2 6/7] NVMe: Split non-mergeable bio requests Keith Busch
  2013-03-21 17:52 ` [PATCH v2 7/7] NVMe: End-to-end data protection Keith Busch
  6 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2013-03-21 17:52 UTC (permalink / raw)


Exports access to copies of common blk_integrity templates so that other
non-scsi dif capable block drivers can use them.

Cc: Martin K. Petersen <martin.petersen at oracle.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/scsi/sd_dif.c  |   24 ++++++++++++++++++++++++
 include/linux/blkdev.h |    5 +++++
 2 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 1735513..8c85027 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -22,6 +22,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/crc-t10dif.h>
+#include <linux/export.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -482,3 +483,26 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 	}
 }
 
+struct blk_integrity sd_dif_get_type1_crc(void)
+{
+	return dif_type1_integrity_crc;
+}
+EXPORT_SYMBOL(sd_dif_get_type1_crc);
+
+struct blk_integrity sd_dif_get_type1_ip(void)
+{
+	return dif_type1_integrity_ip;
+}
+EXPORT_SYMBOL(sd_dif_get_type1_ip);
+
+struct blk_integrity sd_dif_get_type3_crc(void)
+{
+	return dif_type3_integrity_crc;
+}
+EXPORT_SYMBOL(sd_dif_get_type3_crc);
+
+struct blk_integrity sd_dif_get_type3_ip(void)
+{
+	return dif_type3_integrity_ip;
+}
+EXPORT_SYMBOL(sd_dif_get_type3_ip);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 70eccab..f4769a7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1350,6 +1350,11 @@ struct blk_integrity {
 	struct kobject		kobj;
 };
 
+extern struct blk_integrity sd_dif_get_type1_ip(void);
+extern struct blk_integrity sd_dif_get_type1_crc(void);
+extern struct blk_integrity sd_dif_get_type3_ip(void);
+extern struct blk_integrity sd_dif_get_type3_crc(void);
+
 extern bool blk_integrity_is_initialized(struct gendisk *);
 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
 extern void blk_integrity_unregister(struct gendisk *);
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 6/7] NVMe: Split non-mergeable bio requests
  2013-03-21 17:52 [PATCH v2 0/7] NVMe Data Integrity Extensions Keith Busch
                   ` (4 preceding siblings ...)
  2013-03-21 17:52 ` [PATCH v2 5/7] sd: export dif integrity template Keith Busch
@ 2013-03-21 17:52 ` Keith Busch
  2013-03-21 17:52 ` [PATCH v2 7/7] NVMe: End-to-end data protection Keith Busch
  6 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2013-03-21 17:52 UTC (permalink / raw)


It is possible a bio request can not be submitted as a single NVMe
IO command due the bio_vec not being mergeable with the NVMe PRP list
alignement constraints. This condition was handled by submitting an IO for
the mergeable portion, then submitting a follow on IO for the remaining
data after the previous IO completes. The remainder to be sent was tracked
by manipulating the bio->bi_idx and bio->bi_sector. This patch splits
the request as many times as necessary and submits the bios together.

There are a couple other benefits from doing this: it fixes a possible
issue with the current handling of a non-mergeable bio as the existing
requeuing method may potentionally use an unlocked nvme_queue if the
callback isn't invoked on the queue's associated cpu; it will be possible
to retry a failed bio if desired at some later time since it does not
manipulate the original bio; the bio integrity extensions require the
bio to be in its original condition for the checks to work correctly if
we implement the end-to-end data protection.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme.c |  142 ++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 115 insertions(+), 27 deletions(-)

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 993c014..86c7f28 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -361,16 +361,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
 	kfree(iod);
 }
 
-static void requeue_bio(struct nvme_dev *dev, struct bio *bio)
-{
-	struct nvme_queue *nvmeq = get_nvmeq(dev);
-	if (bio_list_empty(&nvmeq->sq_cong))
-		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
-	bio_list_add(&nvmeq->sq_cong, bio);
-	put_nvmeq(nvmeq);
-	wake_up_process(nvme_thread);
-}
-
 static void bio_completion(struct nvme_dev *dev, void *ctx,
 						struct nvme_completion *cqe)
 {
@@ -381,14 +371,12 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
 	if (iod->nents)
 		dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
 			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+
 	nvme_free_iod(dev, iod);
-	if (status) {
+	if (status)
 		bio_endio(bio, -EIO);
-	} else if (bio->bi_vcnt > bio->bi_idx) {
-		requeue_bio(dev, bio);
-	} else {
+	else
 		bio_endio(bio, 0);
-	}
 }
 
 /* length is in bytes.  gfp flags indicates whether we may sleep. */
@@ -473,25 +461,130 @@ static int nvme_setup_prps(struct nvme_dev *dev,
 	return total_len;
 }
 
+struct nvme_bio_pair {
+	struct bio b1, b2, *parent;
+	struct bio_vec *bv1, *bv2;
+	int err;
+	atomic_t cnt;
+};
+
+static void nvme_bio_pair_endio(struct bio *bio, int err)
+{
+	struct nvme_bio_pair *bp = bio->bi_private;
+
+	if (err)
+		bp->err = err;
+
+	if (atomic_dec_and_test(&bp->cnt)) {
+		bio_endio(bp->parent, bp->err);
+		if (bp->bv1)
+			kfree(bp->bv1);
+		if (bp->bv2)
+			kfree(bp->bv2);
+		kfree(bp);
+	}
+}
+
+static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
+							int len, int offset)
+{
+	struct nvme_bio_pair *bp;
+	
+	BUG_ON(len > bio->bi_size);
+	BUG_ON(idx > bio->bi_vcnt);
+
+	bp = kmalloc(sizeof(*bp), GFP_ATOMIC);
+	if (!bp)
+		return NULL;
+	bp->err = 0;
+
+	bp->b1 = *bio;
+	bp->b2 = *bio;
+
+	bp->b1.bi_size = len;
+	bp->b2.bi_size -= len;
+	bp->b1.bi_vcnt = idx;
+	bp->b2.bi_idx = idx;
+	bp->b2.bi_sector += len >> 9;
+
+	if (offset) {
+		bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
+								GFP_ATOMIC);
+		if (!bp->bv1)
+			goto split_fail_1;
+
+		bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
+								GFP_ATOMIC);
+		if (!bp->bv2)
+			goto split_fail_2;
+
+		memcpy(bp->bv1, bio->bi_io_vec,
+			bio->bi_max_vecs * sizeof(struct bio_vec));
+		memcpy(bp->bv2, bio->bi_io_vec,
+			bio->bi_max_vecs * sizeof(struct bio_vec));
+
+		bp->b1.bi_io_vec = bp->bv1;
+		bp->b2.bi_io_vec = bp->bv2;
+		bp->b2.bi_io_vec[idx].bv_offset += offset;
+		bp->b2.bi_io_vec[idx].bv_len -= offset;
+		bp->b1.bi_io_vec[idx].bv_len = offset;
+		bp->b1.bi_vcnt++;
+	} else
+		bp->bv1 = bp->bv2 = NULL;
+
+	bp->b1.bi_private = bp;
+	bp->b2.bi_private = bp;
+
+	bp->b1.bi_end_io = nvme_bio_pair_endio;
+	bp->b2.bi_end_io = nvme_bio_pair_endio;
+
+	bp->parent = bio;
+	atomic_set(&bp->cnt, 2);
+
+	return bp;
+
+ split_fail_2:
+	kfree(bp->bv1);
+ split_fail_1:
+	kfree(bp);
+	return NULL;
+}
+
+static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
+						int idx, int len, int offset)
+{
+	struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset);
+	if (!bp)
+		return -ENOMEM;
+
+	if (bio_list_empty(&nvmeq->sq_cong))
+		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
+	bio_list_add(&nvmeq->sq_cong, &bp->b1);
+	bio_list_add(&nvmeq->sq_cong, &bp->b2);
+	wake_up_process(nvme_thread);
+
+	return 0;
+}
+
 /* NVMe scatterlists require no holes in the virtual address */
 #define BIOVEC_NOT_VIRT_MERGEABLE(vec1, vec2)	((vec2)->bv_offset || \
 			(((vec1)->bv_offset + (vec1)->bv_len) % PAGE_SIZE))
 
-static int nvme_map_bio(struct device *dev, struct nvme_iod *iod,
+static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
 		struct bio *bio, enum dma_data_direction dma_dir, int psegs)
 {
 	struct bio_vec *bvec, *bvprv = NULL;
 	struct scatterlist *sg = NULL;
-	int i, old_idx, length = 0, nsegs = 0;
+	int i, length = 0, nsegs = 0;
 
 	sg_init_table(iod->sg, psegs);
-	old_idx = bio->bi_idx;
 	bio_for_each_segment(bvec, bio, i) {
 		if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) {
 			sg->length += bvec->bv_len;
 		} else {
 			if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec))
-				break;
+				return nvme_split_and_submit(bio, nvmeq, i,
+								length, 0);
 			sg = sg ? sg + 1 : iod->sg;
 			sg_set_page(sg, bvec->bv_page, bvec->bv_len,
 							bvec->bv_offset);
@@ -500,13 +593,10 @@ static int nvme_map_bio(struct device *dev, struct nvme_iod *iod,
 		length += bvec->bv_len;
 		bvprv = bvec;
 	}
-	bio->bi_idx = i;
 	iod->nents = nsegs;
 	sg_mark_end(sg);
-	if (dma_map_sg(dev, iod->sg, iod->nents, dma_dir) == 0) {
-		bio->bi_idx = old_idx;
+	if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0)
 		return -ENOMEM;
-	}
 	return length;
 }
 
@@ -591,8 +681,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 		dma_dir = DMA_FROM_DEVICE;
 	}
 
-	result = nvme_map_bio(nvmeq->q_dmadev, iod, bio, dma_dir, psegs);
-	if (result < 0)
+	result = nvme_map_bio(nvmeq, iod, bio, dma_dir, psegs);
+	if (result <= 0)
 		goto free_cmdid;
 	length = result;
 
@@ -605,8 +695,6 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 	cmnd->rw.control = cpu_to_le16(control);
 	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
 
-	bio->bi_sector += length >> 9;
-
 	if (++nvmeq->sq_tail == nvmeq->q_depth)
 		nvmeq->sq_tail = 0;
 	writel(nvmeq->sq_tail, nvmeq->q_db);
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 7/7] NVMe: End-to-end data protection
  2013-03-21 17:52 [PATCH v2 0/7] NVMe Data Integrity Extensions Keith Busch
                   ` (5 preceding siblings ...)
  2013-03-21 17:52 ` [PATCH v2 6/7] NVMe: Split non-mergeable bio requests Keith Busch
@ 2013-03-21 17:52 ` Keith Busch
  6 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2013-03-21 17:52 UTC (permalink / raw)


Registers a DIF capable nvme namespace with block integrity.

If the namepsace meta-data is a separate buffer, the driver will use the
appropriate block integrity template to generate and verify the protection
information on writes and reads and supply a meta-data pointer in the
command buffer for this.

If the namespace is formatted with meta-data but not with protection
information, a no-op block integrity template is used to create the
unused meta-data buffer.

If the meta-data is interleaved and formatted for data-protection, the
NVMe PRACT field is set to have the controller generate DIF on writes
and strip it on reads.

LBA formats that the driver cannot deal with will not create a block
device for that namespace.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme.c |  135 ++++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/nvme.h |   28 +++++++++--
 2 files changed, 153 insertions(+), 10 deletions(-)

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 86c7f28..182d0b4 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -94,6 +94,9 @@ struct nvme_ns {
 
 	int ns_id;
 	int lba_shift;
+	int pi_type;
+	int extended;
+	u16 ms;
 };
 
 /*
@@ -307,6 +310,7 @@ struct nvme_iod {
 	int nents;		/* Used in scatterlist */
 	int length;		/* Of data, in bytes */
 	dma_addr_t first_dma;
+	dma_addr_t meta_dma;
 	struct scatterlist sg[0];
 };
 
@@ -367,10 +371,14 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
 	struct nvme_iod *iod = ctx;
 	struct bio *bio = iod->private;
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
+	enum dma_data_direction dma_dir = bio_data_dir(bio) ? DMA_TO_DEVICE :
+ 								DMA_FROM_DEVICE;
 
 	if (iod->nents)
-		dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
-			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, dma_dir);
+	if (bio_integrity(bio))
+		dma_unmap_single(&dev->pci_dev->dev, iod->meta_dma,
+					bio->bi_integrity->bip_size, dma_dir);
 
 	nvme_free_iod(dev, iod);
 	if (status)
@@ -464,6 +472,7 @@ static int nvme_setup_prps(struct nvme_dev *dev,
 struct nvme_bio_pair {
 	struct bio b1, b2, *parent;
 	struct bio_vec *bv1, *bv2;
+	struct bio_integrity_payload bip1, bip2;
 	int err;
 	atomic_t cnt;
 };
@@ -532,6 +541,23 @@ static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
 	} else
 		bp->bv1 = bp->bv2 = NULL;
 
+	if (bio_integrity(bio)) {
+		struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+		unsigned int bip_split_len =
+				(len / bdev_logical_block_size(bio->bi_bdev)) *
+					bi->tuple_size;
+
+		bp->bip1.bip_buf = bio->bi_integrity->bip_buf;
+		bp->bip1.bip_size = bip_split_len;
+
+		bp->bip2.bip_buf = bio->bi_integrity->bip_buf + bip_split_len;
+		bp->bip2.bip_size = bio->bi_integrity->bip_size - bip_split_len;
+
+		bp->b1.bi_integrity = &bp->bip1;
+		bp->b2.bi_integrity = &bp->bip2;
+
+ 	}
+
 	bp->b1.bi_private = bp;
 	bp->b2.bi_private = bp;
 
@@ -692,6 +718,29 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 								GFP_ATOMIC);
 	cmnd->rw.slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9));
 	cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1);
+
+	if (ns->ms) {
+		if (ns->pi_type) {
+			control |= NVME_RW_PRINFO_PRCHK_GUARD;
+			if (ns->pi_type != NVME_NS_DPS_PI_TYPE3) {
+				control |= NVME_RW_PRINFO_PRCHK_REF;
+				cmnd->rw.reftag = cpu_to_le32(
+					(bio->bi_sector >> (ns->lba_shift - 9)) &
+					0xffffffff);
+			}
+		}
+		if (bio_integrity(bio)) {
+				iod->meta_dma =
+					dma_map_single(nvmeq->q_dmadev,
+						bio->bi_integrity->bip_buf,
+						bio->bi_integrity->bip_size,
+						dma_dir);
+				cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
+		} else {
+			control |= NVME_RW_PRINFO_PRACT;
+		}
+	}
+
 	cmnd->rw.control = cpu_to_le16(control);
 	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
 
@@ -1435,16 +1484,83 @@ static void nvme_put_ns_idx(int index)
 	spin_unlock(&dev_list_lock);
 }
 
+static void nvme_generate(struct blk_integrity_exchg *bix)
+{
+	return;
+}
+
+static int nvme_verify(struct blk_integrity_exchg *bix)
+{
+	return 0;
+}
+
+/*
+ * No-op integrity extension for namespace formats with meta-data but
+ * without protection settings.
+ */
+static struct blk_integrity nvme_no_dif = {
+	.name			= "T10-DIF-TYPE0",
+	.generate_fn		= &nvme_generate,
+	.verify_fn		= &nvme_verify,
+	.get_tag_fn		= NULL,
+	.set_tag_fn		= NULL,
+	.tuple_size		= 0,
+	.tag_size		= 0,
+};
+
+static void nvme_ns_register_pi(struct nvme_ns *ns)
+{
+	struct blk_integrity integrity;
+
+	if (ns->pi_type == NVME_NS_DPS_PI_TYPE3) {
+		integrity = sd_dif_get_type3_crc();
+		integrity.tag_size = sizeof(u16);
+	} else if (ns->pi_type) {
+		integrity = sd_dif_get_type1_crc();
+		integrity.tag_size = sizeof(u16) + sizeof(u32);
+	} else {
+		integrity = nvme_no_dif;
+	}
+	integrity.tuple_size = ns->ms;
+	blk_integrity_register(ns->disk, &integrity);
+}
+
+/*
+ * Formats with no meta-data or separate meta-data are all valid. Interleaved
+ * meta-data is not valid unless the controller can insert/strip it on
+ * writes/reads, which means the namespace has to be formatted with data
+ * protection settings and meta-data size equal to DIF size.
+ */
+static int nvme_check_pi_format(struct nvme_id_ns *id)
+{
+	int lbaf = id->flbas & NVME_NS_FLBAS_LBAF_MASK;
+	int ms = le16_to_cpu(id->lbaf[lbaf].ms);
+	int dps = id->dps & NVME_NS_DPS_PI_MASK;
+	int extended = id->flbas & NVME_NS_FLBAS_LBA_EXTENDED;
+
+	if (!extended && ms >= 8)
+		return dps;
+	if (dps && ms == 8)
+		return dps;
+	else if (ms)
+		return -1;
+	return 0;
+}
+
 static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid,
 			struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
 {
 	struct nvme_ns *ns;
 	struct gendisk *disk;
-	int lbaf;
+	int lbaf, pi_type;
 
 	if (rt->attributes & NVME_LBART_ATTRIB_HIDE)
 		return NULL;
 
+	pi_type = nvme_check_pi_format(id);
+	if (pi_type < 0)
+		return NULL;
+
 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
 	if (!ns)
 		return NULL;
@@ -1458,6 +1574,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid,
 	blk_queue_make_request(ns->queue, nvme_make_request);
 	ns->dev = dev;
 	ns->queue->queuedata = ns;
+	ns->pi_type = pi_type;
+	if (pi_type)
+		ns->extended = id->flbas & NVME_NS_FLBAS_LBA_EXTENDED;
 
 	disk = alloc_disk(NVME_MINORS);
 	if (!disk)
@@ -1466,6 +1585,7 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid,
 	ns->disk = disk;
 	lbaf = id->flbas & 0xf;
 	ns->lba_shift = id->lbaf[lbaf].ds;
+	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	if (dev->max_hw_sectors)
 		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
@@ -1634,8 +1754,11 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev)
 		if (ns)
 			list_add_tail(&ns->list, &dev->namespaces);
 	}
-	list_for_each_entry(ns, &dev->namespaces, list)
+	list_for_each_entry(ns, &dev->namespaces, list) {
 		add_disk(ns->disk);
+		if (!ns->extended && ns->pi_type)
+			nvme_ns_register_pi(ns);
+	}
 
 	goto out;
 
@@ -1660,6 +1783,8 @@ static int nvme_dev_remove(struct nvme_dev *dev)
 
 	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
 		list_del(&ns->list);
+		if (!ns->extended && ns->pi_type)
+			blk_integrity_unregister(ns->disk);
 		del_gendisk(ns->disk);
 		nvme_ns_free(ns);
 	}
@@ -1891,6 +2016,6 @@ static void __exit nvme_exit(void)
 
 MODULE_AUTHOR("Matthew Wilcox <willy at linux.intel.com>");
 MODULE_LICENSE("GPL");
-MODULE_VERSION("0.8");
+MODULE_VERSION("0.8Poop");
 module_init(nvme_init);
 module_exit(nvme_exit);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 4fa3b0b..f499455 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -130,11 +130,25 @@ struct nvme_id_ns {
 };
 
 enum {
-	NVME_NS_FEAT_THIN	= 1 << 0,
-	NVME_LBAF_RP_BEST	= 0,
-	NVME_LBAF_RP_BETTER	= 1,
-	NVME_LBAF_RP_GOOD	= 2,
-	NVME_LBAF_RP_DEGRADED	= 3,
+	NVME_NS_FEAT_THIN		= 1 << 0,
+	NVME_NS_MC_EXTENDED		= 1 << 0,
+	NVME_NS_MC_SEPARATE		= 1 << 1,
+	NVME_NS_FLBAS_LBA_EXTENDED	= 1 << 4,
+	NVME_NS_FLBAS_LBAF_MASK		= 0xf,
+	NVME_NS_DPC_PI_LAST		= 1 << 4,
+	NVME_NS_DPC_PI_FIRST		= 1 << 3,
+	NVME_NS_DPC_PI_TYPE3		= 1 << 2,
+	NVME_NS_DPC_PI_TYPE2		= 1 << 1,
+	NVME_NS_DPC_PI_TYPE1		= 1 << 0,
+	NVME_NS_DPS_PI_MASK		= 0x7,
+	NVME_NS_DPS_PI_TYPE1		= 1,
+	NVME_NS_DPS_PI_TYPE2		= 2,
+	NVME_NS_DPS_PI_TYPE3		= 3,
+	NVME_NS_DPS_PI_FIRST		= 8,
+	NVME_LBAF_RP_BEST		= 0,
+	NVME_LBAF_RP_BETTER		= 1,
+	NVME_LBAF_RP_GOOD		= 2,
+	NVME_LBAF_RP_DEGRADED		= 3,
 };
 
 struct nvme_smart_log {
@@ -244,6 +258,10 @@ enum {
 	NVME_RW_DSM_LATENCY_LOW		= 3 << 4,
 	NVME_RW_DSM_SEQ_REQ		= 1 << 6,
 	NVME_RW_DSM_COMPRESSED		= 1 << 7,
+	NVME_RW_PRINFO_PRACT		= 1 << 13,
+	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
+	NVME_RW_PRINFO_PRCHK_APP	= 1 << 11,
+	NVME_RW_PRINFO_PRCHK_REF	= 1 << 10,
 };
 
 /* Admin commands */
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-03-21 17:52 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-21 17:52 [PATCH v2 0/7] NVMe Data Integrity Extensions Keith Busch
2013-03-21 17:52 ` [PATCH v2 1/7] sd: remove invalid ref tag check Keith Busch
2013-03-21 17:52 ` [PATCH v2 2/7] sd: skip verifying unwritten sectors Keith Busch
2013-03-21 17:52 ` [PATCH v2 3/7] sd: hw sector size calculation Keith Busch
2013-03-21 17:52 ` [PATCH v2 4/7] sd: arbitrary dif meta-data sizes Keith Busch
2013-03-21 17:52 ` [PATCH v2 5/7] sd: export dif integrity template Keith Busch
2013-03-21 17:52 ` [PATCH v2 6/7] NVMe: Split non-mergeable bio requests Keith Busch
2013-03-21 17:52 ` [PATCH v2 7/7] NVMe: End-to-end data protection Keith Busch

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.