All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Martin K. Petersen" <martin.petersen@oracle.com>
To: linux-scsi@vger.kernel.org
Cc: James.Bottomley@hansenpartnership.com, snitzer@redhat.com,
	jaxboe@fusionio.com,
	"Martin K. Petersen" <martin.petersen@oracle.com>
Subject: [PATCH 5/5] sd: Implement support for WRITE SAME
Date: Mon, 30 Jan 2012 19:31:32 -0500	[thread overview]
Message-ID: <1327969892-5090-6-git-send-email-martin.petersen@oracle.com> (raw)
In-Reply-To: <1327969892-5090-1-git-send-email-martin.petersen@oracle.com>

From: "Martin K. Petersen" <martin.petersen@oracle.com>

Implement support for WRITE SAME(10) and WRITE SAME(16) in the SCSI disk
driver.

 - We set the default maximum to 0xFFFF because there are several
   devices out there that only support two-byte block counts even with
   WRITE SAME(16). We only enable transfers bigger than 0xFFFF if the
   device explicitly reports MAXIMUM WRITE SAME LENGTH in the BLOCK
   LIMITS VPD.

 - max_write_same_blocks can be overriden per-device basis in sysfs.

 - The UNMAP discovery heuristics remain unchanged but the discard
   limits are tweaked to match the "real" WRITE SAME commands.

 - In the error handling logic we now distinguish between WRITE SAME
   with and without UNMAP set.


The discovery process heuristics are:

 - If the device reports a SCSI level of SPC-3 or greater we'll issue
   READ SUPPORTED OPERATION CODES to find out whether WRITE SAME(16) is
   supported. If that's the case we will use it.

 - If the device supports the block limits VPD and reports a MAXIMUM
   WRITE SAME LENGTH bigger than 0xFFFF we will use WRITE SAME(16).

 - Otherwise we will use WRITE SAME(10) unless the target LBA is beyond
   0xFFFFFFFF or the block count exceeds 0xFFFF.

 - no_write_same is set for ATA, FireWire and USB.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-scsi.c      |    1 +
 drivers/firewire/sbp2.c        |    1 +
 drivers/scsi/scsi_lib.c        |   22 ++++-
 drivers/scsi/sd.c              |  173 +++++++++++++++++++++++++++++++++++++---
 drivers/scsi/sd.h              |    7 ++
 drivers/usb/storage/scsiglue.c |    3 +
 include/scsi/scsi_device.h     |    1 +
 7 files changed, 193 insertions(+), 15 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index b6db28f..dde907b 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1053,6 +1053,7 @@ static void ata_scsi_sdev_config(struct scsi_device *sdev)
 	sdev->use_10_for_rw = 1;
 	sdev->use_10_for_ms = 1;
 	sdev->no_report_opcodes = 1;
+	sdev->no_write_same = 1;
 
 	/* Schedule policy is determined by ->qc_defer() callback and
 	 * it needs to see every deferred qc.  Set dev_blocked to 1 to
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index d956dd38..52b2bac 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1518,6 +1518,7 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
 
 	sdev->use_10_for_rw = 1;
 	sdev->no_report_opcodes = 1;
+	sdev->no_write_same = 1;
 
 	if (sbp2_param_exclusive_login)
 		sdev->manage_start_stop = 1;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b2c95db..560b63f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -874,11 +874,23 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 				action = ACTION_FAIL;
 				error = -EILSEQ;
 			/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
-			} else if ((sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
-				   (cmd->cmnd[0] == UNMAP ||
-				    cmd->cmnd[0] == WRITE_SAME_16 ||
-				    cmd->cmnd[0] == WRITE_SAME)) {
-				description = "Discard failure";
+			} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
+				switch(cmd->cmnd[0]) {
+				case UNMAP:
+					description = "Discard failure";
+					break;
+				case WRITE_SAME:
+				case WRITE_SAME_16:
+					if (cmd->cmnd[1] & 0x8)
+						description = "Discard failure";
+					else
+						description =
+							"Write same failure";
+					break;
+				default:
+					description = "Invalid command failure";
+					break;
+				}
 				action = ACTION_FAIL;
 			} else
 				action = ACTION_FAIL;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c691fb5..40946a2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -98,6 +98,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
 #endif
 
 static void sd_config_discard(struct scsi_disk *, unsigned int);
+static void sd_config_write_same(struct scsi_disk *);
 static int  sd_revalidate_disk(struct gendisk *);
 static void sd_unlock_native_capacity(struct gendisk *disk);
 static int  sd_probe(struct device *);
@@ -346,6 +347,41 @@ sd_store_provisioning_mode(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
+static ssize_t
+sd_show_write_same_blocks(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+	return snprintf(buf, 20, "%u\n", sdkp->max_ws_blocks);
+}
+
+static ssize_t
+sd_store_write_same_blocks(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct scsi_disk *sdkp = to_scsi_disk(dev);
+	struct scsi_device *sdp = sdkp->device;
+	unsigned long max;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (sdp->type != TYPE_DISK)
+		return -EINVAL;
+
+	max = simple_strtoul(buf, NULL, 10);
+
+	if (max == 0)
+		sdp->no_write_same = 1;
+	else if (max <= SD_MAX_WS16_BLOCKS)
+		sdkp->max_ws_blocks = max;
+
+	sd_config_write_same(sdkp);
+
+	return count;
+}
+
 static struct device_attribute sd_disk_attrs[] = {
 	__ATTR(cache_type, S_IRUGO|S_IWUSR, sd_show_cache_type,
 	       sd_store_cache_type),
@@ -360,6 +396,8 @@ static struct device_attribute sd_disk_attrs[] = {
 	__ATTR(thin_provisioning, S_IRUGO, sd_show_thin_provisioning, NULL),
 	__ATTR(provisioning_mode, S_IRUGO|S_IWUSR, sd_show_provisioning_mode,
 	       sd_store_provisioning_mode),
+	__ATTR(max_write_same_blocks, S_IRUGO|S_IWUSR,
+	       sd_show_write_same_blocks, sd_store_write_same_blocks),
 	__ATTR_NULL,
 };
 
@@ -505,19 +543,23 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 		return;
 
 	case SD_LBP_UNMAP:
-		max_blocks = min_not_zero(sdkp->max_unmap_blocks, 0xffffffff);
+		max_blocks = min_not_zero(sdkp->max_unmap_blocks,
+					  (u32)SD_MAX_WS16_BLOCKS);
 		break;
 
 	case SD_LBP_WS16:
-		max_blocks = min_not_zero(sdkp->max_ws_blocks, 0xffffffff);
+		max_blocks = min_not_zero(sdkp->max_ws_blocks,
+					  (u32)SD_MAX_WS16_BLOCKS);
 		break;
 
 	case SD_LBP_WS10:
-		max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)0xffff);
+		max_blocks = min_not_zero(sdkp->max_ws_blocks,
+					  (u32)SD_MAX_WS10_BLOCKS);
 		break;
 
 	case SD_LBP_ZERO:
-		max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)0xffff);
+		max_blocks = min_not_zero(sdkp->max_ws_blocks,
+					  (u32)SD_MAX_WS10_BLOCKS);
 		q->limits.discard_zeroes_data = 1;
 		break;
 	}
@@ -615,6 +657,83 @@ out:
 	return ret;
 }
 
+static void sd_config_write_same(struct scsi_disk *sdkp)
+{
+	struct request_queue *q = sdkp->disk->queue;
+	unsigned int logical_block_size = sdkp->device->sector_size;
+	unsigned int blocks = 0;
+
+	if (sdkp->device->no_write_same) {
+		sdkp->max_ws_blocks = 0;
+		goto out;
+	}
+
+	/* Some devices can not handle block counts above 0xffff despite
+	 * supporting WRITE SAME(16). Consequently we default to 64k
+	 * blocks per I/O unless the device explicitly advertises a
+	 * bigger limit.
+	 */
+	if (sdkp->max_ws_blocks == 0)
+		sdkp->max_ws_blocks = SD_MAX_WS10_BLOCKS;
+
+	if (sdkp->ws16 || sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS)
+		blocks = min_not_zero(sdkp->max_ws_blocks,
+				      (u32)SD_MAX_WS16_BLOCKS);
+	else
+		blocks = min_not_zero(sdkp->max_ws_blocks,
+				      (u32)SD_MAX_WS10_BLOCKS);
+
+out:
+	blk_queue_max_write_same_sectors(q, blocks * (logical_block_size >> 9));
+}
+
+/**
+ * sd_setup_write_same_cmnd - write the same data to multiple blocks
+ * @sdp: scsi device to operate one
+ * @rq: Request to prepare
+ *
+ * Will issue either WRITE SAME(10) or WRITE SAME(16) depending on
+ * preference indicated by target device.
+ **/
+static int sd_setup_write_same_cmnd(struct scsi_device *sdp, struct request *rq)
+{
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+	struct bio *bio = rq->bio;
+	sector_t sector = bio->bi_sector;
+	unsigned int nr_sectors = bio_sectors(bio);
+
+	if (sdkp->device->no_write_same)
+		return BLKPREP_KILL;
+
+	if (sdkp->device->sector_size == 4096) {
+		sector >>= 3;
+		nr_sectors >>= 3;
+	}
+
+	rq->timeout = SD_WRITE_SAME_TIMEOUT;
+	rq->__data_len = rq->resid_len = sdp->sector_size;
+	memset(rq->cmd, 0, rq->cmd_len);
+
+	if (rq->nr_phys_segments == 0) {
+		printk(KERN_ERR "%s: Setting phys_segs!\n", __func__);
+		rq->nr_phys_segments = bio->bi_phys_segments;
+	}
+
+	if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff) {
+		rq->cmd_len = 16;
+		rq->cmd[0] = WRITE_SAME_16;
+		put_unaligned_be64(sector, &rq->cmd[2]);
+		put_unaligned_be32(nr_sectors, &rq->cmd[10]);
+	} else {
+		rq->cmd_len = 10;
+		rq->cmd[0] = WRITE_SAME;
+		put_unaligned_be32(sector, &rq->cmd[2]);
+		put_unaligned_be16(nr_sectors, &rq->cmd[7]);
+	}
+
+	return scsi_setup_blk_pc_cmnd(sdp, rq);
+}
+
 static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq)
 {
 	rq->timeout = SD_FLUSH_TIMEOUT;
@@ -660,6 +779,9 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	if (rq->cmd_flags & REQ_DISCARD) {
 		ret = scsi_setup_discard_cmnd(sdp, rq);
 		goto out;
+	} else if (rq->cmd_flags & REQ_WRITE_SAME) {
+		ret = sd_setup_write_same_cmnd(sdp, rq);
+		goto out;
 	} else if (rq->cmd_flags & REQ_FLUSH) {
 		ret = scsi_setup_flush_cmnd(sdp, rq);
 		goto out;
@@ -1375,13 +1497,21 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 	unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
 	struct scsi_sense_hdr sshdr;
 	struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
+	struct request *req = SCpnt->request;
 	int sense_valid = 0;
 	int sense_deferred = 0;
 	unsigned char op = SCpnt->cmnd[0];
+	unsigned char unmap = SCpnt->cmnd[1] & 8;
 
 	if ((SCpnt->request->cmd_flags & REQ_DISCARD) && !result)
 		scsi_set_resid(SCpnt, 0);
 
+	if ((SCpnt->request->cmd_flags & REQ_WRITE_SAME) && !result) {
+		/* Complete entire bio, not just the single block transferred */
+		good_bytes = req->bio->bi_size;
+		scsi_set_resid(SCpnt, 0);
+	}
+
 	if (result) {
 		sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr);
 		if (sense_valid)
@@ -1427,9 +1557,25 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 		if (sshdr.asc == 0x10)  /* DIX: Host detected corruption */
 			good_bytes = sd_completed_bytes(SCpnt);
 		/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
-		if ((sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
-		    (op == UNMAP || op == WRITE_SAME_16 || op == WRITE_SAME))
-			sd_config_discard(sdkp, SD_LBP_DISABLE);
+		if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
+			switch (op) {
+			case UNMAP:
+				sd_config_discard(sdkp, SD_LBP_DISABLE);
+				break;
+			case WRITE_SAME_16:
+			case WRITE_SAME:
+				if (unmap)
+					sd_config_discard(sdkp, SD_LBP_DISABLE);
+				else {
+					sdkp->device->no_write_same = 1;
+					sd_config_write_same(sdkp);
+
+					good_bytes = 0;
+					req->__data_len = req->bio->bi_size;
+					req->cmd_flags |= REQ_QUIET;
+				}
+			}
+		}
 		break;
 	default:
 		break;
@@ -2247,9 +2393,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
 	if (buffer[3] == 0x3c) {
 		unsigned int lba_count, desc_count;
 
-		sdkp->max_ws_blocks =
-			(u32) min_not_zero(get_unaligned_be64(&buffer[36]),
-					   (u64)0xffffffff);
+		sdkp->max_ws_blocks = (u32)get_unaligned_be64(&buffer[36]);
 
 		if (!sdkp->lbpme)
 			goto out;
@@ -2342,6 +2486,13 @@ static void sd_read_block_provisioning(struct scsi_disk *sdkp)
 	kfree(buffer);
 }
 
+static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer)
+{
+	if (scsi_report_opcode(sdkp->device, buffer, SD_BUF_SIZE,
+			       WRITE_SAME_16))
+		sdkp->ws16 = 1;
+}
+
 static int sd_try_extended_inquiry(struct scsi_device *sdp)
 {
 	/*
@@ -2401,6 +2552,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
 		sd_read_write_protect_flag(sdkp, buffer);
 		sd_read_cache_type(sdkp, buffer);
 		sd_read_app_tag_own(sdkp, buffer);
+		sd_read_write_same(sdkp, buffer);
 	}
 
 	sdkp->first_scan = 0;
@@ -2418,6 +2570,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	blk_queue_flush(sdkp->disk->queue, flush);
 
 	set_capacity(disk, sdkp->capacity);
+	sd_config_write_same(sdkp);
 	kfree(buffer);
 
  out:
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4163f29..5923e42 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -14,6 +14,7 @@
 #define SD_TIMEOUT		(30 * HZ)
 #define SD_MOD_TIMEOUT		(75 * HZ)
 #define SD_FLUSH_TIMEOUT	(60 * HZ)
+#define SD_WRITE_SAME_TIMEOUT	(120 * HZ)
 
 /*
  * Number of allowed retries
@@ -38,6 +39,11 @@ enum {
 };
 
 enum {
+	SD_MAX_WS10_BLOCKS = 0xffff,
+	SD_MAX_WS16_BLOCKS = 0x7fffff,
+};
+
+enum {
 	SD_LBP_FULL = 0,	/* Full logical block provisioning */
 	SD_LBP_UNMAP,		/* Use UNMAP command */
 	SD_LBP_WS16,		/* Use WRITE SAME(16) with UNMAP bit */
@@ -74,6 +80,7 @@ struct scsi_disk {
 	unsigned	lbpws : 1;
 	unsigned	lbpws10 : 1;
 	unsigned	lbpvpd : 1;
+	unsigned	ws16 : 1;
 };
 #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
 
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 61178b8..7aaf5d4 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -200,6 +200,9 @@ static int slave_configure(struct scsi_device *sdev)
 		/* Do not attempt to use REPORT SUPPORTED OPERATION CODES */
 		sdev->no_report_opcodes = 1;
 
+		/* Do not attempt to use WRITE SAME */
+		sdev->no_write_same = 1;
+
 		/* Some disks return the total number of blocks in response
 		 * to READ CAPACITY rather than the highest block number.
 		 * If this device makes that mistake, tell the sd driver. */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 59f31dc..c1d8c2f 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -135,6 +135,7 @@ struct scsi_device {
 	unsigned use_10_for_rw:1; /* first try 10-byte read / write */
 	unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
 	unsigned no_report_opcodes:1;	/* no REPORT SUPPORTED OPERATION CODES */
+	unsigned no_write_same:1;	/* no WRITE SAME command */
 	unsigned skip_ms_page_8:1;	/* do not use MODE SENSE page 0x08 */
 	unsigned skip_ms_page_3f:1;	/* do not use MODE SENSE page 0x3f */
 	unsigned use_192_bytes_for_3f:1; /* ask for 192 bytes from page 0x3f */
-- 
1.7.8.3.21.gab8a7


  parent reply	other threads:[~2012-01-31  0:31 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-31  0:31 Write same support Martin K. Petersen
2012-01-31  0:31 ` [PATCH 1/5] block: Implement support for WRITE SAME Martin K. Petersen
2012-02-07 21:40   ` Vivek Goyal
2012-02-13 22:19     ` Martin K. Petersen
2012-02-14  8:05       ` Rolf Eike Beer
2012-02-15 15:33       ` Vivek Goyal
2012-02-16  3:29         ` Martin K. Petersen
2012-02-16 17:16           ` Vivek Goyal
2012-02-16 19:12             ` Martin K. Petersen
2012-02-08 22:50   ` Mike Snitzer
2012-02-08 23:12     ` Martin K. Petersen
2012-02-09  3:33       ` Mike Snitzer
2012-02-09  3:40   ` Mike Snitzer
2012-01-31  0:31 ` [PATCH 2/5] block: Make blkdev_issue_zeroout use " Martin K. Petersen
2012-01-31  0:31 ` [PATCH 3/5] block: ioctl to zero block ranges Martin K. Petersen
2012-01-31  0:31 ` [PATCH 4/5] scsi: Add a report opcode helper Martin K. Petersen
2012-01-31 19:53   ` Jeff Garzik
2012-01-31 20:16     ` Martin K. Petersen
2012-01-31  0:31 ` Martin K. Petersen [this message]
2012-02-20 16:16   ` [PATCH 5/5] sd: Implement support for WRITE SAME Mike Snitzer
2012-02-20 17:36     ` Martin K. Petersen
2012-02-20 18:28       ` Mike Snitzer
2012-02-03 19:15 ` Write same support Mike Snitzer
2012-02-03 19:20 ` Roland Dreier
2012-02-16 20:02 ` Mike Snitzer
2012-02-16 20:46   ` Martin K. Petersen
2012-02-16 21:09     ` Mike Snitzer
2012-02-16 21:03   ` dm-io async WRITE_SAME results in iSCSI NULL pointer [was: Re: Write same support] Mike Snitzer
2012-02-16 21:25     ` Mike Christie
2012-02-16 21:35       ` Mike Snitzer
2012-02-20 17:44     ` Martin K. Petersen
2012-02-20 18:46       ` Mike Snitzer
2012-02-20 23:44         ` Mike Snitzer
2012-02-21  0:07           ` Martin K. Petersen
2012-02-21  3:18             ` Mike Snitzer
2012-02-21  3:57               ` Martin K. Petersen
2012-02-21  6:55                 ` Mike Snitzer
2012-02-21 12:31                   ` Martin K. Petersen
2012-02-21 14:42                     ` Mike Snitzer
2012-02-21 19:33                       ` Mike Snitzer
2012-02-21 21:31                         ` Martin K. Petersen
2012-02-21 23:36                           ` Mike Snitzer
2012-02-21 19:47                   ` Vivek Goyal
2012-02-21 19:56                     ` Martin K. Petersen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1327969892-5090-6-git-send-email-martin.petersen@oracle.com \
    --to=martin.petersen@oracle.com \
    --cc=James.Bottomley@hansenpartnership.com \
    --cc=jaxboe@fusionio.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=snitzer@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.