All of lore.kernel.org
 help / color / mirror / Atom feed
* support ranges TRIM for libata
@ 2017-03-20 20:43 Christoph Hellwig
  2017-03-20 20:43 ` [PATCH 1/7] ѕd: split sd_setup_discard_cmnd Christoph Hellwig
                   ` (7 more replies)
  0 siblings, 8 replies; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-20 20:43 UTC (permalink / raw)
  To: tj, martin.petersen, axboe; +Cc: linux-ide, linux-scsi, linux-block

This series implements rangeѕ discard for ATA SSDs.  Compared to the
initial NVMe support there are two things that complicate the ATA
support:

 - ATA only suports 16-bit long ranges
 - the whole mess of generating a SCSI command first and then
   translating it to an ATA one.

This series adds support for limited range size to the block layer,
and stops translating discard commands - instead we add a new
Vendor Specific SCSI command that contains the TRIM payload when
the device asks for it.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 1/7] ѕd: split sd_setup_discard_cmnd
  2017-03-20 20:43 support ranges TRIM for libata Christoph Hellwig
@ 2017-03-20 20:43 ` Christoph Hellwig
  2017-03-27 22:24     ` Bart Van Assche
  2017-03-20 20:43 ` [PATCH 2/7] sd: provide a new ata trim provisioning mode Christoph Hellwig
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-20 20:43 UTC (permalink / raw)
  To: tj, martin.petersen, axboe; +Cc: linux-ide, linux-scsi, linux-block

Split sd_setup_discard_cmnd into one function per provisioning type.  While
this creates some very slight duplication of boilerplate code it keeps the
code modular for additions of new provisioning types, and for reusing the
write same functions for the upcoming scsi implementation of the Write Zeroes
operation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/sd.c | 153 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 84 insertions(+), 69 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fcfeddc79331..b853f91fb3da 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -701,93 +701,97 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 }
 
-/**
- * sd_setup_discard_cmnd - unmap blocks on thinly provisioned device
- * @sdp: scsi device to operate on
- * @rq: Request to prepare
- *
- * Will issue either UNMAP or WRITE SAME(16) depending on preference
- * indicated by target device.
- **/
-static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
+static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
 {
-	struct request *rq = cmd->request;
 	struct scsi_device *sdp = cmd->device;
-	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
-	sector_t sector = blk_rq_pos(rq);
-	unsigned int nr_sectors = blk_rq_sectors(rq);
-	unsigned int len;
-	int ret;
+	struct request *rq = cmd->request;
+	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	unsigned int data_len = 24;
 	char *buf;
-	struct page *page;
 
-	sector >>= ilog2(sdp->sector_size) - 9;
-	nr_sectors >>= ilog2(sdp->sector_size) - 9;
-
-	page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
-	if (!page)
+	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+	if (!rq->special_vec.bv_page)
 		return BLKPREP_DEFER;
+	rq->special_vec.bv_offset = 0;
+	rq->special_vec.bv_len = data_len;
+	rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
-	switch (sdkp->provisioning_mode) {
-	case SD_LBP_UNMAP:
-		buf = page_address(page);
-
-		cmd->cmd_len = 10;
-		cmd->cmnd[0] = UNMAP;
-		cmd->cmnd[8] = 24;
-
-		put_unaligned_be16(6 + 16, &buf[0]);
-		put_unaligned_be16(16, &buf[2]);
-		put_unaligned_be64(sector, &buf[8]);
-		put_unaligned_be32(nr_sectors, &buf[16]);
+	cmd->cmd_len = 10;
+	cmd->cmnd[0] = UNMAP;
+	cmd->cmnd[8] = 24;
 
-		len = 24;
-		break;
+	buf = page_address(rq->special_vec.bv_page);
+	put_unaligned_be16(6 + 16, &buf[0]);
+	put_unaligned_be16(16, &buf[2]);
+	put_unaligned_be64(sector, &buf[8]);
+	put_unaligned_be32(nr_sectors, &buf[16]);
 
-	case SD_LBP_WS16:
-		cmd->cmd_len = 16;
-		cmd->cmnd[0] = WRITE_SAME_16;
-		cmd->cmnd[1] = 0x8; /* UNMAP */
-		put_unaligned_be64(sector, &cmd->cmnd[2]);
-		put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
+	cmd->allowed = SD_MAX_RETRIES;
+	cmd->transfersize = data_len;
+	rq->timeout = SD_TIMEOUT;
+	scsi_req(rq)->resid_len = data_len;
 
-		len = sdkp->device->sector_size;
-		break;
+	return scsi_init_io(cmd);
+}
 
-	case SD_LBP_WS10:
-	case SD_LBP_ZERO:
-		cmd->cmd_len = 10;
-		cmd->cmnd[0] = WRITE_SAME;
-		if (sdkp->provisioning_mode == SD_LBP_WS10)
-			cmd->cmnd[1] = 0x8; /* UNMAP */
-		put_unaligned_be32(sector, &cmd->cmnd[2]);
-		put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
+static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd)
+{
+	struct scsi_device *sdp = cmd->device;
+	struct request *rq = cmd->request;
+	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 data_len = sdp->sector_size;
 
-		len = sdkp->device->sector_size;
-		break;
+	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+	if (!rq->special_vec.bv_page)
+		return BLKPREP_DEFER;
+	rq->special_vec.bv_offset = 0;
+	rq->special_vec.bv_len = data_len;
+	rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
-	default:
-		ret = BLKPREP_INVALID;
-		goto out;
-	}
+	cmd->cmd_len = 16;
+	cmd->cmnd[0] = WRITE_SAME_16;
+	cmd->cmnd[1] = 0x8; /* UNMAP */
+	put_unaligned_be64(sector, &cmd->cmnd[2]);
+	put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
 
+	cmd->allowed = SD_MAX_RETRIES;
+	cmd->transfersize = data_len;
 	rq->timeout = SD_TIMEOUT;
+	scsi_req(rq)->resid_len = data_len;
 
-	cmd->transfersize = len;
-	cmd->allowed = SD_MAX_RETRIES;
+	return scsi_init_io(cmd);
+}
 
-	rq->special_vec.bv_page = page;
-	rq->special_vec.bv_offset = 0;
-	rq->special_vec.bv_len = len;
+static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
+{
+	struct scsi_device *sdp = cmd->device;
+	struct request *rq = cmd->request;
+	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 data_len = sdp->sector_size;
 
+	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+	if (!rq->special_vec.bv_page)
+		return BLKPREP_DEFER;
+	rq->special_vec.bv_offset = 0;
+	rq->special_vec.bv_len = data_len;
 	rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
-	scsi_req(rq)->resid_len = len;
 
-	ret = scsi_init_io(cmd);
-out:
-	if (ret != BLKPREP_OK)
-		__free_page(page);
-	return ret;
+	cmd->cmd_len = 10;
+	cmd->cmnd[0] = WRITE_SAME;
+	if (unmap)
+		cmd->cmnd[1] = 0x8; /* UNMAP */
+	put_unaligned_be32(sector, &cmd->cmnd[2]);
+	put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
+
+	cmd->allowed = SD_MAX_RETRIES;
+	cmd->transfersize = data_len;
+	rq->timeout = SD_TIMEOUT;
+	scsi_req(rq)->resid_len = data_len;
+
+	return scsi_init_io(cmd);
 }
 
 static void sd_config_write_same(struct scsi_disk *sdkp)
@@ -1155,7 +1159,18 @@ static int sd_init_command(struct scsi_cmnd *cmd)
 
 	switch (req_op(rq)) {
 	case REQ_OP_DISCARD:
-		return sd_setup_discard_cmnd(cmd);
+		switch (scsi_disk(rq->rq_disk)->provisioning_mode) {
+		case SD_LBP_UNMAP:
+			return sd_setup_unmap_cmnd(cmd);
+		case SD_LBP_WS16:
+			return sd_setup_write_same16_cmnd(cmd);
+		case SD_LBP_WS10:
+			return sd_setup_write_same10_cmnd(cmd, true);
+		case SD_LBP_ZERO:
+			return sd_setup_write_same10_cmnd(cmd, false);
+		default:
+			return BLKPREP_INVALID;
+		}
 	case REQ_OP_WRITE_SAME:
 		return sd_setup_write_same_cmnd(cmd);
 	case REQ_OP_FLUSH:
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 2/7] sd: provide a new ata trim provisioning mode
  2017-03-20 20:43 support ranges TRIM for libata Christoph Hellwig
  2017-03-20 20:43 ` [PATCH 1/7] ѕd: split sd_setup_discard_cmnd Christoph Hellwig
@ 2017-03-20 20:43 ` Christoph Hellwig
  2017-03-27 22:40     ` Bart Van Assche
  2017-03-20 20:43 ` [PATCH 3/7] libata: remove SCT WRITE SAME support Christoph Hellwig
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-20 20:43 UTC (permalink / raw)
  To: tj, martin.petersen, axboe; +Cc: linux-ide, linux-scsi, linux-block

This uses a vendor specific command to pass the ATA TRIM payload to
libata without having to rewrite it in place.  Support for it is
indicated by a new flag in struct scsi_device that libata will set
in it's slave_configure routine.  A second flag indicates if TRIM
will reliably zero data.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/sd.c          | 60 +++++++++++++++++++++++++++++++++++++++++++---
 drivers/scsi/sd.h          |  1 +
 include/scsi/scsi_device.h |  2 ++
 include/scsi/scsi_proto.h  |  3 +++
 4 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b853f91fb3da..cc8684818305 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -371,6 +371,7 @@ static const char *lbp_mode[] = {
 	[SD_LBP_WS16]		= "writesame_16",
 	[SD_LBP_WS10]		= "writesame_10",
 	[SD_LBP_ZERO]		= "writesame_zero",
+	[SD_LBP_ATA_TRIM]	= "ata_trim",
 	[SD_LBP_DISABLE]	= "disabled",
 };
 
@@ -411,7 +412,7 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
 		sd_config_discard(sdkp, SD_LBP_ZERO);
 	else if (!strncmp(buf, lbp_mode[SD_LBP_DISABLE], 20))
 		sd_config_discard(sdkp, SD_LBP_DISABLE);
-	else
+	else /* we don't allow manual setting of SD_LBP_ATA_TRIM */
 		return -EINVAL;
 
 	return count;
@@ -653,7 +654,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 	 * lead to data corruption. If LBPRZ is not set, we honor the
 	 * device preference.
 	 */
-	if (sdkp->lbprz) {
+	if (sdkp->lbprz || sdkp->device->ata_trim) {
 		q->limits.discard_alignment = 0;
 		q->limits.discard_granularity = logical_block_size;
 	} else {
@@ -695,6 +696,12 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 					  (u32)SD_MAX_WS10_BLOCKS);
 		q->limits.discard_zeroes_data = 1;
 		break;
+
+	case SD_LBP_ATA_TRIM:
+		max_blocks = 65535 * (512 / sizeof(__le64));
+		if (sdkp->device->ata_trim_zeroes_data)
+			q->limits.discard_zeroes_data = 1;
+		break;
 	}
 
 	blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
@@ -794,6 +801,49 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
 	return scsi_init_io(cmd);
 }
 
+static int sd_setup_ata_trim_cmnd(struct scsi_cmnd *cmd)
+{
+	struct scsi_device *sdp = cmd->device;
+	struct request *rq = cmd->request;
+	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 data_len = sdp->sector_size, i;
+	__le64 *buf;
+
+	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+	if (!rq->special_vec.bv_page)
+		return BLKPREP_DEFER;
+	rq->special_vec.bv_offset = 0;
+	rq->special_vec.bv_len = data_len;
+	rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	/*
+	 * Use the Linux Vendor Specific TRIM command to pass the TRIM payload
+	 * to libata.
+	 */
+	cmd->cmd_len = 10;
+	cmd->cmnd[0] = LINUX_VS_TRIM;
+	cmd->cmnd[8] = data_len;
+
+	buf = page_address(rq->special_vec.bv_page);
+	for (i = 0; i < (data_len >> 3); i++) {
+		u64 n = min(nr_sectors, 0xffffu);
+
+		buf[i] = cpu_to_le64(sector | (n << 48));
+		if (nr_sectors <= 0xffff)
+			break;
+		sector += 0xffff;
+		nr_sectors -= 0xffff;
+	}
+
+	cmd->allowed = SD_MAX_RETRIES;
+	cmd->transfersize = data_len;
+	rq->timeout = SD_TIMEOUT;
+	scsi_req(rq)->resid_len = data_len;
+
+	return scsi_init_io(cmd);
+}
+
 static void sd_config_write_same(struct scsi_disk *sdkp)
 {
 	struct request_queue *q = sdkp->disk->queue;
@@ -1168,6 +1218,8 @@ static int sd_init_command(struct scsi_cmnd *cmd)
 			return sd_setup_write_same10_cmnd(cmd, true);
 		case SD_LBP_ZERO:
 			return sd_setup_write_same10_cmnd(cmd, false);
+		case SD_LBP_ATA_TRIM:
+			return sd_setup_ata_trim_cmnd(cmd);
 		default:
 			return BLKPREP_INVALID;
 		}
@@ -2739,7 +2791,9 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
 	sdkp->max_xfer_blocks = get_unaligned_be32(&buffer[8]);
 	sdkp->opt_xfer_blocks = get_unaligned_be32(&buffer[12]);
 
-	if (buffer[3] == 0x3c) {
+	if (sdkp->device->ata_trim) {
+		sd_config_discard(sdkp, SD_LBP_ATA_TRIM);
+	} else if (buffer[3] == 0x3c) {
 		unsigned int lba_count, desc_count;
 
 		sdkp->max_ws_blocks = (u32)get_unaligned_be64(&buffer[36]);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4dac35e96a75..711d48cea5d7 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -56,6 +56,7 @@ enum {
 	SD_LBP_WS16,		/* Use WRITE SAME(16) with UNMAP bit */
 	SD_LBP_WS10,		/* Use WRITE SAME(10) with UNMAP bit */
 	SD_LBP_ZERO,		/* Use WRITE SAME(10) with zero payload */
+	SD_LBP_ATA_TRIM,	/* generate a ATA TRIM payload for libata */
 	SD_LBP_DISABLE,		/* Discard disabled due to failed cmd */
 };
 
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 080c7ce9bae8..7b1450b1b130 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -182,6 +182,8 @@ struct scsi_device {
 	unsigned broken_fua:1;		/* Don't set FUA bit */
 	unsigned lun_in_cdb:1;		/* Store LUN bits in CDB[1] */
 	unsigned synchronous_alua:1;	/* Synchronous ALUA commands */
+	unsigned ata_trim:1;		/* use ATA TRIM payload for discard */
+	unsigned ata_trim_zeroes_data:1;/* ATA TRIM zeroes discard blocks */
 
 	atomic_t disk_events_disable_depth; /* disable depth for disk events */
 
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index 6ba66e01f6df..033b5662a5f5 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -169,6 +169,9 @@
 /* Vendor specific CDBs start here */
 #define VENDOR_SPECIFIC_CDB 0xc0
 
+/* used to pass the TRIM payload to libata with rewriting it: */
+#define LINUX_VS_TRIM		VENDOR_SPECIFIC_CDB
+
 /*
  *	SCSI command lengths
  */
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 3/7] libata: remove SCT WRITE SAME support
  2017-03-20 20:43 support ranges TRIM for libata Christoph Hellwig
  2017-03-20 20:43 ` [PATCH 1/7] ѕd: split sd_setup_discard_cmnd Christoph Hellwig
  2017-03-20 20:43 ` [PATCH 2/7] sd: provide a new ata trim provisioning mode Christoph Hellwig
@ 2017-03-20 20:43 ` Christoph Hellwig
  2017-03-20 20:43 ` [PATCH 4/7] libata: simplify the trim implementation Christoph Hellwig
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-20 20:43 UTC (permalink / raw)
  To: tj, martin.petersen, axboe; +Cc: linux-ide, linux-scsi, linux-block

This was already disabled a while ago because it caused I/O errors,
and it's severly getting into the way of the discard / write zeroes
rework.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/ata/libata-scsi.c | 128 +++++++++++-----------------------------------
 1 file changed, 29 insertions(+), 99 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 1ac70744ae7b..b93d7e33789a 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3393,46 +3393,6 @@ static size_t ata_format_dsm_trim_descr(struct scsi_cmnd *cmd, u32 trmax,
 }
 
 /**
- * ata_format_dsm_trim_descr() - SATL Write Same to ATA SCT Write Same
- * @cmd: SCSI command being translated
- * @lba: Starting sector
- * @num: Number of sectors to be zero'd.
- *
- * Rewrite the WRITE SAME payload to be an SCT Write Same formatted
- * descriptor.
- * NOTE: Writes a pattern (0's) in the foreground.
- *
- * Return: Number of bytes copied into sglist.
- */
-static size_t ata_format_sct_write_same(struct scsi_cmnd *cmd, u64 lba, u64 num)
-{
-	struct scsi_device *sdp = cmd->device;
-	size_t len = sdp->sector_size;
-	size_t r;
-	u16 *buf;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ata_scsi_rbuf_lock, flags);
-	buf = ((void *)ata_scsi_rbuf);
-
-	put_unaligned_le16(0x0002,  &buf[0]); /* SCT_ACT_WRITE_SAME */
-	put_unaligned_le16(0x0101,  &buf[1]); /* WRITE PTRN FG */
-	put_unaligned_le64(lba,     &buf[2]);
-	put_unaligned_le64(num,     &buf[6]);
-	put_unaligned_le32(0u,      &buf[10]); /* pattern */
-
-	WARN_ON(len > ATA_SCSI_RBUF_SIZE);
-
-	if (len > ATA_SCSI_RBUF_SIZE)
-		len = ATA_SCSI_RBUF_SIZE;
-
-	r = sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, len);
-	spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags);
-
-	return r;
-}
-
-/**
  * ata_scsi_write_same_xlat() - SATL Write Same to ATA SCT Write Same
  * @qc: Command to be translated
  *
@@ -3468,26 +3428,17 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 	}
 	scsi_16_lba_len(cdb, &block, &n_block);
 
-	if (unmap) {
-		/* If trim is not enabled the cmd is invalid. */
-		if ((dev->horkage & ATA_HORKAGE_NOTRIM) ||
-		    !ata_id_has_trim(dev->id)) {
-			fp = 1;
-			bp = 3;
-			goto invalid_fld;
-		}
-		/* If the request is too large the cmd is invalid */
-		if (n_block > 0xffff * trmax) {
-			fp = 2;
-			goto invalid_fld;
-		}
-	} else {
-		/* If write same is not available the cmd is invalid */
-		if (!ata_id_sct_write_same(dev->id)) {
-			fp = 1;
-			bp = 3;
-			goto invalid_fld;
-		}
+	if (!unmap ||
+	    (dev->horkage & ATA_HORKAGE_NOTRIM) ||
+	    !ata_id_has_trim(dev->id)) {
+		fp = 1;
+		bp = 3;
+		goto invalid_fld;
+	}
+	/* If the request is too large the cmd is invalid */
+	if (n_block > 0xffff * trmax) {
+		fp = 2;
+		goto invalid_fld;
 	}
 
 	/*
@@ -3502,49 +3453,28 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 	 * For DATA SET MANAGEMENT TRIM in ACS-2 nsect (aka count)
 	 * is defined as number of 512 byte blocks to be transferred.
 	 */
-	if (unmap) {
-		size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block);
-		if (size != len)
-			goto invalid_param_len;
 
-		if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) {
-			/* Newer devices support queued TRIM commands */
-			tf->protocol = ATA_PROT_NCQ;
-			tf->command = ATA_CMD_FPDMA_SEND;
-			tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f;
-			tf->nsect = qc->tag << 3;
-			tf->hob_feature = (size / 512) >> 8;
-			tf->feature = size / 512;
+	size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block);
+	if (size != len)
+		goto invalid_param_len;
 
-			tf->auxiliary = 1;
-		} else {
-			tf->protocol = ATA_PROT_DMA;
-			tf->hob_feature = 0;
-			tf->feature = ATA_DSM_TRIM;
-			tf->hob_nsect = (size / 512) >> 8;
-			tf->nsect = size / 512;
-			tf->command = ATA_CMD_DSM;
-		}
-	} else {
-		size = ata_format_sct_write_same(scmd, block, n_block);
-		if (size != len)
-			goto invalid_param_len;
+	if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) {
+		/* Newer devices support queued TRIM commands */
+		tf->protocol = ATA_PROT_NCQ;
+		tf->command = ATA_CMD_FPDMA_SEND;
+		tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f;
+		tf->nsect = qc->tag << 3;
+		tf->hob_feature = (size / 512) >> 8;
+		tf->feature = size / 512;
 
-		tf->hob_feature = 0;
-		tf->feature = 0;
-		tf->hob_nsect = 0;
-		tf->nsect = 1;
-		tf->lbah = 0;
-		tf->lbam = 0;
-		tf->lbal = ATA_CMD_STANDBYNOW1;
-		tf->hob_lbah = 0;
-		tf->hob_lbam = 0;
-		tf->hob_lbal = 0;
-		tf->device = ATA_CMD_STANDBYNOW1;
+		tf->auxiliary = 1;
+	} else {
 		tf->protocol = ATA_PROT_DMA;
-		tf->command = ATA_CMD_WRITE_LOG_DMA_EXT;
-		if (unlikely(dev->flags & ATA_DFLAG_PIO))
-			tf->command = ATA_CMD_WRITE_LOG_EXT;
+		tf->hob_feature = 0;
+		tf->feature = ATA_DSM_TRIM;
+		tf->hob_nsect = (size / 512) >> 8;
+		tf->nsect = size / 512;
+		tf->command = ATA_CMD_DSM;
 	}
 
 	tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 |
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 4/7] libata: simplify the trim implementation
  2017-03-20 20:43 support ranges TRIM for libata Christoph Hellwig
                   ` (2 preceding siblings ...)
  2017-03-20 20:43 ` [PATCH 3/7] libata: remove SCT WRITE SAME support Christoph Hellwig
@ 2017-03-20 20:43 ` Christoph Hellwig
  2017-03-20 20:43 ` [PATCH 5/7] block: add a max_discard_segment_size queue limit Christoph Hellwig
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-20 20:43 UTC (permalink / raw)
  To: tj, martin.petersen, axboe; +Cc: linux-ide, linux-scsi, linux-block

Don't try to fake up basic SCSI logical block provisioning and WRITE SAME
support, but offer support for the Linux Vendor Specific TRIM command
instead.  This simplifies the implementation a lot, and avoids rewriting
the data out buffer in the I/O path.   Note that this new command is only
offered to the block layer and will fail for pass through commands.
While this is theoretically a regression in the functionality offered
through SG_IO the previous support was buggy and corrupted user memory
by rewriting the data out buffer in place.

Last but not least this removes the global ata_scsi_rbuf_lock from
the trim path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/ata/libata-scsi.c | 179 ++++++++--------------------------------------
 1 file changed, 28 insertions(+), 151 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index b93d7e33789a..965b9e7dbb7d 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1322,6 +1322,16 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
 
 	blk_queue_flush_queueable(q, false);
 
+	if (ata_id_has_trim(dev->id) &&
+	    !(dev->horkage & ATA_HORKAGE_NOTRIM)) {
+		sdev->ata_trim = 1;
+		if (ata_id_has_zero_after_trim(dev->id) &&
+		    (dev->horkage & ATA_HORKAGE_ZERO_AFTER_TRIM)) {
+			ata_dev_info(dev, "Enabling discard_zeroes_data\n");
+			sdev->ata_trim_zeroes_data = 1;
+		}
+	}
+
 	dev->sdev = sdev;
 	return 0;
 }
@@ -2383,21 +2393,6 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
 	 */
 	min_io_sectors = 1 << ata_id_log2_per_physical_sector(args->id);
 	put_unaligned_be16(min_io_sectors, &rbuf[6]);
-
-	/*
-	 * Optimal unmap granularity.
-	 *
-	 * The ATA spec doesn't even know about a granularity or alignment
-	 * for the TRIM command.  We can leave away most of the unmap related
-	 * VPD page entries, but we have specifify a granularity to signal
-	 * that we support some form of unmap - in thise case via WRITE SAME
-	 * with the unmap bit set.
-	 */
-	if (ata_id_has_trim(args->id)) {
-		put_unaligned_be64(65535 * ATA_MAX_TRIM_RNUM, &rbuf[36]);
-		put_unaligned_be32(1, &rbuf[28]);
-	}
-
 	return 0;
 }
 
@@ -2746,16 +2741,6 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
 		rbuf[14] = (lowest_aligned >> 8) & 0x3f;
 		rbuf[15] = lowest_aligned;
 
-		if (ata_id_has_trim(args->id) &&
-		    !(dev->horkage & ATA_HORKAGE_NOTRIM)) {
-			rbuf[14] |= 0x80; /* LBPME */
-
-			if (ata_id_has_zero_after_trim(args->id) &&
-			    dev->horkage & ATA_HORKAGE_ZERO_AFTER_TRIM) {
-				ata_dev_info(dev, "Enabling discard_zeroes_data\n");
-				rbuf[14] |= 0x40; /* LBPRZ */
-			}
-		}
 		if (ata_id_zoned_cap(args->id) ||
 		    args->dev->class == ATA_DEV_ZAC)
 			rbuf[12] = (1 << 4); /* RC_BASIS */
@@ -3339,141 +3324,45 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
 }
 
 /**
- * ata_format_dsm_trim_descr() - SATL Write Same to DSM Trim
- * @cmd: SCSI command being translated
- * @trmax: Maximum number of entries that will fit in sector_size bytes.
- * @sector: Starting sector
- * @count: Total Range of request in logical sectors
- *
- * Rewrite the WRITE SAME descriptor to be a DSM TRIM little-endian formatted
- * descriptor.
- *
- * Upto 64 entries of the format:
- *   63:48 Range Length
- *   47:0  LBA
- *
- *  Range Length of 0 is ignored.
- *  LBA's should be sorted order and not overlap.
- *
- * NOTE: this is the same format as ADD LBA(S) TO NV CACHE PINNED SET
- *
- * Return: Number of bytes copied into sglist.
- */
-static size_t ata_format_dsm_trim_descr(struct scsi_cmnd *cmd, u32 trmax,
-					u64 sector, u32 count)
-{
-	struct scsi_device *sdp = cmd->device;
-	size_t len = sdp->sector_size;
-	size_t r;
-	__le64 *buf;
-	u32 i = 0;
-	unsigned long flags;
-
-	WARN_ON(len > ATA_SCSI_RBUF_SIZE);
-
-	if (len > ATA_SCSI_RBUF_SIZE)
-		len = ATA_SCSI_RBUF_SIZE;
-
-	spin_lock_irqsave(&ata_scsi_rbuf_lock, flags);
-	buf = ((void *)ata_scsi_rbuf);
-	memset(buf, 0, len);
-	while (i < trmax) {
-		u64 entry = sector |
-			((u64)(count > 0xffff ? 0xffff : count) << 48);
-		buf[i++] = __cpu_to_le64(entry);
-		if (count <= 0xffff)
-			break;
-		count -= 0xffff;
-		sector += 0xffff;
-	}
-	r = sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, len);
-	spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags);
-
-	return r;
-}
-
-/**
- * ata_scsi_write_same_xlat() - SATL Write Same to ATA SCT Write Same
+ * ata_scsi_trim_xlat() - Handle the vendor specific TRIM command.
  * @qc: Command to be translated
  *
- * Translate a SCSI WRITE SAME command to be either a DSM TRIM command or
- * an SCT Write Same command.
- * Based on WRITE SAME has the UNMAP flag
- *   When set translate to DSM TRIM
- *   When clear translate to SCT Write Same
+ * Setup a DSM TRIM command (or it's queued variant) after sd already
+ * prepared the payload for us.
  */
-static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
+static unsigned int ata_scsi_trim_xlat(struct ata_queued_cmd *qc)
 {
 	struct ata_taskfile *tf = &qc->tf;
 	struct scsi_cmnd *scmd = qc->scsicmd;
-	struct scsi_device *sdp = scmd->device;
-	size_t len = sdp->sector_size;
 	struct ata_device *dev = qc->dev;
-	const u8 *cdb = scmd->cmnd;
-	u64 block;
-	u32 n_block;
-	const u32 trmax = len >> 3;
-	u32 size;
-	u16 fp;
-	u8 bp = 0xff;
-	u8 unmap = cdb[1] & 0x8;
-
-	/* we may not issue DMA commands if no DMA mode is set */
-	if (unlikely(!dev->dma_mode))
-		goto invalid_opcode;
 
-	if (unlikely(scmd->cmd_len < 16)) {
-		fp = 15;
-		goto invalid_fld;
+	if (unlikely(!dev->dma_mode)) {
+		ata_scsi_set_sense(dev, scmd, ILLEGAL_REQUEST, 0x20, 0x0);
+		return 1;
 	}
-	scsi_16_lba_len(cdb, &block, &n_block);
 
-	if (!unmap ||
-	    (dev->horkage & ATA_HORKAGE_NOTRIM) ||
-	    !ata_id_has_trim(dev->id)) {
-		fp = 1;
-		bp = 3;
-		goto invalid_fld;
-	}
-	/* If the request is too large the cmd is invalid */
-	if (n_block > 0xffff * trmax) {
-		fp = 2;
-		goto invalid_fld;
+	/* We only allow sending this command through the block layer */
+	if (unlikely(req_op(scmd->request) != REQ_OP_DISCARD)) {
+		ata_scsi_set_sense(dev, scmd, ILLEGAL_REQUEST, 0x20, 0x0);
+		return 1;
 	}
 
-	/*
-	 * WRITE SAME always has a sector sized buffer as payload, this
-	 * should never be a multiple entry S/G list.
-	 */
-	if (!scsi_sg_count(scmd))
-		goto invalid_param_len;
-
-	/*
-	 * size must match sector size in bytes
-	 * For DATA SET MANAGEMENT TRIM in ACS-2 nsect (aka count)
-	 * is defined as number of 512 byte blocks to be transferred.
-	 */
-
-	size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block);
-	if (size != len)
-		goto invalid_param_len;
-
 	if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) {
 		/* Newer devices support queued TRIM commands */
 		tf->protocol = ATA_PROT_NCQ;
 		tf->command = ATA_CMD_FPDMA_SEND;
 		tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f;
 		tf->nsect = qc->tag << 3;
-		tf->hob_feature = (size / 512) >> 8;
-		tf->feature = size / 512;
+		tf->hob_feature = (scmd->device->sector_size / 512) >> 8;
+		tf->feature = scmd->device->sector_size / 512;
 
 		tf->auxiliary = 1;
 	} else {
 		tf->protocol = ATA_PROT_DMA;
 		tf->hob_feature = 0;
 		tf->feature = ATA_DSM_TRIM;
-		tf->hob_nsect = (size / 512) >> 8;
-		tf->nsect = size / 512;
+		tf->hob_nsect = (scmd->device->sector_size / 512) >> 8;
+		tf->nsect = scmd->device->sector_size / 512;
 		tf->command = ATA_CMD_DSM;
 	}
 
@@ -3483,18 +3372,6 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 	ata_qc_set_pc_nbytes(qc);
 
 	return 0;
-
-invalid_fld:
-	ata_scsi_set_invalid_field(dev, scmd, fp, bp);
-	return 1;
-invalid_param_len:
-	/* "Parameter list length error" */
-	ata_scsi_set_sense(dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0);
-	return 1;
-invalid_opcode:
-	/* "Invalid command operation code" */
-	ata_scsi_set_sense(dev, scmd, ILLEGAL_REQUEST, 0x20, 0x0);
-	return 1;
 }
 
 /**
@@ -4087,9 +3964,6 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
 	case WRITE_16:
 		return ata_scsi_rw_xlat;
 
-	case WRITE_SAME_16:
-		return ata_scsi_write_same_xlat;
-
 	case SYNCHRONIZE_CACHE:
 		if (ata_try_flush_cache(dev))
 			return ata_scsi_flush_xlat;
@@ -4116,6 +3990,9 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
 
 	case START_STOP:
 		return ata_scsi_start_stop_xlat;
+
+	case LINUX_VS_TRIM:
+		return ata_scsi_trim_xlat;
 	}
 
 	return NULL;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 5/7] block: add a max_discard_segment_size queue limit
  2017-03-20 20:43 support ranges TRIM for libata Christoph Hellwig
                   ` (3 preceding siblings ...)
  2017-03-20 20:43 ` [PATCH 4/7] libata: simplify the trim implementation Christoph Hellwig
@ 2017-03-20 20:43 ` Christoph Hellwig
  2017-03-27 23:09     ` Bart Van Assche
  2017-03-20 20:43 ` [PATCH 6/7] sd: support multi-range TRIM for ATA disks Christoph Hellwig
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-20 20:43 UTC (permalink / raw)
  To: tj, martin.petersen, axboe; +Cc: linux-ide, linux-scsi, linux-block

ATA only allows 16 bits, so we need a limit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c       |  6 ++++++
 block/blk-merge.c      |  9 +++++++++
 block/blk-settings.c   | 14 ++++++++++++++
 include/linux/blkdev.h |  8 ++++++++
 4 files changed, 37 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index d772c221cc17..3eb3bd89b47a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1486,9 +1486,15 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
 		struct bio *bio)
 {
 	unsigned short segments = blk_rq_nr_discard_segments(req);
+	unsigned max_segment_sectors = queue_max_discard_segment_size(q) >> 9;
 
 	if (segments >= queue_max_discard_segments(q))
 		goto no_merge;
+	if (blk_rq_sectors(req) > max_segment_sectors)
+		goto no_merge;
+	if (bio_sectors(bio) > max_segment_sectors)
+		goto no_merge;
+
 	if (blk_rq_sectors(req) + bio_sectors(bio) >
 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
 		goto no_merge;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 2afa262425d1..c62a6f0325e0 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -11,6 +11,15 @@
 
 #include "blk.h"
 
+/*
+ * Split a discard bio if it doesn't fit into the overall discard request size
+ * of the device.  Note that we don't split it here if it's over the maximum
+ * discard segment size to avoid creating way too many bios in that case.
+ * We will simply take care of never merging such a larger than segment size
+ * bio into a request that has other bios, and let the low-level driver take
+ * care of splitting the request into multiple ranges in the on the wire
+ * format.
+ */
 static struct bio *blk_bio_discard_split(struct request_queue *q,
 					 struct bio *bio,
 					 struct bio_set *bs,
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1e7174ffc9d4..9d515ae3a405 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -93,6 +93,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->virt_boundary_mask = 0;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
+	lim->max_discard_segment_size = UINT_MAX;
 	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
 	lim->max_dev_sectors = 0;
 	lim->chunk_sectors = 0;
@@ -132,6 +133,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
 	lim->max_discard_segments = 1;
 	lim->max_hw_sectors = UINT_MAX;
 	lim->max_segment_size = UINT_MAX;
+	lim->max_discard_segment_size = UINT_MAX;
 	lim->max_sectors = UINT_MAX;
 	lim->max_dev_sectors = UINT_MAX;
 	lim->max_write_same_sectors = UINT_MAX;
@@ -376,6 +378,18 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 EXPORT_SYMBOL(blk_queue_max_segment_size);
 
 /**
+ * blk_queue_max_discard_segment_size - set max segment size for discards
+ * @q:  the request queue for the device
+ * @max_size:  max size of a discard segment in bytes
+ **/
+void blk_queue_max_discard_segment_size(struct request_queue *q,
+		unsigned int max_size)
+{
+	q->limits.max_discard_segment_size = max_size;
+}
+EXPORT_SYMBOL_GPL(blk_queue_max_discard_segment_size);
+
+/**
  * blk_queue_logical_block_size - set logical block size for the queue
  * @q:  the request queue for the device
  * @size:  the logical block size, in bytes
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5a7da607ca04..3b3bd646f580 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -333,6 +333,7 @@ struct queue_limits {
 	unsigned short		max_segments;
 	unsigned short		max_integrity_segments;
 	unsigned short		max_discard_segments;
+	unsigned int		max_discard_segment_size;
 
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
@@ -1150,6 +1151,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_discard_segments(struct request_queue *,
 		unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
+extern void blk_queue_max_discard_segment_size(struct request_queue *,
+		unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
 extern void blk_queue_max_write_same_sectors(struct request_queue *q,
@@ -1415,6 +1418,11 @@ static inline unsigned int queue_max_segment_size(struct request_queue *q)
 	return q->limits.max_segment_size;
 }
 
+static inline unsigned int queue_max_discard_segment_size(struct request_queue *q)
+{
+	return q->limits.max_discard_segment_size;
+}
+
 static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
 	int retval = 512;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 6/7] sd: support multi-range TRIM for ATA disks
  2017-03-20 20:43 support ranges TRIM for libata Christoph Hellwig
                   ` (4 preceding siblings ...)
  2017-03-20 20:43 ` [PATCH 5/7] block: add a max_discard_segment_size queue limit Christoph Hellwig
@ 2017-03-20 20:43 ` Christoph Hellwig
  2017-03-27 23:38     ` Bart Van Assche
  2017-03-20 20:43 ` [PATCH 7/7] sd: use ZERO_PAGE for WRITE_SAME payloads Christoph Hellwig
  2017-03-21 18:59 ` support ranges TRIM for libata Tejun Heo
  7 siblings, 1 reply; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-20 20:43 UTC (permalink / raw)
  To: tj, martin.petersen, axboe; +Cc: linux-ide, linux-scsi, linux-block

Almost the same scheme as the older multi-range support for NVMe.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/sd.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cc8684818305..c18fe9ff1f8f 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -643,7 +643,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 {
 	struct request_queue *q = sdkp->disk->queue;
 	unsigned int logical_block_size = sdkp->device->sector_size;
-	unsigned int max_blocks = 0;
+	unsigned int max_blocks = 0, max_ranges = 0, max_range_size = 0;
 
 	q->limits.discard_zeroes_data = 0;
 
@@ -698,13 +698,19 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 		break;
 
 	case SD_LBP_ATA_TRIM:
-		max_blocks = 65535 * (512 / sizeof(__le64));
+		max_ranges = 512 / sizeof(__le64);
+		max_range_size = USHRT_MAX;
+		max_blocks = max_ranges * max_range_size;
 		if (sdkp->device->ata_trim_zeroes_data)
 			q->limits.discard_zeroes_data = 1;
 		break;
 	}
 
 	blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
+	if (max_ranges)
+		blk_queue_max_discard_segments(q, max_ranges);
+	if (max_range_size)
+		blk_queue_max_discard_segment_size(q, max_range_size);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 }
 
@@ -805,9 +811,9 @@ static int sd_setup_ata_trim_cmnd(struct scsi_cmnd *cmd)
 {
 	struct scsi_device *sdp = cmd->device;
 	struct request *rq = cmd->request;
-	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
-	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
-	u32 data_len = sdp->sector_size, i;
+	u32 sector_shift = ilog2(sdp->sector_size);
+	u32 data_len = sdp->sector_size, i = 0;
+	struct bio *bio;
 	__le64 *buf;
 
 	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
@@ -826,14 +832,21 @@ static int sd_setup_ata_trim_cmnd(struct scsi_cmnd *cmd)
 	cmd->cmnd[8] = data_len;
 
 	buf = page_address(rq->special_vec.bv_page);
-	for (i = 0; i < (data_len >> 3); i++) {
-		u64 n = min(nr_sectors, 0xffffu);
+	__rq_for_each_bio(bio, rq) {
+		u64 sector = bio->bi_iter.bi_sector >> (sector_shift - 9);
+		u32 nr_sectors = bio->bi_iter.bi_size >> sector_shift;
 
-		buf[i] = cpu_to_le64(sector | (n << 48));
-		if (nr_sectors <= 0xffff)
-			break;
-		sector += 0xffff;
-		nr_sectors -= 0xffff;
+		do {
+			u64 n = min(nr_sectors, 0xffffu);
+
+			buf[i] = cpu_to_le64(sector | (n << 48));
+			if (nr_sectors <= 0xffff)
+				break;
+			sector += 0xffff;
+			nr_sectors -= 0xffff;
+			i++;
+
+		} while (!WARN_ON_ONCE(i >= data_len >> 3));
 	}
 
 	cmd->allowed = SD_MAX_RETRIES;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 7/7] sd: use ZERO_PAGE for WRITE_SAME payloads
  2017-03-20 20:43 support ranges TRIM for libata Christoph Hellwig
                   ` (5 preceding siblings ...)
  2017-03-20 20:43 ` [PATCH 6/7] sd: support multi-range TRIM for ATA disks Christoph Hellwig
@ 2017-03-20 20:43 ` Christoph Hellwig
  2017-03-27 23:40     ` Bart Van Assche
  2017-03-21 18:59 ` support ranges TRIM for libata Tejun Heo
  7 siblings, 1 reply; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-20 20:43 UTC (permalink / raw)
  To: tj, martin.petersen, axboe; +Cc: linux-ide, linux-scsi, linux-block

We're never touching the contents of the page, so save a memory
allocation for these cases.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/sd.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c18fe9ff1f8f..af632e350ab4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -756,7 +756,7 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd)
 	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
 	u32 data_len = sdp->sector_size;
 
-	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+	rq->special_vec.bv_page = ZERO_PAGE(0);
 	if (!rq->special_vec.bv_page)
 		return BLKPREP_DEFER;
 	rq->special_vec.bv_offset = 0;
@@ -785,7 +785,7 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
 	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
 	u32 data_len = sdp->sector_size;
 
-	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+	rq->special_vec.bv_page = ZERO_PAGE(0);
 	if (!rq->special_vec.bv_page)
 		return BLKPREP_DEFER;
 	rq->special_vec.bv_offset = 0;
@@ -1256,7 +1256,8 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 {
 	struct request *rq = SCpnt->request;
 
-	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+	if ((rq->rq_flags & RQF_SPECIAL_PAYLOAD) &&
+	    rq->special_vec.bv_page != ZERO_PAGE(0))
 		__free_page(rq->special_vec.bv_page);
 
 	if (SCpnt->cmnd != scsi_req(rq)->cmd) {
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: support ranges TRIM for libata
  2017-03-20 20:43 support ranges TRIM for libata Christoph Hellwig
                   ` (6 preceding siblings ...)
  2017-03-20 20:43 ` [PATCH 7/7] sd: use ZERO_PAGE for WRITE_SAME payloads Christoph Hellwig
@ 2017-03-21 18:59 ` Tejun Heo
  2017-03-22 18:19   ` Christoph Hellwig
  7 siblings, 1 reply; 30+ messages in thread
From: Tejun Heo @ 2017-03-21 18:59 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: martin.petersen, axboe, linux-ide, linux-scsi, linux-block

Hello,

On Mon, Mar 20, 2017 at 04:43:12PM -0400, Christoph Hellwig wrote:
> This series implements rangeѕ discard for ATA SSDs.  Compared to the
> initial NVMe support there are two things that complicate the ATA
> support:
> 
>  - ATA only suports 16-bit long ranges
>  - the whole mess of generating a SCSI command first and then
>    translating it to an ATA one.
> 
> This series adds support for limited range size to the block layer,
> and stops translating discard commands - instead we add a new
> Vendor Specific SCSI command that contains the TRIM payload when
> the device asks for it.

I do like the fact that this is a lot simpler than the previous
implementation but am not quite sure we want to deviate significantly
from what we do for other commands (command translation).  Is it
because fixing the existing implementation would involve invaisve
changes including memory allocations?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: support ranges TRIM for libata
  2017-03-21 18:59 ` support ranges TRIM for libata Tejun Heo
@ 2017-03-22 18:19   ` Christoph Hellwig
  2017-03-23 13:47     ` James Bottomley
  0 siblings, 1 reply; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-22 18:19 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Christoph Hellwig, martin.petersen, axboe, linux-ide, linux-scsi,
	linux-block

On Tue, Mar 21, 2017 at 02:59:01PM -0400, Tejun Heo wrote:
> I do like the fact that this is a lot simpler than the previous
> implementation but am not quite sure we want to deviate significantly
> from what we do for other commands (command translation).  Is it
> because fixing the existing implementation would involve invaisve
> changes including memory allocations?

The current implementation already has the issue of that it does
corrupt user data reliably if the using SG_IO for WRITE SAME commands.

Doing ranges using translation would turn into a nightmare because
ATA TRIM ranges are 16 bits long while SCSI UNAMP ranges are 32-bit,
so we effectively can't translated them without introducing a
non-standard hook between libata and scsi to communicate that
limit.  And once we're down that path we might as well just do the
right thing directly.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: support ranges TRIM for libata
  2017-03-22 18:19   ` Christoph Hellwig
@ 2017-03-23 13:47     ` James Bottomley
  2017-03-23 13:55       ` Christoph Hellwig
  0 siblings, 1 reply; 30+ messages in thread
From: James Bottomley @ 2017-03-23 13:47 UTC (permalink / raw)
  To: Christoph Hellwig, Tejun Heo
  Cc: martin.petersen, axboe, linux-ide, linux-scsi, linux-block

On Wed, 2017-03-22 at 19:19 +0100, Christoph Hellwig wrote:
> On Tue, Mar 21, 2017 at 02:59:01PM -0400, Tejun Heo wrote:
> > I do like the fact that this is a lot simpler than the previous
> > implementation but am not quite sure we want to deviate 
> > significantly from what we do for other commands (command 
> > translation).  Is it because fixing the existing implementation 
> > would involve invaisve changes including memory allocations?
> 
> The current implementation already has the issue of that it does
> corrupt user data reliably if the using SG_IO for WRITE SAME
> commands.

That does need fixing.

> Doing ranges using translation would turn into a nightmare because
> ATA TRIM ranges are 16 bits long while SCSI UNAMP ranges are 32-bit,
> so we effectively can't translated them without introducing a
> non-standard hook between libata and scsi to communicate that
> limit.

Why can't we do what the t10 sat document recommends: if the ATA device
doesn't support the XL version (32 bit ranges) then translate unmap to
multiple non-XL commands?

I don't necessarily object to the vendor specific 1<->1 approach, it's
just it won't fix the problem you cited above (SG_IO WRITE SAME), its
just that now we error the command, which may cause some surprise.  I
also wonder if we couldn't simply do an ATA_16 TRIM if we're already
going to all the trouble of recognising ATA devices in the sd discard
path?

James

>   And once we're down that path we might as well just do the
> right thing directly.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: support ranges TRIM for libata
  2017-03-23 13:47     ` James Bottomley
@ 2017-03-23 13:55       ` Christoph Hellwig
  2017-03-23 14:35         ` James Bottomley
  0 siblings, 1 reply; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-23 13:55 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, Tejun Heo, martin.petersen, axboe, linux-ide,
	linux-scsi, linux-block

On Thu, Mar 23, 2017 at 09:47:41AM -0400, James Bottomley wrote:
> > The current implementation already has the issue of that it does
> > corrupt user data reliably if the using SG_IO for WRITE SAME
> > commands.
> 
> That does need fixing.

I don't think it's fixable as long as we translate the data payload.

> Why can't we do what the t10 sat document recommends: if the ATA device
> doesn't support the XL version (32 bit ranges) then translate unmap to
> multiple non-XL commands?

Because libata sits underneath the tag allocator we'd getting into
a giant set of problems.  Where do you expect the new commands to
magically come from?  And adhere to the queueing limits and not actually
deadlock in one way or another?

> I don't necessarily object to the vendor specific 1<->1 approach, it's
> just it won't fix the problem you cited above (SG_IO WRITE SAME), its
> just that now we error the command, which may cause some surprise.

We now error WRITE SAME for passthrough consistently.  Before we only
accepted it only with the unmap bit set, and even did so incorrectly
(not checking that the payload was all zeros).

> I
> also wonder if we couldn't simply do an ATA_16 TRIM if we're already
> going to all the trouble of recognising ATA devices in the sd discard
> path?

We probably could do that as well.  But that would drag a lot more
ATA-specific code into sd.c than just formatting the ranges.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: support ranges TRIM for libata
  2017-03-23 13:55       ` Christoph Hellwig
@ 2017-03-23 14:35         ` James Bottomley
  2017-03-23 14:43           ` Christoph Hellwig
  0 siblings, 1 reply; 30+ messages in thread
From: James Bottomley @ 2017-03-23 14:35 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tejun Heo, martin.petersen, axboe, linux-ide, linux-scsi, linux-block

On Thu, 2017-03-23 at 14:55 +0100, Christoph Hellwig wrote:
> On Thu, Mar 23, 2017 at 09:47:41AM -0400, James Bottomley wrote:
> > > The current implementation already has the issue of that it does
> > > corrupt user data reliably if the using SG_IO for WRITE SAME
> > > commands.
> > 
> > That does need fixing.
> 
> I don't think it's fixable as long as we translate the data payload.
> 
> > Why can't we do what the t10 sat document recommends: if the ATA 
> > device doesn't support the XL version (32 bit ranges) then 
> > translate unmap to multiple non-XL commands?
> 
> Because libata sits underneath the tag allocator we'd getting into
> a giant set of problems.  Where do you expect the new commands to
> magically come from?  And adhere to the queueing limits and not 
> actually deadlock in one way or another?

I'm certainly not saying we blindly follow t10, but I believe their
intent is to issue the next command from the completion of the first
(we can do this using qc->complete_fn, like atapi_request_sense).  That
way we don't get any tag problems because there's only one command
outstanding at once; reusing the qc means no allocation issues either.

The t10 approach does mean the SG_IO problem is actually fixable rather
than simply erroring out.

> > I don't necessarily object to the vendor specific 1<->1 approach, 
> > it's just it won't fix the problem you cited above (SG_IO WRITE 
> > SAME), its just that now we error the command, which may cause some
> > surprise.
> 
> We now error WRITE SAME for passthrough consistently.  Before we only
> accepted it only with the unmap bit set, and even did so incorrectly
> (not checking that the payload was all zeros).

If it never worked for anyone, I'm OK with changing it to error out.

> > I also wonder if we couldn't simply do an ATA_16 TRIM if we're
> > already going to all the trouble of recognising ATA devices in the 
> > sd discard path?
> 
> We probably could do that as well.  But that would drag a lot more
> ATA-specific code into sd.c than just formatting the ranges.

That's up to you ... from the point of view of code documenting itself,
forming the ATA_16 TRIM in sd and not doing any satl transformation is
easier for others to follow, but if it's going to cause more code, I'm
only marginal on the advantages of easier to follow code.

James

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: support ranges TRIM for libata
  2017-03-23 14:35         ` James Bottomley
@ 2017-03-23 14:43           ` Christoph Hellwig
  2017-03-23 15:04             ` Tejun Heo
  2017-03-23 15:30             ` Christoph Hellwig
  0 siblings, 2 replies; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-23 14:43 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, Tejun Heo, martin.petersen, axboe, linux-ide,
	linux-scsi, linux-block

On Thu, Mar 23, 2017 at 10:35:06AM -0400, James Bottomley wrote:
> I'm certainly not saying we blindly follow t10, but I believe their
> intent is to issue the next command from the completion of the first
> (we can do this using qc->complete_fn, like atapi_request_sense).  That
> way we don't get any tag problems because there's only one command
> outstanding at once; reusing the qc means no allocation issues either.
> 
> The t10 approach does mean the SG_IO problem is actually fixable rather
> than simply erroring out.

It would be sort of fixable, but with a lot of hackery.

> That's up to you ... from the point of view of code documenting itself,
> forming the ATA_16 TRIM in sd and not doing any satl transformation is
> easier for others to follow, but if it's going to cause more code, I'm
> only marginal on the advantages of easier to follow code.

I tried this earlier before giving up on it because it looked to ugly.
But I can complete that version of it and post it for people to compare.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: support ranges TRIM for libata
  2017-03-23 14:43           ` Christoph Hellwig
@ 2017-03-23 15:04             ` Tejun Heo
  2017-03-23 15:27               ` Christoph Hellwig
  2017-03-23 15:30             ` Christoph Hellwig
  1 sibling, 1 reply; 30+ messages in thread
From: Tejun Heo @ 2017-03-23 15:04 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: James Bottomley, martin.petersen, axboe, linux-ide, linux-scsi,
	linux-block

Hello, Christoph.

On Thu, Mar 23, 2017 at 03:43:30PM +0100, Christoph Hellwig wrote:
> > That's up to you ... from the point of view of code documenting itself,
> > forming the ATA_16 TRIM in sd and not doing any satl transformation is
> > easier for others to follow, but if it's going to cause more code, I'm
> > only marginal on the advantages of easier to follow code.
> 
> I tried this earlier before giving up on it because it looked to ugly.
> But I can complete that version of it and post it for people to compare.

I kinda like the idea of sticking with satl as that's how libata has
been doing most things even if the implementation is uglier.  It'd be
great to find out whether the ugliness would be acceptable or too
much.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: support ranges TRIM for libata
  2017-03-23 15:04             ` Tejun Heo
@ 2017-03-23 15:27               ` Christoph Hellwig
  0 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-23 15:27 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Christoph Hellwig, James Bottomley, martin.petersen, axboe,
	linux-ide, linux-scsi, linux-block

On Thu, Mar 23, 2017 at 11:04:58AM -0400, Tejun Heo wrote:
> I kinda like the idea of sticking with satl as that's how libata has
> been doing most things even if the implementation is uglier.  It'd be
> great to find out whether the ugliness would be acceptable or too
> much.

The SATL way is simply broken.  I'll just leave the code corrupting
user data and 5 times slower than it could be then.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: support ranges TRIM for libata
  2017-03-23 14:43           ` Christoph Hellwig
  2017-03-23 15:04             ` Tejun Heo
@ 2017-03-23 15:30             ` Christoph Hellwig
  2017-03-23 15:39               ` Martin K. Petersen
  1 sibling, 1 reply; 30+ messages in thread
From: Christoph Hellwig @ 2017-03-23 15:30 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, Tejun Heo, martin.petersen, axboe, linux-ide,
	linux-scsi, linux-block

On Thu, Mar 23, 2017 at 03:43:30PM +0100, Christoph Hellwig wrote:
> I tried this earlier before giving up on it because it looked to ugly.
> But I can complete that version of it and post it for people to compare.

Meh, I remember why I gave up on it - to support queued trim passthrough
we'd need to implement ATA 32 for the auxiliary fields and thus support
32-bit CDBs.  I don't really want to go there..

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: support ranges TRIM for libata
  2017-03-23 15:30             ` Christoph Hellwig
@ 2017-03-23 15:39               ` Martin K. Petersen
  0 siblings, 0 replies; 30+ messages in thread
From: Martin K. Petersen @ 2017-03-23 15:39 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: James Bottomley, Tejun Heo, martin.petersen, axboe, linux-ide,
	linux-scsi, linux-block

Christoph Hellwig <hch@lst.de> writes:

> Meh, I remember why I gave up on it - to support queued trim passthrough
> we'd need to implement ATA 32 for the auxiliary fields and thus support
> 32-bit CDBs.  I don't really want to go there..

I wish we could stick with SATL. However, I have attempted this a few
times over the years and I am with Christoph here. Due to the
discrepancies between ACS and SBC it's a twisted mess. And the iterative
approach has not worked well for HBA SATLs either. Quite the contrary.

So I think sticking the DSM TRIM payload onto a SCSI command is the path
of least resistance. And then NAK'ing attempts to issue these commands
through sg.

I'll take closer look at the entire series tomorrow or Monday. I want to
test multiple ranges on my "Little Shop of SSD Horrors" when I get back
home from LSF/MM.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/7] ѕd: split sd_setup_discard_cmnd
  2017-03-20 20:43 ` [PATCH 1/7] ѕd: split sd_setup_discard_cmnd Christoph Hellwig
@ 2017-03-27 22:24     ` Bart Van Assche
  0 siblings, 0 replies; 30+ messages in thread
From: Bart Van Assche @ 2017-03-27 22:24 UTC (permalink / raw)
  To: hch, tj, martin.petersen, axboe; +Cc: linux-scsi, linux-block, linux-ide

On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> +	u64 sector =3D blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
> +	u32 nr_sectors =3D blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);

Although I know this is an issue in the existing code and not something
introduced by you: please consider using logical_to_sectors() instead of
open-coding this function. Otherwise this patch looks fine to me.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/7] ѕd: split sd_setup_discard_cmnd
@ 2017-03-27 22:24     ` Bart Van Assche
  0 siblings, 0 replies; 30+ messages in thread
From: Bart Van Assche @ 2017-03-27 22:24 UTC (permalink / raw)
  To: hch, tj, martin.petersen, axboe; +Cc: linux-scsi, linux-block, linux-ide

On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> +	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
> +	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);

Although I know this is an issue in the existing code and not something
introduced by you: please consider using logical_to_sectors() instead of
open-coding this function. Otherwise this patch looks fine to me.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/7] sd: provide a new ata trim provisioning mode
  2017-03-20 20:43 ` [PATCH 2/7] sd: provide a new ata trim provisioning mode Christoph Hellwig
@ 2017-03-27 22:40     ` Bart Van Assche
  0 siblings, 0 replies; 30+ messages in thread
From: Bart Van Assche @ 2017-03-27 22:40 UTC (permalink / raw)
  To: hch, tj, martin.petersen, axboe; +Cc: linux-scsi, linux-block, linux-ide

On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> +	case SD_LBP_ATA_TRIM:
> +		max_blocks =3D 65535 * (512 / sizeof(__le64));
> +		if (sdkp->device->ata_trim_zeroes_data)
> +			q->limits.discard_zeroes_data =3D 1;
> +		break;

Do we need a comment here that explains where the numbers 65535 and
512 come from?

> +	u64 sector =3D blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
> +	u32 nr_sectors =3D blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);

Please consider using logical_to_sectors() instead of open-coding this
function.

Thanks,

Bart.=

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/7] sd: provide a new ata trim provisioning mode
@ 2017-03-27 22:40     ` Bart Van Assche
  0 siblings, 0 replies; 30+ messages in thread
From: Bart Van Assche @ 2017-03-27 22:40 UTC (permalink / raw)
  To: hch, tj, martin.petersen, axboe; +Cc: linux-scsi, linux-block, linux-ide

On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> +	case SD_LBP_ATA_TRIM:
> +		max_blocks = 65535 * (512 / sizeof(__le64));
> +		if (sdkp->device->ata_trim_zeroes_data)
> +			q->limits.discard_zeroes_data = 1;
> +		break;

Do we need a comment here that explains where the numbers 65535 and
512 come from?

> +	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
> +	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);

Please consider using logical_to_sectors() instead of open-coding this
function.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 5/7] block: add a max_discard_segment_size queue limit
  2017-03-20 20:43 ` [PATCH 5/7] block: add a max_discard_segment_size queue limit Christoph Hellwig
@ 2017-03-27 23:09     ` Bart Van Assche
  0 siblings, 0 replies; 30+ messages in thread
From: Bart Van Assche @ 2017-03-27 23:09 UTC (permalink / raw)
  To: hch, tj, martin.petersen, axboe; +Cc: linux-scsi, linux-block, linux-ide

On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 5a7da607ca04..3b3bd646f580 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -333,6 +333,7 @@ struct queue_limits {
>  	unsigned short		max_segments;
>  	unsigned short		max_integrity_segments;
>  	unsigned short		max_discard_segments;
> +	unsigned int		max_discard_segment_size;
> =20
>  	unsigned char		misaligned;
>  	unsigned char		discard_misaligned;
> @@ -1150,6 +1151,8 @@ extern void blk_queue_max_segments(struct request_q=
ueue *, unsigned short);
>  extern void blk_queue_max_discard_segments(struct request_queue *,
>  		unsigned short);
>  extern void blk_queue_max_segment_size(struct request_queue *, unsigned =
int);
> +extern void blk_queue_max_discard_segment_size(struct request_queue *,
> +		unsigned int);
>  extern void blk_queue_max_discard_sectors(struct request_queue *q,
>  		unsigned int max_discard_sectors);
>  extern void blk_queue_max_write_same_sectors(struct request_queue *q,
> @@ -1415,6 +1418,11 @@ static inline unsigned int queue_max_segment_size(=
struct request_queue *q)
>  	return q->limits.max_segment_size;
>  }
> =20
> +static inline unsigned int queue_max_discard_segment_size(struct request=
_queue *q)
> +{
> +	return q->limits.max_discard_segment_size;
> +}
> +
>  static inline unsigned short queue_logical_block_size(struct request_que=
ue *q)
>  {
>  	int retval =3D 512;

Hello Christoph,

It's probably a good idea to add a comment somewhere that the unit of
max_discard_segment_size is one byte.

Additionally, should it be mentioned in the commit message that this
patch limits the maximum discard segment size to 4 GB?

Thanks,

Bart.=

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 5/7] block: add a max_discard_segment_size queue limit
@ 2017-03-27 23:09     ` Bart Van Assche
  0 siblings, 0 replies; 30+ messages in thread
From: Bart Van Assche @ 2017-03-27 23:09 UTC (permalink / raw)
  To: hch, tj, martin.petersen, axboe; +Cc: linux-scsi, linux-block, linux-ide

On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 5a7da607ca04..3b3bd646f580 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -333,6 +333,7 @@ struct queue_limits {
>  	unsigned short		max_segments;
>  	unsigned short		max_integrity_segments;
>  	unsigned short		max_discard_segments;
> +	unsigned int		max_discard_segment_size;
>  
>  	unsigned char		misaligned;
>  	unsigned char		discard_misaligned;
> @@ -1150,6 +1151,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short);
>  extern void blk_queue_max_discard_segments(struct request_queue *,
>  		unsigned short);
>  extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
> +extern void blk_queue_max_discard_segment_size(struct request_queue *,
> +		unsigned int);
>  extern void blk_queue_max_discard_sectors(struct request_queue *q,
>  		unsigned int max_discard_sectors);
>  extern void blk_queue_max_write_same_sectors(struct request_queue *q,
> @@ -1415,6 +1418,11 @@ static inline unsigned int queue_max_segment_size(struct request_queue *q)
>  	return q->limits.max_segment_size;
>  }
>  
> +static inline unsigned int queue_max_discard_segment_size(struct request_queue *q)
> +{
> +	return q->limits.max_discard_segment_size;
> +}
> +
>  static inline unsigned short queue_logical_block_size(struct request_queue *q)
>  {
>  	int retval = 512;

Hello Christoph,

It's probably a good idea to add a comment somewhere that the unit of
max_discard_segment_size is one byte.

Additionally, should it be mentioned in the commit message that this
patch limits the maximum discard segment size to 4 GB?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 6/7] sd: support multi-range TRIM for ATA disks
  2017-03-20 20:43 ` [PATCH 6/7] sd: support multi-range TRIM for ATA disks Christoph Hellwig
@ 2017-03-27 23:38     ` Bart Van Assche
  0 siblings, 0 replies; 30+ messages in thread
From: Bart Van Assche @ 2017-03-27 23:38 UTC (permalink / raw)
  To: hch, tj, martin.petersen, axboe; +Cc: linux-scsi, linux-block, linux-ide

On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> @@ -698,13 +698,19 @@ static void sd_config_discard(struct scsi_disk *sdk=
p, unsigned int mode)
>  		break;
> =20
>  	case SD_LBP_ATA_TRIM:
> -		max_blocks =3D 65535 * (512 / sizeof(__le64));
> +		max_ranges =3D 512 / sizeof(__le64);
> +		max_range_size =3D USHRT_MAX;
> +		max_blocks =3D max_ranges * max_range_size;
>  		if (sdkp->device->ata_trim_zeroes_data)
>  			q->limits.discard_zeroes_data =3D 1;
>  		break;
>  	}
> =20
>  	blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9)=
);
> +	if (max_ranges)
> +		blk_queue_max_discard_segments(q, max_ranges);
> +	if (max_range_size)
> +		blk_queue_max_discard_segment_size(q, max_range_size);
>  	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
>  }

Hello Christoph,

Does blk_queue_max_discard_segment_size() expect a second argument that is =
a
number of bytes? Is max_range_size a number of logical blocks that each hav=
e
a size 1 << sector_shift?

> @@ -826,14 +832,21 @@ static int sd_setup_ata_trim_cmnd(struct scsi_cmnd =
*cmd)
>  	cmd->cmnd[8] =3D data_len;
> =20
>  	buf =3D page_address(rq->special_vec.bv_page);
> -	for (i =3D 0; i < (data_len >> 3); i++) {
> -		u64 n =3D min(nr_sectors, 0xffffu);
> +	__rq_for_each_bio(bio, rq) {
> +		u64 sector =3D bio->bi_iter.bi_sector >> (sector_shift - 9);
> +		u32 nr_sectors =3D bio->bi_iter.bi_size >> sector_shift;
> =20
> -		buf[i] =3D cpu_to_le64(sector | (n << 48));
> -		if (nr_sectors <=3D 0xffff)
> -			break;
> -		sector +=3D 0xffff;
> -		nr_sectors -=3D 0xffff;
> +		do {
> +			u64 n =3D min(nr_sectors, 0xffffu);
> +
> +			buf[i] =3D cpu_to_le64(sector | (n << 48));
> +			if (nr_sectors <=3D 0xffff)
> +				break;
> +			sector +=3D 0xffff;
> +			nr_sectors -=3D 0xffff;
> +			i++;
> +
> +		} while (!WARN_ON_ONCE(i >=3D data_len >> 3));
>  	}
> =20
>  	cmd->allowed =3D SD_MAX_RETRIES;

It's unfortunate that the loop-end test occurs twice (i < data_len >> 3).
Please consider using put_unaligned_le64() instead of cpu_to_le64() such
that the data type of buf can be changed from __le64* into void *. With
that change and by introducing e.g. the following:

void *end;
[ ... ]
end =3D (void *)buf + data_len;

the loop variable 'i' can be eliminated. If buf[i++] =3D ... would be
changed into *buf++ =3D ... then that would allow to change the two
occurrences of (i < data_len >> 3) into (buf < end).

Thanks,

Bart.=

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 6/7] sd: support multi-range TRIM for ATA disks
@ 2017-03-27 23:38     ` Bart Van Assche
  0 siblings, 0 replies; 30+ messages in thread
From: Bart Van Assche @ 2017-03-27 23:38 UTC (permalink / raw)
  To: hch, tj, martin.petersen, axboe; +Cc: linux-scsi, linux-block, linux-ide

On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> @@ -698,13 +698,19 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
>  		break;
>  
>  	case SD_LBP_ATA_TRIM:
> -		max_blocks = 65535 * (512 / sizeof(__le64));
> +		max_ranges = 512 / sizeof(__le64);
> +		max_range_size = USHRT_MAX;
> +		max_blocks = max_ranges * max_range_size;
>  		if (sdkp->device->ata_trim_zeroes_data)
>  			q->limits.discard_zeroes_data = 1;
>  		break;
>  	}
>  
>  	blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
> +	if (max_ranges)
> +		blk_queue_max_discard_segments(q, max_ranges);
> +	if (max_range_size)
> +		blk_queue_max_discard_segment_size(q, max_range_size);
>  	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
>  }

Hello Christoph,

Does blk_queue_max_discard_segment_size() expect a second argument that is a
number of bytes? Is max_range_size a number of logical blocks that each have
a size 1 << sector_shift?

> @@ -826,14 +832,21 @@ static int sd_setup_ata_trim_cmnd(struct scsi_cmnd *cmd)
>  	cmd->cmnd[8] = data_len;
>  
>  	buf = page_address(rq->special_vec.bv_page);
> -	for (i = 0; i < (data_len >> 3); i++) {
> -		u64 n = min(nr_sectors, 0xffffu);
> +	__rq_for_each_bio(bio, rq) {
> +		u64 sector = bio->bi_iter.bi_sector >> (sector_shift - 9);
> +		u32 nr_sectors = bio->bi_iter.bi_size >> sector_shift;
>  
> -		buf[i] = cpu_to_le64(sector | (n << 48));
> -		if (nr_sectors <= 0xffff)
> -			break;
> -		sector += 0xffff;
> -		nr_sectors -= 0xffff;
> +		do {
> +			u64 n = min(nr_sectors, 0xffffu);
> +
> +			buf[i] = cpu_to_le64(sector | (n << 48));
> +			if (nr_sectors <= 0xffff)
> +				break;
> +			sector += 0xffff;
> +			nr_sectors -= 0xffff;
> +			i++;
> +
> +		} while (!WARN_ON_ONCE(i >= data_len >> 3));
>  	}
>  
>  	cmd->allowed = SD_MAX_RETRIES;

It's unfortunate that the loop-end test occurs twice (i < data_len >> 3).
Please consider using put_unaligned_le64() instead of cpu_to_le64() such
that the data type of buf can be changed from __le64* into void *. With
that change and by introducing e.g. the following:

void *end;
[ ... ]
end = (void *)buf + data_len;

the loop variable 'i' can be eliminated. If buf[i++] = ... would be
changed into *buf++ = ... then that would allow to change the two
occurrences of (i < data_len >> 3) into (buf < end).

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 7/7] sd: use ZERO_PAGE for WRITE_SAME payloads
  2017-03-20 20:43 ` [PATCH 7/7] sd: use ZERO_PAGE for WRITE_SAME payloads Christoph Hellwig
@ 2017-03-27 23:40     ` Bart Van Assche
  0 siblings, 0 replies; 30+ messages in thread
From: Bart Van Assche @ 2017-03-27 23:40 UTC (permalink / raw)
  To: hch, tj, martin.petersen, axboe; +Cc: linux-scsi, linux-block, linux-ide

On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> We're never touching the contents of the page, so save a memory
> allocation for these cases.
>=20
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/scsi/sd.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>=20
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index c18fe9ff1f8f..af632e350ab4 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -756,7 +756,7 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmn=
d *cmd)
>  	u32 nr_sectors =3D blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
>  	u32 data_len =3D sdp->sector_size;
> =20
> -	rq->special_vec.bv_page =3D alloc_page(GFP_ATOMIC | __GFP_ZERO);
> +	rq->special_vec.bv_page =3D ZERO_PAGE(0);
>  	if (!rq->special_vec.bv_page)
>  		return BLKPREP_DEFER;
>  	rq->special_vec.bv_offset =3D 0;
> @@ -785,7 +785,7 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmn=
d *cmd, bool unmap)
>  	u32 nr_sectors =3D blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
>  	u32 data_len =3D sdp->sector_size;
> =20
> -	rq->special_vec.bv_page =3D alloc_page(GFP_ATOMIC | __GFP_ZERO);
> +	rq->special_vec.bv_page =3D ZERO_PAGE(0);
>  	if (!rq->special_vec.bv_page)
>  		return BLKPREP_DEFER;
>  	rq->special_vec.bv_offset =3D 0;
> @@ -1256,7 +1256,8 @@ static void sd_uninit_command(struct scsi_cmnd *SCp=
nt)
>  {
>  	struct request *rq =3D SCpnt->request;
> =20
> -	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
> +	if ((rq->rq_flags & RQF_SPECIAL_PAYLOAD) &&
> +	    rq->special_vec.bv_page !=3D ZERO_PAGE(0))
>  		__free_page(rq->special_vec.bv_page);
> =20
>  	if (SCpnt->cmnd !=3D scsi_req(rq)->cmd) {

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 7/7] sd: use ZERO_PAGE for WRITE_SAME payloads
@ 2017-03-27 23:40     ` Bart Van Assche
  0 siblings, 0 replies; 30+ messages in thread
From: Bart Van Assche @ 2017-03-27 23:40 UTC (permalink / raw)
  To: hch, tj, martin.petersen, axboe; +Cc: linux-scsi, linux-block, linux-ide

On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> We're never touching the contents of the page, so save a memory
> allocation for these cases.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/scsi/sd.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index c18fe9ff1f8f..af632e350ab4 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -756,7 +756,7 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd)
>  	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
>  	u32 data_len = sdp->sector_size;
>  
> -	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
> +	rq->special_vec.bv_page = ZERO_PAGE(0);
>  	if (!rq->special_vec.bv_page)
>  		return BLKPREP_DEFER;
>  	rq->special_vec.bv_offset = 0;
> @@ -785,7 +785,7 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
>  	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
>  	u32 data_len = sdp->sector_size;
>  
> -	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
> +	rq->special_vec.bv_page = ZERO_PAGE(0);
>  	if (!rq->special_vec.bv_page)
>  		return BLKPREP_DEFER;
>  	rq->special_vec.bv_offset = 0;
> @@ -1256,7 +1256,8 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
>  {
>  	struct request *rq = SCpnt->request;
>  
> -	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
> +	if ((rq->rq_flags & RQF_SPECIAL_PAYLOAD) &&
> +	    rq->special_vec.bv_page != ZERO_PAGE(0))
>  		__free_page(rq->special_vec.bv_page);
>  
>  	if (SCpnt->cmnd != scsi_req(rq)->cmd) {

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/7] ѕd: split sd_setup_discard_cmnd
  2017-03-27 22:24     ` Bart Van Assche
  (?)
@ 2017-03-28 14:05     ` axboe
  2017-03-30  8:49       ` hch
  -1 siblings, 1 reply; 30+ messages in thread
From: axboe @ 2017-03-28 14:05 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: hch, tj, martin.petersen, linux-scsi, linux-block, linux-ide

On Mon, Mar 27 2017, Bart Van Assche wrote:
> On Mon, 2017-03-20 at 16:43 -0400, Christoph Hellwig wrote:
> > +	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
> > +	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
> 
> Although I know this is an issue in the existing code and not something
> introduced by you: please consider using logical_to_sectors() instead of
> open-coding this function. Otherwise this patch looks fine to me.

The downside of doing that is that we still call ilog2() twice, which
sucks. Would be faster to cache ilog2(sector_size) and use that in the
shift calculation.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/7] ѕd: split sd_setup_discard_cmnd
  2017-03-28 14:05     ` axboe
@ 2017-03-30  8:49       ` hch
  0 siblings, 0 replies; 30+ messages in thread
From: hch @ 2017-03-30  8:49 UTC (permalink / raw)
  To: axboe
  Cc: Bart Van Assche, hch, tj, martin.petersen, linux-scsi,
	linux-block, linux-ide

On Tue, Mar 28, 2017 at 08:05:09AM -0600, axboe@kernel.dk wrote:
> > Although I know this is an issue in the existing code and not something
> > introduced by you: please consider using logical_to_sectors() instead of
> > open-coding this function. Otherwise this patch looks fine to me.
> 
> The downside of doing that is that we still call ilog2() twice, which
> sucks. Would be faster to cache ilog2(sector_size) and use that in the
> shift calculation.

I suspect that gcc is smart enough to optimize it away.  That beeing said
while this looks like a nice cleanup this patch is just supposed to move
code, so I'd rather not add the change here and leave it for a separate
submission.

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2017-03-30  8:49 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-20 20:43 support ranges TRIM for libata Christoph Hellwig
2017-03-20 20:43 ` [PATCH 1/7] ѕd: split sd_setup_discard_cmnd Christoph Hellwig
2017-03-27 22:24   ` Bart Van Assche
2017-03-27 22:24     ` Bart Van Assche
2017-03-28 14:05     ` axboe
2017-03-30  8:49       ` hch
2017-03-20 20:43 ` [PATCH 2/7] sd: provide a new ata trim provisioning mode Christoph Hellwig
2017-03-27 22:40   ` Bart Van Assche
2017-03-27 22:40     ` Bart Van Assche
2017-03-20 20:43 ` [PATCH 3/7] libata: remove SCT WRITE SAME support Christoph Hellwig
2017-03-20 20:43 ` [PATCH 4/7] libata: simplify the trim implementation Christoph Hellwig
2017-03-20 20:43 ` [PATCH 5/7] block: add a max_discard_segment_size queue limit Christoph Hellwig
2017-03-27 23:09   ` Bart Van Assche
2017-03-27 23:09     ` Bart Van Assche
2017-03-20 20:43 ` [PATCH 6/7] sd: support multi-range TRIM for ATA disks Christoph Hellwig
2017-03-27 23:38   ` Bart Van Assche
2017-03-27 23:38     ` Bart Van Assche
2017-03-20 20:43 ` [PATCH 7/7] sd: use ZERO_PAGE for WRITE_SAME payloads Christoph Hellwig
2017-03-27 23:40   ` Bart Van Assche
2017-03-27 23:40     ` Bart Van Assche
2017-03-21 18:59 ` support ranges TRIM for libata Tejun Heo
2017-03-22 18:19   ` Christoph Hellwig
2017-03-23 13:47     ` James Bottomley
2017-03-23 13:55       ` Christoph Hellwig
2017-03-23 14:35         ` James Bottomley
2017-03-23 14:43           ` Christoph Hellwig
2017-03-23 15:04             ` Tejun Heo
2017-03-23 15:27               ` Christoph Hellwig
2017-03-23 15:30             ` Christoph Hellwig
2017-03-23 15:39               ` Martin K. Petersen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.