All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support
@ 2018-06-06  4:19 Changpeng Liu
  2018-06-07 13:10 ` Stefan Hajnoczi
                   ` (3 more replies)
  0 siblings, 4 replies; 50+ messages in thread
From: Changpeng Liu @ 2018-06-06  4:19 UTC (permalink / raw)
  To: virtualization, changpeng.liu; +Cc: pbonzini, cavery, stefanha

Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES commands
support, this will impact the performance when using SSD backend over
file systems.

Commit 88c85538 "virtio-blk: add discard and write zeroes features to
specification"(see https://github.com/oasis-tcs/virtio-spec) extended
existing virtio-blk protocol, adding extra DISCARD and WRITE ZEROES
commands support.

While here, using 16 bytes descriptor to describe one segment of DISCARD
or WRITE ZEROES commands, each command may contain one or more decriptors.

The following data structure shows the definition of one descriptor:

struct virtio_blk_discard_write_zeroes {
        le64 sector;
        le32 num_sectors;
        le32 unmap;
};

Field 'sector' means the start sector for DISCARD and WRITE ZEROES,
filed 'num_sectors' means the number of sectors for DISCARD and WRITE
ZEROES, if both DISCARD and WRITE ZEROES are supported, field 'unmap'
maybe used for WRITE ZEROES command with DISCARD enabled.

We also extended the virtio-blk configuration space to let backend
device put DISCARD and WRITE ZEROES configuration parameters.

struct virtio_blk_config {
        [...]

        le32 max_discard_sectors;
        le32 max_discard_seg;
        le32 discard_sector_alignment;
        le32 max_write_zeroes_sectors;
        le32 max_write_zeroes_seg;
        u8 write_zeroes_may_unmap;
}

New feature bit [VIRTIO_BLK_F_DISCARD (13)]: Device can support discard
command, maximum discard sectors size in field 'max_discard_sectors' and
maximum discard segment number in field 'max_discard_seg'.

New feature [VIRTIO_BLK_F_WRITE_ZEROES (14)]: Device can support write
zeroes command, maximum write zeroes sectors size in field
'max_write_zeroes_sectors' and maximum write zeroes segment number in
field 'max_write_zeroes_seg'.

The parameters in the configuration space of the device field
'max_discard_sectors' and field 'discard_sector_alignment' are expressed in
512-byte units if the VIRTIO_BLK_F_DISCARD feature bit is negotiated. The
field 'max_write_zeroes_sectors' is expressed in 512-byte units if the
VIRTIO_BLK_F_WRITE_ZEROES feature bit is negotiated.

Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
---
CHANGELOG:
v6: don't set T_OUT bit to discard and write zeroes commands.
v5: use new block layer API: blk_queue_flag_set.
v4: several optimizations based on MST's comments, remove bit field usage for
command descriptor.
v3: define the virtio-blk protocol to add discard and write zeroes support,
first version implementation based on proposed specification.
v2: add write zeroes command support.
v1: initial proposal implementation for discard command.
---
 drivers/block/virtio_blk.c      | 89 ++++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/virtio_blk.h | 43 ++++++++++++++++++++
 2 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4a07593c..5aabc63 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -172,10 +172,45 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
+static inline int virtblk_setup_discard_write_zeroes(struct request *req,
+						bool unmap)
+{
+	unsigned short segments = blk_rq_nr_discard_segments(req);
+	unsigned short n = 0;
+	struct virtio_blk_discard_write_zeroes *range;
+	struct bio *bio;
+
+	range = kmalloc_array(segments, sizeof(*range), GFP_KERNEL);
+	if (!range)
+		return -ENOMEM;
+
+	__rq_for_each_bio(bio, req) {
+		u64 sector = bio->bi_iter.bi_sector;
+		u32 num_sectors = bio->bi_iter.bi_size >> 9;
+
+		range[n].unmap = cpu_to_le32(unmap);
+		range[n].num_sectors = cpu_to_le32(num_sectors);
+		range[n].sector = cpu_to_le64(sector);
+		n++;
+	}
+
+	req->special_vec.bv_page = virt_to_page(range);
+	req->special_vec.bv_offset = offset_in_page(range);
+	req->special_vec.bv_len = sizeof(*range) * segments;
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return 0;
+}
+
 static inline void virtblk_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+		kfree(page_address(req->special_vec.bv_page) +
+		      req->special_vec.bv_offset);
+	}
+
 	switch (req_op(req)) {
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
@@ -225,6 +260,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	int qid = hctx->queue_num;
 	int err;
 	bool notify = false;
+	bool unmap = false;
 	u32 type;
 
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
@@ -237,6 +273,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_FLUSH:
 		type = VIRTIO_BLK_T_FLUSH;
 		break;
+	case REQ_OP_DISCARD:
+		type = VIRTIO_BLK_T_DISCARD;
+		break;
+	case REQ_OP_WRITE_ZEROES:
+		type = VIRTIO_BLK_T_WRITE_ZEROES;
+		unmap = !(req->cmd_flags & REQ_NOUNMAP);
+		break;
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
 		type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,6 +299,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(req);
 
+	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
+		err = virtblk_setup_discard_write_zeroes(req, unmap);
+		if (err)
+			return BLK_STS_RESOURCE;
+	}
+
 	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(req) == WRITE)
@@ -777,6 +826,42 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (!err && opt_io_size)
 		blk_queue_io_opt(q, blk_size * opt_io_size);
 
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+		q->limits.discard_granularity = blk_size;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+				discard_sector_alignment, &v);
+		if (v)
+			q->limits.discard_alignment = v << 9;
+		else
+			q->limits.discard_alignment = 0;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+				max_discard_sectors, &v);
+		if (v)
+			blk_queue_max_discard_sectors(q, v);
+		else
+			blk_queue_max_discard_sectors(q, UINT_MAX);
+
+		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
+				&v);
+		if (v)
+			blk_queue_max_discard_segments(q, v);
+		else
+			blk_queue_max_discard_segments(q, USHRT_MAX);
+
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
+		virtio_cread(vdev, struct virtio_blk_config,
+				max_write_zeroes_sectors, &v);
+		if (v)
+			blk_queue_max_write_zeroes_sectors(q, v);
+		else
+			blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
+	}
+
 	virtblk_update_capacity(vblk, false);
 	virtio_device_ready(vdev);
 
@@ -885,14 +970,14 @@ static int virtblk_restore(struct virtio_device *vdev)
 	VIRTIO_BLK_F_SCSI,
 #endif
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 }
 ;
 static unsigned int features[] = {
 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ebe4d9..8e7a015 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -38,6 +38,8 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -86,6 +88,29 @@ struct virtio_blk_config {
 
 	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
 	__u16 num_queues;
+	/* The maximum discard sectors (in 512-byte sectors) for
+	 * one segment (if VIRTIO_BLK_F_DISCARD)
+	 */
+	__u32 max_discard_sectors;
+	/* The maximum number of discard segments in a
+	 * discard command (if VIRTIO_BLK_F_DISCARD)
+	 */
+	__u32 max_discard_seg;
+	/* The alignment sectors for discard (if VIRTIO_BLK_F_DISCARD) */
+	__u32 discard_sector_alignment;
+	/* The maximum number of write zeroes sectors (in 512-byte sectors) in
+	 * one segment (if VIRTIO_BLK_F_WRITE_ZEROES)
+	 */
+	__u32 max_write_zeroes_sectors;
+	/* The maximum number of segments in a write zeroes
+	 * command (if VIRTIO_BLK_F_WRITE_ZEROES)
+	 */
+	__u32 max_write_zeroes_seg;
+	/* Device clear this bit when write zeroes command can't result in
+	 * unmapping sectors (if VIRITO_BLK_F_WRITE_ZEROES and with unmap)
+	 */
+	__u8 write_zeroes_may_unmap;
+	__u8 unused1[3];
 } __attribute__((packed));
 
 /*
@@ -114,6 +139,12 @@ struct virtio_blk_config {
 /* Get device ID command */
 #define VIRTIO_BLK_T_GET_ID    8
 
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD	11
+
+/* Write zeroes command */
+#define VIRTIO_BLK_T_WRITE_ZEROES	13
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
@@ -133,6 +164,18 @@ struct virtio_blk_outhdr {
 	__virtio64 sector;
 };
 
+/*
+ * discard/write zeroes range for each request.
+ */
+struct virtio_blk_discard_write_zeroes {
+	/* discard/write zeroes start sector */
+	__virtio64 sector;
+	/* number of discard/write zeroes sectors */
+	__virtio32 num_sectors;
+	/* valid for write zeroes command */
+	__virtio32 unmap;
+};
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 struct virtio_scsi_inhdr {
 	__virtio32 errors;
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-06-06  4:19 [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support Changpeng Liu
@ 2018-06-07 13:10 ` Stefan Hajnoczi
  2018-06-07 23:07   ` Liu, Changpeng
  2018-08-28 22:25 ` [PATCH v7] virtio_blk: add discard and write zeroes support Daniel Verkamp
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 50+ messages in thread
From: Stefan Hajnoczi @ 2018-06-07 13:10 UTC (permalink / raw)
  To: Changpeng Liu; +Cc: pbonzini, cavery, virtualization


[-- Attachment #1.1: Type: text/plain, Size: 3765 bytes --]

On Wed, Jun 06, 2018 at 12:19:00PM +0800, Changpeng Liu wrote:
> Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES commands
> support, this will impact the performance when using SSD backend over
> file systems.
> 
> Commit 88c85538 "virtio-blk: add discard and write zeroes features to
> specification"(see https://github.com/oasis-tcs/virtio-spec) extended
> existing virtio-blk protocol, adding extra DISCARD and WRITE ZEROES
> commands support.
> 
> While here, using 16 bytes descriptor to describe one segment of DISCARD
> or WRITE ZEROES commands, each command may contain one or more decriptors.
> 
> The following data structure shows the definition of one descriptor:
> 
> struct virtio_blk_discard_write_zeroes {
>         le64 sector;
>         le32 num_sectors;
>         le32 unmap;
> };
> 
> Field 'sector' means the start sector for DISCARD and WRITE ZEROES,
> filed 'num_sectors' means the number of sectors for DISCARD and WRITE
> ZEROES, if both DISCARD and WRITE ZEROES are supported, field 'unmap'
> maybe used for WRITE ZEROES command with DISCARD enabled.
> 
> We also extended the virtio-blk configuration space to let backend
> device put DISCARD and WRITE ZEROES configuration parameters.
> 
> struct virtio_blk_config {
>         [...]
> 
>         le32 max_discard_sectors;
>         le32 max_discard_seg;
>         le32 discard_sector_alignment;
>         le32 max_write_zeroes_sectors;
>         le32 max_write_zeroes_seg;
>         u8 write_zeroes_may_unmap;
> }
> 
> New feature bit [VIRTIO_BLK_F_DISCARD (13)]: Device can support discard
> command, maximum discard sectors size in field 'max_discard_sectors' and
> maximum discard segment number in field 'max_discard_seg'.
> 
> New feature [VIRTIO_BLK_F_WRITE_ZEROES (14)]: Device can support write
> zeroes command, maximum write zeroes sectors size in field
> 'max_write_zeroes_sectors' and maximum write zeroes segment number in
> field 'max_write_zeroes_seg'.
> 
> The parameters in the configuration space of the device field
> 'max_discard_sectors' and field 'discard_sector_alignment' are expressed in
> 512-byte units if the VIRTIO_BLK_F_DISCARD feature bit is negotiated. The
> field 'max_write_zeroes_sectors' is expressed in 512-byte units if the
> VIRTIO_BLK_F_WRITE_ZEROES feature bit is negotiated.
> 
> Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> ---
> CHANGELOG:
> v6: don't set T_OUT bit to discard and write zeroes commands.

I don't see this in the patch...

> @@ -225,6 +260,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	int qid = hctx->queue_num;
>  	int err;
>  	bool notify = false;
> +	bool unmap = false;
>  	u32 type;
>  
>  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> @@ -237,6 +273,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	case REQ_OP_FLUSH:
>  		type = VIRTIO_BLK_T_FLUSH;
>  		break;
> +	case REQ_OP_DISCARD:
> +		type = VIRTIO_BLK_T_DISCARD;
> +		break;
> +	case REQ_OP_WRITE_ZEROES:
> +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> +		break;
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
>  		type = VIRTIO_BLK_T_SCSI_CMD;
> @@ -256,6 +299,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	blk_mq_start_request(req);
>  
> +	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
> +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> +		if (err)
> +			return BLK_STS_RESOURCE;
> +	}
> +
>  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>  	if (num) {
>  		if (rq_data_dir(req) == WRITE)

...since we still do blk_rq_map_sg() here and num should be != 0.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-06-07 13:10 ` Stefan Hajnoczi
@ 2018-06-07 23:07   ` Liu, Changpeng
  2018-06-08 10:20     ` Stefan Hajnoczi
  0 siblings, 1 reply; 50+ messages in thread
From: Liu, Changpeng @ 2018-06-07 23:07 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: pbonzini, cavery, virtualization



> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Thursday, June 7, 2018 9:10 PM
> To: Liu, Changpeng <changpeng.liu@intel.com>
> Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> <wei.w.wang@intel.com>
> Subject: Re: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands
> support
> 
> On Wed, Jun 06, 2018 at 12:19:00PM +0800, Changpeng Liu wrote:
> > Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES commands
> > support, this will impact the performance when using SSD backend over
> > file systems.
> >
> > Commit 88c85538 "virtio-blk: add discard and write zeroes features to
> > specification"(see https://github.com/oasis-tcs/virtio-spec) extended
> > existing virtio-blk protocol, adding extra DISCARD and WRITE ZEROES
> > commands support.
> >
> > While here, using 16 bytes descriptor to describe one segment of DISCARD
> > or WRITE ZEROES commands, each command may contain one or more
> decriptors.
> >
> > The following data structure shows the definition of one descriptor:
> >
> > struct virtio_blk_discard_write_zeroes {
> >         le64 sector;
> >         le32 num_sectors;
> >         le32 unmap;
> > };
> >
> > Field 'sector' means the start sector for DISCARD and WRITE ZEROES,
> > filed 'num_sectors' means the number of sectors for DISCARD and WRITE
> > ZEROES, if both DISCARD and WRITE ZEROES are supported, field 'unmap'
> > maybe used for WRITE ZEROES command with DISCARD enabled.
> >
> > We also extended the virtio-blk configuration space to let backend
> > device put DISCARD and WRITE ZEROES configuration parameters.
> >
> > struct virtio_blk_config {
> >         [...]
> >
> >         le32 max_discard_sectors;
> >         le32 max_discard_seg;
> >         le32 discard_sector_alignment;
> >         le32 max_write_zeroes_sectors;
> >         le32 max_write_zeroes_seg;
> >         u8 write_zeroes_may_unmap;
> > }
> >
> > New feature bit [VIRTIO_BLK_F_DISCARD (13)]: Device can support discard
> > command, maximum discard sectors size in field 'max_discard_sectors' and
> > maximum discard segment number in field 'max_discard_seg'.
> >
> > New feature [VIRTIO_BLK_F_WRITE_ZEROES (14)]: Device can support write
> > zeroes command, maximum write zeroes sectors size in field
> > 'max_write_zeroes_sectors' and maximum write zeroes segment number in
> > field 'max_write_zeroes_seg'.
> >
> > The parameters in the configuration space of the device field
> > 'max_discard_sectors' and field 'discard_sector_alignment' are expressed in
> > 512-byte units if the VIRTIO_BLK_F_DISCARD feature bit is negotiated. The
> > field 'max_write_zeroes_sectors' is expressed in 512-byte units if the
> > VIRTIO_BLK_F_WRITE_ZEROES feature bit is negotiated.
> >
> > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > ---
> > CHANGELOG:
> > v6: don't set T_OUT bit to discard and write zeroes commands.
> 
> I don't see this in the patch...
Yeah, do noting with DISCARD/WRITE ZEROES means no need to OR BLK_T_OUT again.
> 
> > @@ -225,6 +260,7 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> >  	int qid = hctx->queue_num;
> >  	int err;
> >  	bool notify = false;
> > +	bool unmap = false;
> >  	u32 type;
> >
> >  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> > @@ -237,6 +273,13 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> >  	case REQ_OP_FLUSH:
> >  		type = VIRTIO_BLK_T_FLUSH;
> >  		break;
> > +	case REQ_OP_DISCARD:
> > +		type = VIRTIO_BLK_T_DISCARD;
> > +		break;
> > +	case REQ_OP_WRITE_ZEROES:
> > +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> > +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> > +		break;
> >  	case REQ_OP_SCSI_IN:
> >  	case REQ_OP_SCSI_OUT:
> >  		type = VIRTIO_BLK_T_SCSI_CMD;
> > @@ -256,6 +299,12 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> >
> >  	blk_mq_start_request(req);
> >
> > +	if (type == VIRTIO_BLK_T_DISCARD || type ==
> VIRTIO_BLK_T_WRITE_ZEROES) {
> > +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> > +		if (err)
> > +			return BLK_STS_RESOURCE;
> > +	}
> > +
> >  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> >  	if (num) {
> >  		if (rq_data_dir(req) == WRITE)
> 
> ...since we still do blk_rq_map_sg() here and num should be != 0.
No, while here, we should keep the original logic for READ/WRITE commands.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-06-07 23:07   ` Liu, Changpeng
@ 2018-06-08 10:20     ` Stefan Hajnoczi
  2018-06-11  3:37       ` Liu, Changpeng
  0 siblings, 1 reply; 50+ messages in thread
From: Stefan Hajnoczi @ 2018-06-08 10:20 UTC (permalink / raw)
  To: Liu, Changpeng; +Cc: pbonzini, cavery, virtualization


[-- Attachment #1.1: Type: text/plain, Size: 5124 bytes --]

On Thu, Jun 07, 2018 at 11:07:06PM +0000, Liu, Changpeng wrote:
> 
> 
> > -----Original Message-----
> > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > Sent: Thursday, June 7, 2018 9:10 PM
> > To: Liu, Changpeng <changpeng.liu@intel.com>
> > Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> > jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> > <wei.w.wang@intel.com>
> > Subject: Re: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands
> > support
> > 
> > On Wed, Jun 06, 2018 at 12:19:00PM +0800, Changpeng Liu wrote:
> > > Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES commands
> > > support, this will impact the performance when using SSD backend over
> > > file systems.
> > >
> > > Commit 88c85538 "virtio-blk: add discard and write zeroes features to
> > > specification"(see https://github.com/oasis-tcs/virtio-spec) extended
> > > existing virtio-blk protocol, adding extra DISCARD and WRITE ZEROES
> > > commands support.
> > >
> > > While here, using 16 bytes descriptor to describe one segment of DISCARD
> > > or WRITE ZEROES commands, each command may contain one or more
> > decriptors.
> > >
> > > The following data structure shows the definition of one descriptor:
> > >
> > > struct virtio_blk_discard_write_zeroes {
> > >         le64 sector;
> > >         le32 num_sectors;
> > >         le32 unmap;
> > > };
> > >
> > > Field 'sector' means the start sector for DISCARD and WRITE ZEROES,
> > > filed 'num_sectors' means the number of sectors for DISCARD and WRITE
> > > ZEROES, if both DISCARD and WRITE ZEROES are supported, field 'unmap'
> > > maybe used for WRITE ZEROES command with DISCARD enabled.
> > >
> > > We also extended the virtio-blk configuration space to let backend
> > > device put DISCARD and WRITE ZEROES configuration parameters.
> > >
> > > struct virtio_blk_config {
> > >         [...]
> > >
> > >         le32 max_discard_sectors;
> > >         le32 max_discard_seg;
> > >         le32 discard_sector_alignment;
> > >         le32 max_write_zeroes_sectors;
> > >         le32 max_write_zeroes_seg;
> > >         u8 write_zeroes_may_unmap;
> > > }
> > >
> > > New feature bit [VIRTIO_BLK_F_DISCARD (13)]: Device can support discard
> > > command, maximum discard sectors size in field 'max_discard_sectors' and
> > > maximum discard segment number in field 'max_discard_seg'.
> > >
> > > New feature [VIRTIO_BLK_F_WRITE_ZEROES (14)]: Device can support write
> > > zeroes command, maximum write zeroes sectors size in field
> > > 'max_write_zeroes_sectors' and maximum write zeroes segment number in
> > > field 'max_write_zeroes_seg'.
> > >
> > > The parameters in the configuration space of the device field
> > > 'max_discard_sectors' and field 'discard_sector_alignment' are expressed in
> > > 512-byte units if the VIRTIO_BLK_F_DISCARD feature bit is negotiated. The
> > > field 'max_write_zeroes_sectors' is expressed in 512-byte units if the
> > > VIRTIO_BLK_F_WRITE_ZEROES feature bit is negotiated.
> > >
> > > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > > ---
> > > CHANGELOG:
> > > v6: don't set T_OUT bit to discard and write zeroes commands.
> > 
> > I don't see this in the patch...
> Yeah, do noting with DISCARD/WRITE ZEROES means no need to OR BLK_T_OUT again.
> > 
> > > @@ -225,6 +260,7 @@ static blk_status_t virtio_queue_rq(struct
> > blk_mq_hw_ctx *hctx,
> > >  	int qid = hctx->queue_num;
> > >  	int err;
> > >  	bool notify = false;
> > > +	bool unmap = false;
> > >  	u32 type;
> > >
> > >  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> > > @@ -237,6 +273,13 @@ static blk_status_t virtio_queue_rq(struct
> > blk_mq_hw_ctx *hctx,
> > >  	case REQ_OP_FLUSH:
> > >  		type = VIRTIO_BLK_T_FLUSH;
> > >  		break;
> > > +	case REQ_OP_DISCARD:
> > > +		type = VIRTIO_BLK_T_DISCARD;
> > > +		break;
> > > +	case REQ_OP_WRITE_ZEROES:
> > > +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> > > +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> > > +		break;
> > >  	case REQ_OP_SCSI_IN:
> > >  	case REQ_OP_SCSI_OUT:
> > >  		type = VIRTIO_BLK_T_SCSI_CMD;
> > > @@ -256,6 +299,12 @@ static blk_status_t virtio_queue_rq(struct
> > blk_mq_hw_ctx *hctx,
> > >
> > >  	blk_mq_start_request(req);
> > >
> > > +	if (type == VIRTIO_BLK_T_DISCARD || type ==
> > VIRTIO_BLK_T_WRITE_ZEROES) {
> > > +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> > > +		if (err)
> > > +			return BLK_STS_RESOURCE;
> > > +	}
> > > +
> > >  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> > >  	if (num) {
> > >  		if (rq_data_dir(req) == WRITE)
> > 
> > ...since we still do blk_rq_map_sg() here and num should be != 0.
> No, while here, we should keep the original logic for READ/WRITE commands.

My question is: why does the changelog say "don't set T_OUT" but the
code *will* set it because blk_rq_map_sg() returns != 0 and
rq_data_dir(req) == WRITE?

Maybe I'm misreading the code, but it looks to me like this patch
does the opposite of what the changelog says.

Stefan

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-06-08 10:20     ` Stefan Hajnoczi
@ 2018-06-11  3:37       ` Liu, Changpeng
  2018-06-12 16:05         ` Stefan Hajnoczi
  0 siblings, 1 reply; 50+ messages in thread
From: Liu, Changpeng @ 2018-06-11  3:37 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: pbonzini, cavery, virtualization



> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Friday, June 8, 2018 6:20 PM
> To: Liu, Changpeng <changpeng.liu@intel.com>
> Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> <wei.w.wang@intel.com>
> Subject: Re: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands
> support
> 
> On Thu, Jun 07, 2018 at 11:07:06PM +0000, Liu, Changpeng wrote:
> >
> >
> > > -----Original Message-----
> > > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > > Sent: Thursday, June 7, 2018 9:10 PM
> > > To: Liu, Changpeng <changpeng.liu@intel.com>
> > > Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> > > jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> > > <wei.w.wang@intel.com>
> > > Subject: Re: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands
> > > support
> > >
> > > On Wed, Jun 06, 2018 at 12:19:00PM +0800, Changpeng Liu wrote:
> > > > Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES commands
> > > > support, this will impact the performance when using SSD backend over
> > > > file systems.
> > > >
> > > > Commit 88c85538 "virtio-blk: add discard and write zeroes features to
> > > > specification"(see https://github.com/oasis-tcs/virtio-spec) extended
> > > > existing virtio-blk protocol, adding extra DISCARD and WRITE ZEROES
> > > > commands support.
> > > >
> > > > While here, using 16 bytes descriptor to describe one segment of DISCARD
> > > > or WRITE ZEROES commands, each command may contain one or more
> > > decriptors.
> > > >
> > > > The following data structure shows the definition of one descriptor:
> > > >
> > > > struct virtio_blk_discard_write_zeroes {
> > > >         le64 sector;
> > > >         le32 num_sectors;
> > > >         le32 unmap;
> > > > };
> > > >
> > > > Field 'sector' means the start sector for DISCARD and WRITE ZEROES,
> > > > filed 'num_sectors' means the number of sectors for DISCARD and WRITE
> > > > ZEROES, if both DISCARD and WRITE ZEROES are supported, field 'unmap'
> > > > maybe used for WRITE ZEROES command with DISCARD enabled.
> > > >
> > > > We also extended the virtio-blk configuration space to let backend
> > > > device put DISCARD and WRITE ZEROES configuration parameters.
> > > >
> > > > struct virtio_blk_config {
> > > >         [...]
> > > >
> > > >         le32 max_discard_sectors;
> > > >         le32 max_discard_seg;
> > > >         le32 discard_sector_alignment;
> > > >         le32 max_write_zeroes_sectors;
> > > >         le32 max_write_zeroes_seg;
> > > >         u8 write_zeroes_may_unmap;
> > > > }
> > > >
> > > > New feature bit [VIRTIO_BLK_F_DISCARD (13)]: Device can support discard
> > > > command, maximum discard sectors size in field 'max_discard_sectors' and
> > > > maximum discard segment number in field 'max_discard_seg'.
> > > >
> > > > New feature [VIRTIO_BLK_F_WRITE_ZEROES (14)]: Device can support write
> > > > zeroes command, maximum write zeroes sectors size in field
> > > > 'max_write_zeroes_sectors' and maximum write zeroes segment number in
> > > > field 'max_write_zeroes_seg'.
> > > >
> > > > The parameters in the configuration space of the device field
> > > > 'max_discard_sectors' and field 'discard_sector_alignment' are expressed in
> > > > 512-byte units if the VIRTIO_BLK_F_DISCARD feature bit is negotiated. The
> > > > field 'max_write_zeroes_sectors' is expressed in 512-byte units if the
> > > > VIRTIO_BLK_F_WRITE_ZEROES feature bit is negotiated.
> > > >
> > > > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > > > ---
> > > > CHANGELOG:
> > > > v6: don't set T_OUT bit to discard and write zeroes commands.
> > >
> > > I don't see this in the patch...
> > Yeah, do noting with DISCARD/WRITE ZEROES means no need to OR BLK_T_OUT
> again.
> > >
> > > > @@ -225,6 +260,7 @@ static blk_status_t virtio_queue_rq(struct
> > > blk_mq_hw_ctx *hctx,
> > > >  	int qid = hctx->queue_num;
> > > >  	int err;
> > > >  	bool notify = false;
> > > > +	bool unmap = false;
> > > >  	u32 type;
> > > >
> > > >  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> > > > @@ -237,6 +273,13 @@ static blk_status_t virtio_queue_rq(struct
> > > blk_mq_hw_ctx *hctx,
> > > >  	case REQ_OP_FLUSH:
> > > >  		type = VIRTIO_BLK_T_FLUSH;
> > > >  		break;
> > > > +	case REQ_OP_DISCARD:
> > > > +		type = VIRTIO_BLK_T_DISCARD;
> > > > +		break;
> > > > +	case REQ_OP_WRITE_ZEROES:
> > > > +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> > > > +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> > > > +		break;
> > > >  	case REQ_OP_SCSI_IN:
> > > >  	case REQ_OP_SCSI_OUT:
> > > >  		type = VIRTIO_BLK_T_SCSI_CMD;
> > > > @@ -256,6 +299,12 @@ static blk_status_t virtio_queue_rq(struct
> > > blk_mq_hw_ctx *hctx,
> > > >
> > > >  	blk_mq_start_request(req);
> > > >
> > > > +	if (type == VIRTIO_BLK_T_DISCARD || type ==
> > > VIRTIO_BLK_T_WRITE_ZEROES) {
> > > > +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> > > > +		if (err)
> > > > +			return BLK_STS_RESOURCE;
> > > > +	}
> > > > +
> > > >  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> > > >  	if (num) {
> > > >  		if (rq_data_dir(req) == WRITE)
> > >
> > > ...since we still do blk_rq_map_sg() here and num should be != 0.
> > No, while here, we should keep the original logic for READ/WRITE commands.
> 
> My question is: why does the changelog say "don't set T_OUT" but the
> code *will* set it because blk_rq_map_sg() returns != 0 and
> rq_data_dir(req) == WRITE?
Since the last bit of DISCARD/WRITE ZEROES commands are already 1, so I said we don't need to set
T_OUT bit to DISCARD/WRITE ZEROES commands again. But the original logic for WRITE, T_OUT is still
needed, so just keep the original code here is fine.
> 
> Maybe I'm misreading the code, but it looks to me like this patch
> does the opposite of what the changelog says.
> 
> Stefan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-06-11  3:37       ` Liu, Changpeng
@ 2018-06-12 16:05         ` Stefan Hajnoczi
  0 siblings, 0 replies; 50+ messages in thread
From: Stefan Hajnoczi @ 2018-06-12 16:05 UTC (permalink / raw)
  To: Liu, Changpeng; +Cc: pbonzini, cavery, virtualization


[-- Attachment #1.1: Type: text/plain, Size: 6316 bytes --]

On Mon, Jun 11, 2018 at 03:37:00AM +0000, Liu, Changpeng wrote:
> 
> 
> > -----Original Message-----
> > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > Sent: Friday, June 8, 2018 6:20 PM
> > To: Liu, Changpeng <changpeng.liu@intel.com>
> > Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> > jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> > <wei.w.wang@intel.com>
> > Subject: Re: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands
> > support
> > 
> > On Thu, Jun 07, 2018 at 11:07:06PM +0000, Liu, Changpeng wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > > > Sent: Thursday, June 7, 2018 9:10 PM
> > > > To: Liu, Changpeng <changpeng.liu@intel.com>
> > > > Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> > > > jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> > > > <wei.w.wang@intel.com>
> > > > Subject: Re: [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands
> > > > support
> > > >
> > > > On Wed, Jun 06, 2018 at 12:19:00PM +0800, Changpeng Liu wrote:
> > > > > Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES commands
> > > > > support, this will impact the performance when using SSD backend over
> > > > > file systems.
> > > > >
> > > > > Commit 88c85538 "virtio-blk: add discard and write zeroes features to
> > > > > specification"(see https://github.com/oasis-tcs/virtio-spec) extended
> > > > > existing virtio-blk protocol, adding extra DISCARD and WRITE ZEROES
> > > > > commands support.
> > > > >
> > > > > While here, using 16 bytes descriptor to describe one segment of DISCARD
> > > > > or WRITE ZEROES commands, each command may contain one or more
> > > > decriptors.
> > > > >
> > > > > The following data structure shows the definition of one descriptor:
> > > > >
> > > > > struct virtio_blk_discard_write_zeroes {
> > > > >         le64 sector;
> > > > >         le32 num_sectors;
> > > > >         le32 unmap;
> > > > > };
> > > > >
> > > > > Field 'sector' means the start sector for DISCARD and WRITE ZEROES,
> > > > > filed 'num_sectors' means the number of sectors for DISCARD and WRITE
> > > > > ZEROES, if both DISCARD and WRITE ZEROES are supported, field 'unmap'
> > > > > maybe used for WRITE ZEROES command with DISCARD enabled.
> > > > >
> > > > > We also extended the virtio-blk configuration space to let backend
> > > > > device put DISCARD and WRITE ZEROES configuration parameters.
> > > > >
> > > > > struct virtio_blk_config {
> > > > >         [...]
> > > > >
> > > > >         le32 max_discard_sectors;
> > > > >         le32 max_discard_seg;
> > > > >         le32 discard_sector_alignment;
> > > > >         le32 max_write_zeroes_sectors;
> > > > >         le32 max_write_zeroes_seg;
> > > > >         u8 write_zeroes_may_unmap;
> > > > > }
> > > > >
> > > > > New feature bit [VIRTIO_BLK_F_DISCARD (13)]: Device can support discard
> > > > > command, maximum discard sectors size in field 'max_discard_sectors' and
> > > > > maximum discard segment number in field 'max_discard_seg'.
> > > > >
> > > > > New feature [VIRTIO_BLK_F_WRITE_ZEROES (14)]: Device can support write
> > > > > zeroes command, maximum write zeroes sectors size in field
> > > > > 'max_write_zeroes_sectors' and maximum write zeroes segment number in
> > > > > field 'max_write_zeroes_seg'.
> > > > >
> > > > > The parameters in the configuration space of the device field
> > > > > 'max_discard_sectors' and field 'discard_sector_alignment' are expressed in
> > > > > 512-byte units if the VIRTIO_BLK_F_DISCARD feature bit is negotiated. The
> > > > > field 'max_write_zeroes_sectors' is expressed in 512-byte units if the
> > > > > VIRTIO_BLK_F_WRITE_ZEROES feature bit is negotiated.
> > > > >
> > > > > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > > > > ---
> > > > > CHANGELOG:
> > > > > v6: don't set T_OUT bit to discard and write zeroes commands.
> > > >
> > > > I don't see this in the patch...
> > > Yeah, do noting with DISCARD/WRITE ZEROES means no need to OR BLK_T_OUT
> > again.
> > > >
> > > > > @@ -225,6 +260,7 @@ static blk_status_t virtio_queue_rq(struct
> > > > blk_mq_hw_ctx *hctx,
> > > > >  	int qid = hctx->queue_num;
> > > > >  	int err;
> > > > >  	bool notify = false;
> > > > > +	bool unmap = false;
> > > > >  	u32 type;
> > > > >
> > > > >  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> > > > > @@ -237,6 +273,13 @@ static blk_status_t virtio_queue_rq(struct
> > > > blk_mq_hw_ctx *hctx,
> > > > >  	case REQ_OP_FLUSH:
> > > > >  		type = VIRTIO_BLK_T_FLUSH;
> > > > >  		break;
> > > > > +	case REQ_OP_DISCARD:
> > > > > +		type = VIRTIO_BLK_T_DISCARD;
> > > > > +		break;
> > > > > +	case REQ_OP_WRITE_ZEROES:
> > > > > +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> > > > > +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> > > > > +		break;
> > > > >  	case REQ_OP_SCSI_IN:
> > > > >  	case REQ_OP_SCSI_OUT:
> > > > >  		type = VIRTIO_BLK_T_SCSI_CMD;
> > > > > @@ -256,6 +299,12 @@ static blk_status_t virtio_queue_rq(struct
> > > > blk_mq_hw_ctx *hctx,
> > > > >
> > > > >  	blk_mq_start_request(req);
> > > > >
> > > > > +	if (type == VIRTIO_BLK_T_DISCARD || type ==
> > > > VIRTIO_BLK_T_WRITE_ZEROES) {
> > > > > +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> > > > > +		if (err)
> > > > > +			return BLK_STS_RESOURCE;
> > > > > +	}
> > > > > +
> > > > >  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> > > > >  	if (num) {
> > > > >  		if (rq_data_dir(req) == WRITE)
> > > >
> > > > ...since we still do blk_rq_map_sg() here and num should be != 0.
> > > No, while here, we should keep the original logic for READ/WRITE commands.
> > 
> > My question is: why does the changelog say "don't set T_OUT" but the
> > code *will* set it because blk_rq_map_sg() returns != 0 and
> > rq_data_dir(req) == WRITE?
> Since the last bit of DISCARD/WRITE ZEROES commands are already 1, so I said we don't need to set
> T_OUT bit to DISCARD/WRITE ZEROES commands again. But the original logic for WRITE, T_OUT is still
> needed, so just keep the original code here is fine.

Okay, I understand what you meant now.  Thanks!

Stefan

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v7] virtio_blk: add discard and write zeroes support
  2018-06-06  4:19 [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support Changpeng Liu
  2018-06-07 13:10 ` Stefan Hajnoczi
@ 2018-08-28 22:25 ` Daniel Verkamp
  2018-10-12 21:06   ` Daniel Verkamp
  2018-11-01 22:40   ` Daniel Verkamp
  3 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-08-28 22:25 UTC (permalink / raw)
  To: virtualization
  Cc: Jens Axboe, Michael S. Tsirkin, Stefan Hajnoczi, Changpeng Liu

From: Changpeng Liu <changpeng.liu@intel.com>

In commit 88c85538, "virtio-blk: add discard and write zeroes features
to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
block specification has been extended to add VIRTIO_BLK_T_DISCARD and
VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
discard and write zeroes in the virtio-blk driver when the device
advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
VIRTIO_BLK_F_WRITE_ZEROES.

Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
---
dverkamp: I've picked up this patch and made a few minor changes (as
listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
since it can be called from a context where sleeping is not allowed.
To prevent large allocations, I've also clamped the maximum number of
discard segments to 256; this results in a 4K allocation and should be
plenty of descriptors for most use cases.

I also removed most of the description from the commit message, since it
was duplicating the comments from virtio_blk.h and quoting parts of the
spec without adding any extra information.  I have tested this iteration
of the patch using crosvm with modifications to enable the new features:
https://chromium.googlesource.com/chromiumos/platform/crosvm/

CHANGELOG:
v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
descriptor flags field; comment wording cleanups.
v6: don't set T_OUT bit to discard and write zeroes commands.
v5: use new block layer API: blk_queue_flag_set.
v4: several optimizations based on MST's comments, remove bit field
usage for command descriptor.
v3: define the virtio-blk protocol to add discard and write zeroes
support, first version implementation based on proposed specification.
v2: add write zeroes command support.
v1: initial proposal implementation for discard command.
---
 drivers/block/virtio_blk.c      | 95 ++++++++++++++++++++++++++++++++-
 include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++
 2 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 23752dc99b00..c033e718a36a 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -18,6 +18,7 @@
 
 #define PART_BITS 4
 #define VQ_NAME_LEN 16
+#define DISCARD_MAX_SEGMENTS 256
 
 static int major;
 static DEFINE_IDA(vd_index_ida);
@@ -172,10 +173,50 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
+
+static inline int virtblk_setup_discard_write_zeroes(struct request *req,
+						bool unmap)
+{
+	unsigned short segments = blk_rq_nr_discard_segments(req);
+	unsigned short n = 0;
+	struct virtio_blk_discard_write_zeroes *range;
+	struct bio *bio;
+	u32 flags = 0;
+
+	if (unmap)
+		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
+
+	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
+	if (!range)
+		return -ENOMEM;
+
+	__rq_for_each_bio(bio, req) {
+		u64 sector = bio->bi_iter.bi_sector;
+		u32 num_sectors = bio->bi_iter.bi_size >> 9;
+
+		range[n].flags = cpu_to_le32(flags);
+		range[n].num_sectors = cpu_to_le32(num_sectors);
+		range[n].sector = cpu_to_le64(sector);
+		n++;
+	}
+
+	req->special_vec.bv_page = virt_to_page(range);
+	req->special_vec.bv_offset = offset_in_page(range);
+	req->special_vec.bv_len = sizeof(*range) * segments;
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return 0;
+}
+
 static inline void virtblk_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+		kfree(page_address(req->special_vec.bv_page) +
+		      req->special_vec.bv_offset);
+	}
+
 	switch (req_op(req)) {
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
@@ -225,6 +266,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	int qid = hctx->queue_num;
 	int err;
 	bool notify = false;
+	bool unmap = false;
 	u32 type;
 
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
@@ -237,6 +279,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_FLUSH:
 		type = VIRTIO_BLK_T_FLUSH;
 		break;
+	case REQ_OP_DISCARD:
+		type = VIRTIO_BLK_T_DISCARD;
+		break;
+	case REQ_OP_WRITE_ZEROES:
+		type = VIRTIO_BLK_T_WRITE_ZEROES;
+		unmap = !(req->cmd_flags & REQ_NOUNMAP);
+		break;
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
 		type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,6 +305,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(req);
 
+	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
+		err = virtblk_setup_discard_write_zeroes(req, unmap);
+		if (err)
+			return BLK_STS_RESOURCE;
+	}
+
 	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(req) == WRITE)
@@ -777,6 +832,42 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (!err && opt_io_size)
 		blk_queue_io_opt(q, blk_size * opt_io_size);
 
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+		q->limits.discard_granularity = blk_size;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+				discard_sector_alignment, &v);
+		if (v)
+			q->limits.discard_alignment = v << 9;
+		else
+			q->limits.discard_alignment = 0;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+				max_discard_sectors, &v);
+		if (v)
+			blk_queue_max_discard_sectors(q, v);
+		else
+			blk_queue_max_discard_sectors(q, UINT_MAX);
+
+		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
+				&v);
+		if (v && v <= DISCARD_MAX_SEGMENTS)
+			blk_queue_max_discard_segments(q, v);
+		else
+			blk_queue_max_discard_segments(q, DISCARD_MAX_SEGMENTS);
+
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
+		virtio_cread(vdev, struct virtio_blk_config,
+				max_write_zeroes_sectors, &v);
+		if (v)
+			blk_queue_max_write_zeroes_sectors(q, v);
+		else
+			blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
+	}
+
 	virtblk_update_capacity(vblk, false);
 	virtio_device_ready(vdev);
 
@@ -885,14 +976,14 @@ static unsigned int features_legacy[] = {
 	VIRTIO_BLK_F_SCSI,
 #endif
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 }
 ;
 static unsigned int features[] = {
 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ebe4d968dd5..682afbfe3aa4 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -38,6 +38,8 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -86,6 +88,39 @@ struct virtio_blk_config {
 
 	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
 	__u16 num_queues;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
+	/*
+	 * The maximum discard sectors (in 512-byte sectors) for
+	 * one segment.
+	 */
+	__u32 max_discard_sectors;
+	/*
+	 * The maximum number of discard segments in a
+	 * discard command.
+	 */
+	__u32 max_discard_seg;
+	/* Discard commands must be aligned to this number of sectors. */
+	__u32 discard_sector_alignment;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
+	/*
+	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
+	 * one segment.
+	 */
+	__u32 max_write_zeroes_sectors;
+	/*
+	 * The maximum number of segments in a write zeroes
+	 * command.
+	 */
+	__u32 max_write_zeroes_seg;
+	/*
+	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
+	 * deallocation of one or more of the sectors.
+	 */
+	__u8 write_zeroes_may_unmap;
+
+	__u8 unused1[3];
 } __attribute__((packed));
 
 /*
@@ -114,6 +149,12 @@ struct virtio_blk_config {
 /* Get device ID command */
 #define VIRTIO_BLK_T_GET_ID    8
 
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD	11
+
+/* Write zeroes command */
+#define VIRTIO_BLK_T_WRITE_ZEROES	13
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
@@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
 	__virtio64 sector;
 };
 
+/* Unmap this range (only valid for write zeroes command) */
+#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
+
+/* Discard/write zeroes range for each request. */
+struct virtio_blk_discard_write_zeroes {
+	/* discard/write zeroes start sector */
+	__virtio64 sector;
+	/* number of discard/write zeroes sectors */
+	__virtio32 num_sectors;
+	/* flags for this range */
+	__virtio32 flags;
+};
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 struct virtio_scsi_inhdr {
 	__virtio32 errors;
-- 
2.19.0.rc0.228.g281dcd1b4d0-goog

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-06-06  4:19 [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support Changpeng Liu
@ 2018-10-12 21:06   ` Daniel Verkamp
  2018-08-28 22:25 ` [PATCH v7] virtio_blk: add discard and write zeroes support Daniel Verkamp
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-10-12 21:06 UTC (permalink / raw)
  To: virtualization, linux-block
  Cc: Michael S. Tsirkin, Jason Wang, Jens Axboe, Stefan Hajnoczi,
	Changpeng Liu, Daniel Verkamp

From: Changpeng Liu <changpeng.liu@intel.com>

In commit 88c85538, "virtio-blk: add discard and write zeroes features
to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
block specification has been extended to add VIRTIO_BLK_T_DISCARD and
VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
discard and write zeroes in the virtio-blk driver when the device
advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
VIRTIO_BLK_F_WRITE_ZEROES.

Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
---
dverkamp: I've picked up this patch and made a few minor changes (as
listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
since it can be called from a context where sleeping is not allowed.
To prevent large allocations, I've also clamped the maximum number of
discard segments to 256; this results in a 4K allocation and should be
plenty of descriptors for most use cases.

I also removed most of the description from the commit message, since it
was duplicating the comments from virtio_blk.h and quoting parts of the
spec without adding any extra information.  I have tested this iteration
of the patch using crosvm with modifications to enable the new features:
https://chromium.googlesource.com/chromiumos/platform/crosvm/

CHANGELOG:
v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
descriptor flags field; comment wording cleanups.
v6: don't set T_OUT bit to discard and write zeroes commands.
v5: use new block layer API: blk_queue_flag_set.
v4: several optimizations based on MST's comments, remove bit field
usage for command descriptor.
v3: define the virtio-blk protocol to add discard and write zeroes
support, first version implementation based on proposed specification.
v2: add write zeroes command support.
v1: initial proposal implementation for discard command.
---
 drivers/block/virtio_blk.c      | 95 ++++++++++++++++++++++++++++++++-
 include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++
 2 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 23752dc99b00..04a7ae602e2f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -18,6 +18,7 @@
 
 #define PART_BITS 4
 #define VQ_NAME_LEN 16
+#define MAX_DISCARD_SEGMENTS 256
 
 static int major;
 static DEFINE_IDA(vd_index_ida);
@@ -172,10 +173,50 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
+
+static inline int virtblk_setup_discard_write_zeroes(struct request *req,
+						bool unmap)
+{
+	unsigned short segments = blk_rq_nr_discard_segments(req);
+	unsigned short n = 0;
+	struct virtio_blk_discard_write_zeroes *range;
+	struct bio *bio;
+	u32 flags = 0;
+
+	if (unmap)
+		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
+
+	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
+	if (!range)
+		return -ENOMEM;
+
+	__rq_for_each_bio(bio, req) {
+		u64 sector = bio->bi_iter.bi_sector;
+		u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+
+		range[n].flags = cpu_to_le32(flags);
+		range[n].num_sectors = cpu_to_le32(num_sectors);
+		range[n].sector = cpu_to_le64(sector);
+		n++;
+	}
+
+	req->special_vec.bv_page = virt_to_page(range);
+	req->special_vec.bv_offset = offset_in_page(range);
+	req->special_vec.bv_len = sizeof(*range) * segments;
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return 0;
+}
+
 static inline void virtblk_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+		kfree(page_address(req->special_vec.bv_page) +
+		      req->special_vec.bv_offset);
+	}
+
 	switch (req_op(req)) {
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
@@ -225,6 +266,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	int qid = hctx->queue_num;
 	int err;
 	bool notify = false;
+	bool unmap = false;
 	u32 type;
 
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
@@ -237,6 +279,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_FLUSH:
 		type = VIRTIO_BLK_T_FLUSH;
 		break;
+	case REQ_OP_DISCARD:
+		type = VIRTIO_BLK_T_DISCARD;
+		break;
+	case REQ_OP_WRITE_ZEROES:
+		type = VIRTIO_BLK_T_WRITE_ZEROES;
+		unmap = !(req->cmd_flags & REQ_NOUNMAP);
+		break;
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
 		type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,6 +305,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(req);
 
+	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
+		err = virtblk_setup_discard_write_zeroes(req, unmap);
+		if (err)
+			return BLK_STS_RESOURCE;
+	}
+
 	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(req) == WRITE)
@@ -777,6 +832,42 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (!err && opt_io_size)
 		blk_queue_io_opt(q, blk_size * opt_io_size);
 
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+		q->limits.discard_granularity = blk_size;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+				discard_sector_alignment, &v);
+		if (v)
+			q->limits.discard_alignment = v << SECTOR_SHIFT;
+		else
+			q->limits.discard_alignment = 0;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+				max_discard_sectors, &v);
+		if (v)
+			blk_queue_max_discard_sectors(q, v);
+		else
+			blk_queue_max_discard_sectors(q, UINT_MAX);
+
+		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
+				&v);
+		if (v && v <= MAX_DISCARD_SEGMENTS)
+			blk_queue_max_discard_segments(q, v);
+		else
+			blk_queue_max_discard_segments(q, MAX_DISCARD_SEGMENTS);
+
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
+		virtio_cread(vdev, struct virtio_blk_config,
+				max_write_zeroes_sectors, &v);
+		if (v)
+			blk_queue_max_write_zeroes_sectors(q, v);
+		else
+			blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
+	}
+
 	virtblk_update_capacity(vblk, false);
 	virtio_device_ready(vdev);
 
@@ -885,14 +976,14 @@ static unsigned int features_legacy[] = {
 	VIRTIO_BLK_F_SCSI,
 #endif
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 }
 ;
 static unsigned int features[] = {
 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ebe4d968dd5..682afbfe3aa4 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -38,6 +38,8 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -86,6 +88,39 @@ struct virtio_blk_config {
 
 	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
 	__u16 num_queues;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
+	/*
+	 * The maximum discard sectors (in 512-byte sectors) for
+	 * one segment.
+	 */
+	__u32 max_discard_sectors;
+	/*
+	 * The maximum number of discard segments in a
+	 * discard command.
+	 */
+	__u32 max_discard_seg;
+	/* Discard commands must be aligned to this number of sectors. */
+	__u32 discard_sector_alignment;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
+	/*
+	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
+	 * one segment.
+	 */
+	__u32 max_write_zeroes_sectors;
+	/*
+	 * The maximum number of segments in a write zeroes
+	 * command.
+	 */
+	__u32 max_write_zeroes_seg;
+	/*
+	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
+	 * deallocation of one or more of the sectors.
+	 */
+	__u8 write_zeroes_may_unmap;
+
+	__u8 unused1[3];
 } __attribute__((packed));
 
 /*
@@ -114,6 +149,12 @@ struct virtio_blk_config {
 /* Get device ID command */
 #define VIRTIO_BLK_T_GET_ID    8
 
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD	11
+
+/* Write zeroes command */
+#define VIRTIO_BLK_T_WRITE_ZEROES	13
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
@@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
 	__virtio64 sector;
 };
 
+/* Unmap this range (only valid for write zeroes command) */
+#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
+
+/* Discard/write zeroes range for each request. */
+struct virtio_blk_discard_write_zeroes {
+	/* discard/write zeroes start sector */
+	__virtio64 sector;
+	/* number of discard/write zeroes sectors */
+	__virtio32 num_sectors;
+	/* flags for this range */
+	__virtio32 flags;
+};
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 struct virtio_scsi_inhdr {
 	__virtio32 errors;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-12 21:06   ` Daniel Verkamp
  0 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-10-12 21:06 UTC (permalink / raw)
  To: virtualization, linux-block
  Cc: Jens Axboe, Michael S. Tsirkin, Stefan Hajnoczi, Changpeng Liu

From: Changpeng Liu <changpeng.liu@intel.com>

In commit 88c85538, "virtio-blk: add discard and write zeroes features
to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
block specification has been extended to add VIRTIO_BLK_T_DISCARD and
VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
discard and write zeroes in the virtio-blk driver when the device
advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
VIRTIO_BLK_F_WRITE_ZEROES.

Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
---
dverkamp: I've picked up this patch and made a few minor changes (as
listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
since it can be called from a context where sleeping is not allowed.
To prevent large allocations, I've also clamped the maximum number of
discard segments to 256; this results in a 4K allocation and should be
plenty of descriptors for most use cases.

I also removed most of the description from the commit message, since it
was duplicating the comments from virtio_blk.h and quoting parts of the
spec without adding any extra information.  I have tested this iteration
of the patch using crosvm with modifications to enable the new features:
https://chromium.googlesource.com/chromiumos/platform/crosvm/

CHANGELOG:
v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
descriptor flags field; comment wording cleanups.
v6: don't set T_OUT bit to discard and write zeroes commands.
v5: use new block layer API: blk_queue_flag_set.
v4: several optimizations based on MST's comments, remove bit field
usage for command descriptor.
v3: define the virtio-blk protocol to add discard and write zeroes
support, first version implementation based on proposed specification.
v2: add write zeroes command support.
v1: initial proposal implementation for discard command.
---
 drivers/block/virtio_blk.c      | 95 ++++++++++++++++++++++++++++++++-
 include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++
 2 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 23752dc99b00..04a7ae602e2f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -18,6 +18,7 @@
 
 #define PART_BITS 4
 #define VQ_NAME_LEN 16
+#define MAX_DISCARD_SEGMENTS 256
 
 static int major;
 static DEFINE_IDA(vd_index_ida);
@@ -172,10 +173,50 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
+
+static inline int virtblk_setup_discard_write_zeroes(struct request *req,
+						bool unmap)
+{
+	unsigned short segments = blk_rq_nr_discard_segments(req);
+	unsigned short n = 0;
+	struct virtio_blk_discard_write_zeroes *range;
+	struct bio *bio;
+	u32 flags = 0;
+
+	if (unmap)
+		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
+
+	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
+	if (!range)
+		return -ENOMEM;
+
+	__rq_for_each_bio(bio, req) {
+		u64 sector = bio->bi_iter.bi_sector;
+		u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+
+		range[n].flags = cpu_to_le32(flags);
+		range[n].num_sectors = cpu_to_le32(num_sectors);
+		range[n].sector = cpu_to_le64(sector);
+		n++;
+	}
+
+	req->special_vec.bv_page = virt_to_page(range);
+	req->special_vec.bv_offset = offset_in_page(range);
+	req->special_vec.bv_len = sizeof(*range) * segments;
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return 0;
+}
+
 static inline void virtblk_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+		kfree(page_address(req->special_vec.bv_page) +
+		      req->special_vec.bv_offset);
+	}
+
 	switch (req_op(req)) {
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
@@ -225,6 +266,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	int qid = hctx->queue_num;
 	int err;
 	bool notify = false;
+	bool unmap = false;
 	u32 type;
 
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
@@ -237,6 +279,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_FLUSH:
 		type = VIRTIO_BLK_T_FLUSH;
 		break;
+	case REQ_OP_DISCARD:
+		type = VIRTIO_BLK_T_DISCARD;
+		break;
+	case REQ_OP_WRITE_ZEROES:
+		type = VIRTIO_BLK_T_WRITE_ZEROES;
+		unmap = !(req->cmd_flags & REQ_NOUNMAP);
+		break;
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
 		type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,6 +305,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(req);
 
+	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
+		err = virtblk_setup_discard_write_zeroes(req, unmap);
+		if (err)
+			return BLK_STS_RESOURCE;
+	}
+
 	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(req) == WRITE)
@@ -777,6 +832,42 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (!err && opt_io_size)
 		blk_queue_io_opt(q, blk_size * opt_io_size);
 
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+		q->limits.discard_granularity = blk_size;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+				discard_sector_alignment, &v);
+		if (v)
+			q->limits.discard_alignment = v << SECTOR_SHIFT;
+		else
+			q->limits.discard_alignment = 0;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+				max_discard_sectors, &v);
+		if (v)
+			blk_queue_max_discard_sectors(q, v);
+		else
+			blk_queue_max_discard_sectors(q, UINT_MAX);
+
+		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
+				&v);
+		if (v && v <= MAX_DISCARD_SEGMENTS)
+			blk_queue_max_discard_segments(q, v);
+		else
+			blk_queue_max_discard_segments(q, MAX_DISCARD_SEGMENTS);
+
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
+		virtio_cread(vdev, struct virtio_blk_config,
+				max_write_zeroes_sectors, &v);
+		if (v)
+			blk_queue_max_write_zeroes_sectors(q, v);
+		else
+			blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
+	}
+
 	virtblk_update_capacity(vblk, false);
 	virtio_device_ready(vdev);
 
@@ -885,14 +976,14 @@ static unsigned int features_legacy[] = {
 	VIRTIO_BLK_F_SCSI,
 #endif
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 }
 ;
 static unsigned int features[] = {
 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ebe4d968dd5..682afbfe3aa4 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -38,6 +38,8 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -86,6 +88,39 @@ struct virtio_blk_config {
 
 	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
 	__u16 num_queues;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
+	/*
+	 * The maximum discard sectors (in 512-byte sectors) for
+	 * one segment.
+	 */
+	__u32 max_discard_sectors;
+	/*
+	 * The maximum number of discard segments in a
+	 * discard command.
+	 */
+	__u32 max_discard_seg;
+	/* Discard commands must be aligned to this number of sectors. */
+	__u32 discard_sector_alignment;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
+	/*
+	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
+	 * one segment.
+	 */
+	__u32 max_write_zeroes_sectors;
+	/*
+	 * The maximum number of segments in a write zeroes
+	 * command.
+	 */
+	__u32 max_write_zeroes_seg;
+	/*
+	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
+	 * deallocation of one or more of the sectors.
+	 */
+	__u8 write_zeroes_may_unmap;
+
+	__u8 unused1[3];
 } __attribute__((packed));
 
 /*
@@ -114,6 +149,12 @@ struct virtio_blk_config {
 /* Get device ID command */
 #define VIRTIO_BLK_T_GET_ID    8
 
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD	11
+
+/* Write zeroes command */
+#define VIRTIO_BLK_T_WRITE_ZEROES	13
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
@@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
 	__virtio64 sector;
 };
 
+/* Unmap this range (only valid for write zeroes command) */
+#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
+
+/* Discard/write zeroes range for each request. */
+struct virtio_blk_discard_write_zeroes {
+	/* discard/write zeroes start sector */
+	__virtio64 sector;
+	/* number of discard/write zeroes sectors */
+	__virtio32 num_sectors;
+	/* flags for this range */
+	__virtio32 flags;
+};
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 struct virtio_scsi_inhdr {
 	__virtio32 errors;
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-12 21:06   ` Daniel Verkamp
@ 2018-10-15  0:54     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2018-10-15  0:54 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: virtualization, linux-block, Jason Wang, Jens Axboe,
	Stefan Hajnoczi, Changpeng Liu, pbonzini

On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> From: Changpeng Liu <changpeng.liu@intel.com>
> 
> In commit 88c85538, "virtio-blk: add discard and write zeroes features
> to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> discard and write zeroes in the virtio-blk driver when the device
> advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> VIRTIO_BLK_F_WRITE_ZEROES.
> 
> Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>

Cc Paolo as well.

> ---
> dverkamp: I've picked up this patch and made a few minor changes (as
> listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> since it can be called from a context where sleeping is not allowed.
> To prevent large allocations, I've also clamped the maximum number of
> discard segments to 256; this results in a 4K allocation and should be
> plenty of descriptors for most use cases.
> 
> I also removed most of the description from the commit message, since it
> was duplicating the comments from virtio_blk.h and quoting parts of the
> spec without adding any extra information.  I have tested this iteration
> of the patch using crosvm with modifications to enable the new features:
> https://chromium.googlesource.com/chromiumos/platform/crosvm/
> 
> CHANGELOG:
> v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> descriptor flags field; comment wording cleanups.
> v6: don't set T_OUT bit to discard and write zeroes commands.
> v5: use new block layer API: blk_queue_flag_set.
> v4: several optimizations based on MST's comments, remove bit field
> usage for command descriptor.
> v3: define the virtio-blk protocol to add discard and write zeroes
> support, first version implementation based on proposed specification.
> v2: add write zeroes command support.
> v1: initial proposal implementation for discard command.
> ---
>  drivers/block/virtio_blk.c      | 95 ++++++++++++++++++++++++++++++++-
>  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++
>  2 files changed, 147 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 23752dc99b00..04a7ae602e2f 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -18,6 +18,7 @@
>  
>  #define PART_BITS 4
>  #define VQ_NAME_LEN 16
> +#define MAX_DISCARD_SEGMENTS 256
>  
>  static int major;
>  static DEFINE_IDA(vd_index_ida);
> @@ -172,10 +173,50 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
>  	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> +
> +static inline int virtblk_setup_discard_write_zeroes(struct request *req,
> +						bool unmap)
> +{
> +	unsigned short segments = blk_rq_nr_discard_segments(req);
> +	unsigned short n = 0;
> +	struct virtio_blk_discard_write_zeroes *range;
> +	struct bio *bio;
> +	u32 flags = 0;
> +
> +	if (unmap)
> +		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
> +
> +	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
> +	if (!range)
> +		return -ENOMEM;
> +
> +	__rq_for_each_bio(bio, req) {
> +		u64 sector = bio->bi_iter.bi_sector;
> +		u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
> +
> +		range[n].flags = cpu_to_le32(flags);
> +		range[n].num_sectors = cpu_to_le32(num_sectors);
> +		range[n].sector = cpu_to_le64(sector);
> +		n++;
> +	}
> +
> +	req->special_vec.bv_page = virt_to_page(range);
> +	req->special_vec.bv_offset = offset_in_page(range);
> +	req->special_vec.bv_len = sizeof(*range) * segments;
> +	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> +	return 0;
> +}
> +
>  static inline void virtblk_request_done(struct request *req)
>  {
>  	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>  
> +	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> +		kfree(page_address(req->special_vec.bv_page) +
> +		      req->special_vec.bv_offset);
> +	}
> +
>  	switch (req_op(req)) {
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
> @@ -225,6 +266,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	int qid = hctx->queue_num;
>  	int err;
>  	bool notify = false;
> +	bool unmap = false;
>  	u32 type;
>  
>  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> @@ -237,6 +279,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	case REQ_OP_FLUSH:
>  		type = VIRTIO_BLK_T_FLUSH;
>  		break;
> +	case REQ_OP_DISCARD:
> +		type = VIRTIO_BLK_T_DISCARD;
> +		break;
> +	case REQ_OP_WRITE_ZEROES:
> +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> +		break;
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
>  		type = VIRTIO_BLK_T_SCSI_CMD;
> @@ -256,6 +305,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	blk_mq_start_request(req);
>  
> +	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
> +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> +		if (err)
> +			return BLK_STS_RESOURCE;
> +	}
> +
>  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>  	if (num) {
>  		if (rq_data_dir(req) == WRITE)
> @@ -777,6 +832,42 @@ static int virtblk_probe(struct virtio_device *vdev)
>  	if (!err && opt_io_size)
>  		blk_queue_io_opt(q, blk_size * opt_io_size);
>  
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> +		q->limits.discard_granularity = blk_size;
> +
> +		virtio_cread(vdev, struct virtio_blk_config,
> +				discard_sector_alignment, &v);
> +		if (v)
> +			q->limits.discard_alignment = v << SECTOR_SHIFT;
> +		else
> +			q->limits.discard_alignment = 0;
> +
> +		virtio_cread(vdev, struct virtio_blk_config,
> +				max_discard_sectors, &v);
> +		if (v)
> +			blk_queue_max_discard_sectors(q, v);
> +		else
> +			blk_queue_max_discard_sectors(q, UINT_MAX);
> +
> +		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
> +				&v);
> +		if (v && v <= MAX_DISCARD_SEGMENTS)
> +			blk_queue_max_discard_segments(q, v);
> +		else
> +			blk_queue_max_discard_segments(q, MAX_DISCARD_SEGMENTS);
> +
> +		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> +	}
> +
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> +		virtio_cread(vdev, struct virtio_blk_config,
> +				max_write_zeroes_sectors, &v);
> +		if (v)
> +			blk_queue_max_write_zeroes_sectors(q, v);
> +		else
> +			blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
> +	}
> +
>  	virtblk_update_capacity(vblk, false);
>  	virtio_device_ready(vdev);
>  
> @@ -885,14 +976,14 @@ static unsigned int features_legacy[] = {
>  	VIRTIO_BLK_F_SCSI,
>  #endif
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  }
>  ;
>  static unsigned int features[] = {
>  	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
>  	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  };
>  
>  static struct virtio_driver virtio_blk = {
> diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> index 9ebe4d968dd5..682afbfe3aa4 100644
> --- a/include/uapi/linux/virtio_blk.h
> +++ b/include/uapi/linux/virtio_blk.h
> @@ -38,6 +38,8 @@
>  #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
>  #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
>  #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
>  
>  /* Legacy feature bits */
>  #ifndef VIRTIO_BLK_NO_LEGACY
> @@ -86,6 +88,39 @@ struct virtio_blk_config {
>  
>  	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
>  	__u16 num_queues;
> +
> +	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
> +	/*
> +	 * The maximum discard sectors (in 512-byte sectors) for
> +	 * one segment.
> +	 */
> +	__u32 max_discard_sectors;
> +	/*
> +	 * The maximum number of discard segments in a
> +	 * discard command.
> +	 */
> +	__u32 max_discard_seg;
> +	/* Discard commands must be aligned to this number of sectors. */
> +	__u32 discard_sector_alignment;
> +
> +	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
> +	/*
> +	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
> +	 * one segment.
> +	 */
> +	__u32 max_write_zeroes_sectors;
> +	/*
> +	 * The maximum number of segments in a write zeroes
> +	 * command.
> +	 */
> +	__u32 max_write_zeroes_seg;
> +	/*
> +	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
> +	 * deallocation of one or more of the sectors.
> +	 */
> +	__u8 write_zeroes_may_unmap;
> +
> +	__u8 unused1[3];
>  } __attribute__((packed));
>  
>  /*
> @@ -114,6 +149,12 @@ struct virtio_blk_config {
>  /* Get device ID command */
>  #define VIRTIO_BLK_T_GET_ID    8
>  
> +/* Discard command */
> +#define VIRTIO_BLK_T_DISCARD	11
> +
> +/* Write zeroes command */
> +#define VIRTIO_BLK_T_WRITE_ZEROES	13
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  /* Barrier before this op. */
>  #define VIRTIO_BLK_T_BARRIER	0x80000000
> @@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
>  	__virtio64 sector;
>  };
>  
> +/* Unmap this range (only valid for write zeroes command) */
> +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
> +
> +/* Discard/write zeroes range for each request. */
> +struct virtio_blk_discard_write_zeroes {
> +	/* discard/write zeroes start sector */
> +	__virtio64 sector;
> +	/* number of discard/write zeroes sectors */
> +	__virtio32 num_sectors;
> +	/* flags for this range */
> +	__virtio32 flags;
> +};
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  struct virtio_scsi_inhdr {
>  	__virtio32 errors;
> -- 
> 2.19.0.605.g01d371f741-goog

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-15  0:54     ` Michael S. Tsirkin
  0 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2018-10-15  0:54 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: Jens Axboe, virtualization, linux-block, Stefan Hajnoczi,
	pbonzini, Changpeng Liu

On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> From: Changpeng Liu <changpeng.liu@intel.com>
> 
> In commit 88c85538, "virtio-blk: add discard and write zeroes features
> to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> discard and write zeroes in the virtio-blk driver when the device
> advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> VIRTIO_BLK_F_WRITE_ZEROES.
> 
> Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>

Cc Paolo as well.

> ---
> dverkamp: I've picked up this patch and made a few minor changes (as
> listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> since it can be called from a context where sleeping is not allowed.
> To prevent large allocations, I've also clamped the maximum number of
> discard segments to 256; this results in a 4K allocation and should be
> plenty of descriptors for most use cases.
> 
> I also removed most of the description from the commit message, since it
> was duplicating the comments from virtio_blk.h and quoting parts of the
> spec without adding any extra information.  I have tested this iteration
> of the patch using crosvm with modifications to enable the new features:
> https://chromium.googlesource.com/chromiumos/platform/crosvm/
> 
> CHANGELOG:
> v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> descriptor flags field; comment wording cleanups.
> v6: don't set T_OUT bit to discard and write zeroes commands.
> v5: use new block layer API: blk_queue_flag_set.
> v4: several optimizations based on MST's comments, remove bit field
> usage for command descriptor.
> v3: define the virtio-blk protocol to add discard and write zeroes
> support, first version implementation based on proposed specification.
> v2: add write zeroes command support.
> v1: initial proposal implementation for discard command.
> ---
>  drivers/block/virtio_blk.c      | 95 ++++++++++++++++++++++++++++++++-
>  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++
>  2 files changed, 147 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 23752dc99b00..04a7ae602e2f 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -18,6 +18,7 @@
>  
>  #define PART_BITS 4
>  #define VQ_NAME_LEN 16
> +#define MAX_DISCARD_SEGMENTS 256
>  
>  static int major;
>  static DEFINE_IDA(vd_index_ida);
> @@ -172,10 +173,50 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
>  	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> +
> +static inline int virtblk_setup_discard_write_zeroes(struct request *req,
> +						bool unmap)
> +{
> +	unsigned short segments = blk_rq_nr_discard_segments(req);
> +	unsigned short n = 0;
> +	struct virtio_blk_discard_write_zeroes *range;
> +	struct bio *bio;
> +	u32 flags = 0;
> +
> +	if (unmap)
> +		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
> +
> +	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
> +	if (!range)
> +		return -ENOMEM;
> +
> +	__rq_for_each_bio(bio, req) {
> +		u64 sector = bio->bi_iter.bi_sector;
> +		u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
> +
> +		range[n].flags = cpu_to_le32(flags);
> +		range[n].num_sectors = cpu_to_le32(num_sectors);
> +		range[n].sector = cpu_to_le64(sector);
> +		n++;
> +	}
> +
> +	req->special_vec.bv_page = virt_to_page(range);
> +	req->special_vec.bv_offset = offset_in_page(range);
> +	req->special_vec.bv_len = sizeof(*range) * segments;
> +	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> +	return 0;
> +}
> +
>  static inline void virtblk_request_done(struct request *req)
>  {
>  	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>  
> +	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> +		kfree(page_address(req->special_vec.bv_page) +
> +		      req->special_vec.bv_offset);
> +	}
> +
>  	switch (req_op(req)) {
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
> @@ -225,6 +266,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	int qid = hctx->queue_num;
>  	int err;
>  	bool notify = false;
> +	bool unmap = false;
>  	u32 type;
>  
>  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> @@ -237,6 +279,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	case REQ_OP_FLUSH:
>  		type = VIRTIO_BLK_T_FLUSH;
>  		break;
> +	case REQ_OP_DISCARD:
> +		type = VIRTIO_BLK_T_DISCARD;
> +		break;
> +	case REQ_OP_WRITE_ZEROES:
> +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> +		break;
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
>  		type = VIRTIO_BLK_T_SCSI_CMD;
> @@ -256,6 +305,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	blk_mq_start_request(req);
>  
> +	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
> +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> +		if (err)
> +			return BLK_STS_RESOURCE;
> +	}
> +
>  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>  	if (num) {
>  		if (rq_data_dir(req) == WRITE)
> @@ -777,6 +832,42 @@ static int virtblk_probe(struct virtio_device *vdev)
>  	if (!err && opt_io_size)
>  		blk_queue_io_opt(q, blk_size * opt_io_size);
>  
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> +		q->limits.discard_granularity = blk_size;
> +
> +		virtio_cread(vdev, struct virtio_blk_config,
> +				discard_sector_alignment, &v);
> +		if (v)
> +			q->limits.discard_alignment = v << SECTOR_SHIFT;
> +		else
> +			q->limits.discard_alignment = 0;
> +
> +		virtio_cread(vdev, struct virtio_blk_config,
> +				max_discard_sectors, &v);
> +		if (v)
> +			blk_queue_max_discard_sectors(q, v);
> +		else
> +			blk_queue_max_discard_sectors(q, UINT_MAX);
> +
> +		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
> +				&v);
> +		if (v && v <= MAX_DISCARD_SEGMENTS)
> +			blk_queue_max_discard_segments(q, v);
> +		else
> +			blk_queue_max_discard_segments(q, MAX_DISCARD_SEGMENTS);
> +
> +		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> +	}
> +
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> +		virtio_cread(vdev, struct virtio_blk_config,
> +				max_write_zeroes_sectors, &v);
> +		if (v)
> +			blk_queue_max_write_zeroes_sectors(q, v);
> +		else
> +			blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
> +	}
> +
>  	virtblk_update_capacity(vblk, false);
>  	virtio_device_ready(vdev);
>  
> @@ -885,14 +976,14 @@ static unsigned int features_legacy[] = {
>  	VIRTIO_BLK_F_SCSI,
>  #endif
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  }
>  ;
>  static unsigned int features[] = {
>  	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
>  	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  };
>  
>  static struct virtio_driver virtio_blk = {
> diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> index 9ebe4d968dd5..682afbfe3aa4 100644
> --- a/include/uapi/linux/virtio_blk.h
> +++ b/include/uapi/linux/virtio_blk.h
> @@ -38,6 +38,8 @@
>  #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
>  #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
>  #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
>  
>  /* Legacy feature bits */
>  #ifndef VIRTIO_BLK_NO_LEGACY
> @@ -86,6 +88,39 @@ struct virtio_blk_config {
>  
>  	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
>  	__u16 num_queues;
> +
> +	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
> +	/*
> +	 * The maximum discard sectors (in 512-byte sectors) for
> +	 * one segment.
> +	 */
> +	__u32 max_discard_sectors;
> +	/*
> +	 * The maximum number of discard segments in a
> +	 * discard command.
> +	 */
> +	__u32 max_discard_seg;
> +	/* Discard commands must be aligned to this number of sectors. */
> +	__u32 discard_sector_alignment;
> +
> +	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
> +	/*
> +	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
> +	 * one segment.
> +	 */
> +	__u32 max_write_zeroes_sectors;
> +	/*
> +	 * The maximum number of segments in a write zeroes
> +	 * command.
> +	 */
> +	__u32 max_write_zeroes_seg;
> +	/*
> +	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
> +	 * deallocation of one or more of the sectors.
> +	 */
> +	__u8 write_zeroes_may_unmap;
> +
> +	__u8 unused1[3];
>  } __attribute__((packed));
>  
>  /*
> @@ -114,6 +149,12 @@ struct virtio_blk_config {
>  /* Get device ID command */
>  #define VIRTIO_BLK_T_GET_ID    8
>  
> +/* Discard command */
> +#define VIRTIO_BLK_T_DISCARD	11
> +
> +/* Write zeroes command */
> +#define VIRTIO_BLK_T_WRITE_ZEROES	13
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  /* Barrier before this op. */
>  #define VIRTIO_BLK_T_BARRIER	0x80000000
> @@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
>  	__virtio64 sector;
>  };
>  
> +/* Unmap this range (only valid for write zeroes command) */
> +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
> +
> +/* Discard/write zeroes range for each request. */
> +struct virtio_blk_discard_write_zeroes {
> +	/* discard/write zeroes start sector */
> +	__virtio64 sector;
> +	/* number of discard/write zeroes sectors */
> +	__virtio32 num_sectors;
> +	/* flags for this range */
> +	__virtio32 flags;
> +};
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  struct virtio_scsi_inhdr {
>  	__virtio32 errors;
> -- 
> 2.19.0.605.g01d371f741-goog

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-12 21:06   ` Daniel Verkamp
  (?)
  (?)
@ 2018-10-15  9:21   ` Ming Lei
  -1 siblings, 0 replies; 50+ messages in thread
From: Ming Lei @ 2018-10-15  9:21 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: virtualization, linux-block, Michael S. Tsirkin, Jason Wang,
	Jens Axboe, Stefan Hajnoczi, Changpeng Liu

On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> From: Changpeng Liu <changpeng.liu@intel.com>
> 
> In commit 88c85538, "virtio-blk: add discard and write zeroes features
> to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> discard and write zeroes in the virtio-blk driver when the device
> advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> VIRTIO_BLK_F_WRITE_ZEROES.
> 
> Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> ---
> dverkamp: I've picked up this patch and made a few minor changes (as
> listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> since it can be called from a context where sleeping is not allowed.
> To prevent large allocations, I've also clamped the maximum number of
> discard segments to 256; this results in a 4K allocation and should be
> plenty of descriptors for most use cases.
> 
> I also removed most of the description from the commit message, since it
> was duplicating the comments from virtio_blk.h and quoting parts of the
> spec without adding any extra information.  I have tested this iteration
> of the patch using crosvm with modifications to enable the new features:
> https://chromium.googlesource.com/chromiumos/platform/crosvm/
> 
> CHANGELOG:
> v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> descriptor flags field; comment wording cleanups.
> v6: don't set T_OUT bit to discard and write zeroes commands.
> v5: use new block layer API: blk_queue_flag_set.
> v4: several optimizations based on MST's comments, remove bit field
> usage for command descriptor.
> v3: define the virtio-blk protocol to add discard and write zeroes
> support, first version implementation based on proposed specification.
> v2: add write zeroes command support.
> v1: initial proposal implementation for discard command.
> ---
>  drivers/block/virtio_blk.c      | 95 ++++++++++++++++++++++++++++++++-
>  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++
>  2 files changed, 147 insertions(+), 2 deletions(-)

The implementation is quite straightforward, just some minor points, see
inline comment.

> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 23752dc99b00..04a7ae602e2f 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -18,6 +18,7 @@
>  
>  #define PART_BITS 4
>  #define VQ_NAME_LEN 16
> +#define MAX_DISCARD_SEGMENTS 256
>  
>  static int major;
>  static DEFINE_IDA(vd_index_ida);
> @@ -172,10 +173,50 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
>  	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> +
> +static inline int virtblk_setup_discard_write_zeroes(struct request *req,
> +						bool unmap)
> +{
> +	unsigned short segments = blk_rq_nr_discard_segments(req);
> +	unsigned short n = 0;
> +	struct virtio_blk_discard_write_zeroes *range;
> +	struct bio *bio;
> +	u32 flags = 0;
> +
> +	if (unmap)
> +		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
> +
> +	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
> +	if (!range)
> +		return -ENOMEM;
> +
> +	__rq_for_each_bio(bio, req) {
> +		u64 sector = bio->bi_iter.bi_sector;
> +		u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
> +
> +		range[n].flags = cpu_to_le32(flags);
> +		range[n].num_sectors = cpu_to_le32(num_sectors);
> +		range[n].sector = cpu_to_le64(sector);
> +		n++;
> +	}
> +
> +	req->special_vec.bv_page = virt_to_page(range);
> +	req->special_vec.bv_offset = offset_in_page(range);
> +	req->special_vec.bv_len = sizeof(*range) * segments;
> +	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> +	return 0;
> +}
> +
>  static inline void virtblk_request_done(struct request *req)
>  {
>  	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>  
> +	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> +		kfree(page_address(req->special_vec.bv_page) +
> +		      req->special_vec.bv_offset);
> +	}
> +
>  	switch (req_op(req)) {
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
> @@ -225,6 +266,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	int qid = hctx->queue_num;
>  	int err;
>  	bool notify = false;
> +	bool unmap = false;
>  	u32 type;
>  
>  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> @@ -237,6 +279,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	case REQ_OP_FLUSH:
>  		type = VIRTIO_BLK_T_FLUSH;
>  		break;
> +	case REQ_OP_DISCARD:
> +		type = VIRTIO_BLK_T_DISCARD;
> +		break;
> +	case REQ_OP_WRITE_ZEROES:
> +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> +		break;
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
>  		type = VIRTIO_BLK_T_SCSI_CMD;
> @@ -256,6 +305,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	blk_mq_start_request(req);
>  
> +	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
> +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> +		if (err)
> +			return BLK_STS_RESOURCE;
> +	}
> +
>  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>  	if (num) {
>  		if (rq_data_dir(req) == WRITE)
> @@ -777,6 +832,42 @@ static int virtblk_probe(struct virtio_device *vdev)
>  	if (!err && opt_io_size)
>  		blk_queue_io_opt(q, blk_size * opt_io_size);
>  
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> +		q->limits.discard_granularity = blk_size;
> +
> +		virtio_cread(vdev, struct virtio_blk_config,
> +				discard_sector_alignment, &v);
> +		if (v)
> +			q->limits.discard_alignment = v << SECTOR_SHIFT;
> +		else
> +			q->limits.discard_alignment = 0;

It may be better to use  "v ? v << SECTOR_SHIFT : 0".

> +
> +		virtio_cread(vdev, struct virtio_blk_config,
> +				max_discard_sectors, &v);
> +		if (v)
> +			blk_queue_max_discard_sectors(q, v);
> +		else
> +			blk_queue_max_discard_sectors(q, UINT_MAX);

Same with above.

> +
> +		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
> +				&v);
> +		if (v && v <= MAX_DISCARD_SEGMENTS)
> +			blk_queue_max_discard_segments(q, v);
> +		else
> +			blk_queue_max_discard_segments(q, MAX_DISCARD_SEGMENTS);

It may be better to use min_not_zero().

> +
> +		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> +	}
> +
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> +		virtio_cread(vdev, struct virtio_blk_config,
> +				max_write_zeroes_sectors, &v);
> +		if (v)
> +			blk_queue_max_write_zeroes_sectors(q, v);
> +		else
> +			blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
> +	}

Same with above.

> +
>  	virtblk_update_capacity(vblk, false);
>  	virtio_device_ready(vdev);
>  
> @@ -885,14 +976,14 @@ static unsigned int features_legacy[] = {
>  	VIRTIO_BLK_F_SCSI,
>  #endif
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  }
>  ;
>  static unsigned int features[] = {
>  	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
>  	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  };
>  
>  static struct virtio_driver virtio_blk = {
> diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> index 9ebe4d968dd5..682afbfe3aa4 100644
> --- a/include/uapi/linux/virtio_blk.h
> +++ b/include/uapi/linux/virtio_blk.h
> @@ -38,6 +38,8 @@
>  #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
>  #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
>  #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
>  
>  /* Legacy feature bits */
>  #ifndef VIRTIO_BLK_NO_LEGACY
> @@ -86,6 +88,39 @@ struct virtio_blk_config {
>  
>  	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
>  	__u16 num_queues;
> +
> +	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
> +	/*
> +	 * The maximum discard sectors (in 512-byte sectors) for
> +	 * one segment.
> +	 */
> +	__u32 max_discard_sectors;
> +	/*
> +	 * The maximum number of discard segments in a
> +	 * discard command.
> +	 */
> +	__u32 max_discard_seg;
> +	/* Discard commands must be aligned to this number of sectors. */
> +	__u32 discard_sector_alignment;
> +
> +	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
> +	/*
> +	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
> +	 * one segment.
> +	 */
> +	__u32 max_write_zeroes_sectors;
> +	/*
> +	 * The maximum number of segments in a write zeroes
> +	 * command.
> +	 */
> +	__u32 max_write_zeroes_seg;
> +	/*
> +	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
> +	 * deallocation of one or more of the sectors.
> +	 */
> +	__u8 write_zeroes_may_unmap;
> +
> +	__u8 unused1[3];
>  } __attribute__((packed));
>  
>  /*
> @@ -114,6 +149,12 @@ struct virtio_blk_config {
>  /* Get device ID command */
>  #define VIRTIO_BLK_T_GET_ID    8
>  
> +/* Discard command */
> +#define VIRTIO_BLK_T_DISCARD	11
> +
> +/* Write zeroes command */
> +#define VIRTIO_BLK_T_WRITE_ZEROES	13
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  /* Barrier before this op. */
>  #define VIRTIO_BLK_T_BARRIER	0x80000000
> @@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
>  	__virtio64 sector;
>  };
>  
> +/* Unmap this range (only valid for write zeroes command) */
> +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
> +
> +/* Discard/write zeroes range for each request. */
> +struct virtio_blk_discard_write_zeroes {
> +	/* discard/write zeroes start sector */
> +	__virtio64 sector;
> +	/* number of discard/write zeroes sectors */
> +	__virtio32 num_sectors;
> +	/* flags for this range */
> +	__virtio32 flags;
> +};
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  struct virtio_scsi_inhdr {
>  	__virtio32 errors;
> -- 
> 2.19.0.605.g01d371f741-goog
> 

Thanks,
Ming

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-12 21:06   ` Daniel Verkamp
@ 2018-10-15  9:27     ` Christoph Hellwig
  -1 siblings, 0 replies; 50+ messages in thread
From: Christoph Hellwig @ 2018-10-15  9:27 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: virtualization, linux-block, Michael S. Tsirkin, Jason Wang,
	Jens Axboe, Stefan Hajnoczi, Changpeng Liu

On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> From: Changpeng Liu <changpeng.liu@intel.com>
> 
> In commit 88c85538, "virtio-blk: add discard and write zeroes features
> to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio

There is some issues in this spec.  For one using the multiple ranges
also for write zeroes is rather inefficient.  Write zeroes really should
use the same format as read and write.

Second the unmap flag isn't properly specified at all, as nothing
says the device may not unmap without the unmap flag.  Please take
a look at the SCSI or NVMe ѕpec for some guidance.

> +static inline int virtblk_setup_discard_write_zeroes(struct request *req,
> +						bool unmap)

Why is this an inline function?

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-15  9:27     ` Christoph Hellwig
  0 siblings, 0 replies; 50+ messages in thread
From: Christoph Hellwig @ 2018-10-15  9:27 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: Jens Axboe, Michael S. Tsirkin, virtualization, linux-block,
	Stefan Hajnoczi, Changpeng Liu

On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> From: Changpeng Liu <changpeng.liu@intel.com>
> 
> In commit 88c85538, "virtio-blk: add discard and write zeroes features
> to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio

There is some issues in this spec.  For one using the multiple ranges
also for write zeroes is rather inefficient.  Write zeroes really should
use the same format as read and write.

Second the unmap flag isn't properly specified at all, as nothing
says the device may not unmap without the unmap flag.  Please take
a look at the SCSI or NVMe ѕpec for some guidance.

> +static inline int virtblk_setup_discard_write_zeroes(struct request *req,
> +						bool unmap)

Why is this an inline function?
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-15  9:27     ` Christoph Hellwig
@ 2018-10-15 23:16       ` Daniel Verkamp
  -1 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-10-15 23:16 UTC (permalink / raw)
  To: hch
  Cc: virtualization, linux-block, mst, jasowang, axboe, stefanha,
	Changpeng Liu

On Mon, Oct 15, 2018 at 2:27 AM Christoph Hellwig <hch@infradead.org> wrote=
:
> On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > From: Changpeng Liu <changpeng.liu@intel.com>
> >
> > In commit 88c85538, "virtio-blk: add discard and write zeroes features
> > to specification" (https://github.com/oasis-tcs/virtio-spec), the virti=
o
>
> There is some issues in this spec.  For one using the multiple ranges
> also for write zeroes is rather inefficient.  Write zeroes really should
> use the same format as read and write.

I wasn't involved in the writing of the spec, so I'll defer to Michael
and Changpeng here, but I'm not sure how "set in stone" the virtio
specification is, or if it can be updated somehow without breaking
compatibility.

I agree that Write Zeroes would be simpler to implement as a single
LBA + length rather than a list.  However, it's not really possible to
use the same format as the regular virtio block read/write commands
(VIRTIO_BLK_T_IN/VIRTIO_BLK_T_OUT), since the read/write commands do
not specify a length explicitly; length is implied by the length of
the data buffer as defined by the virtio descriptor, but a Write
Zeroes command does not require a data buffer.  At best, this could be
a separate command mirroring the layout of struct virtio_blk_req but
with data replaced with a length field; I'm not sure that buys much in
the way of consistency.

> Second the unmap flag isn't properly specified at all, as nothing
> says the device may not unmap without the unmap flag.  Please take
> a look at the SCSI or NVMe =D1=95pec for some guidance.

This could probably use some clarifying text in the specification, but
given that there is nothing in the spec describing what the device
needs to do when unmap =3D 0, I would assume that the device can do
whatever it likes, as long as the blocks read back as 0s afterwards.
Reading back 0s is required by the definition of the Write Zeroes
command in the same virtio spec change.  It would probably be good to
clarify this and explicitly define what the device is allowed to do in
response to both settings of the unmap bit.

My understanding of the corresponding feature in NVMe (the Deallocate
bit in the Write Zeroes command) is that the only difference between
Deallocate =3D 1 and 0 is that the device "should" versus "may" (no
"shall" on either side) deallocate the corresponding blocks, but only
if the device supports reading 0s back after blocks are deallocated.
If the device does not support reading 0s after deallocation, it is
not allowed to deallocate blocks as part of a Write Zeroes command
regardless of the setting of the Deallocate bit.

Some similar wording could probably be added to the virtio spec to
clarify the meaning of unmap, although I would prefer something that
makes it a little clearer that the bit is only intended as a hint from
the driver to indicate whether the device should attempt to keep
storage allocated for the zeroed blocks, if that is indeed the
intended behavior.

Is there some in-kernel doc that describes what behavior the Linux
block layer needs from a write zeroes command?

> > +static inline int virtblk_setup_discard_write_zeroes(struct request *r=
eq,
> > +                                             bool unmap)
>
> Why is this an inline function?

I don't think there's any reason it needs to be inline; I can drop the
inline in the next revision.

Given (as far as I can tell) your concerns seem to apply to the Write
Zeroes command specifically, would it be reasonable to start with a
patch that just adds support for the Discard command (along with fixes
for Ming's feedback)?  This would be sufficient for my particular use
case (although I can't speak for Changpeng), and we can revisit Write
Zeroes once the spec concerns are worked out.

Thanks,
-- Daniel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-15 23:16       ` Daniel Verkamp
  0 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-10-15 23:16 UTC (permalink / raw)
  To: hch; +Cc: axboe, mst, virtualization, linux-block, stefanha, Changpeng Liu

On Mon, Oct 15, 2018 at 2:27 AM Christoph Hellwig <hch@infradead.org> wrote:
> On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > From: Changpeng Liu <changpeng.liu@intel.com>
> >
> > In commit 88c85538, "virtio-blk: add discard and write zeroes features
> > to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
>
> There is some issues in this spec.  For one using the multiple ranges
> also for write zeroes is rather inefficient.  Write zeroes really should
> use the same format as read and write.

I wasn't involved in the writing of the spec, so I'll defer to Michael
and Changpeng here, but I'm not sure how "set in stone" the virtio
specification is, or if it can be updated somehow without breaking
compatibility.

I agree that Write Zeroes would be simpler to implement as a single
LBA + length rather than a list.  However, it's not really possible to
use the same format as the regular virtio block read/write commands
(VIRTIO_BLK_T_IN/VIRTIO_BLK_T_OUT), since the read/write commands do
not specify a length explicitly; length is implied by the length of
the data buffer as defined by the virtio descriptor, but a Write
Zeroes command does not require a data buffer.  At best, this could be
a separate command mirroring the layout of struct virtio_blk_req but
with data replaced with a length field; I'm not sure that buys much in
the way of consistency.

> Second the unmap flag isn't properly specified at all, as nothing
> says the device may not unmap without the unmap flag.  Please take
> a look at the SCSI or NVMe ѕpec for some guidance.

This could probably use some clarifying text in the specification, but
given that there is nothing in the spec describing what the device
needs to do when unmap = 0, I would assume that the device can do
whatever it likes, as long as the blocks read back as 0s afterwards.
Reading back 0s is required by the definition of the Write Zeroes
command in the same virtio spec change.  It would probably be good to
clarify this and explicitly define what the device is allowed to do in
response to both settings of the unmap bit.

My understanding of the corresponding feature in NVMe (the Deallocate
bit in the Write Zeroes command) is that the only difference between
Deallocate = 1 and 0 is that the device "should" versus "may" (no
"shall" on either side) deallocate the corresponding blocks, but only
if the device supports reading 0s back after blocks are deallocated.
If the device does not support reading 0s after deallocation, it is
not allowed to deallocate blocks as part of a Write Zeroes command
regardless of the setting of the Deallocate bit.

Some similar wording could probably be added to the virtio spec to
clarify the meaning of unmap, although I would prefer something that
makes it a little clearer that the bit is only intended as a hint from
the driver to indicate whether the device should attempt to keep
storage allocated for the zeroed blocks, if that is indeed the
intended behavior.

Is there some in-kernel doc that describes what behavior the Linux
block layer needs from a write zeroes command?

> > +static inline int virtblk_setup_discard_write_zeroes(struct request *req,
> > +                                             bool unmap)
>
> Why is this an inline function?

I don't think there's any reason it needs to be inline; I can drop the
inline in the next revision.

Given (as far as I can tell) your concerns seem to apply to the Write
Zeroes command specifically, would it be reasonable to start with a
patch that just adds support for the Discard command (along with fixes
for Ming's feedback)?  This would be sufficient for my particular use
case (although I can't speak for Changpeng), and we can revisit Write
Zeroes once the spec concerns are worked out.

Thanks,
-- Daniel
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-15  9:27     ` Christoph Hellwig
@ 2018-10-16  1:40       ` Liu, Changpeng
  -1 siblings, 0 replies; 50+ messages in thread
From: Liu, Changpeng @ 2018-10-16  1:40 UTC (permalink / raw)
  To: Christoph Hellwig, Daniel Verkamp
  Cc: virtualization, linux-block, Michael S. Tsirkin, Jason Wang,
	Jens Axboe, Stefan Hajnoczi

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogQ2hyaXN0b3BoIEhlbGx3
aWcgW21haWx0bzpoY2hAaW5mcmFkZWFkLm9yZ10NCj4gU2VudDogTW9uZGF5LCBPY3RvYmVyIDE1
LCAyMDE4IDU6MjggUE0NCj4gVG86IERhbmllbCBWZXJrYW1wIDxkdmVya2FtcEBjaHJvbWl1bS5v
cmc+DQo+IENjOiB2aXJ0dWFsaXphdGlvbkBsaXN0cy5saW51eC1mb3VuZGF0aW9uLm9yZzsgbGlu
dXgtYmxvY2tAdmdlci5rZXJuZWwub3JnOw0KPiBNaWNoYWVsIFMuIFRzaXJraW4gPG1zdEByZWRo
YXQuY29tPjsgSmFzb24gV2FuZyA8amFzb3dhbmdAcmVkaGF0LmNvbT47DQo+IEplbnMgQXhib2Ug
PGF4Ym9lQGtlcm5lbC5kaz47IFN0ZWZhbiBIYWpub2N6aSA8c3RlZmFuaGFAcmVkaGF0LmNvbT47
IExpdSwNCj4gQ2hhbmdwZW5nIDxjaGFuZ3BlbmcubGl1QGludGVsLmNvbT4NCj4gU3ViamVjdDog
UmU6IFtQQVRDSCB2OF0gdmlydGlvX2JsazogYWRkIGRpc2NhcmQgYW5kIHdyaXRlIHplcm9lcyBz
dXBwb3J0DQo+IA0KPiBPbiBGcmksIE9jdCAxMiwgMjAxOCBhdCAwMjowNjoyOFBNIC0wNzAwLCBE
YW5pZWwgVmVya2FtcCB3cm90ZToNCj4gPiBGcm9tOiBDaGFuZ3BlbmcgTGl1IDxjaGFuZ3Blbmcu
bGl1QGludGVsLmNvbT4NCj4gPg0KPiA+IEluIGNvbW1pdCA4OGM4NTUzOCwgInZpcnRpby1ibGs6
IGFkZCBkaXNjYXJkIGFuZCB3cml0ZSB6ZXJvZXMgZmVhdHVyZXMNCj4gPiB0byBzcGVjaWZpY2F0
aW9uIiAoaHR0cHM6Ly9naXRodWIuY29tL29hc2lzLXRjcy92aXJ0aW8tc3BlYyksIHRoZSB2aXJ0
aW8NCj4gDQo+IFRoZXJlIGlzIHNvbWUgaXNzdWVzIGluIHRoaXMgc3BlYy4gIEZvciBvbmUgdXNp
bmcgdGhlIG11bHRpcGxlIHJhbmdlcw0KPiBhbHNvIGZvciB3cml0ZSB6ZXJvZXMgaXMgcmF0aGVy
IGluZWZmaWNpZW50LiAgV3JpdGUgemVyb2VzIHJlYWxseSBzaG91bGQNCj4gdXNlIHRoZSBzYW1l
IGZvcm1hdCBhcyByZWFkIGFuZCB3cml0ZS4NCkJlY2F1c2UgdGhlcmUgaXMgbm8gbGVuZ3RoIHBh
cmFtZXRlciBmb3IgdmlydGlvIGJsb2NrIHNwZWNpZmljYXRpb24sIGFkZGluZyB0aGUNCnR3byBl
eHRyYSBjb21tYW5kcyB3aWxsIG5vdCBicmVhayB0aGUgZXhpc3Rpbmcgc3BlY2lmaWNhdGlvbiBh
bmQgZHJpdmVyIGltcGxlbWVudGF0aW9uLiANCkFsc28gZXhpc3RpbmcgTGludXggaW1wbGVtZW50
YXRpb24gZm9yIHdyaXRlIHplcm9lcyB3aWxsIG5vdCB1c2UgbXVsdGlwbGUgc2VnbWVudA0KYXQg
YWxsIHNvIHRoZXJlIGlzIGFsd2F5cyBvbmUgcmFuZ2UgaW4gcHJhY3RpY2UuDQo+IA0KPiBTZWNv
bmQgdGhlIHVubWFwIGZsYWcgaXNuJ3QgcHJvcGVybHkgc3BlY2lmaWVkIGF0IGFsbCwgYXMgbm90
aGluZw0KPiBzYXlzIHRoZSBkZXZpY2UgbWF5IG5vdCB1bm1hcCB3aXRob3V0IHRoZSB1bm1hcCBm
bGFnLiAgUGxlYXNlIHRha2UNCj4gYSBsb29rIGF0IHRoZSBTQ1NJIG9yIE5WTWUg0ZVwZWMgZm9y
IHNvbWUgZ3VpZGFuY2UuDQpUaGUgdW5tYXAgZmxhZyBpcyBvbmx5IHVzZWQgZm9yIHdyaXRlIHpl
cm9lcyBjb21tYW5kLCBhcyBkaXNjYXJkIGNvbW1hbmQgd2lsbCBub3QgDQpndWFyYW50ZWUgdGhl
IHNwYWNlcyB3aWxsIGJlIHplcm9lZCwgc28gYWRkaW5nIHRoaXMgZmxhZyBtZWFucyAoRGlzY2Fy
ZCArIFdyaXRlIFplcm9lcyksDQpzbyB0aGlzIGRlZmluaXRlbHkgaXMgYmFja2VuZCByZWxhdGVk
LCB0aGUgYmFja2VuZCBpbXBsZW1lbnRhdGlvbiBjYW4gdXNlIHNhbWUgY29kZQ0KdG8gaW1wbGVt
ZW50IGRpc2NhcmQgYW5kIHdyaXRlIHplcm9lcyBjb21tYW5kcy4NCj4gDQo+ID4gK3N0YXRpYyBp
bmxpbmUgaW50IHZpcnRibGtfc2V0dXBfZGlzY2FyZF93cml0ZV96ZXJvZXMoc3RydWN0IHJlcXVl
c3QgKnJlcSwNCj4gPiArCQkJCQkJYm9vbCB1bm1hcCkNCj4gDQo+IFdoeSBpcyB0aGlzIGFuIGlu
bGluZSBmdW5jdGlvbj8NCg==

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-16  1:40       ` Liu, Changpeng
  0 siblings, 0 replies; 50+ messages in thread
From: Liu, Changpeng @ 2018-10-16  1:40 UTC (permalink / raw)
  To: Christoph Hellwig, Daniel Verkamp
  Cc: Jens Axboe, Michael S. Tsirkin, virtualization, linux-block,
	Stefan Hajnoczi



> -----Original Message-----
> From: Christoph Hellwig [mailto:hch@infradead.org]
> Sent: Monday, October 15, 2018 5:28 PM
> To: Daniel Verkamp <dverkamp@chromium.org>
> Cc: virtualization@lists.linux-foundation.org; linux-block@vger.kernel.org;
> Michael S. Tsirkin <mst@redhat.com>; Jason Wang <jasowang@redhat.com>;
> Jens Axboe <axboe@kernel.dk>; Stefan Hajnoczi <stefanha@redhat.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: Re: [PATCH v8] virtio_blk: add discard and write zeroes support
> 
> On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > From: Changpeng Liu <changpeng.liu@intel.com>
> >
> > In commit 88c85538, "virtio-blk: add discard and write zeroes features
> > to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> 
> There is some issues in this spec.  For one using the multiple ranges
> also for write zeroes is rather inefficient.  Write zeroes really should
> use the same format as read and write.
Because there is no length parameter for virtio block specification, adding the
two extra commands will not break the existing specification and driver implementation. 
Also existing Linux implementation for write zeroes will not use multiple segment
at all so there is always one range in practice.
> 
> Second the unmap flag isn't properly specified at all, as nothing
> says the device may not unmap without the unmap flag.  Please take
> a look at the SCSI or NVMe ѕpec for some guidance.
The unmap flag is only used for write zeroes command, as discard command will not 
guarantee the spaces will be zeroed, so adding this flag means (Discard + Write Zeroes),
so this definitely is backend related, the backend implementation can use same code
to implement discard and write zeroes commands.
> 
> > +static inline int virtblk_setup_discard_write_zeroes(struct request *req,
> > +						bool unmap)
> 
> Why is this an inline function?
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-15 23:16       ` Daniel Verkamp
@ 2018-10-16  1:45         ` Liu, Changpeng
  -1 siblings, 0 replies; 50+ messages in thread
From: Liu, Changpeng @ 2018-10-16  1:45 UTC (permalink / raw)
  To: Daniel Verkamp, hch
  Cc: virtualization, linux-block, mst, jasowang, axboe, stefanha

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogRGFuaWVsIFZlcmthbXAg
W21haWx0bzpkdmVya2FtcEBjaHJvbWl1bS5vcmddDQo+IFNlbnQ6IFR1ZXNkYXksIE9jdG9iZXIg
MTYsIDIwMTggNzoxNiBBTQ0KPiBUbzogaGNoQGluZnJhZGVhZC5vcmcNCj4gQ2M6IHZpcnR1YWxp
emF0aW9uQGxpc3RzLmxpbnV4LWZvdW5kYXRpb24ub3JnOyBsaW51eC1ibG9ja0B2Z2VyLmtlcm5l
bC5vcmc7DQo+IG1zdEByZWRoYXQuY29tOyBqYXNvd2FuZ0ByZWRoYXQuY29tOyBheGJvZUBrZXJu
ZWwuZGs7DQo+IHN0ZWZhbmhhQHJlZGhhdC5jb207IExpdSwgQ2hhbmdwZW5nIDxjaGFuZ3Blbmcu
bGl1QGludGVsLmNvbT4NCj4gU3ViamVjdDogUmU6IFtQQVRDSCB2OF0gdmlydGlvX2JsazogYWRk
IGRpc2NhcmQgYW5kIHdyaXRlIHplcm9lcyBzdXBwb3J0DQo+IA0KPiBPbiBNb24sIE9jdCAxNSwg
MjAxOCBhdCAyOjI3IEFNIENocmlzdG9waCBIZWxsd2lnIDxoY2hAaW5mcmFkZWFkLm9yZz4gd3Jv
dGU6DQo+ID4gT24gRnJpLCBPY3QgMTIsIDIwMTggYXQgMDI6MDY6MjhQTSAtMDcwMCwgRGFuaWVs
IFZlcmthbXAgd3JvdGU6DQo+ID4gPiBGcm9tOiBDaGFuZ3BlbmcgTGl1IDxjaGFuZ3BlbmcubGl1
QGludGVsLmNvbT4NCj4gPiA+DQo+ID4gPiBJbiBjb21taXQgODhjODU1MzgsICJ2aXJ0aW8tYmxr
OiBhZGQgZGlzY2FyZCBhbmQgd3JpdGUgemVyb2VzIGZlYXR1cmVzDQo+ID4gPiB0byBzcGVjaWZp
Y2F0aW9uIiAoaHR0cHM6Ly9naXRodWIuY29tL29hc2lzLXRjcy92aXJ0aW8tc3BlYyksIHRoZSB2
aXJ0aW8NCj4gPg0KPiA+IFRoZXJlIGlzIHNvbWUgaXNzdWVzIGluIHRoaXMgc3BlYy4gIEZvciBv
bmUgdXNpbmcgdGhlIG11bHRpcGxlIHJhbmdlcw0KPiA+IGFsc28gZm9yIHdyaXRlIHplcm9lcyBp
cyByYXRoZXIgaW5lZmZpY2llbnQuICBXcml0ZSB6ZXJvZXMgcmVhbGx5IHNob3VsZA0KPiA+IHVz
ZSB0aGUgc2FtZSBmb3JtYXQgYXMgcmVhZCBhbmQgd3JpdGUuDQo+IA0KPiBJIHdhc24ndCBpbnZv
bHZlZCBpbiB0aGUgd3JpdGluZyBvZiB0aGUgc3BlYywgc28gSSdsbCBkZWZlciB0byBNaWNoYWVs
DQo+IGFuZCBDaGFuZ3BlbmcgaGVyZSwgYnV0IEknbSBub3Qgc3VyZSBob3cgInNldCBpbiBzdG9u
ZSIgdGhlIHZpcnRpbw0KPiBzcGVjaWZpY2F0aW9uIGlzLCBvciBpZiBpdCBjYW4gYmUgdXBkYXRl
ZCBzb21laG93IHdpdGhvdXQgYnJlYWtpbmcNCj4gY29tcGF0aWJpbGl0eS4NCj4gDQo+IEkgYWdy
ZWUgdGhhdCBXcml0ZSBaZXJvZXMgd291bGQgYmUgc2ltcGxlciB0byBpbXBsZW1lbnQgYXMgYSBz
aW5nbGUNCj4gTEJBICsgbGVuZ3RoIHJhdGhlciB0aGFuIGEgbGlzdC4gIEhvd2V2ZXIsIGl0J3Mg
bm90IHJlYWxseSBwb3NzaWJsZSB0bw0KPiB1c2UgdGhlIHNhbWUgZm9ybWF0IGFzIHRoZSByZWd1
bGFyIHZpcnRpbyBibG9jayByZWFkL3dyaXRlIGNvbW1hbmRzDQo+IChWSVJUSU9fQkxLX1RfSU4v
VklSVElPX0JMS19UX09VVCksIHNpbmNlIHRoZSByZWFkL3dyaXRlIGNvbW1hbmRzIGRvDQo+IG5v
dCBzcGVjaWZ5IGEgbGVuZ3RoIGV4cGxpY2l0bHk7IGxlbmd0aCBpcyBpbXBsaWVkIGJ5IHRoZSBs
ZW5ndGggb2YNCj4gdGhlIGRhdGEgYnVmZmVyIGFzIGRlZmluZWQgYnkgdGhlIHZpcnRpbyBkZXNj
cmlwdG9yLCBidXQgYSBXcml0ZQ0KPiBaZXJvZXMgY29tbWFuZCBkb2VzIG5vdCByZXF1aXJlIGEg
ZGF0YSBidWZmZXIuICBBdCBiZXN0LCB0aGlzIGNvdWxkIGJlDQo+IGEgc2VwYXJhdGUgY29tbWFu
ZCBtaXJyb3JpbmcgdGhlIGxheW91dCBvZiBzdHJ1Y3QgdmlydGlvX2Jsa19yZXEgYnV0DQo+IHdp
dGggZGF0YSByZXBsYWNlZCB3aXRoIGEgbGVuZ3RoIGZpZWxkOyBJJ20gbm90IHN1cmUgdGhhdCBi
dXlzIG11Y2ggaW4NCj4gdGhlIHdheSBvZiBjb25zaXN0ZW5jeS4NClllYWgsIHRoYXQncyB0aGUg
Y29uc2lkZXJhdGlvbiBoZXJlLg0KPiANCj4gPiBTZWNvbmQgdGhlIHVubWFwIGZsYWcgaXNuJ3Qg
cHJvcGVybHkgc3BlY2lmaWVkIGF0IGFsbCwgYXMgbm90aGluZw0KPiA+IHNheXMgdGhlIGRldmlj
ZSBtYXkgbm90IHVubWFwIHdpdGhvdXQgdGhlIHVubWFwIGZsYWcuICBQbGVhc2UgdGFrZQ0KPiA+
IGEgbG9vayBhdCB0aGUgU0NTSSBvciBOVk1lINGVcGVjIGZvciBzb21lIGd1aWRhbmNlLg0KPiAN
Cj4gVGhpcyBjb3VsZCBwcm9iYWJseSB1c2Ugc29tZSBjbGFyaWZ5aW5nIHRleHQgaW4gdGhlIHNw
ZWNpZmljYXRpb24sIGJ1dA0KPiBnaXZlbiB0aGF0IHRoZXJlIGlzIG5vdGhpbmcgaW4gdGhlIHNw
ZWMgZGVzY3JpYmluZyB3aGF0IHRoZSBkZXZpY2UNCj4gbmVlZHMgdG8gZG8gd2hlbiB1bm1hcCA9
IDAsIEkgd291bGQgYXNzdW1lIHRoYXQgdGhlIGRldmljZSBjYW4gZG8NCj4gd2hhdGV2ZXIgaXQg
bGlrZXMsIGFzIGxvbmcgYXMgdGhlIGJsb2NrcyByZWFkIGJhY2sgYXMgMHMgYWZ0ZXJ3YXJkcy4N
Cj4gUmVhZGluZyBiYWNrIDBzIGlzIHJlcXVpcmVkIGJ5IHRoZSBkZWZpbml0aW9uIG9mIHRoZSBX
cml0ZSBaZXJvZXMNCj4gY29tbWFuZCBpbiB0aGUgc2FtZSB2aXJ0aW8gc3BlYyBjaGFuZ2UuICBJ
dCB3b3VsZCBwcm9iYWJseSBiZSBnb29kIHRvDQo+IGNsYXJpZnkgdGhpcyBhbmQgZXhwbGljaXRs
eSBkZWZpbmUgd2hhdCB0aGUgZGV2aWNlIGlzIGFsbG93ZWQgdG8gZG8gaW4NCj4gcmVzcG9uc2Ug
dG8gYm90aCBzZXR0aW5ncyBvZiB0aGUgdW5tYXAgYml0Lg0KPiANCj4gTXkgdW5kZXJzdGFuZGlu
ZyBvZiB0aGUgY29ycmVzcG9uZGluZyBmZWF0dXJlIGluIE5WTWUgKHRoZSBEZWFsbG9jYXRlDQo+
IGJpdCBpbiB0aGUgV3JpdGUgWmVyb2VzIGNvbW1hbmQpIGlzIHRoYXQgdGhlIG9ubHkgZGlmZmVy
ZW5jZSBiZXR3ZWVuDQo+IERlYWxsb2NhdGUgPSAxIGFuZCAwIGlzIHRoYXQgdGhlIGRldmljZSAi
c2hvdWxkIiB2ZXJzdXMgIm1heSIgKG5vDQo+ICJzaGFsbCIgb24gZWl0aGVyIHNpZGUpIGRlYWxs
b2NhdGUgdGhlIGNvcnJlc3BvbmRpbmcgYmxvY2tzLCBidXQgb25seQ0KPiBpZiB0aGUgZGV2aWNl
IHN1cHBvcnRzIHJlYWRpbmcgMHMgYmFjayBhZnRlciBibG9ja3MgYXJlIGRlYWxsb2NhdGVkLg0K
PiBJZiB0aGUgZGV2aWNlIGRvZXMgbm90IHN1cHBvcnQgcmVhZGluZyAwcyBhZnRlciBkZWFsbG9j
YXRpb24sIGl0IGlzDQo+IG5vdCBhbGxvd2VkIHRvIGRlYWxsb2NhdGUgYmxvY2tzIGFzIHBhcnQg
b2YgYSBXcml0ZSBaZXJvZXMgY29tbWFuZA0KPiByZWdhcmRsZXNzIG9mIHRoZSBzZXR0aW5nIG9m
IHRoZSBEZWFsbG9jYXRlIGJpdC4NCj4gDQo+IFNvbWUgc2ltaWxhciB3b3JkaW5nIGNvdWxkIHBy
b2JhYmx5IGJlIGFkZGVkIHRvIHRoZSB2aXJ0aW8gc3BlYyB0bw0KPiBjbGFyaWZ5IHRoZSBtZWFu
aW5nIG9mIHVubWFwLCBhbHRob3VnaCBJIHdvdWxkIHByZWZlciBzb21ldGhpbmcgdGhhdA0KPiBt
YWtlcyBpdCBhIGxpdHRsZSBjbGVhcmVyIHRoYXQgdGhlIGJpdCBpcyBvbmx5IGludGVuZGVkIGFz
IGEgaGludCBmcm9tDQo+IHRoZSBkcml2ZXIgdG8gaW5kaWNhdGUgd2hldGhlciB0aGUgZGV2aWNl
IHNob3VsZCBhdHRlbXB0IHRvIGtlZXANCj4gc3RvcmFnZSBhbGxvY2F0ZWQgZm9yIHRoZSB6ZXJv
ZWQgYmxvY2tzLCBpZiB0aGF0IGlzIGluZGVlZCB0aGUNCj4gaW50ZW5kZWQgYmVoYXZpb3IuDQpZ
ZXMsIHRoYXQncyB0aGUgb3JpZ2luYWwgaWRlYS4gIEFkZGluZyBhIGNsZWFyIGRlc2NyaXB0aW9u
IHRvIHRoZSBzcGVjaWZpY2F0aW9uIG1heSBiZSBiZXR0ZXIuIA0KPiANCj4gSXMgdGhlcmUgc29t
ZSBpbi1rZXJuZWwgZG9jIHRoYXQgZGVzY3JpYmVzIHdoYXQgYmVoYXZpb3IgdGhlIExpbnV4DQo+
IGJsb2NrIGxheWVyIG5lZWRzIGZyb20gYSB3cml0ZSB6ZXJvZXMgY29tbWFuZD8NCj4gDQo+ID4g
PiArc3RhdGljIGlubGluZSBpbnQgdmlydGJsa19zZXR1cF9kaXNjYXJkX3dyaXRlX3plcm9lcyhz
dHJ1Y3QgcmVxdWVzdCAqcmVxLA0KPiA+ID4gKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgIGJvb2wgdW5tYXApDQo+ID4NCj4gPiBXaHkgaXMgdGhpcyBhbiBpbmxp
bmUgZnVuY3Rpb24/DQo+IA0KPiBJIGRvbid0IHRoaW5rIHRoZXJlJ3MgYW55IHJlYXNvbiBpdCBu
ZWVkcyB0byBiZSBpbmxpbmU7IEkgY2FuIGRyb3AgdGhlDQo+IGlubGluZSBpbiB0aGUgbmV4dCBy
ZXZpc2lvbi4NCj4gDQo+IEdpdmVuIChhcyBmYXIgYXMgSSBjYW4gdGVsbCkgeW91ciBjb25jZXJu
cyBzZWVtIHRvIGFwcGx5IHRvIHRoZSBXcml0ZQ0KPiBaZXJvZXMgY29tbWFuZCBzcGVjaWZpY2Fs
bHksIHdvdWxkIGl0IGJlIHJlYXNvbmFibGUgdG8gc3RhcnQgd2l0aCBhDQo+IHBhdGNoIHRoYXQg
anVzdCBhZGRzIHN1cHBvcnQgZm9yIHRoZSBEaXNjYXJkIGNvbW1hbmQgKGFsb25nIHdpdGggZml4
ZXMNCj4gZm9yIE1pbmcncyBmZWVkYmFjayk/ICBUaGlzIHdvdWxkIGJlIHN1ZmZpY2llbnQgZm9y
IG15IHBhcnRpY3VsYXIgdXNlDQo+IGNhc2UgKGFsdGhvdWdoIEkgY2FuJ3Qgc3BlYWsgZm9yIENo
YW5ncGVuZyksIGFuZCB3ZSBjYW4gcmV2aXNpdCBXcml0ZQ0KPiBaZXJvZXMgb25jZSB0aGUgc3Bl
YyBjb25jZXJucyBhcmUgd29ya2VkIG91dC4NCj4gDQo+IFRoYW5rcywNCj4gLS0gRGFuaWVsDQo=

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-16  1:45         ` Liu, Changpeng
  0 siblings, 0 replies; 50+ messages in thread
From: Liu, Changpeng @ 2018-10-16  1:45 UTC (permalink / raw)
  To: Daniel Verkamp, hch; +Cc: axboe, mst, virtualization, linux-block, stefanha



> -----Original Message-----
> From: Daniel Verkamp [mailto:dverkamp@chromium.org]
> Sent: Tuesday, October 16, 2018 7:16 AM
> To: hch@infradead.org
> Cc: virtualization@lists.linux-foundation.org; linux-block@vger.kernel.org;
> mst@redhat.com; jasowang@redhat.com; axboe@kernel.dk;
> stefanha@redhat.com; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: Re: [PATCH v8] virtio_blk: add discard and write zeroes support
> 
> On Mon, Oct 15, 2018 at 2:27 AM Christoph Hellwig <hch@infradead.org> wrote:
> > On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > > From: Changpeng Liu <changpeng.liu@intel.com>
> > >
> > > In commit 88c85538, "virtio-blk: add discard and write zeroes features
> > > to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> >
> > There is some issues in this spec.  For one using the multiple ranges
> > also for write zeroes is rather inefficient.  Write zeroes really should
> > use the same format as read and write.
> 
> I wasn't involved in the writing of the spec, so I'll defer to Michael
> and Changpeng here, but I'm not sure how "set in stone" the virtio
> specification is, or if it can be updated somehow without breaking
> compatibility.
> 
> I agree that Write Zeroes would be simpler to implement as a single
> LBA + length rather than a list.  However, it's not really possible to
> use the same format as the regular virtio block read/write commands
> (VIRTIO_BLK_T_IN/VIRTIO_BLK_T_OUT), since the read/write commands do
> not specify a length explicitly; length is implied by the length of
> the data buffer as defined by the virtio descriptor, but a Write
> Zeroes command does not require a data buffer.  At best, this could be
> a separate command mirroring the layout of struct virtio_blk_req but
> with data replaced with a length field; I'm not sure that buys much in
> the way of consistency.
Yeah, that's the consideration here.
> 
> > Second the unmap flag isn't properly specified at all, as nothing
> > says the device may not unmap without the unmap flag.  Please take
> > a look at the SCSI or NVMe ѕpec for some guidance.
> 
> This could probably use some clarifying text in the specification, but
> given that there is nothing in the spec describing what the device
> needs to do when unmap = 0, I would assume that the device can do
> whatever it likes, as long as the blocks read back as 0s afterwards.
> Reading back 0s is required by the definition of the Write Zeroes
> command in the same virtio spec change.  It would probably be good to
> clarify this and explicitly define what the device is allowed to do in
> response to both settings of the unmap bit.
> 
> My understanding of the corresponding feature in NVMe (the Deallocate
> bit in the Write Zeroes command) is that the only difference between
> Deallocate = 1 and 0 is that the device "should" versus "may" (no
> "shall" on either side) deallocate the corresponding blocks, but only
> if the device supports reading 0s back after blocks are deallocated.
> If the device does not support reading 0s after deallocation, it is
> not allowed to deallocate blocks as part of a Write Zeroes command
> regardless of the setting of the Deallocate bit.
> 
> Some similar wording could probably be added to the virtio spec to
> clarify the meaning of unmap, although I would prefer something that
> makes it a little clearer that the bit is only intended as a hint from
> the driver to indicate whether the device should attempt to keep
> storage allocated for the zeroed blocks, if that is indeed the
> intended behavior.
Yes, that's the original idea.  Adding a clear description to the specification may be better. 
> 
> Is there some in-kernel doc that describes what behavior the Linux
> block layer needs from a write zeroes command?
> 
> > > +static inline int virtblk_setup_discard_write_zeroes(struct request *req,
> > > +                                             bool unmap)
> >
> > Why is this an inline function?
> 
> I don't think there's any reason it needs to be inline; I can drop the
> inline in the next revision.
> 
> Given (as far as I can tell) your concerns seem to apply to the Write
> Zeroes command specifically, would it be reasonable to start with a
> patch that just adds support for the Discard command (along with fixes
> for Ming's feedback)?  This would be sufficient for my particular use
> case (although I can't speak for Changpeng), and we can revisit Write
> Zeroes once the spec concerns are worked out.
> 
> Thanks,
> -- Daniel
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-15  9:27     ` Christoph Hellwig
@ 2018-10-25 23:28       ` Paolo Bonzini
  -1 siblings, 0 replies; 50+ messages in thread
From: Paolo Bonzini @ 2018-10-25 23:28 UTC (permalink / raw)
  To: Christoph Hellwig, Daniel Verkamp
  Cc: Jens Axboe, Michael S. Tsirkin, virtualization, linux-block,
	Stefan Hajnoczi, Changpeng Liu

On 15/10/2018 11:27, Christoph Hellwig wrote:
> There is some issues in this spec.  For one using the multiple ranges
> also for write zeroes is rather inefficient.  Write zeroes really should
> use the same format as read and write.

What makes it inefficient?

> Second the unmap flag isn't properly specified at all, as nothing
> says the device may not unmap without the unmap flag.  Please take
> a look at the SCSI or NVMe ѕpec for some guidance.

Thanks, I'll submit a patch for this.

Paolo

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-25 23:28       ` Paolo Bonzini
  0 siblings, 0 replies; 50+ messages in thread
From: Paolo Bonzini @ 2018-10-25 23:28 UTC (permalink / raw)
  To: Christoph Hellwig, Daniel Verkamp
  Cc: Jens Axboe, Michael S. Tsirkin, virtualization, linux-block,
	Stefan Hajnoczi, Changpeng Liu

On 15/10/2018 11:27, Christoph Hellwig wrote:
> There is some issues in this spec.  For one using the multiple ranges
> also for write zeroes is rather inefficient.  Write zeroes really should
> use the same format as read and write.

What makes it inefficient?

> Second the unmap flag isn't properly specified at all, as nothing
> says the device may not unmap without the unmap flag.  Please take
> a look at the SCSI or NVMe ѕpec for some guidance.

Thanks, I'll submit a patch for this.

Paolo
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-12 21:06   ` Daniel Verkamp
@ 2018-10-26  8:08     ` Stefan Hajnoczi
  -1 siblings, 0 replies; 50+ messages in thread
From: Stefan Hajnoczi @ 2018-10-26  8:08 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: virtualization, linux-block, Michael S. Tsirkin, Jason Wang,
	Jens Axboe, Changpeng Liu

[-- Attachment #1: Type: text/plain, Size: 1271 bytes --]

On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> +		range[n].flags = cpu_to_le32(flags);
> +		range[n].num_sectors = cpu_to_le32(num_sectors);
> +		range[n].sector = cpu_to_le64(sector);
...
> +/* Discard/write zeroes range for each request. */
> +struct virtio_blk_discard_write_zeroes {
> +	/* discard/write zeroes start sector */
> +	__virtio64 sector;
> +	/* number of discard/write zeroes sectors */
> +	__virtio32 num_sectors;
> +	/* flags for this range */
> +	__virtio32 flags;

cpu_to_le32() is being used on __virtio32 fields instead of cpu_to_virtio32().

From include/uapi/linux/virtio_types.h:

  /*
   * __virtio{16,32,64} have the following meaning:
   * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
   * - __le{16,32,64} for standard-compliant virtio devices
   */

From the VIRTIO specification:

  struct virtio_blk_discard_write_zeroes {
         le64 sector;
         le32 num_sectors;
         struct {
                 le32 unmap:1;
                 le32 reserved:31;
         } flags;
  };


Since the VIRTIO spec says these fields are little-endian, I think these
fields should be declared just __u32 and __u64 instead of __virtio32 and
__virtio64.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-26  8:08     ` Stefan Hajnoczi
  0 siblings, 0 replies; 50+ messages in thread
From: Stefan Hajnoczi @ 2018-10-26  8:08 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: Jens Axboe, Michael S. Tsirkin, virtualization, linux-block,
	Changpeng Liu


[-- Attachment #1.1: Type: text/plain, Size: 1271 bytes --]

On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> +		range[n].flags = cpu_to_le32(flags);
> +		range[n].num_sectors = cpu_to_le32(num_sectors);
> +		range[n].sector = cpu_to_le64(sector);
...
> +/* Discard/write zeroes range for each request. */
> +struct virtio_blk_discard_write_zeroes {
> +	/* discard/write zeroes start sector */
> +	__virtio64 sector;
> +	/* number of discard/write zeroes sectors */
> +	__virtio32 num_sectors;
> +	/* flags for this range */
> +	__virtio32 flags;

cpu_to_le32() is being used on __virtio32 fields instead of cpu_to_virtio32().

From include/uapi/linux/virtio_types.h:

  /*
   * __virtio{16,32,64} have the following meaning:
   * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
   * - __le{16,32,64} for standard-compliant virtio devices
   */

From the VIRTIO specification:

  struct virtio_blk_discard_write_zeroes {
         le64 sector;
         le32 num_sectors;
         struct {
                 le32 unmap:1;
                 le32 reserved:31;
         } flags;
  };


Since the VIRTIO spec says these fields are little-endian, I think these
fields should be declared just __u32 and __u64 instead of __virtio32 and
__virtio64.

Stefan

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-25 23:28       ` Paolo Bonzini
@ 2018-10-26  8:26         ` Christoph Hellwig
  -1 siblings, 0 replies; 50+ messages in thread
From: Christoph Hellwig @ 2018-10-26  8:26 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Christoph Hellwig, Daniel Verkamp, Jens Axboe,
	Michael S. Tsirkin, virtualization, linux-block, Stefan Hajnoczi,
	Changpeng Liu

On Fri, Oct 26, 2018 at 01:28:54AM +0200, Paolo Bonzini wrote:
> On 15/10/2018 11:27, Christoph Hellwig wrote:
> > There is some issues in this spec.  For one using the multiple ranges
> > also for write zeroes is rather inefficient.  Write zeroes really should
> > use the same format as read and write.
> 
> What makes it inefficient?

We require a memory allocation for each write zeroes instead of encoding
the lba/len in the command.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-26  8:26         ` Christoph Hellwig
  0 siblings, 0 replies; 50+ messages in thread
From: Christoph Hellwig @ 2018-10-26  8:26 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Jens Axboe, Michael S. Tsirkin, virtualization,
	Christoph Hellwig, linux-block, Stefan Hajnoczi, Changpeng Liu

On Fri, Oct 26, 2018 at 01:28:54AM +0200, Paolo Bonzini wrote:
> On 15/10/2018 11:27, Christoph Hellwig wrote:
> > There is some issues in this spec.  For one using the multiple ranges
> > also for write zeroes is rather inefficient.  Write zeroes really should
> > use the same format as read and write.
> 
> What makes it inefficient?

We require a memory allocation for each write zeroes instead of encoding
the lba/len in the command.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-26  8:08     ` Stefan Hajnoczi
@ 2018-10-26 14:47       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2018-10-26 14:47 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Daniel Verkamp, virtualization, linux-block, Jason Wang,
	Jens Axboe, Changpeng Liu

On Fri, Oct 26, 2018 at 09:08:38AM +0100, Stefan Hajnoczi wrote:
> On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > +		range[n].flags = cpu_to_le32(flags);
> > +		range[n].num_sectors = cpu_to_le32(num_sectors);
> > +		range[n].sector = cpu_to_le64(sector);
> ...
> > +/* Discard/write zeroes range for each request. */
> > +struct virtio_blk_discard_write_zeroes {
> > +	/* discard/write zeroes start sector */
> > +	__virtio64 sector;
> > +	/* number of discard/write zeroes sectors */
> > +	__virtio32 num_sectors;
> > +	/* flags for this range */
> > +	__virtio32 flags;
> 
> cpu_to_le32() is being used on __virtio32 fields instead of cpu_to_virtio32().
> 
> From include/uapi/linux/virtio_types.h:
> 
>   /*
>    * __virtio{16,32,64} have the following meaning:
>    * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
>    * - __le{16,32,64} for standard-compliant virtio devices
>    */
> 
> From the VIRTIO specification:
> 
>   struct virtio_blk_discard_write_zeroes {
>          le64 sector;
>          le32 num_sectors;
>          struct {
>                  le32 unmap:1;
>                  le32 reserved:31;
>          } flags;
>   };
> 
> 
> Since the VIRTIO spec says these fields are little-endian, I think these
> fields should be declared just __u32 and __u64 instead of __virtio32 and
> __virtio64.
> 
> Stefan


__le32/__le64 rather?

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-26 14:47       ` Michael S. Tsirkin
  0 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2018-10-26 14:47 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: Jens Axboe, virtualization, linux-block, Changpeng Liu

On Fri, Oct 26, 2018 at 09:08:38AM +0100, Stefan Hajnoczi wrote:
> On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > +		range[n].flags = cpu_to_le32(flags);
> > +		range[n].num_sectors = cpu_to_le32(num_sectors);
> > +		range[n].sector = cpu_to_le64(sector);
> ...
> > +/* Discard/write zeroes range for each request. */
> > +struct virtio_blk_discard_write_zeroes {
> > +	/* discard/write zeroes start sector */
> > +	__virtio64 sector;
> > +	/* number of discard/write zeroes sectors */
> > +	__virtio32 num_sectors;
> > +	/* flags for this range */
> > +	__virtio32 flags;
> 
> cpu_to_le32() is being used on __virtio32 fields instead of cpu_to_virtio32().
> 
> From include/uapi/linux/virtio_types.h:
> 
>   /*
>    * __virtio{16,32,64} have the following meaning:
>    * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
>    * - __le{16,32,64} for standard-compliant virtio devices
>    */
> 
> From the VIRTIO specification:
> 
>   struct virtio_blk_discard_write_zeroes {
>          le64 sector;
>          le32 num_sectors;
>          struct {
>                  le32 unmap:1;
>                  le32 reserved:31;
>          } flags;
>   };
> 
> 
> Since the VIRTIO spec says these fields are little-endian, I think these
> fields should be declared just __u32 and __u64 instead of __virtio32 and
> __virtio64.
> 
> Stefan


__le32/__le64 rather?

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-26  8:26         ` Christoph Hellwig
@ 2018-10-29  3:21           ` Liu, Changpeng
  -1 siblings, 0 replies; 50+ messages in thread
From: Liu, Changpeng @ 2018-10-29  3:21 UTC (permalink / raw)
  To: Christoph Hellwig, Paolo Bonzini
  Cc: Daniel Verkamp, Jens Axboe, Michael S. Tsirkin, virtualization,
	linux-block, Stefan Hajnoczi



> -----Original Message-----
> From: Christoph Hellwig [mailto:hch@infradead.org]
> Sent: Friday, October 26, 2018 4:27 PM
> To: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Christoph Hellwig <hch@infradead.org>; Daniel Verkamp
> <dverkamp@chromium.org>; Jens Axboe <axboe@kernel.dk>; Michael S. Tsirkin
> <mst@redhat.com>; virtualization@lists.linux-foundation.org; linux-
> block@vger.kernel.org; Stefan Hajnoczi <stefanha@redhat.com>; Liu, Changp=
eng
> <changpeng.liu@intel.com>
> Subject: Re: [PATCH v8] virtio_blk: add discard and write zeroes support
>=20
> On Fri, Oct 26, 2018 at 01:28:54AM +0200, Paolo Bonzini wrote:
> > On 15/10/2018 11:27, Christoph Hellwig wrote:
> > > There is some issues in this spec.  For one using the multiple ranges
> > > also for write zeroes is rather inefficient.  Write zeroes really sho=
uld
> > > use the same format as read and write.
> >
> > What makes it inefficient?
>=20
> We require a memory allocation for each write zeroes instead of encoding
> the lba/len in the command.
Make sense to me, but need to change the spec first.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-29  3:21           ` Liu, Changpeng
  0 siblings, 0 replies; 50+ messages in thread
From: Liu, Changpeng @ 2018-10-29  3:21 UTC (permalink / raw)
  To: Christoph Hellwig, Paolo Bonzini
  Cc: Jens Axboe, Michael S. Tsirkin, virtualization, linux-block,
	Stefan Hajnoczi



> -----Original Message-----
> From: Christoph Hellwig [mailto:hch@infradead.org]
> Sent: Friday, October 26, 2018 4:27 PM
> To: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Christoph Hellwig <hch@infradead.org>; Daniel Verkamp
> <dverkamp@chromium.org>; Jens Axboe <axboe@kernel.dk>; Michael S. Tsirkin
> <mst@redhat.com>; virtualization@lists.linux-foundation.org; linux-
> block@vger.kernel.org; Stefan Hajnoczi <stefanha@redhat.com>; Liu, Changpeng
> <changpeng.liu@intel.com>
> Subject: Re: [PATCH v8] virtio_blk: add discard and write zeroes support
> 
> On Fri, Oct 26, 2018 at 01:28:54AM +0200, Paolo Bonzini wrote:
> > On 15/10/2018 11:27, Christoph Hellwig wrote:
> > > There is some issues in this spec.  For one using the multiple ranges
> > > also for write zeroes is rather inefficient.  Write zeroes really should
> > > use the same format as read and write.
> >
> > What makes it inefficient?
> 
> We require a memory allocation for each write zeroes instead of encoding
> the lba/len in the command.
Make sense to me, but need to change the spec first.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-26 14:47       ` Michael S. Tsirkin
@ 2018-10-29  5:05         ` Stefan Hajnoczi
  -1 siblings, 0 replies; 50+ messages in thread
From: Stefan Hajnoczi @ 2018-10-29  5:05 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Daniel Verkamp, virtualization, linux-block, Jason Wang,
	Jens Axboe, Changpeng Liu

[-- Attachment #1: Type: text/plain, Size: 1623 bytes --]

On Fri, Oct 26, 2018 at 10:47:16AM -0400, Michael S. Tsirkin wrote:
> On Fri, Oct 26, 2018 at 09:08:38AM +0100, Stefan Hajnoczi wrote:
> > On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > > +		range[n].flags = cpu_to_le32(flags);
> > > +		range[n].num_sectors = cpu_to_le32(num_sectors);
> > > +		range[n].sector = cpu_to_le64(sector);
> > ...
> > > +/* Discard/write zeroes range for each request. */
> > > +struct virtio_blk_discard_write_zeroes {
> > > +	/* discard/write zeroes start sector */
> > > +	__virtio64 sector;
> > > +	/* number of discard/write zeroes sectors */
> > > +	__virtio32 num_sectors;
> > > +	/* flags for this range */
> > > +	__virtio32 flags;
> > 
> > cpu_to_le32() is being used on __virtio32 fields instead of cpu_to_virtio32().
> > 
> > From include/uapi/linux/virtio_types.h:
> > 
> >   /*
> >    * __virtio{16,32,64} have the following meaning:
> >    * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
> >    * - __le{16,32,64} for standard-compliant virtio devices
> >    */
> > 
> > From the VIRTIO specification:
> > 
> >   struct virtio_blk_discard_write_zeroes {
> >          le64 sector;
> >          le32 num_sectors;
> >          struct {
> >                  le32 unmap:1;
> >                  le32 reserved:31;
> >          } flags;
> >   };
> > 
> > 
> > Since the VIRTIO spec says these fields are little-endian, I think these
> > fields should be declared just __u32 and __u64 instead of __virtio32 and
> > __virtio64.
> > 
> > Stefan
> 
> 
> __le32/__le64 rather?

Yes.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-29  5:05         ` Stefan Hajnoczi
  0 siblings, 0 replies; 50+ messages in thread
From: Stefan Hajnoczi @ 2018-10-29  5:05 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Jens Axboe, virtualization, linux-block, Changpeng Liu


[-- Attachment #1.1: Type: text/plain, Size: 1623 bytes --]

On Fri, Oct 26, 2018 at 10:47:16AM -0400, Michael S. Tsirkin wrote:
> On Fri, Oct 26, 2018 at 09:08:38AM +0100, Stefan Hajnoczi wrote:
> > On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > > +		range[n].flags = cpu_to_le32(flags);
> > > +		range[n].num_sectors = cpu_to_le32(num_sectors);
> > > +		range[n].sector = cpu_to_le64(sector);
> > ...
> > > +/* Discard/write zeroes range for each request. */
> > > +struct virtio_blk_discard_write_zeroes {
> > > +	/* discard/write zeroes start sector */
> > > +	__virtio64 sector;
> > > +	/* number of discard/write zeroes sectors */
> > > +	__virtio32 num_sectors;
> > > +	/* flags for this range */
> > > +	__virtio32 flags;
> > 
> > cpu_to_le32() is being used on __virtio32 fields instead of cpu_to_virtio32().
> > 
> > From include/uapi/linux/virtio_types.h:
> > 
> >   /*
> >    * __virtio{16,32,64} have the following meaning:
> >    * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
> >    * - __le{16,32,64} for standard-compliant virtio devices
> >    */
> > 
> > From the VIRTIO specification:
> > 
> >   struct virtio_blk_discard_write_zeroes {
> >          le64 sector;
> >          le32 num_sectors;
> >          struct {
> >                  le32 unmap:1;
> >                  le32 reserved:31;
> >          } flags;
> >   };
> > 
> > 
> > Since the VIRTIO spec says these fields are little-endian, I think these
> > fields should be declared just __u32 and __u64 instead of __virtio32 and
> > __virtio64.
> > 
> > Stefan
> 
> 
> __le32/__le64 rather?

Yes.

Stefan

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-26  8:26         ` Christoph Hellwig
@ 2018-10-29 18:03           ` Paolo Bonzini
  -1 siblings, 0 replies; 50+ messages in thread
From: Paolo Bonzini @ 2018-10-29 18:03 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Daniel Verkamp, Jens Axboe, Michael S. Tsirkin, virtualization,
	linux-block, Stefan Hajnoczi, Changpeng Liu

On 26/10/2018 10:26, Christoph Hellwig wrote:
> On Fri, Oct 26, 2018 at 01:28:54AM +0200, Paolo Bonzini wrote:
>> On 15/10/2018 11:27, Christoph Hellwig wrote:
>>> There is some issues in this spec.  For one using the multiple ranges
>>> also for write zeroes is rather inefficient.  Write zeroes really should
>>> use the same format as read and write.
>>
>> What makes it inefficient?
> 
> We require a memory allocation for each write zeroes instead of encoding
> the lba/len in the command.

Oh, I see.  That's not a spec issue, the lba/length descriptor can be
included in the same buffer as the rest of the command; using
kmalloc_array even for a single-bio REQ_OP_WRITE_ZEROES is a choice made
by this patch, I suppose for simplicity.

It is possible to special case single-bio unmap and write zeroes so that
they don't call virtblk_setup_discard_write_zeroes and avoid
RQF_SPECIAL_PAYLOAD.

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-10-29 18:03           ` Paolo Bonzini
  0 siblings, 0 replies; 50+ messages in thread
From: Paolo Bonzini @ 2018-10-29 18:03 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Michael S. Tsirkin, virtualization, linux-block,
	Stefan Hajnoczi, Changpeng Liu

On 26/10/2018 10:26, Christoph Hellwig wrote:
> On Fri, Oct 26, 2018 at 01:28:54AM +0200, Paolo Bonzini wrote:
>> On 15/10/2018 11:27, Christoph Hellwig wrote:
>>> There is some issues in this spec.  For one using the multiple ranges
>>> also for write zeroes is rather inefficient.  Write zeroes really should
>>> use the same format as read and write.
>>
>> What makes it inefficient?
> 
> We require a memory allocation for each write zeroes instead of encoding
> the lba/len in the command.

Oh, I see.  That's not a spec issue, the lba/length descriptor can be
included in the same buffer as the rest of the command; using
kmalloc_array even for a single-bio REQ_OP_WRITE_ZEROES is a choice made
by this patch, I suppose for simplicity.

It is possible to special case single-bio unmap and write zeroes so that
they don't call virtblk_setup_discard_write_zeroes and avoid
RQF_SPECIAL_PAYLOAD.

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-10-29  5:05         ` Stefan Hajnoczi
@ 2018-11-01 21:25           ` Michael S. Tsirkin
  -1 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2018-11-01 21:25 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Daniel Verkamp, virtualization, linux-block, Jason Wang,
	Jens Axboe, Changpeng Liu

On Mon, Oct 29, 2018 at 05:05:21AM +0000, Stefan Hajnoczi wrote:
> On Fri, Oct 26, 2018 at 10:47:16AM -0400, Michael S. Tsirkin wrote:
> > On Fri, Oct 26, 2018 at 09:08:38AM +0100, Stefan Hajnoczi wrote:
> > > On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > > > +		range[n].flags = cpu_to_le32(flags);
> > > > +		range[n].num_sectors = cpu_to_le32(num_sectors);
> > > > +		range[n].sector = cpu_to_le64(sector);
> > > ...
> > > > +/* Discard/write zeroes range for each request. */
> > > > +struct virtio_blk_discard_write_zeroes {
> > > > +	/* discard/write zeroes start sector */
> > > > +	__virtio64 sector;
> > > > +	/* number of discard/write zeroes sectors */
> > > > +	__virtio32 num_sectors;
> > > > +	/* flags for this range */
> > > > +	__virtio32 flags;
> > > 
> > > cpu_to_le32() is being used on __virtio32 fields instead of cpu_to_virtio32().
> > > 
> > > From include/uapi/linux/virtio_types.h:
> > > 
> > >   /*
> > >    * __virtio{16,32,64} have the following meaning:
> > >    * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
> > >    * - __le{16,32,64} for standard-compliant virtio devices
> > >    */
> > > 
> > > From the VIRTIO specification:
> > > 
> > >   struct virtio_blk_discard_write_zeroes {
> > >          le64 sector;
> > >          le32 num_sectors;
> > >          struct {
> > >                  le32 unmap:1;
> > >                  le32 reserved:31;
> > >          } flags;
> > >   };
> > > 
> > > 
> > > Since the VIRTIO spec says these fields are little-endian, I think these
> > > fields should be declared just __u32 and __u64 instead of __virtio32 and
> > > __virtio64.
> > > 
> > > Stefan
> > 
> > 
> > __le32/__le64 rather?
> 
> Yes.
> 
> Stefan

I agree. And further using bitfields for this is questionable -
it is preferable to set bits in a full 32 bit field using "|".


-- 
MST

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-11-01 21:25           ` Michael S. Tsirkin
  0 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2018-11-01 21:25 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: Jens Axboe, virtualization, linux-block, Changpeng Liu

On Mon, Oct 29, 2018 at 05:05:21AM +0000, Stefan Hajnoczi wrote:
> On Fri, Oct 26, 2018 at 10:47:16AM -0400, Michael S. Tsirkin wrote:
> > On Fri, Oct 26, 2018 at 09:08:38AM +0100, Stefan Hajnoczi wrote:
> > > On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > > > +		range[n].flags = cpu_to_le32(flags);
> > > > +		range[n].num_sectors = cpu_to_le32(num_sectors);
> > > > +		range[n].sector = cpu_to_le64(sector);
> > > ...
> > > > +/* Discard/write zeroes range for each request. */
> > > > +struct virtio_blk_discard_write_zeroes {
> > > > +	/* discard/write zeroes start sector */
> > > > +	__virtio64 sector;
> > > > +	/* number of discard/write zeroes sectors */
> > > > +	__virtio32 num_sectors;
> > > > +	/* flags for this range */
> > > > +	__virtio32 flags;
> > > 
> > > cpu_to_le32() is being used on __virtio32 fields instead of cpu_to_virtio32().
> > > 
> > > From include/uapi/linux/virtio_types.h:
> > > 
> > >   /*
> > >    * __virtio{16,32,64} have the following meaning:
> > >    * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
> > >    * - __le{16,32,64} for standard-compliant virtio devices
> > >    */
> > > 
> > > From the VIRTIO specification:
> > > 
> > >   struct virtio_blk_discard_write_zeroes {
> > >          le64 sector;
> > >          le32 num_sectors;
> > >          struct {
> > >                  le32 unmap:1;
> > >                  le32 reserved:31;
> > >          } flags;
> > >   };
> > > 
> > > 
> > > Since the VIRTIO spec says these fields are little-endian, I think these
> > > fields should be declared just __u32 and __u64 instead of __virtio32 and
> > > __virtio64.
> > > 
> > > Stefan
> > 
> > 
> > __le32/__le64 rather?
> 
> Yes.
> 
> Stefan

I agree. And further using bitfields for this is questionable -
it is preferable to set bits in a full 32 bit field using "|".


-- 
MST

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
  2018-11-01 21:25           ` Michael S. Tsirkin
@ 2018-11-01 22:18             ` Daniel Verkamp
  -1 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-11-01 22:18 UTC (permalink / raw)
  To: mst; +Cc: stefanha, virtualization, linux-block, jasowang, axboe, Changpeng Liu

On Thu, Nov 1, 2018 at 2:25 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Oct 29, 2018 at 05:05:21AM +0000, Stefan Hajnoczi wrote:
> > On Fri, Oct 26, 2018 at 10:47:16AM -0400, Michael S. Tsirkin wrote:
> > > On Fri, Oct 26, 2018 at 09:08:38AM +0100, Stefan Hajnoczi wrote:
> > > > On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > > > > +               range[n].flags = cpu_to_le32(flags);
> > > > > +               range[n].num_sectors = cpu_to_le32(num_sectors);
> > > > > +               range[n].sector = cpu_to_le64(sector);
> > > > ...
> > > > > +/* Discard/write zeroes range for each request. */
> > > > > +struct virtio_blk_discard_write_zeroes {
> > > > > +       /* discard/write zeroes start sector */
> > > > > +       __virtio64 sector;
> > > > > +       /* number of discard/write zeroes sectors */
> > > > > +       __virtio32 num_sectors;
> > > > > +       /* flags for this range */
> > > > > +       __virtio32 flags;
> > > >
> > > > cpu_to_le32() is being used on __virtio32 fields instead of cpu_to_virtio32().
> > > >
> > > > From include/uapi/linux/virtio_types.h:
> > > >
> > > >   /*
> > > >    * __virtio{16,32,64} have the following meaning:
> > > >    * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
> > > >    * - __le{16,32,64} for standard-compliant virtio devices
> > > >    */
> > > >
> > > > From the VIRTIO specification:
> > > >
> > > >   struct virtio_blk_discard_write_zeroes {
> > > >          le64 sector;
> > > >          le32 num_sectors;
> > > >          struct {
> > > >                  le32 unmap:1;
> > > >                  le32 reserved:31;
> > > >          } flags;
> > > >   };
> > > >
> > > >
> > > > Since the VIRTIO spec says these fields are little-endian, I think these
> > > > fields should be declared just __u32 and __u64 instead of __virtio32 and
> > > > __virtio64.
> > > >
> > > > Stefan
> > >
> > >
> > > __le32/__le64 rather?
> >
> > Yes.
> >
> > Stefan
>
> I agree. And further using bitfields for this is questionable -
> it is preferable to set bits in a full 32 bit field using "|".

The bitfield is only in the specification, not the code - the actual
implementation in this patch (quoted above from earlier in the thread)
uses a 32-bit field with a flag #define.

I did misunderstand the meaning of __virtio32 vs __le32 - I'll fix
that up (I think the spec definition and code for these is already
correct; the structure definition just needs to change to match).

Thanks,
-- Daniel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v8] virtio_blk: add discard and write zeroes support
@ 2018-11-01 22:18             ` Daniel Verkamp
  0 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-11-01 22:18 UTC (permalink / raw)
  To: mst; +Cc: axboe, virtualization, linux-block, stefanha, Changpeng Liu

On Thu, Nov 1, 2018 at 2:25 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Oct 29, 2018 at 05:05:21AM +0000, Stefan Hajnoczi wrote:
> > On Fri, Oct 26, 2018 at 10:47:16AM -0400, Michael S. Tsirkin wrote:
> > > On Fri, Oct 26, 2018 at 09:08:38AM +0100, Stefan Hajnoczi wrote:
> > > > On Fri, Oct 12, 2018 at 02:06:28PM -0700, Daniel Verkamp wrote:
> > > > > +               range[n].flags = cpu_to_le32(flags);
> > > > > +               range[n].num_sectors = cpu_to_le32(num_sectors);
> > > > > +               range[n].sector = cpu_to_le64(sector);
> > > > ...
> > > > > +/* Discard/write zeroes range for each request. */
> > > > > +struct virtio_blk_discard_write_zeroes {
> > > > > +       /* discard/write zeroes start sector */
> > > > > +       __virtio64 sector;
> > > > > +       /* number of discard/write zeroes sectors */
> > > > > +       __virtio32 num_sectors;
> > > > > +       /* flags for this range */
> > > > > +       __virtio32 flags;
> > > >
> > > > cpu_to_le32() is being used on __virtio32 fields instead of cpu_to_virtio32().
> > > >
> > > > From include/uapi/linux/virtio_types.h:
> > > >
> > > >   /*
> > > >    * __virtio{16,32,64} have the following meaning:
> > > >    * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
> > > >    * - __le{16,32,64} for standard-compliant virtio devices
> > > >    */
> > > >
> > > > From the VIRTIO specification:
> > > >
> > > >   struct virtio_blk_discard_write_zeroes {
> > > >          le64 sector;
> > > >          le32 num_sectors;
> > > >          struct {
> > > >                  le32 unmap:1;
> > > >                  le32 reserved:31;
> > > >          } flags;
> > > >   };
> > > >
> > > >
> > > > Since the VIRTIO spec says these fields are little-endian, I think these
> > > > fields should be declared just __u32 and __u64 instead of __virtio32 and
> > > > __virtio64.
> > > >
> > > > Stefan
> > >
> > >
> > > __le32/__le64 rather?
> >
> > Yes.
> >
> > Stefan
>
> I agree. And further using bitfields for this is questionable -
> it is preferable to set bits in a full 32 bit field using "|".

The bitfield is only in the specification, not the code - the actual
implementation in this patch (quoted above from earlier in the thread)
uses a 32-bit field with a flag #define.

I did misunderstand the meaning of __virtio32 vs __le32 - I'll fix
that up (I think the spec definition and code for these is already
correct; the structure definition just needs to change to match).

Thanks,
-- Daniel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v9] virtio_blk: add discard and write zeroes support
  2018-06-06  4:19 [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support Changpeng Liu
@ 2018-11-01 22:40   ` Daniel Verkamp
  2018-08-28 22:25 ` [PATCH v7] virtio_blk: add discard and write zeroes support Daniel Verkamp
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-11-01 22:40 UTC (permalink / raw)
  To: virtualization, linux-block
  Cc: Michael S. Tsirkin, Jason Wang, Jens Axboe, Stefan Hajnoczi,
	Paolo Bonzini, Christoph Hellwig, Changpeng Liu, Daniel Verkamp

From: Changpeng Liu <changpeng.liu@intel.com>

In commit 88c85538, "virtio-blk: add discard and write zeroes features
to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
block specification has been extended to add VIRTIO_BLK_T_DISCARD and
VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
discard and write zeroes in the virtio-blk driver when the device
advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
VIRTIO_BLK_F_WRITE_ZEROES.

Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
---
dverkamp: I've picked up this patch and made a few minor changes (as
listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
since it can be called from a context where sleeping is not allowed.
To prevent large allocations, I've also clamped the maximum number of
discard segments to 256; this results in a 4K allocation and should be
plenty of descriptors for most use cases.

I also removed most of the description from the commit message, since it
was duplicating the comments from virtio_blk.h and quoting parts of the
spec without adding any extra information.  I have tested this iteration
of the patch using crosvm with modifications to enable the new features:
https://chromium.googlesource.com/chromiumos/platform/crosvm/

v9 fixes a number of review issues; I didn't attempt to optimize the
single-element write zeroes case, so it still does an allocation per
request (I did not see any easy place to put the payload that would
avoid the allocation).

CHANGELOG:
v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
descriptor flags field; comment wording cleanups.
v6: don't set T_OUT bit to discard and write zeroes commands.
v5: use new block layer API: blk_queue_flag_set.
v4: several optimizations based on MST's comments, remove bit field
usage for command descriptor.
v3: define the virtio-blk protocol to add discard and write zeroes
support, first version implementation based on proposed specification.
v2: add write zeroes command support.
v1: initial proposal implementation for discard command.
---
 drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
 include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
 2 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 086c6bb12baa..0f39efb4b3aa 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -18,6 +18,7 @@
 
 #define PART_BITS 4
 #define VQ_NAME_LEN 16
+#define MAX_DISCARD_SEGMENTS 256u
 
 static int major;
 static DEFINE_IDA(vd_index_ida);
@@ -172,10 +173,48 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
+static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
+{
+	unsigned short segments = blk_rq_nr_discard_segments(req);
+	unsigned short n = 0;
+	struct virtio_blk_discard_write_zeroes *range;
+	struct bio *bio;
+	u32 flags = 0;
+
+	if (unmap)
+		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
+
+	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
+	if (!range)
+		return -ENOMEM;
+
+	__rq_for_each_bio(bio, req) {
+		u64 sector = bio->bi_iter.bi_sector;
+		u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+
+		range[n].flags = cpu_to_le32(flags);
+		range[n].num_sectors = cpu_to_le32(num_sectors);
+		range[n].sector = cpu_to_le64(sector);
+		n++;
+	}
+
+	req->special_vec.bv_page = virt_to_page(range);
+	req->special_vec.bv_offset = offset_in_page(range);
+	req->special_vec.bv_len = sizeof(*range) * segments;
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return 0;
+}
+
 static inline void virtblk_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+		kfree(page_address(req->special_vec.bv_page) +
+		      req->special_vec.bv_offset);
+	}
+
 	switch (req_op(req)) {
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
@@ -225,6 +264,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	int qid = hctx->queue_num;
 	int err;
 	bool notify = false;
+	bool unmap = false;
 	u32 type;
 
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
@@ -237,6 +277,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_FLUSH:
 		type = VIRTIO_BLK_T_FLUSH;
 		break;
+	case REQ_OP_DISCARD:
+		type = VIRTIO_BLK_T_DISCARD;
+		break;
+	case REQ_OP_WRITE_ZEROES:
+		type = VIRTIO_BLK_T_WRITE_ZEROES;
+		unmap = !(req->cmd_flags & REQ_NOUNMAP);
+		break;
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
 		type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,6 +303,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(req);
 
+	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
+		err = virtblk_setup_discard_write_zeroes(req, unmap);
+		if (err)
+			return BLK_STS_RESOURCE;
+	}
+
 	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(req) == WRITE)
@@ -802,6 +855,32 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (!err && opt_io_size)
 		blk_queue_io_opt(q, blk_size * opt_io_size);
 
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+		q->limits.discard_granularity = blk_size;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+			     discard_sector_alignment, &v);
+		q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+			     max_discard_sectors, &v);
+		blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
+
+		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
+			     &v);
+		blk_queue_max_discard_segments(q,
+					       min_not_zero(v,
+							    MAX_DISCARD_SEGMENTS));
+
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
+		virtio_cread(vdev, struct virtio_blk_config,
+			     max_write_zeroes_sectors, &v);
+		blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
+	}
+
 	virtblk_update_capacity(vblk, false);
 	virtio_device_ready(vdev);
 
@@ -895,14 +974,14 @@ static unsigned int features_legacy[] = {
 	VIRTIO_BLK_F_SCSI,
 #endif
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 }
 ;
 static unsigned int features[] = {
 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ebe4d968dd5..0f99d7b49ede 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -38,6 +38,8 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -86,6 +88,39 @@ struct virtio_blk_config {
 
 	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
 	__u16 num_queues;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
+	/*
+	 * The maximum discard sectors (in 512-byte sectors) for
+	 * one segment.
+	 */
+	__u32 max_discard_sectors;
+	/*
+	 * The maximum number of discard segments in a
+	 * discard command.
+	 */
+	__u32 max_discard_seg;
+	/* Discard commands must be aligned to this number of sectors. */
+	__u32 discard_sector_alignment;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
+	/*
+	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
+	 * one segment.
+	 */
+	__u32 max_write_zeroes_sectors;
+	/*
+	 * The maximum number of segments in a write zeroes
+	 * command.
+	 */
+	__u32 max_write_zeroes_seg;
+	/*
+	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
+	 * deallocation of one or more of the sectors.
+	 */
+	__u8 write_zeroes_may_unmap;
+
+	__u8 unused1[3];
 } __attribute__((packed));
 
 /*
@@ -114,6 +149,12 @@ struct virtio_blk_config {
 /* Get device ID command */
 #define VIRTIO_BLK_T_GET_ID    8
 
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD	11
+
+/* Write zeroes command */
+#define VIRTIO_BLK_T_WRITE_ZEROES	13
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
@@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
 	__virtio64 sector;
 };
 
+/* Unmap this range (only valid for write zeroes command) */
+#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
+
+/* Discard/write zeroes range for each request. */
+struct virtio_blk_discard_write_zeroes {
+	/* discard/write zeroes start sector */
+	__le64 sector;
+	/* number of discard/write zeroes sectors */
+	__le32 num_sectors;
+	/* flags for this range */
+	__le32 flags;
+};
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 struct virtio_scsi_inhdr {
 	__virtio32 errors;
-- 
2.19.1.568.g152ad8e336-goog

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v9] virtio_blk: add discard and write zeroes support
@ 2018-11-01 22:40   ` Daniel Verkamp
  0 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-11-01 22:40 UTC (permalink / raw)
  To: virtualization, linux-block
  Cc: Jens Axboe, Michael S. Tsirkin, Christoph Hellwig,
	Stefan Hajnoczi, Paolo Bonzini, Changpeng Liu

From: Changpeng Liu <changpeng.liu@intel.com>

In commit 88c85538, "virtio-blk: add discard and write zeroes features
to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
block specification has been extended to add VIRTIO_BLK_T_DISCARD and
VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
discard and write zeroes in the virtio-blk driver when the device
advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
VIRTIO_BLK_F_WRITE_ZEROES.

Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
---
dverkamp: I've picked up this patch and made a few minor changes (as
listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
since it can be called from a context where sleeping is not allowed.
To prevent large allocations, I've also clamped the maximum number of
discard segments to 256; this results in a 4K allocation and should be
plenty of descriptors for most use cases.

I also removed most of the description from the commit message, since it
was duplicating the comments from virtio_blk.h and quoting parts of the
spec without adding any extra information.  I have tested this iteration
of the patch using crosvm with modifications to enable the new features:
https://chromium.googlesource.com/chromiumos/platform/crosvm/

v9 fixes a number of review issues; I didn't attempt to optimize the
single-element write zeroes case, so it still does an allocation per
request (I did not see any easy place to put the payload that would
avoid the allocation).

CHANGELOG:
v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
descriptor flags field; comment wording cleanups.
v6: don't set T_OUT bit to discard and write zeroes commands.
v5: use new block layer API: blk_queue_flag_set.
v4: several optimizations based on MST's comments, remove bit field
usage for command descriptor.
v3: define the virtio-blk protocol to add discard and write zeroes
support, first version implementation based on proposed specification.
v2: add write zeroes command support.
v1: initial proposal implementation for discard command.
---
 drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
 include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
 2 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 086c6bb12baa..0f39efb4b3aa 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -18,6 +18,7 @@
 
 #define PART_BITS 4
 #define VQ_NAME_LEN 16
+#define MAX_DISCARD_SEGMENTS 256u
 
 static int major;
 static DEFINE_IDA(vd_index_ida);
@@ -172,10 +173,48 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
+static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
+{
+	unsigned short segments = blk_rq_nr_discard_segments(req);
+	unsigned short n = 0;
+	struct virtio_blk_discard_write_zeroes *range;
+	struct bio *bio;
+	u32 flags = 0;
+
+	if (unmap)
+		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
+
+	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
+	if (!range)
+		return -ENOMEM;
+
+	__rq_for_each_bio(bio, req) {
+		u64 sector = bio->bi_iter.bi_sector;
+		u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+
+		range[n].flags = cpu_to_le32(flags);
+		range[n].num_sectors = cpu_to_le32(num_sectors);
+		range[n].sector = cpu_to_le64(sector);
+		n++;
+	}
+
+	req->special_vec.bv_page = virt_to_page(range);
+	req->special_vec.bv_offset = offset_in_page(range);
+	req->special_vec.bv_len = sizeof(*range) * segments;
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return 0;
+}
+
 static inline void virtblk_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+		kfree(page_address(req->special_vec.bv_page) +
+		      req->special_vec.bv_offset);
+	}
+
 	switch (req_op(req)) {
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
@@ -225,6 +264,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	int qid = hctx->queue_num;
 	int err;
 	bool notify = false;
+	bool unmap = false;
 	u32 type;
 
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
@@ -237,6 +277,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_FLUSH:
 		type = VIRTIO_BLK_T_FLUSH;
 		break;
+	case REQ_OP_DISCARD:
+		type = VIRTIO_BLK_T_DISCARD;
+		break;
+	case REQ_OP_WRITE_ZEROES:
+		type = VIRTIO_BLK_T_WRITE_ZEROES;
+		unmap = !(req->cmd_flags & REQ_NOUNMAP);
+		break;
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
 		type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,6 +303,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(req);
 
+	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
+		err = virtblk_setup_discard_write_zeroes(req, unmap);
+		if (err)
+			return BLK_STS_RESOURCE;
+	}
+
 	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(req) == WRITE)
@@ -802,6 +855,32 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (!err && opt_io_size)
 		blk_queue_io_opt(q, blk_size * opt_io_size);
 
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+		q->limits.discard_granularity = blk_size;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+			     discard_sector_alignment, &v);
+		q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+			     max_discard_sectors, &v);
+		blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
+
+		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
+			     &v);
+		blk_queue_max_discard_segments(q,
+					       min_not_zero(v,
+							    MAX_DISCARD_SEGMENTS));
+
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
+		virtio_cread(vdev, struct virtio_blk_config,
+			     max_write_zeroes_sectors, &v);
+		blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
+	}
+
 	virtblk_update_capacity(vblk, false);
 	virtio_device_ready(vdev);
 
@@ -895,14 +974,14 @@ static unsigned int features_legacy[] = {
 	VIRTIO_BLK_F_SCSI,
 #endif
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 }
 ;
 static unsigned int features[] = {
 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ebe4d968dd5..0f99d7b49ede 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -38,6 +38,8 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -86,6 +88,39 @@ struct virtio_blk_config {
 
 	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
 	__u16 num_queues;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
+	/*
+	 * The maximum discard sectors (in 512-byte sectors) for
+	 * one segment.
+	 */
+	__u32 max_discard_sectors;
+	/*
+	 * The maximum number of discard segments in a
+	 * discard command.
+	 */
+	__u32 max_discard_seg;
+	/* Discard commands must be aligned to this number of sectors. */
+	__u32 discard_sector_alignment;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
+	/*
+	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
+	 * one segment.
+	 */
+	__u32 max_write_zeroes_sectors;
+	/*
+	 * The maximum number of segments in a write zeroes
+	 * command.
+	 */
+	__u32 max_write_zeroes_seg;
+	/*
+	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
+	 * deallocation of one or more of the sectors.
+	 */
+	__u8 write_zeroes_may_unmap;
+
+	__u8 unused1[3];
 } __attribute__((packed));
 
 /*
@@ -114,6 +149,12 @@ struct virtio_blk_config {
 /* Get device ID command */
 #define VIRTIO_BLK_T_GET_ID    8
 
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD	11
+
+/* Write zeroes command */
+#define VIRTIO_BLK_T_WRITE_ZEROES	13
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
@@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
 	__virtio64 sector;
 };
 
+/* Unmap this range (only valid for write zeroes command) */
+#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
+
+/* Discard/write zeroes range for each request. */
+struct virtio_blk_discard_write_zeroes {
+	/* discard/write zeroes start sector */
+	__le64 sector;
+	/* number of discard/write zeroes sectors */
+	__le32 num_sectors;
+	/* flags for this range */
+	__le32 flags;
+};
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 struct virtio_scsi_inhdr {
 	__virtio32 errors;
-- 
2.19.1.568.g152ad8e336-goog

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH v9] virtio_blk: add discard and write zeroes support
  2018-11-01 22:40   ` Daniel Verkamp
@ 2018-11-01 23:43     ` Dongli Zhang
  -1 siblings, 0 replies; 50+ messages in thread
From: Dongli Zhang @ 2018-11-01 23:43 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: virtualization, linux-block, Michael S. Tsirkin, Jason Wang,
	Jens Axboe, Stefan Hajnoczi, Paolo Bonzini, Christoph Hellwig,
	Changpeng Liu

Hi Daniel,

Other than crosvm, is there any version of qemu (e.g., repositories developed in
progress on github) where I can try with this feature?

Thank you very much!

Dongli Zhang

On 11/02/2018 06:40 AM, Daniel Verkamp wrote:
> From: Changpeng Liu <changpeng.liu@intel.com>
> 
> In commit 88c85538, "virtio-blk: add discard and write zeroes features
> to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> discard and write zeroes in the virtio-blk driver when the device
> advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> VIRTIO_BLK_F_WRITE_ZEROES.
> 
> Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> ---
> dverkamp: I've picked up this patch and made a few minor changes (as
> listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> since it can be called from a context where sleeping is not allowed.
> To prevent large allocations, I've also clamped the maximum number of
> discard segments to 256; this results in a 4K allocation and should be
> plenty of descriptors for most use cases.
> 
> I also removed most of the description from the commit message, since it
> was duplicating the comments from virtio_blk.h and quoting parts of the
> spec without adding any extra information.  I have tested this iteration
> of the patch using crosvm with modifications to enable the new features:
> https://chromium.googlesource.com/chromiumos/platform/crosvm/
> 
> v9 fixes a number of review issues; I didn't attempt to optimize the
> single-element write zeroes case, so it still does an allocation per
> request (I did not see any easy place to put the payload that would
> avoid the allocation).
> 
> CHANGELOG:
> v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
> v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> descriptor flags field; comment wording cleanups.
> v6: don't set T_OUT bit to discard and write zeroes commands.
> v5: use new block layer API: blk_queue_flag_set.
> v4: several optimizations based on MST's comments, remove bit field
> usage for command descriptor.
> v3: define the virtio-blk protocol to add discard and write zeroes
> support, first version implementation based on proposed specification.
> v2: add write zeroes command support.
> v1: initial proposal implementation for discard command.
> ---
>  drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
>  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
>  2 files changed, 135 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 086c6bb12baa..0f39efb4b3aa 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -18,6 +18,7 @@
>  
>  #define PART_BITS 4
>  #define VQ_NAME_LEN 16
> +#define MAX_DISCARD_SEGMENTS 256u
>  
>  static int major;
>  static DEFINE_IDA(vd_index_ida);
> @@ -172,10 +173,48 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
>  	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> +static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
> +{
> +	unsigned short segments = blk_rq_nr_discard_segments(req);
> +	unsigned short n = 0;
> +	struct virtio_blk_discard_write_zeroes *range;
> +	struct bio *bio;
> +	u32 flags = 0;
> +
> +	if (unmap)
> +		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
> +
> +	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
> +	if (!range)
> +		return -ENOMEM;
> +
> +	__rq_for_each_bio(bio, req) {
> +		u64 sector = bio->bi_iter.bi_sector;
> +		u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
> +
> +		range[n].flags = cpu_to_le32(flags);
> +		range[n].num_sectors = cpu_to_le32(num_sectors);
> +		range[n].sector = cpu_to_le64(sector);
> +		n++;
> +	}
> +
> +	req->special_vec.bv_page = virt_to_page(range);
> +	req->special_vec.bv_offset = offset_in_page(range);
> +	req->special_vec.bv_len = sizeof(*range) * segments;
> +	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> +	return 0;
> +}
> +
>  static inline void virtblk_request_done(struct request *req)
>  {
>  	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>  
> +	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> +		kfree(page_address(req->special_vec.bv_page) +
> +		      req->special_vec.bv_offset);
> +	}
> +
>  	switch (req_op(req)) {
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
> @@ -225,6 +264,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	int qid = hctx->queue_num;
>  	int err;
>  	bool notify = false;
> +	bool unmap = false;
>  	u32 type;
>  
>  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> @@ -237,6 +277,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	case REQ_OP_FLUSH:
>  		type = VIRTIO_BLK_T_FLUSH;
>  		break;
> +	case REQ_OP_DISCARD:
> +		type = VIRTIO_BLK_T_DISCARD;
> +		break;
> +	case REQ_OP_WRITE_ZEROES:
> +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> +		break;
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
>  		type = VIRTIO_BLK_T_SCSI_CMD;
> @@ -256,6 +303,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	blk_mq_start_request(req);
>  
> +	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
> +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> +		if (err)
> +			return BLK_STS_RESOURCE;
> +	}
> +
>  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>  	if (num) {
>  		if (rq_data_dir(req) == WRITE)
> @@ -802,6 +855,32 @@ static int virtblk_probe(struct virtio_device *vdev)
>  	if (!err && opt_io_size)
>  		blk_queue_io_opt(q, blk_size * opt_io_size);
>  
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> +		q->limits.discard_granularity = blk_size;
> +
> +		virtio_cread(vdev, struct virtio_blk_config,
> +			     discard_sector_alignment, &v);
> +		q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
> +
> +		virtio_cread(vdev, struct virtio_blk_config,
> +			     max_discard_sectors, &v);
> +		blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
> +
> +		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
> +			     &v);
> +		blk_queue_max_discard_segments(q,
> +					       min_not_zero(v,
> +							    MAX_DISCARD_SEGMENTS));
> +
> +		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> +	}
> +
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> +		virtio_cread(vdev, struct virtio_blk_config,
> +			     max_write_zeroes_sectors, &v);
> +		blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
> +	}
> +
>  	virtblk_update_capacity(vblk, false);
>  	virtio_device_ready(vdev);
>  
> @@ -895,14 +974,14 @@ static unsigned int features_legacy[] = {
>  	VIRTIO_BLK_F_SCSI,
>  #endif
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  }
>  ;
>  static unsigned int features[] = {
>  	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
>  	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  };
>  
>  static struct virtio_driver virtio_blk = {
> diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> index 9ebe4d968dd5..0f99d7b49ede 100644
> --- a/include/uapi/linux/virtio_blk.h
> +++ b/include/uapi/linux/virtio_blk.h
> @@ -38,6 +38,8 @@
>  #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
>  #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
>  #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
>  
>  /* Legacy feature bits */
>  #ifndef VIRTIO_BLK_NO_LEGACY
> @@ -86,6 +88,39 @@ struct virtio_blk_config {
>  
>  	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
>  	__u16 num_queues;
> +
> +	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
> +	/*
> +	 * The maximum discard sectors (in 512-byte sectors) for
> +	 * one segment.
> +	 */
> +	__u32 max_discard_sectors;
> +	/*
> +	 * The maximum number of discard segments in a
> +	 * discard command.
> +	 */
> +	__u32 max_discard_seg;
> +	/* Discard commands must be aligned to this number of sectors. */
> +	__u32 discard_sector_alignment;
> +
> +	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
> +	/*
> +	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
> +	 * one segment.
> +	 */
> +	__u32 max_write_zeroes_sectors;
> +	/*
> +	 * The maximum number of segments in a write zeroes
> +	 * command.
> +	 */
> +	__u32 max_write_zeroes_seg;
> +	/*
> +	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
> +	 * deallocation of one or more of the sectors.
> +	 */
> +	__u8 write_zeroes_may_unmap;
> +
> +	__u8 unused1[3];
>  } __attribute__((packed));
>  
>  /*
> @@ -114,6 +149,12 @@ struct virtio_blk_config {
>  /* Get device ID command */
>  #define VIRTIO_BLK_T_GET_ID    8
>  
> +/* Discard command */
> +#define VIRTIO_BLK_T_DISCARD	11
> +
> +/* Write zeroes command */
> +#define VIRTIO_BLK_T_WRITE_ZEROES	13
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  /* Barrier before this op. */
>  #define VIRTIO_BLK_T_BARRIER	0x80000000
> @@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
>  	__virtio64 sector;
>  };
>  
> +/* Unmap this range (only valid for write zeroes command) */
> +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
> +
> +/* Discard/write zeroes range for each request. */
> +struct virtio_blk_discard_write_zeroes {
> +	/* discard/write zeroes start sector */
> +	__le64 sector;
> +	/* number of discard/write zeroes sectors */
> +	__le32 num_sectors;
> +	/* flags for this range */
> +	__le32 flags;
> +};
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  struct virtio_scsi_inhdr {
>  	__virtio32 errors;
> 

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v9] virtio_blk: add discard and write zeroes support
@ 2018-11-01 23:43     ` Dongli Zhang
  0 siblings, 0 replies; 50+ messages in thread
From: Dongli Zhang @ 2018-11-01 23:43 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: Jens Axboe, Christoph Hellwig, Michael S. Tsirkin,
	virtualization, linux-block, Stefan Hajnoczi, Paolo Bonzini,
	Changpeng Liu

Hi Daniel,

Other than crosvm, is there any version of qemu (e.g., repositories developed in
progress on github) where I can try with this feature?

Thank you very much!

Dongli Zhang

On 11/02/2018 06:40 AM, Daniel Verkamp wrote:
> From: Changpeng Liu <changpeng.liu@intel.com>
> 
> In commit 88c85538, "virtio-blk: add discard and write zeroes features
> to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> discard and write zeroes in the virtio-blk driver when the device
> advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> VIRTIO_BLK_F_WRITE_ZEROES.
> 
> Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> ---
> dverkamp: I've picked up this patch and made a few minor changes (as
> listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> since it can be called from a context where sleeping is not allowed.
> To prevent large allocations, I've also clamped the maximum number of
> discard segments to 256; this results in a 4K allocation and should be
> plenty of descriptors for most use cases.
> 
> I also removed most of the description from the commit message, since it
> was duplicating the comments from virtio_blk.h and quoting parts of the
> spec without adding any extra information.  I have tested this iteration
> of the patch using crosvm with modifications to enable the new features:
> https://chromium.googlesource.com/chromiumos/platform/crosvm/
> 
> v9 fixes a number of review issues; I didn't attempt to optimize the
> single-element write zeroes case, so it still does an allocation per
> request (I did not see any easy place to put the payload that would
> avoid the allocation).
> 
> CHANGELOG:
> v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
> v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> descriptor flags field; comment wording cleanups.
> v6: don't set T_OUT bit to discard and write zeroes commands.
> v5: use new block layer API: blk_queue_flag_set.
> v4: several optimizations based on MST's comments, remove bit field
> usage for command descriptor.
> v3: define the virtio-blk protocol to add discard and write zeroes
> support, first version implementation based on proposed specification.
> v2: add write zeroes command support.
> v1: initial proposal implementation for discard command.
> ---
>  drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
>  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
>  2 files changed, 135 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 086c6bb12baa..0f39efb4b3aa 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -18,6 +18,7 @@
>  
>  #define PART_BITS 4
>  #define VQ_NAME_LEN 16
> +#define MAX_DISCARD_SEGMENTS 256u
>  
>  static int major;
>  static DEFINE_IDA(vd_index_ida);
> @@ -172,10 +173,48 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
>  	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> +static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
> +{
> +	unsigned short segments = blk_rq_nr_discard_segments(req);
> +	unsigned short n = 0;
> +	struct virtio_blk_discard_write_zeroes *range;
> +	struct bio *bio;
> +	u32 flags = 0;
> +
> +	if (unmap)
> +		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
> +
> +	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
> +	if (!range)
> +		return -ENOMEM;
> +
> +	__rq_for_each_bio(bio, req) {
> +		u64 sector = bio->bi_iter.bi_sector;
> +		u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
> +
> +		range[n].flags = cpu_to_le32(flags);
> +		range[n].num_sectors = cpu_to_le32(num_sectors);
> +		range[n].sector = cpu_to_le64(sector);
> +		n++;
> +	}
> +
> +	req->special_vec.bv_page = virt_to_page(range);
> +	req->special_vec.bv_offset = offset_in_page(range);
> +	req->special_vec.bv_len = sizeof(*range) * segments;
> +	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> +	return 0;
> +}
> +
>  static inline void virtblk_request_done(struct request *req)
>  {
>  	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>  
> +	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> +		kfree(page_address(req->special_vec.bv_page) +
> +		      req->special_vec.bv_offset);
> +	}
> +
>  	switch (req_op(req)) {
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
> @@ -225,6 +264,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	int qid = hctx->queue_num;
>  	int err;
>  	bool notify = false;
> +	bool unmap = false;
>  	u32 type;
>  
>  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> @@ -237,6 +277,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	case REQ_OP_FLUSH:
>  		type = VIRTIO_BLK_T_FLUSH;
>  		break;
> +	case REQ_OP_DISCARD:
> +		type = VIRTIO_BLK_T_DISCARD;
> +		break;
> +	case REQ_OP_WRITE_ZEROES:
> +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> +		break;
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
>  		type = VIRTIO_BLK_T_SCSI_CMD;
> @@ -256,6 +303,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	blk_mq_start_request(req);
>  
> +	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
> +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> +		if (err)
> +			return BLK_STS_RESOURCE;
> +	}
> +
>  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>  	if (num) {
>  		if (rq_data_dir(req) == WRITE)
> @@ -802,6 +855,32 @@ static int virtblk_probe(struct virtio_device *vdev)
>  	if (!err && opt_io_size)
>  		blk_queue_io_opt(q, blk_size * opt_io_size);
>  
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> +		q->limits.discard_granularity = blk_size;
> +
> +		virtio_cread(vdev, struct virtio_blk_config,
> +			     discard_sector_alignment, &v);
> +		q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
> +
> +		virtio_cread(vdev, struct virtio_blk_config,
> +			     max_discard_sectors, &v);
> +		blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
> +
> +		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
> +			     &v);
> +		blk_queue_max_discard_segments(q,
> +					       min_not_zero(v,
> +							    MAX_DISCARD_SEGMENTS));
> +
> +		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> +	}
> +
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> +		virtio_cread(vdev, struct virtio_blk_config,
> +			     max_write_zeroes_sectors, &v);
> +		blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
> +	}
> +
>  	virtblk_update_capacity(vblk, false);
>  	virtio_device_ready(vdev);
>  
> @@ -895,14 +974,14 @@ static unsigned int features_legacy[] = {
>  	VIRTIO_BLK_F_SCSI,
>  #endif
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  }
>  ;
>  static unsigned int features[] = {
>  	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
>  	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  };
>  
>  static struct virtio_driver virtio_blk = {
> diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> index 9ebe4d968dd5..0f99d7b49ede 100644
> --- a/include/uapi/linux/virtio_blk.h
> +++ b/include/uapi/linux/virtio_blk.h
> @@ -38,6 +38,8 @@
>  #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
>  #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
>  #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
>  
>  /* Legacy feature bits */
>  #ifndef VIRTIO_BLK_NO_LEGACY
> @@ -86,6 +88,39 @@ struct virtio_blk_config {
>  
>  	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
>  	__u16 num_queues;
> +
> +	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
> +	/*
> +	 * The maximum discard sectors (in 512-byte sectors) for
> +	 * one segment.
> +	 */
> +	__u32 max_discard_sectors;
> +	/*
> +	 * The maximum number of discard segments in a
> +	 * discard command.
> +	 */
> +	__u32 max_discard_seg;
> +	/* Discard commands must be aligned to this number of sectors. */
> +	__u32 discard_sector_alignment;
> +
> +	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
> +	/*
> +	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
> +	 * one segment.
> +	 */
> +	__u32 max_write_zeroes_sectors;
> +	/*
> +	 * The maximum number of segments in a write zeroes
> +	 * command.
> +	 */
> +	__u32 max_write_zeroes_seg;
> +	/*
> +	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
> +	 * deallocation of one or more of the sectors.
> +	 */
> +	__u8 write_zeroes_may_unmap;
> +
> +	__u8 unused1[3];
>  } __attribute__((packed));
>  
>  /*
> @@ -114,6 +149,12 @@ struct virtio_blk_config {
>  /* Get device ID command */
>  #define VIRTIO_BLK_T_GET_ID    8
>  
> +/* Discard command */
> +#define VIRTIO_BLK_T_DISCARD	11
> +
> +/* Write zeroes command */
> +#define VIRTIO_BLK_T_WRITE_ZEROES	13
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  /* Barrier before this op. */
>  #define VIRTIO_BLK_T_BARRIER	0x80000000
> @@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
>  	__virtio64 sector;
>  };
>  
> +/* Unmap this range (only valid for write zeroes command) */
> +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
> +
> +/* Discard/write zeroes range for each request. */
> +struct virtio_blk_discard_write_zeroes {
> +	/* discard/write zeroes start sector */
> +	__le64 sector;
> +	/* number of discard/write zeroes sectors */
> +	__le32 num_sectors;
> +	/* flags for this range */
> +	__le32 flags;
> +};
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  struct virtio_scsi_inhdr {
>  	__virtio32 errors;
> 

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v9] virtio_blk: add discard and write zeroes support
  2018-11-01 22:40   ` Daniel Verkamp
@ 2018-11-02  4:17     ` Stefan Hajnoczi
  -1 siblings, 0 replies; 50+ messages in thread
From: Stefan Hajnoczi @ 2018-11-02  4:17 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: virtualization, linux-block, Michael S. Tsirkin, Jason Wang,
	Jens Axboe, Paolo Bonzini, Christoph Hellwig, Changpeng Liu

[-- Attachment #1: Type: text/plain, Size: 2736 bytes --]

On Thu, Nov 01, 2018 at 03:40:35PM -0700, Daniel Verkamp wrote:
> From: Changpeng Liu <changpeng.liu@intel.com>
> 
> In commit 88c85538, "virtio-blk: add discard and write zeroes features
> to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> discard and write zeroes in the virtio-blk driver when the device
> advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> VIRTIO_BLK_F_WRITE_ZEROES.
> 
> Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> ---
> dverkamp: I've picked up this patch and made a few minor changes (as
> listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> since it can be called from a context where sleeping is not allowed.
> To prevent large allocations, I've also clamped the maximum number of
> discard segments to 256; this results in a 4K allocation and should be
> plenty of descriptors for most use cases.
> 
> I also removed most of the description from the commit message, since it
> was duplicating the comments from virtio_blk.h and quoting parts of the
> spec without adding any extra information.  I have tested this iteration
> of the patch using crosvm with modifications to enable the new features:
> https://chromium.googlesource.com/chromiumos/platform/crosvm/
> 
> v9 fixes a number of review issues; I didn't attempt to optimize the
> single-element write zeroes case, so it still does an allocation per
> request (I did not see any easy place to put the payload that would
> avoid the allocation).
> 
> CHANGELOG:
> v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
> v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> descriptor flags field; comment wording cleanups.
> v6: don't set T_OUT bit to discard and write zeroes commands.
> v5: use new block layer API: blk_queue_flag_set.
> v4: several optimizations based on MST's comments, remove bit field
> usage for command descriptor.
> v3: define the virtio-blk protocol to add discard and write zeroes
> support, first version implementation based on proposed specification.
> v2: add write zeroes command support.
> v1: initial proposal implementation for discard command.
> ---
>  drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
>  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
>  2 files changed, 135 insertions(+), 2 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v9] virtio_blk: add discard and write zeroes support
@ 2018-11-02  4:17     ` Stefan Hajnoczi
  0 siblings, 0 replies; 50+ messages in thread
From: Stefan Hajnoczi @ 2018-11-02  4:17 UTC (permalink / raw)
  To: Daniel Verkamp
  Cc: Jens Axboe, Christoph Hellwig, Michael S. Tsirkin,
	virtualization, linux-block, Paolo Bonzini, Changpeng Liu


[-- Attachment #1.1: Type: text/plain, Size: 2736 bytes --]

On Thu, Nov 01, 2018 at 03:40:35PM -0700, Daniel Verkamp wrote:
> From: Changpeng Liu <changpeng.liu@intel.com>
> 
> In commit 88c85538, "virtio-blk: add discard and write zeroes features
> to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> discard and write zeroes in the virtio-blk driver when the device
> advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> VIRTIO_BLK_F_WRITE_ZEROES.
> 
> Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> ---
> dverkamp: I've picked up this patch and made a few minor changes (as
> listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> since it can be called from a context where sleeping is not allowed.
> To prevent large allocations, I've also clamped the maximum number of
> discard segments to 256; this results in a 4K allocation and should be
> plenty of descriptors for most use cases.
> 
> I also removed most of the description from the commit message, since it
> was duplicating the comments from virtio_blk.h and quoting parts of the
> spec without adding any extra information.  I have tested this iteration
> of the patch using crosvm with modifications to enable the new features:
> https://chromium.googlesource.com/chromiumos/platform/crosvm/
> 
> v9 fixes a number of review issues; I didn't attempt to optimize the
> single-element write zeroes case, so it still does an allocation per
> request (I did not see any easy place to put the payload that would
> avoid the allocation).
> 
> CHANGELOG:
> v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
> v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> descriptor flags field; comment wording cleanups.
> v6: don't set T_OUT bit to discard and write zeroes commands.
> v5: use new block layer API: blk_queue_flag_set.
> v4: several optimizations based on MST's comments, remove bit field
> usage for command descriptor.
> v3: define the virtio-blk protocol to add discard and write zeroes
> support, first version implementation based on proposed specification.
> v2: add write zeroes command support.
> v1: initial proposal implementation for discard command.
> ---
>  drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
>  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
>  2 files changed, 135 insertions(+), 2 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v9] virtio_blk: add discard and write zeroes support
  2018-11-01 23:43     ` Dongli Zhang
@ 2018-11-02 18:25       ` Daniel Verkamp
  -1 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-11-02 18:25 UTC (permalink / raw)
  To: dongli.zhang
  Cc: virtualization, linux-block, mst, jasowang, axboe, stefanha,
	pbonzini, hch, Changpeng Liu

Hi Dongli,

Unfortunately, I am not aware of any in-progress implementation of
this feature for qemu. It hopefully should not be too difficult to
wire up in the qemu virtio-blk model, but I haven't looked into it in
detail.

Thanks,
-- Daniel
On Thu, Nov 1, 2018 at 4:42 PM Dongli Zhang <dongli.zhang@oracle.com> wrote:
>
> Hi Daniel,
>
> Other than crosvm, is there any version of qemu (e.g., repositories developed in
> progress on github) where I can try with this feature?
>
> Thank you very much!
>
> Dongli Zhang
>
> On 11/02/2018 06:40 AM, Daniel Verkamp wrote:
> > From: Changpeng Liu <changpeng.liu@intel.com>
> >
> > In commit 88c85538, "virtio-blk: add discard and write zeroes features
> > to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> > block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> > VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> > discard and write zeroes in the virtio-blk driver when the device
> > advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> > VIRTIO_BLK_F_WRITE_ZEROES.
> >
> > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> > ---
> > dverkamp: I've picked up this patch and made a few minor changes (as
> > listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> > since it can be called from a context where sleeping is not allowed.
> > To prevent large allocations, I've also clamped the maximum number of
> > discard segments to 256; this results in a 4K allocation and should be
> > plenty of descriptors for most use cases.
> >
> > I also removed most of the description from the commit message, since it
> > was duplicating the comments from virtio_blk.h and quoting parts of the
> > spec without adding any extra information.  I have tested this iteration
> > of the patch using crosvm with modifications to enable the new features:
> > https://chromium.googlesource.com/chromiumos/platform/crosvm/
> >
> > v9 fixes a number of review issues; I didn't attempt to optimize the
> > single-element write zeroes case, so it still does an allocation per
> > request (I did not see any easy place to put the payload that would
> > avoid the allocation).
> >
> > CHANGELOG:
> > v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
> > v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> > v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> > descriptor flags field; comment wording cleanups.
> > v6: don't set T_OUT bit to discard and write zeroes commands.
> > v5: use new block layer API: blk_queue_flag_set.
> > v4: several optimizations based on MST's comments, remove bit field
> > usage for command descriptor.
> > v3: define the virtio-blk protocol to add discard and write zeroes
> > support, first version implementation based on proposed specification.
> > v2: add write zeroes command support.
> > v1: initial proposal implementation for discard command.
> > ---
> >  drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
> >  2 files changed, 135 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> > index 086c6bb12baa..0f39efb4b3aa 100644
> > --- a/drivers/block/virtio_blk.c
> > +++ b/drivers/block/virtio_blk.c
> > @@ -18,6 +18,7 @@
> >
> >  #define PART_BITS 4
> >  #define VQ_NAME_LEN 16
> > +#define MAX_DISCARD_SEGMENTS 256u
> >
> >  static int major;
> >  static DEFINE_IDA(vd_index_ida);
> > @@ -172,10 +173,48 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
> >       return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
> >  }
> >
> > +static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
> > +{
> > +     unsigned short segments = blk_rq_nr_discard_segments(req);
> > +     unsigned short n = 0;
> > +     struct virtio_blk_discard_write_zeroes *range;
> > +     struct bio *bio;
> > +     u32 flags = 0;
> > +
> > +     if (unmap)
> > +             flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
> > +
> > +     range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
> > +     if (!range)
> > +             return -ENOMEM;
> > +
> > +     __rq_for_each_bio(bio, req) {
> > +             u64 sector = bio->bi_iter.bi_sector;
> > +             u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
> > +
> > +             range[n].flags = cpu_to_le32(flags);
> > +             range[n].num_sectors = cpu_to_le32(num_sectors);
> > +             range[n].sector = cpu_to_le64(sector);
> > +             n++;
> > +     }
> > +
> > +     req->special_vec.bv_page = virt_to_page(range);
> > +     req->special_vec.bv_offset = offset_in_page(range);
> > +     req->special_vec.bv_len = sizeof(*range) * segments;
> > +     req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> > +
> > +     return 0;
> > +}
> > +
> >  static inline void virtblk_request_done(struct request *req)
> >  {
> >       struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
> >
> > +     if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> > +             kfree(page_address(req->special_vec.bv_page) +
> > +                   req->special_vec.bv_offset);
> > +     }
> > +
> >       switch (req_op(req)) {
> >       case REQ_OP_SCSI_IN:
> >       case REQ_OP_SCSI_OUT:
> > @@ -225,6 +264,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> >       int qid = hctx->queue_num;
> >       int err;
> >       bool notify = false;
> > +     bool unmap = false;
> >       u32 type;
> >
> >       BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> > @@ -237,6 +277,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> >       case REQ_OP_FLUSH:
> >               type = VIRTIO_BLK_T_FLUSH;
> >               break;
> > +     case REQ_OP_DISCARD:
> > +             type = VIRTIO_BLK_T_DISCARD;
> > +             break;
> > +     case REQ_OP_WRITE_ZEROES:
> > +             type = VIRTIO_BLK_T_WRITE_ZEROES;
> > +             unmap = !(req->cmd_flags & REQ_NOUNMAP);
> > +             break;
> >       case REQ_OP_SCSI_IN:
> >       case REQ_OP_SCSI_OUT:
> >               type = VIRTIO_BLK_T_SCSI_CMD;
> > @@ -256,6 +303,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> >
> >       blk_mq_start_request(req);
> >
> > +     if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
> > +             err = virtblk_setup_discard_write_zeroes(req, unmap);
> > +             if (err)
> > +                     return BLK_STS_RESOURCE;
> > +     }
> > +
> >       num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> >       if (num) {
> >               if (rq_data_dir(req) == WRITE)
> > @@ -802,6 +855,32 @@ static int virtblk_probe(struct virtio_device *vdev)
> >       if (!err && opt_io_size)
> >               blk_queue_io_opt(q, blk_size * opt_io_size);
> >
> > +     if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> > +             q->limits.discard_granularity = blk_size;
> > +
> > +             virtio_cread(vdev, struct virtio_blk_config,
> > +                          discard_sector_alignment, &v);
> > +             q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
> > +
> > +             virtio_cread(vdev, struct virtio_blk_config,
> > +                          max_discard_sectors, &v);
> > +             blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
> > +
> > +             virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
> > +                          &v);
> > +             blk_queue_max_discard_segments(q,
> > +                                            min_not_zero(v,
> > +                                                         MAX_DISCARD_SEGMENTS));
> > +
> > +             blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> > +     }
> > +
> > +     if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> > +             virtio_cread(vdev, struct virtio_blk_config,
> > +                          max_write_zeroes_sectors, &v);
> > +             blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
> > +     }
> > +
> >       virtblk_update_capacity(vblk, false);
> >       virtio_device_ready(vdev);
> >
> > @@ -895,14 +974,14 @@ static unsigned int features_legacy[] = {
> >       VIRTIO_BLK_F_SCSI,
> >  #endif
> >       VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> > -     VIRTIO_BLK_F_MQ,
> > +     VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
> >  }
> >  ;
> >  static unsigned int features[] = {
> >       VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
> >       VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
> >       VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> > -     VIRTIO_BLK_F_MQ,
> > +     VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
> >  };
> >
> >  static struct virtio_driver virtio_blk = {
> > diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> > index 9ebe4d968dd5..0f99d7b49ede 100644
> > --- a/include/uapi/linux/virtio_blk.h
> > +++ b/include/uapi/linux/virtio_blk.h
> > @@ -38,6 +38,8 @@
> >  #define VIRTIO_BLK_F_BLK_SIZE        6       /* Block size of disk is available*/
> >  #define VIRTIO_BLK_F_TOPOLOGY        10      /* Topology information is available */
> >  #define VIRTIO_BLK_F_MQ              12      /* support more than one vq */
> > +#define VIRTIO_BLK_F_DISCARD 13      /* DISCARD is supported */
> > +#define VIRTIO_BLK_F_WRITE_ZEROES    14      /* WRITE ZEROES is supported */
> >
> >  /* Legacy feature bits */
> >  #ifndef VIRTIO_BLK_NO_LEGACY
> > @@ -86,6 +88,39 @@ struct virtio_blk_config {
> >
> >       /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
> >       __u16 num_queues;
> > +
> > +     /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
> > +     /*
> > +      * The maximum discard sectors (in 512-byte sectors) for
> > +      * one segment.
> > +      */
> > +     __u32 max_discard_sectors;
> > +     /*
> > +      * The maximum number of discard segments in a
> > +      * discard command.
> > +      */
> > +     __u32 max_discard_seg;
> > +     /* Discard commands must be aligned to this number of sectors. */
> > +     __u32 discard_sector_alignment;
> > +
> > +     /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
> > +     /*
> > +      * The maximum number of write zeroes sectors (in 512-byte sectors) in
> > +      * one segment.
> > +      */
> > +     __u32 max_write_zeroes_sectors;
> > +     /*
> > +      * The maximum number of segments in a write zeroes
> > +      * command.
> > +      */
> > +     __u32 max_write_zeroes_seg;
> > +     /*
> > +      * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
> > +      * deallocation of one or more of the sectors.
> > +      */
> > +     __u8 write_zeroes_may_unmap;
> > +
> > +     __u8 unused1[3];
> >  } __attribute__((packed));
> >
> >  /*
> > @@ -114,6 +149,12 @@ struct virtio_blk_config {
> >  /* Get device ID command */
> >  #define VIRTIO_BLK_T_GET_ID    8
> >
> > +/* Discard command */
> > +#define VIRTIO_BLK_T_DISCARD 11
> > +
> > +/* Write zeroes command */
> > +#define VIRTIO_BLK_T_WRITE_ZEROES    13
> > +
> >  #ifndef VIRTIO_BLK_NO_LEGACY
> >  /* Barrier before this op. */
> >  #define VIRTIO_BLK_T_BARRIER 0x80000000
> > @@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
> >       __virtio64 sector;
> >  };
> >
> > +/* Unmap this range (only valid for write zeroes command) */
> > +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP   0x00000001
> > +
> > +/* Discard/write zeroes range for each request. */
> > +struct virtio_blk_discard_write_zeroes {
> > +     /* discard/write zeroes start sector */
> > +     __le64 sector;
> > +     /* number of discard/write zeroes sectors */
> > +     __le32 num_sectors;
> > +     /* flags for this range */
> > +     __le32 flags;
> > +};
> > +
> >  #ifndef VIRTIO_BLK_NO_LEGACY
> >  struct virtio_scsi_inhdr {
> >       __virtio32 errors;
> >

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v9] virtio_blk: add discard and write zeroes support
@ 2018-11-02 18:25       ` Daniel Verkamp
  0 siblings, 0 replies; 50+ messages in thread
From: Daniel Verkamp @ 2018-11-02 18:25 UTC (permalink / raw)
  To: dongli.zhang
  Cc: axboe, hch, mst, virtualization, linux-block, stefanha, pbonzini,
	Changpeng Liu

Hi Dongli,

Unfortunately, I am not aware of any in-progress implementation of
this feature for qemu. It hopefully should not be too difficult to
wire up in the qemu virtio-blk model, but I haven't looked into it in
detail.

Thanks,
-- Daniel
On Thu, Nov 1, 2018 at 4:42 PM Dongli Zhang <dongli.zhang@oracle.com> wrote:
>
> Hi Daniel,
>
> Other than crosvm, is there any version of qemu (e.g., repositories developed in
> progress on github) where I can try with this feature?
>
> Thank you very much!
>
> Dongli Zhang
>
> On 11/02/2018 06:40 AM, Daniel Verkamp wrote:
> > From: Changpeng Liu <changpeng.liu@intel.com>
> >
> > In commit 88c85538, "virtio-blk: add discard and write zeroes features
> > to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> > block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> > VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> > discard and write zeroes in the virtio-blk driver when the device
> > advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> > VIRTIO_BLK_F_WRITE_ZEROES.
> >
> > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> > ---
> > dverkamp: I've picked up this patch and made a few minor changes (as
> > listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> > since it can be called from a context where sleeping is not allowed.
> > To prevent large allocations, I've also clamped the maximum number of
> > discard segments to 256; this results in a 4K allocation and should be
> > plenty of descriptors for most use cases.
> >
> > I also removed most of the description from the commit message, since it
> > was duplicating the comments from virtio_blk.h and quoting parts of the
> > spec without adding any extra information.  I have tested this iteration
> > of the patch using crosvm with modifications to enable the new features:
> > https://chromium.googlesource.com/chromiumos/platform/crosvm/
> >
> > v9 fixes a number of review issues; I didn't attempt to optimize the
> > single-element write zeroes case, so it still does an allocation per
> > request (I did not see any easy place to put the payload that would
> > avoid the allocation).
> >
> > CHANGELOG:
> > v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
> > v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> > v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> > descriptor flags field; comment wording cleanups.
> > v6: don't set T_OUT bit to discard and write zeroes commands.
> > v5: use new block layer API: blk_queue_flag_set.
> > v4: several optimizations based on MST's comments, remove bit field
> > usage for command descriptor.
> > v3: define the virtio-blk protocol to add discard and write zeroes
> > support, first version implementation based on proposed specification.
> > v2: add write zeroes command support.
> > v1: initial proposal implementation for discard command.
> > ---
> >  drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
> >  2 files changed, 135 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> > index 086c6bb12baa..0f39efb4b3aa 100644
> > --- a/drivers/block/virtio_blk.c
> > +++ b/drivers/block/virtio_blk.c
> > @@ -18,6 +18,7 @@
> >
> >  #define PART_BITS 4
> >  #define VQ_NAME_LEN 16
> > +#define MAX_DISCARD_SEGMENTS 256u
> >
> >  static int major;
> >  static DEFINE_IDA(vd_index_ida);
> > @@ -172,10 +173,48 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
> >       return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
> >  }
> >
> > +static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
> > +{
> > +     unsigned short segments = blk_rq_nr_discard_segments(req);
> > +     unsigned short n = 0;
> > +     struct virtio_blk_discard_write_zeroes *range;
> > +     struct bio *bio;
> > +     u32 flags = 0;
> > +
> > +     if (unmap)
> > +             flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
> > +
> > +     range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
> > +     if (!range)
> > +             return -ENOMEM;
> > +
> > +     __rq_for_each_bio(bio, req) {
> > +             u64 sector = bio->bi_iter.bi_sector;
> > +             u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
> > +
> > +             range[n].flags = cpu_to_le32(flags);
> > +             range[n].num_sectors = cpu_to_le32(num_sectors);
> > +             range[n].sector = cpu_to_le64(sector);
> > +             n++;
> > +     }
> > +
> > +     req->special_vec.bv_page = virt_to_page(range);
> > +     req->special_vec.bv_offset = offset_in_page(range);
> > +     req->special_vec.bv_len = sizeof(*range) * segments;
> > +     req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> > +
> > +     return 0;
> > +}
> > +
> >  static inline void virtblk_request_done(struct request *req)
> >  {
> >       struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
> >
> > +     if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> > +             kfree(page_address(req->special_vec.bv_page) +
> > +                   req->special_vec.bv_offset);
> > +     }
> > +
> >       switch (req_op(req)) {
> >       case REQ_OP_SCSI_IN:
> >       case REQ_OP_SCSI_OUT:
> > @@ -225,6 +264,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> >       int qid = hctx->queue_num;
> >       int err;
> >       bool notify = false;
> > +     bool unmap = false;
> >       u32 type;
> >
> >       BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> > @@ -237,6 +277,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> >       case REQ_OP_FLUSH:
> >               type = VIRTIO_BLK_T_FLUSH;
> >               break;
> > +     case REQ_OP_DISCARD:
> > +             type = VIRTIO_BLK_T_DISCARD;
> > +             break;
> > +     case REQ_OP_WRITE_ZEROES:
> > +             type = VIRTIO_BLK_T_WRITE_ZEROES;
> > +             unmap = !(req->cmd_flags & REQ_NOUNMAP);
> > +             break;
> >       case REQ_OP_SCSI_IN:
> >       case REQ_OP_SCSI_OUT:
> >               type = VIRTIO_BLK_T_SCSI_CMD;
> > @@ -256,6 +303,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> >
> >       blk_mq_start_request(req);
> >
> > +     if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
> > +             err = virtblk_setup_discard_write_zeroes(req, unmap);
> > +             if (err)
> > +                     return BLK_STS_RESOURCE;
> > +     }
> > +
> >       num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> >       if (num) {
> >               if (rq_data_dir(req) == WRITE)
> > @@ -802,6 +855,32 @@ static int virtblk_probe(struct virtio_device *vdev)
> >       if (!err && opt_io_size)
> >               blk_queue_io_opt(q, blk_size * opt_io_size);
> >
> > +     if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> > +             q->limits.discard_granularity = blk_size;
> > +
> > +             virtio_cread(vdev, struct virtio_blk_config,
> > +                          discard_sector_alignment, &v);
> > +             q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
> > +
> > +             virtio_cread(vdev, struct virtio_blk_config,
> > +                          max_discard_sectors, &v);
> > +             blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
> > +
> > +             virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
> > +                          &v);
> > +             blk_queue_max_discard_segments(q,
> > +                                            min_not_zero(v,
> > +                                                         MAX_DISCARD_SEGMENTS));
> > +
> > +             blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> > +     }
> > +
> > +     if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> > +             virtio_cread(vdev, struct virtio_blk_config,
> > +                          max_write_zeroes_sectors, &v);
> > +             blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
> > +     }
> > +
> >       virtblk_update_capacity(vblk, false);
> >       virtio_device_ready(vdev);
> >
> > @@ -895,14 +974,14 @@ static unsigned int features_legacy[] = {
> >       VIRTIO_BLK_F_SCSI,
> >  #endif
> >       VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> > -     VIRTIO_BLK_F_MQ,
> > +     VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
> >  }
> >  ;
> >  static unsigned int features[] = {
> >       VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
> >       VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
> >       VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> > -     VIRTIO_BLK_F_MQ,
> > +     VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
> >  };
> >
> >  static struct virtio_driver virtio_blk = {
> > diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> > index 9ebe4d968dd5..0f99d7b49ede 100644
> > --- a/include/uapi/linux/virtio_blk.h
> > +++ b/include/uapi/linux/virtio_blk.h
> > @@ -38,6 +38,8 @@
> >  #define VIRTIO_BLK_F_BLK_SIZE        6       /* Block size of disk is available*/
> >  #define VIRTIO_BLK_F_TOPOLOGY        10      /* Topology information is available */
> >  #define VIRTIO_BLK_F_MQ              12      /* support more than one vq */
> > +#define VIRTIO_BLK_F_DISCARD 13      /* DISCARD is supported */
> > +#define VIRTIO_BLK_F_WRITE_ZEROES    14      /* WRITE ZEROES is supported */
> >
> >  /* Legacy feature bits */
> >  #ifndef VIRTIO_BLK_NO_LEGACY
> > @@ -86,6 +88,39 @@ struct virtio_blk_config {
> >
> >       /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
> >       __u16 num_queues;
> > +
> > +     /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
> > +     /*
> > +      * The maximum discard sectors (in 512-byte sectors) for
> > +      * one segment.
> > +      */
> > +     __u32 max_discard_sectors;
> > +     /*
> > +      * The maximum number of discard segments in a
> > +      * discard command.
> > +      */
> > +     __u32 max_discard_seg;
> > +     /* Discard commands must be aligned to this number of sectors. */
> > +     __u32 discard_sector_alignment;
> > +
> > +     /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
> > +     /*
> > +      * The maximum number of write zeroes sectors (in 512-byte sectors) in
> > +      * one segment.
> > +      */
> > +     __u32 max_write_zeroes_sectors;
> > +     /*
> > +      * The maximum number of segments in a write zeroes
> > +      * command.
> > +      */
> > +     __u32 max_write_zeroes_seg;
> > +     /*
> > +      * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
> > +      * deallocation of one or more of the sectors.
> > +      */
> > +     __u8 write_zeroes_may_unmap;
> > +
> > +     __u8 unused1[3];
> >  } __attribute__((packed));
> >
> >  /*
> > @@ -114,6 +149,12 @@ struct virtio_blk_config {
> >  /* Get device ID command */
> >  #define VIRTIO_BLK_T_GET_ID    8
> >
> > +/* Discard command */
> > +#define VIRTIO_BLK_T_DISCARD 11
> > +
> > +/* Write zeroes command */
> > +#define VIRTIO_BLK_T_WRITE_ZEROES    13
> > +
> >  #ifndef VIRTIO_BLK_NO_LEGACY
> >  /* Barrier before this op. */
> >  #define VIRTIO_BLK_T_BARRIER 0x80000000
> > @@ -133,6 +174,19 @@ struct virtio_blk_outhdr {
> >       __virtio64 sector;
> >  };
> >
> > +/* Unmap this range (only valid for write zeroes command) */
> > +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP   0x00000001
> > +
> > +/* Discard/write zeroes range for each request. */
> > +struct virtio_blk_discard_write_zeroes {
> > +     /* discard/write zeroes start sector */
> > +     __le64 sector;
> > +     /* number of discard/write zeroes sectors */
> > +     __le32 num_sectors;
> > +     /* flags for this range */
> > +     __le32 flags;
> > +};
> > +
> >  #ifndef VIRTIO_BLK_NO_LEGACY
> >  struct virtio_scsi_inhdr {
> >       __virtio32 errors;
> >

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH v9] virtio_blk: add discard and write zeroes support
  2018-11-02  4:17     ` Stefan Hajnoczi
@ 2018-12-05  9:46       ` Liu, Changpeng
  -1 siblings, 0 replies; 50+ messages in thread
From: Liu, Changpeng @ 2018-12-05  9:46 UTC (permalink / raw)
  To: Stefan Hajnoczi, Daniel Verkamp
  Cc: virtualization, linux-block, Michael S. Tsirkin, Jason Wang,
	Jens Axboe, Paolo Bonzini, Christoph Hellwig

What's the status of this patch ? anybody pulled it for the branch ?

> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Friday, November 2, 2018 12:18 PM
> To: Daniel Verkamp <dverkamp@chromium.org>
> Cc: virtualization@lists.linux-foundation.org; linux-block@vger.kernel.org;
> Michael S. Tsirkin <mst@redhat.com>; Jason Wang <jasowang@redhat.com>;
> Jens Axboe <axboe@kernel.dk>; Paolo Bonzini <pbonzini@redhat.com>;
> Christoph Hellwig <hch@infradead.org>; Liu, Changpeng
> <changpeng.liu@intel.com>
> Subject: Re: [PATCH v9] virtio_blk: add discard and write zeroes support
> 
> On Thu, Nov 01, 2018 at 03:40:35PM -0700, Daniel Verkamp wrote:
> > From: Changpeng Liu <changpeng.liu@intel.com>
> >
> > In commit 88c85538, "virtio-blk: add discard and write zeroes features
> > to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> > block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> > VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> > discard and write zeroes in the virtio-blk driver when the device
> > advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> > VIRTIO_BLK_F_WRITE_ZEROES.
> >
> > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> > ---
> > dverkamp: I've picked up this patch and made a few minor changes (as
> > listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> > since it can be called from a context where sleeping is not allowed.
> > To prevent large allocations, I've also clamped the maximum number of
> > discard segments to 256; this results in a 4K allocation and should be
> > plenty of descriptors for most use cases.
> >
> > I also removed most of the description from the commit message, since it
> > was duplicating the comments from virtio_blk.h and quoting parts of the
> > spec without adding any extra information.  I have tested this iteration
> > of the patch using crosvm with modifications to enable the new features:
> > https://chromium.googlesource.com/chromiumos/platform/crosvm/
> >
> > v9 fixes a number of review issues; I didn't attempt to optimize the
> > single-element write zeroes case, so it still does an allocation per
> > request (I did not see any easy place to put the payload that would
> > avoid the allocation).
> >
> > CHANGELOG:
> > v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
> > v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> > v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> > descriptor flags field; comment wording cleanups.
> > v6: don't set T_OUT bit to discard and write zeroes commands.
> > v5: use new block layer API: blk_queue_flag_set.
> > v4: several optimizations based on MST's comments, remove bit field
> > usage for command descriptor.
> > v3: define the virtio-blk protocol to add discard and write zeroes
> > support, first version implementation based on proposed specification.
> > v2: add write zeroes command support.
> > v1: initial proposal implementation for discard command.
> > ---
> >  drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
> >  2 files changed, 135 insertions(+), 2 deletions(-)
> 
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH v9] virtio_blk: add discard and write zeroes support
@ 2018-12-05  9:46       ` Liu, Changpeng
  0 siblings, 0 replies; 50+ messages in thread
From: Liu, Changpeng @ 2018-12-05  9:46 UTC (permalink / raw)
  To: Stefan Hajnoczi, Daniel Verkamp
  Cc: Jens Axboe, Christoph Hellwig, Michael S. Tsirkin,
	virtualization, linux-block, Paolo Bonzini

What's the status of this patch ? anybody pulled it for the branch ?

> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Friday, November 2, 2018 12:18 PM
> To: Daniel Verkamp <dverkamp@chromium.org>
> Cc: virtualization@lists.linux-foundation.org; linux-block@vger.kernel.org;
> Michael S. Tsirkin <mst@redhat.com>; Jason Wang <jasowang@redhat.com>;
> Jens Axboe <axboe@kernel.dk>; Paolo Bonzini <pbonzini@redhat.com>;
> Christoph Hellwig <hch@infradead.org>; Liu, Changpeng
> <changpeng.liu@intel.com>
> Subject: Re: [PATCH v9] virtio_blk: add discard and write zeroes support
> 
> On Thu, Nov 01, 2018 at 03:40:35PM -0700, Daniel Verkamp wrote:
> > From: Changpeng Liu <changpeng.liu@intel.com>
> >
> > In commit 88c85538, "virtio-blk: add discard and write zeroes features
> > to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> > block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> > VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> > discard and write zeroes in the virtio-blk driver when the device
> > advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> > VIRTIO_BLK_F_WRITE_ZEROES.
> >
> > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> > ---
> > dverkamp: I've picked up this patch and made a few minor changes (as
> > listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> > since it can be called from a context where sleeping is not allowed.
> > To prevent large allocations, I've also clamped the maximum number of
> > discard segments to 256; this results in a 4K allocation and should be
> > plenty of descriptors for most use cases.
> >
> > I also removed most of the description from the commit message, since it
> > was duplicating the comments from virtio_blk.h and quoting parts of the
> > spec without adding any extra information.  I have tested this iteration
> > of the patch using crosvm with modifications to enable the new features:
> > https://chromium.googlesource.com/chromiumos/platform/crosvm/
> >
> > v9 fixes a number of review issues; I didn't attempt to optimize the
> > single-element write zeroes case, so it still does an allocation per
> > request (I did not see any easy place to put the payload that would
> > avoid the allocation).
> >
> > CHANGELOG:
> > v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
> > v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> > v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> > descriptor flags field; comment wording cleanups.
> > v6: don't set T_OUT bit to discard and write zeroes commands.
> > v5: use new block layer API: blk_queue_flag_set.
> > v4: several optimizations based on MST's comments, remove bit field
> > usage for command descriptor.
> > v3: define the virtio-blk protocol to add discard and write zeroes
> > support, first version implementation based on proposed specification.
> > v2: add write zeroes command support.
> > v1: initial proposal implementation for discard command.
> > ---
> >  drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
> >  2 files changed, 135 insertions(+), 2 deletions(-)
> 
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v9] virtio_blk: add discard and write zeroes support
  2018-12-05  9:46       ` Liu, Changpeng
@ 2018-12-05 13:41         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2018-12-05 13:41 UTC (permalink / raw)
  To: Liu, Changpeng
  Cc: Stefan Hajnoczi, Daniel Verkamp, virtualization, linux-block,
	Jason Wang, Jens Axboe, Paolo Bonzini, Christoph Hellwig


On Wed, Dec 05, 2018 at 09:46:16AM +0000, Liu, Changpeng wrote:
> What's the status of this patch ? anybody pulled it for the branch ?

I will merge it for next.

> 
> > -----Original Message-----
> > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > Sent: Friday, November 2, 2018 12:18 PM
> > To: Daniel Verkamp <dverkamp@chromium.org>
> > Cc: virtualization@lists.linux-foundation.org; linux-block@vger.kernel.org;
> > Michael S. Tsirkin <mst@redhat.com>; Jason Wang <jasowang@redhat.com>;
> > Jens Axboe <axboe@kernel.dk>; Paolo Bonzini <pbonzini@redhat.com>;
> > Christoph Hellwig <hch@infradead.org>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: Re: [PATCH v9] virtio_blk: add discard and write zeroes support
> > 
> > On Thu, Nov 01, 2018 at 03:40:35PM -0700, Daniel Verkamp wrote:
> > > From: Changpeng Liu <changpeng.liu@intel.com>
> > >
> > > In commit 88c85538, "virtio-blk: add discard and write zeroes features
> > > to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> > > block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> > > VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> > > discard and write zeroes in the virtio-blk driver when the device
> > > advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> > > VIRTIO_BLK_F_WRITE_ZEROES.
> > >
> > > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > > Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> > > ---
> > > dverkamp: I've picked up this patch and made a few minor changes (as
> > > listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> > > since it can be called from a context where sleeping is not allowed.
> > > To prevent large allocations, I've also clamped the maximum number of
> > > discard segments to 256; this results in a 4K allocation and should be
> > > plenty of descriptors for most use cases.
> > >
> > > I also removed most of the description from the commit message, since it
> > > was duplicating the comments from virtio_blk.h and quoting parts of the
> > > spec without adding any extra information.  I have tested this iteration
> > > of the patch using crosvm with modifications to enable the new features:
> > > https://chromium.googlesource.com/chromiumos/platform/crosvm/
> > >
> > > v9 fixes a number of review issues; I didn't attempt to optimize the
> > > single-element write zeroes case, so it still does an allocation per
> > > request (I did not see any easy place to put the payload that would
> > > avoid the allocation).
> > >
> > > CHANGELOG:
> > > v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
> > > v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> > > v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> > > descriptor flags field; comment wording cleanups.
> > > v6: don't set T_OUT bit to discard and write zeroes commands.
> > > v5: use new block layer API: blk_queue_flag_set.
> > > v4: several optimizations based on MST's comments, remove bit field
> > > usage for command descriptor.
> > > v3: define the virtio-blk protocol to add discard and write zeroes
> > > support, first version implementation based on proposed specification.
> > > v2: add write zeroes command support.
> > > v1: initial proposal implementation for discard command.
> > > ---
> > >  drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
> > >  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
> > >  2 files changed, 135 insertions(+), 2 deletions(-)
> > 
> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v9] virtio_blk: add discard and write zeroes support
@ 2018-12-05 13:41         ` Michael S. Tsirkin
  0 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2018-12-05 13:41 UTC (permalink / raw)
  To: Liu, Changpeng
  Cc: Jens Axboe, Christoph Hellwig, virtualization, linux-block,
	Stefan Hajnoczi, Paolo Bonzini


On Wed, Dec 05, 2018 at 09:46:16AM +0000, Liu, Changpeng wrote:
> What's the status of this patch ? anybody pulled it for the branch ?

I will merge it for next.

> 
> > -----Original Message-----
> > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > Sent: Friday, November 2, 2018 12:18 PM
> > To: Daniel Verkamp <dverkamp@chromium.org>
> > Cc: virtualization@lists.linux-foundation.org; linux-block@vger.kernel.org;
> > Michael S. Tsirkin <mst@redhat.com>; Jason Wang <jasowang@redhat.com>;
> > Jens Axboe <axboe@kernel.dk>; Paolo Bonzini <pbonzini@redhat.com>;
> > Christoph Hellwig <hch@infradead.org>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: Re: [PATCH v9] virtio_blk: add discard and write zeroes support
> > 
> > On Thu, Nov 01, 2018 at 03:40:35PM -0700, Daniel Verkamp wrote:
> > > From: Changpeng Liu <changpeng.liu@intel.com>
> > >
> > > In commit 88c85538, "virtio-blk: add discard and write zeroes features
> > > to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio
> > > block specification has been extended to add VIRTIO_BLK_T_DISCARD and
> > > VIRTIO_BLK_T_WRITE_ZEROES commands.  This patch enables support for
> > > discard and write zeroes in the virtio-blk driver when the device
> > > advertises the corresponding features, VIRTIO_BLK_F_DISCARD and
> > > VIRTIO_BLK_F_WRITE_ZEROES.
> > >
> > > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > > Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
> > > ---
> > > dverkamp: I've picked up this patch and made a few minor changes (as
> > > listed below); most notably, I changed the kmalloc back to GFP_ATOMIC,
> > > since it can be called from a context where sleeping is not allowed.
> > > To prevent large allocations, I've also clamped the maximum number of
> > > discard segments to 256; this results in a 4K allocation and should be
> > > plenty of descriptors for most use cases.
> > >
> > > I also removed most of the description from the commit message, since it
> > > was duplicating the comments from virtio_blk.h and quoting parts of the
> > > spec without adding any extra information.  I have tested this iteration
> > > of the patch using crosvm with modifications to enable the new features:
> > > https://chromium.googlesource.com/chromiumos/platform/crosvm/
> > >
> > > v9 fixes a number of review issues; I didn't attempt to optimize the
> > > single-element write zeroes case, so it still does an allocation per
> > > request (I did not see any easy place to put the payload that would
> > > avoid the allocation).
> > >
> > > CHANGELOG:
> > > v9: [dverkamp] fix LE types in discard struct; cleanups from Ming Lei
> > > v8: [dverkamp] replace shifts by 9 with SECTOR_SHIFT constant
> > > v7: [dverkamp] use GFP_ATOMIC for allocation that may not sleep; clarify
> > > descriptor flags field; comment wording cleanups.
> > > v6: don't set T_OUT bit to discard and write zeroes commands.
> > > v5: use new block layer API: blk_queue_flag_set.
> > > v4: several optimizations based on MST's comments, remove bit field
> > > usage for command descriptor.
> > > v3: define the virtio-blk protocol to add discard and write zeroes
> > > support, first version implementation based on proposed specification.
> > > v2: add write zeroes command support.
> > > v1: initial proposal implementation for discard command.
> > > ---
> > >  drivers/block/virtio_blk.c      | 83 ++++++++++++++++++++++++++++++++-
> > >  include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++
> > >  2 files changed, 135 insertions(+), 2 deletions(-)
> > 
> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

^ permalink raw reply	[flat|nested] 50+ messages in thread

end of thread, other threads:[~2018-12-05 13:41 UTC | newest]

Thread overview: 50+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-06  4:19 [PATCH v6] virtio_blk: add DISCARD and WRIET ZEROES commands support Changpeng Liu
2018-06-07 13:10 ` Stefan Hajnoczi
2018-06-07 23:07   ` Liu, Changpeng
2018-06-08 10:20     ` Stefan Hajnoczi
2018-06-11  3:37       ` Liu, Changpeng
2018-06-12 16:05         ` Stefan Hajnoczi
2018-08-28 22:25 ` [PATCH v7] virtio_blk: add discard and write zeroes support Daniel Verkamp
2018-10-12 21:06 ` [PATCH v8] " Daniel Verkamp
2018-10-12 21:06   ` Daniel Verkamp
2018-10-15  0:54   ` Michael S. Tsirkin
2018-10-15  0:54     ` Michael S. Tsirkin
2018-10-15  9:21   ` Ming Lei
2018-10-15  9:27   ` Christoph Hellwig
2018-10-15  9:27     ` Christoph Hellwig
2018-10-15 23:16     ` Daniel Verkamp
2018-10-15 23:16       ` Daniel Verkamp
2018-10-16  1:45       ` Liu, Changpeng
2018-10-16  1:45         ` Liu, Changpeng
2018-10-16  1:40     ` Liu, Changpeng
2018-10-16  1:40       ` Liu, Changpeng
2018-10-25 23:28     ` Paolo Bonzini
2018-10-25 23:28       ` Paolo Bonzini
2018-10-26  8:26       ` Christoph Hellwig
2018-10-26  8:26         ` Christoph Hellwig
2018-10-29  3:21         ` Liu, Changpeng
2018-10-29  3:21           ` Liu, Changpeng
2018-10-29 18:03         ` Paolo Bonzini
2018-10-29 18:03           ` Paolo Bonzini
2018-10-26  8:08   ` Stefan Hajnoczi
2018-10-26  8:08     ` Stefan Hajnoczi
2018-10-26 14:47     ` Michael S. Tsirkin
2018-10-26 14:47       ` Michael S. Tsirkin
2018-10-29  5:05       ` Stefan Hajnoczi
2018-10-29  5:05         ` Stefan Hajnoczi
2018-11-01 21:25         ` Michael S. Tsirkin
2018-11-01 21:25           ` Michael S. Tsirkin
2018-11-01 22:18           ` Daniel Verkamp
2018-11-01 22:18             ` Daniel Verkamp
2018-11-01 22:40 ` [PATCH v9] " Daniel Verkamp
2018-11-01 22:40   ` Daniel Verkamp
2018-11-01 23:43   ` Dongli Zhang
2018-11-01 23:43     ` Dongli Zhang
2018-11-02 18:25     ` Daniel Verkamp
2018-11-02 18:25       ` Daniel Verkamp
2018-11-02  4:17   ` Stefan Hajnoczi
2018-11-02  4:17     ` Stefan Hajnoczi
2018-12-05  9:46     ` Liu, Changpeng
2018-12-05  9:46       ` Liu, Changpeng
2018-12-05 13:41       ` Michael S. Tsirkin
2018-12-05 13:41         ` Michael S. Tsirkin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.