All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support
@ 2018-05-29  1:42 Changpeng Liu
  2018-05-31 10:30 ` Stefan Hajnoczi
  0 siblings, 1 reply; 7+ messages in thread
From: Changpeng Liu @ 2018-05-29  1:42 UTC (permalink / raw)
  To: virtualization, changpeng.liu; +Cc: pbonzini, cavery, stefanha

Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES commands
support, this will impact the performance when using SSD backend over
file systems.

Commit 88c85538 "virtio-blk: add discard and write zeroes features to
specification"(see https://github.com/oasis-tcs/virtio-spec) extended
existing virtio-blk protocol, adding extra DISCARD and WRITE ZEROES
commands support.

While here, using 16 bytes descriptor to describe one segment of DISCARD
or WRITE ZEROES commands, each command may contain one or more decriptors.

The following data structure shows the definition of one descriptor:

struct virtio_blk_discard_write_zeroes {
        le64 sector;
        le32 num_sectors;
        le32 unmap;
};

Field 'sector' means the start sector for DISCARD and WRITE ZEROES,
filed 'num_sectors' means the number of sectors for DISCARD and WRITE
ZEROES, if both DISCARD and WRITE ZEROES are supported, field 'unmap'
maybe used for WRITE ZEROES command with DISCARD enabled.

We also extended the virtio-blk configuration space to let backend
device put DISCARD and WRITE ZEROES configuration parameters.

struct virtio_blk_config {
        [...]

        le32 max_discard_sectors;
        le32 max_discard_seg;
        le32 discard_sector_alignment;
        le32 max_write_zeroes_sectors;
        le32 max_write_zeroes_seg;
        u8 write_zeroes_may_unmap;
}

New feature bit [VIRTIO_BLK_F_DISCARD (13)]: Device can support discard
command, maximum discard sectors size in field 'max_discard_sectors' and
maximum discard segment number in field 'max_discard_seg'.

New feature [VIRTIO_BLK_F_WRITE_ZEROES (14)]: Device can support write
zeroes command, maximum write zeroes sectors size in field
'max_write_zeroes_sectors' and maximum write zeroes segment number in
field 'max_write_zeroes_seg'.

The parameters in the configuration space of the device field
'max_discard_sectors' and field 'discard_sector_alignment' are expressed in
512-byte units if the VIRTIO_BLK_F_DISCARD feature bit is negotiated. The
field 'max_write_zeroes_sectors' is expressed in 512-byte units if the
VIRTIO_BLK_F_WRITE_ZEROES feature bit is negotiated.

Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
---
CHANGELOG:
v4: several optimizations based on MST's comments, remove bit field usage for
command descriptor.
v3: define the virtio-blk protocol to add discard and write zeroes support,
first version implementation based on proposed specification.
v2: add write zeroes command support.
v1: initial proposal implementation for discard command.
---
 drivers/block/virtio_blk.c      | 92 +++++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/virtio_blk.h | 43 +++++++++++++++++++
 2 files changed, 132 insertions(+), 3 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4a07593c..a250fcc 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -172,10 +172,45 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
+static inline int virtblk_setup_discard_write_zeroes(struct request *req,
+						bool unmap)
+{
+	unsigned short segments = blk_rq_nr_discard_segments(req);
+	unsigned short n = 0;
+	struct virtio_blk_discard_write_zeroes *range;
+	struct bio *bio;
+
+	range = kmalloc_array(segments, sizeof(*range), GFP_KERNEL);
+	if (!range)
+		return -ENOMEM;
+
+	__rq_for_each_bio(bio, req) {
+		u64 sector = bio->bi_iter.bi_sector;
+		u32 num_sectors = bio->bi_iter.bi_size >> 9;
+
+		range[n].unmap = cpu_to_le32(unmap);
+		range[n].num_sectors = cpu_to_le32(num_sectors);
+		range[n].sector = cpu_to_le64(sector);
+		n++;
+	}
+
+	req->special_vec.bv_page = virt_to_page(range);
+	req->special_vec.bv_offset = offset_in_page(range);
+	req->special_vec.bv_len = sizeof(*range) * segments;
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return 0;
+}
+
 static inline void virtblk_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+		kfree(page_address(req->special_vec.bv_page) +
+		      req->special_vec.bv_offset);
+	}
+
 	switch (req_op(req)) {
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
@@ -225,6 +260,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	int qid = hctx->queue_num;
 	int err;
 	bool notify = false;
+	bool unmap = false;
 	u32 type;
 
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
@@ -237,6 +273,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_FLUSH:
 		type = VIRTIO_BLK_T_FLUSH;
 		break;
+	case REQ_OP_DISCARD:
+		type = VIRTIO_BLK_T_DISCARD;
+		break;
+	case REQ_OP_WRITE_ZEROES:
+		type = VIRTIO_BLK_T_WRITE_ZEROES;
+		unmap = !(req->cmd_flags & REQ_NOUNMAP);
+		break;
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
 		type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,9 +299,16 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(req);
 
+	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
+		err = virtblk_setup_discard_write_zeroes(req, unmap);
+		if (err)
+			return BLK_STS_RESOURCE;
+	}
+
 	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
 	if (num) {
-		if (rq_data_dir(req) == WRITE)
+		if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD ||
+		    type == VIRTIO_BLK_T_WRITE_ZEROES)
 			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
 		else
 			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
@@ -777,6 +827,42 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (!err && opt_io_size)
 		blk_queue_io_opt(q, blk_size * opt_io_size);
 
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+		q->limits.discard_granularity = blk_size;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+				discard_sector_alignment, &v);
+		if (v)
+			q->limits.discard_alignment = v << 9;
+		else
+			q->limits.discard_alignment = 0;
+
+		virtio_cread(vdev, struct virtio_blk_config,
+				max_discard_sectors, &v);
+		if (v)
+			blk_queue_max_discard_sectors(q, v);
+		else
+			blk_queue_max_discard_sectors(q, UINT_MAX);
+
+		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
+				&v);
+		if (v)
+			blk_queue_max_discard_segments(q, v);
+		else
+			blk_queue_max_discard_segments(q, USHRT_MAX);
+
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
+		virtio_cread(vdev, struct virtio_blk_config,
+				max_write_zeroes_sectors, &v);
+		if (v)
+			blk_queue_max_write_zeroes_sectors(q, v);
+		else
+			blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
+	}
+
 	virtblk_update_capacity(vblk, false);
 	virtio_device_ready(vdev);
 
@@ -885,14 +971,14 @@ static int virtblk_restore(struct virtio_device *vdev)
 	VIRTIO_BLK_F_SCSI,
 #endif
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 }
 ;
 static unsigned int features[] = {
 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-	VIRTIO_BLK_F_MQ,
+	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ebe4d9..8e7a015 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -38,6 +38,8 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -86,6 +88,29 @@ struct virtio_blk_config {
 
 	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
 	__u16 num_queues;
+	/* The maximum discard sectors (in 512-byte sectors) for
+	 * one segment (if VIRTIO_BLK_F_DISCARD)
+	 */
+	__u32 max_discard_sectors;
+	/* The maximum number of discard segments in a
+	 * discard command (if VIRTIO_BLK_F_DISCARD)
+	 */
+	__u32 max_discard_seg;
+	/* The alignment sectors for discard (if VIRTIO_BLK_F_DISCARD) */
+	__u32 discard_sector_alignment;
+	/* The maximum number of write zeroes sectors (in 512-byte sectors) in
+	 * one segment (if VIRTIO_BLK_F_WRITE_ZEROES)
+	 */
+	__u32 max_write_zeroes_sectors;
+	/* The maximum number of segments in a write zeroes
+	 * command (if VIRTIO_BLK_F_WRITE_ZEROES)
+	 */
+	__u32 max_write_zeroes_seg;
+	/* Device clear this bit when write zeroes command can't result in
+	 * unmapping sectors (if VIRITO_BLK_F_WRITE_ZEROES and with unmap)
+	 */
+	__u8 write_zeroes_may_unmap;
+	__u8 unused1[3];
 } __attribute__((packed));
 
 /*
@@ -114,6 +139,12 @@ struct virtio_blk_config {
 /* Get device ID command */
 #define VIRTIO_BLK_T_GET_ID    8
 
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD	11
+
+/* Write zeroes command */
+#define VIRTIO_BLK_T_WRITE_ZEROES	13
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
@@ -133,6 +164,18 @@ struct virtio_blk_outhdr {
 	__virtio64 sector;
 };
 
+/*
+ * discard/write zeroes range for each request.
+ */
+struct virtio_blk_discard_write_zeroes {
+	/* discard/write zeroes start sector */
+	__virtio64 sector;
+	/* number of discard/write zeroes sectors */
+	__virtio32 num_sectors;
+	/* valid for write zeroes command */
+	__virtio32 unmap;
+};
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 struct virtio_scsi_inhdr {
 	__virtio32 errors;
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-05-29  1:42 [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support Changpeng Liu
@ 2018-05-31 10:30 ` Stefan Hajnoczi
  2018-05-31 23:53   ` Liu, Changpeng
  0 siblings, 1 reply; 7+ messages in thread
From: Stefan Hajnoczi @ 2018-05-31 10:30 UTC (permalink / raw)
  To: Changpeng Liu; +Cc: pbonzini, cavery, virtualization


[-- Attachment #1.1: Type: text/plain, Size: 884 bytes --]

On Tue, May 29, 2018 at 09:42:24AM +0800, Changpeng Liu wrote:
>  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>  	if (num) {
> -		if (rq_data_dir(req) == WRITE)
> +		if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD ||
> +		    type == VIRTIO_BLK_T_WRITE_ZEROES)
>  			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);

The VIRTIO specification says:

  The type of the request is either a read (VIRTIO_BLK_T_IN), a write
  (VIRTIO_BLK_T_OUT), a discard (VIRTIO_BLK_T_DISCARD), a write zeroes
  (VIRTIO_BLK_T_WRITE_ZEROES) or a flush (VIRTIO_BLK_T_FLUSH).

But this patch actually uses VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_OUT or
VIRTIO_BLK_T_WRITE_ZEROES | VIRTIO_BLK_T_OUT.  "either A or B" is
exclusive, it does not mean "A and B".

Can you clarify whether the spec needs to be changed or what the purpose
of ORing in the VIRTIO_BLK_T_OUT bit is?

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-05-31 10:30 ` Stefan Hajnoczi
@ 2018-05-31 23:53   ` Liu, Changpeng
  2018-06-01  8:59     ` Stefan Hajnoczi
  0 siblings, 1 reply; 7+ messages in thread
From: Liu, Changpeng @ 2018-05-31 23:53 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: pbonzini, cavery, virtualization



> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Thursday, May 31, 2018 6:31 PM
> To: Liu, Changpeng <changpeng.liu@intel.com>
> Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> <wei.w.wang@intel.com>
> Subject: Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands
> support
> 
> On Tue, May 29, 2018 at 09:42:24AM +0800, Changpeng Liu wrote:
> >  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> >  	if (num) {
> > -		if (rq_data_dir(req) == WRITE)
> > +		if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD
> ||
> > +		    type == VIRTIO_BLK_T_WRITE_ZEROES)
> >  			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
> VIRTIO_BLK_T_OUT);
> 
> The VIRTIO specification says:
> 
>   The type of the request is either a read (VIRTIO_BLK_T_IN), a write
>   (VIRTIO_BLK_T_OUT), a discard (VIRTIO_BLK_T_DISCARD), a write zeroes
>   (VIRTIO_BLK_T_WRITE_ZEROES) or a flush (VIRTIO_BLK_T_FLUSH).
> 
> But this patch actually uses VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_OUT or
> VIRTIO_BLK_T_WRITE_ZEROES | VIRTIO_BLK_T_OUT.  "either A or B" is
> exclusive, it does not mean "A and B".
Hi Stefan,

For the new add DISCARD and WRITE ZEROES commands, I defined the 
type code to 11 and 13, so the last bit always is 1, there is no difference
in practice.
But I believe the specification says VIRTIO_BLK_T_OUT means direction, so
OR the two bits together should compliance with the specification.
> 
> Can you clarify whether the spec needs to be changed or what the purpose
> of ORing in the VIRTIO_BLK_T_OUT bit is?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-05-31 23:53   ` Liu, Changpeng
@ 2018-06-01  8:59     ` Stefan Hajnoczi
  2018-06-04  4:14       ` Liu, Changpeng
  0 siblings, 1 reply; 7+ messages in thread
From: Stefan Hajnoczi @ 2018-06-01  8:59 UTC (permalink / raw)
  To: Liu, Changpeng; +Cc: pbonzini, cavery, virtualization


[-- Attachment #1.1: Type: text/plain, Size: 2364 bytes --]

On Thu, May 31, 2018 at 11:53:26PM +0000, Liu, Changpeng wrote:
> 
> 
> > -----Original Message-----
> > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > Sent: Thursday, May 31, 2018 6:31 PM
> > To: Liu, Changpeng <changpeng.liu@intel.com>
> > Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> > jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> > <wei.w.wang@intel.com>
> > Subject: Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands
> > support
> > 
> > On Tue, May 29, 2018 at 09:42:24AM +0800, Changpeng Liu wrote:
> > >  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> > >  	if (num) {
> > > -		if (rq_data_dir(req) == WRITE)
> > > +		if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD
> > ||
> > > +		    type == VIRTIO_BLK_T_WRITE_ZEROES)
> > >  			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
> > VIRTIO_BLK_T_OUT);
> > 
> > The VIRTIO specification says:
> > 
> >   The type of the request is either a read (VIRTIO_BLK_T_IN), a write
> >   (VIRTIO_BLK_T_OUT), a discard (VIRTIO_BLK_T_DISCARD), a write zeroes
> >   (VIRTIO_BLK_T_WRITE_ZEROES) or a flush (VIRTIO_BLK_T_FLUSH).
> > 
> > But this patch actually uses VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_OUT or
> > VIRTIO_BLK_T_WRITE_ZEROES | VIRTIO_BLK_T_OUT.  "either A or B" is
> > exclusive, it does not mean "A and B".
> Hi Stefan,
> 
> For the new add DISCARD and WRITE ZEROES commands, I defined the 
> type code to 11 and 13, so the last bit always is 1, there is no difference
> in practice.

Then the code is misleading.  This is clearer:

  if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES)
      /* Do nothing, type already set */
  else if (rq_data_dir(req) == WRITE)
      vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
  ...

> But I believe the specification says VIRTIO_BLK_T_OUT means direction, so
> OR the two bits together should compliance with the specification.

I cannot find that in the specification:

  http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-2020002

and it would contradict the "The type of the request is either ... or
..." wording that I quoted from the spec above.

If you do find something in the spec, please let me know and we can
figure out how to make the spec consistent.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-06-01  8:59     ` Stefan Hajnoczi
@ 2018-06-04  4:14       ` Liu, Changpeng
  2018-06-04 10:02         ` Paolo Bonzini
  0 siblings, 1 reply; 7+ messages in thread
From: Liu, Changpeng @ 2018-06-04  4:14 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: pbonzini, cavery, virtualization



> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Friday, June 1, 2018 5:00 PM
> To: Liu, Changpeng <changpeng.liu@intel.com>
> Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> <wei.w.wang@intel.com>
> Subject: Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands
> support
> 
> On Thu, May 31, 2018 at 11:53:26PM +0000, Liu, Changpeng wrote:
> >
> >
> > > -----Original Message-----
> > > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > > Sent: Thursday, May 31, 2018 6:31 PM
> > > To: Liu, Changpeng <changpeng.liu@intel.com>
> > > Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> > > jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> > > <wei.w.wang@intel.com>
> > > Subject: Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands
> > > support
> > >
> > > On Tue, May 29, 2018 at 09:42:24AM +0800, Changpeng Liu wrote:
> > > >  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> > > >  	if (num) {
> > > > -		if (rq_data_dir(req) == WRITE)
> > > > +		if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD
> > > ||
> > > > +		    type == VIRTIO_BLK_T_WRITE_ZEROES)
> > > >  			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
> > > VIRTIO_BLK_T_OUT);
> > >
> > > The VIRTIO specification says:
> > >
> > >   The type of the request is either a read (VIRTIO_BLK_T_IN), a write
> > >   (VIRTIO_BLK_T_OUT), a discard (VIRTIO_BLK_T_DISCARD), a write zeroes
> > >   (VIRTIO_BLK_T_WRITE_ZEROES) or a flush (VIRTIO_BLK_T_FLUSH).
> > >
> > > But this patch actually uses VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_OUT or
> > > VIRTIO_BLK_T_WRITE_ZEROES | VIRTIO_BLK_T_OUT.  "either A or B" is
> > > exclusive, it does not mean "A and B".
> > Hi Stefan,
> >
> > For the new add DISCARD and WRITE ZEROES commands, I defined the
> > type code to 11 and 13, so the last bit always is 1, there is no difference
> > in practice.
> 
> Then the code is misleading.  This is clearer:
> 
>   if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES)
>       /* Do nothing, type already set */
>   else if (rq_data_dir(req) == WRITE)
>       vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
>   ...
This change sounds good to me, will change the patch accordingly.
> 
> > But I believe the specification says VIRTIO_BLK_T_OUT means direction, so
> > OR the two bits together should compliance with the specification.
> 
> I cannot find that in the specification:
> 
>   http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-
> 2020002
> 
> and it would contradict the "The type of the request is either ... or
> ..." wording that I quoted from the spec above.
> 
> If you do find something in the spec, please let me know and we can
> figure out how to make the spec consistent.
I saw comments from file linux/usr/include/uapi/linux/virtio_blk.h, which says
VIRTIO_BLK_T_OUT may be combined with other commands and means direction,
the specification does not have such description.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-06-04  4:14       ` Liu, Changpeng
@ 2018-06-04 10:02         ` Paolo Bonzini
  2018-06-05  0:55           ` Liu, Changpeng
  0 siblings, 1 reply; 7+ messages in thread
From: Paolo Bonzini @ 2018-06-04 10:02 UTC (permalink / raw)
  To: Liu, Changpeng, Stefan Hajnoczi; +Cc: cavery, virtualization

On 04/06/2018 06:14, Liu, Changpeng wrote:
>>> But I believe the specification says VIRTIO_BLK_T_OUT means direction, so
>>> OR the two bits together should compliance with the specification.
>> I cannot find that in the specification:
>>
>>   http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-
>> 2020002
>>
>> and it would contradict the "The type of the request is either ... or
>> ..." wording that I quoted from the spec above.
>>
>> If you do find something in the spec, please let me know and we can
>> figure out how to make the spec consistent.
>
> I saw comments from file linux/usr/include/uapi/linux/virtio_blk.h, which says
> VIRTIO_BLK_T_OUT may be combined with other commands and means direction,
> the specification does not have such description.

I don't think it is in the specification indeed (however, 11 and 13 were
chosen so that VIRTIO_BLK_T_OUT can still indicate direction).

Paolo

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support
  2018-06-04 10:02         ` Paolo Bonzini
@ 2018-06-05  0:55           ` Liu, Changpeng
  0 siblings, 0 replies; 7+ messages in thread
From: Liu, Changpeng @ 2018-06-05  0:55 UTC (permalink / raw)
  To: Paolo Bonzini, Stefan Hajnoczi; +Cc: cavery, virtualization



> -----Original Message-----
> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> Sent: Monday, June 4, 2018 6:03 PM
> To: Liu, Changpeng <changpeng.liu@intel.com>; Stefan Hajnoczi
> <stefanha@redhat.com>
> Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> jasowang@redhat.com; Wang, Wei W <wei.w.wang@intel.com>
> Subject: Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands
> support
> 
> On 04/06/2018 06:14, Liu, Changpeng wrote:
> >>> But I believe the specification says VIRTIO_BLK_T_OUT means direction, so
> >>> OR the two bits together should compliance with the specification.
> >> I cannot find that in the specification:
> >>
> >>   http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-
> >> 2020002
> >>
> >> and it would contradict the "The type of the request is either ... or
> >> ..." wording that I quoted from the spec above.
> >>
> >> If you do find something in the spec, please let me know and we can
> >> figure out how to make the spec consistent.
> >
> > I saw comments from file linux/usr/include/uapi/linux/virtio_blk.h, which says
> > VIRTIO_BLK_T_OUT may be combined with other commands and means direction,
> > the specification does not have such description.
> 
> I don't think it is in the specification indeed (however, 11 and 13 were
> chosen so that VIRTIO_BLK_T_OUT can still indicate direction).
Correct, we don't need to OR VIRTIO_BLK_T_OUT again for DISCARD and WRITE ZEROES commands.
I'll remove it in next patch set.
> 
> Paolo

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-06-05  0:55 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-29  1:42 [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support Changpeng Liu
2018-05-31 10:30 ` Stefan Hajnoczi
2018-05-31 23:53   ` Liu, Changpeng
2018-06-01  8:59     ` Stefan Hajnoczi
2018-06-04  4:14       ` Liu, Changpeng
2018-06-04 10:02         ` Paolo Bonzini
2018-06-05  0:55           ` Liu, Changpeng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.