All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH RFC 0/2] virtio-blk: add DISCARD and WRITE ZEROES features
@ 2019-01-24 17:23 Stefano Garzarella
  2019-01-24 17:23 ` [Qemu-devel] [PATCH RFC 1/2] " Stefano Garzarella
  2019-01-24 17:23 ` [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command Stefano Garzarella
  0 siblings, 2 replies; 28+ messages in thread
From: Stefano Garzarella @ 2019-01-24 17:23 UTC (permalink / raw)
  To: qemu-devel
  Cc: Laurent Vivier, Michael S. Tsirkin, Stefan Hajnoczi, Max Reitz,
	Thomas Huth, Paolo Bonzini, Kevin Wolf, qemu-block

This series adds the support of DISCARD and WRITE ZEROES commands
and extends the virtio-blk-test to test WRITE_ZEROES command when
the feature is enabled.

RFC because I'm not sure if the "case" conditions that I used in
virtio-blk.c is clean enough.

This series requires the new virtio headers from linux v5.0-rc1
already imported by Paolo:

Based-on: <20190104082731.24967-1-pbonzini@redhat.com>

Thanks,
Stefano

Stefano Garzarella (2):
  virtio-blk: add DISCARD and WRITE ZEROES features
  tests/virtio-blk: add test for WRITE_ZEROES command

 hw/block/virtio-blk.c   | 79 +++++++++++++++++++++++++++++++++++++++++
 tests/virtio-blk-test.c | 63 ++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+)

-- 
2.20.1

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [Qemu-devel] [PATCH RFC 1/2] virtio-blk: add DISCARD and WRITE ZEROES features
  2019-01-24 17:23 [Qemu-devel] [PATCH RFC 0/2] virtio-blk: add DISCARD and WRITE ZEROES features Stefano Garzarella
@ 2019-01-24 17:23 ` Stefano Garzarella
  2019-01-24 17:55   ` Dr. David Alan Gilbert
  2019-01-25 14:58   ` Stefan Hajnoczi
  2019-01-24 17:23 ` [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command Stefano Garzarella
  1 sibling, 2 replies; 28+ messages in thread
From: Stefano Garzarella @ 2019-01-24 17:23 UTC (permalink / raw)
  To: qemu-devel
  Cc: Laurent Vivier, Michael S. Tsirkin, Stefan Hajnoczi, Max Reitz,
	Thomas Huth, Paolo Bonzini, Kevin Wolf, qemu-block

This patch adds the support of DISCARD and WRITE ZEROES commands,
that have been introduced in the virtio-blk protocol to have
better performance when using SSD backend.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
 hw/block/virtio-blk.c | 79 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index f208c6ddb9..8850957751 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -145,6 +145,25 @@ out:
     aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
+static void virtio_blk_discard_wzeroes_complete(void *opaque, int ret)
+{
+    VirtIOBlockReq *req = opaque;
+    VirtIOBlock *s = req->dev;
+
+    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+    if (ret) {
+        if (virtio_blk_handle_rw_error(req, -ret, 0)) {
+            goto out;
+        }
+    }
+
+    virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+    virtio_blk_free_request(req);
+
+out:
+    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
+}
+
 #ifdef __linux__
 
 typedef struct {
@@ -584,6 +603,56 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
         virtio_blk_free_request(req);
         break;
     }
+    /*
+     * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with
+     * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement,
+     * so we must mask it for these requests, then we will check the type.
+     */
+    case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT:
+    case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT:
+    {
+        struct virtio_blk_discard_write_zeroes dwz_hdr;
+        uint64_t sector;
+        int bytes;
+
+        if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr,
+                                sizeof(dwz_hdr)) != sizeof(dwz_hdr))) {
+            virtio_error(vdev, "virtio-blk discard/wzeroes header too short");
+            return -1;
+        }
+
+        sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &dwz_hdr.sector);
+        bytes = virtio_ldl_p(VIRTIO_DEVICE(req->dev),
+                             &dwz_hdr.num_sectors) << BDRV_SECTOR_BITS;
+
+        if (!virtio_blk_sect_range_ok(req->dev, sector, bytes)) {
+            virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+            virtio_blk_free_request(req);
+            return 0;
+        }
+
+        if ((type & ~(VIRTIO_BLK_T_BARRIER)) == VIRTIO_BLK_T_DISCARD) {
+            blk_aio_pdiscard(req->dev->blk, sector << BDRV_SECTOR_BITS, bytes,
+                             virtio_blk_discard_wzeroes_complete, req);
+        } else if ((type & ~(VIRTIO_BLK_T_BARRIER)) ==
+                   VIRTIO_BLK_T_WRITE_ZEROES) {
+            int flags = 0;
+
+            if (virtio_ldl_p(VIRTIO_DEVICE(req->dev), &dwz_hdr.flags) &
+                VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
+                flags |= BDRV_REQ_MAY_UNMAP;
+            }
+
+            blk_aio_pwrite_zeroes(req->dev->blk, sector << BDRV_SECTOR_BITS,
+                                  bytes, flags,
+                                  virtio_blk_discard_wzeroes_complete, req);
+        } else { /* Unsupported if VIRTIO_BLK_T_OUT is not set */
+            virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
+            virtio_blk_free_request(req);
+        }
+
+        break;
+    }
     default:
         virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
         virtio_blk_free_request(req);
@@ -763,6 +832,14 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
     blkcfg.alignment_offset = 0;
     blkcfg.wce = blk_enable_write_cache(s->blk);
     virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues);
+    virtio_stl_p(vdev, &blkcfg.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS);
+    virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1);
+    virtio_stl_p(vdev, &blkcfg.discard_sector_alignment,
+                 blk_size >> BDRV_SECTOR_BITS);
+    virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors,
+                 BDRV_REQUEST_MAX_SECTORS);
+    virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1);
+    blkcfg.write_zeroes_may_unmap = 1;
     memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
 }
 
@@ -787,6 +864,8 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features,
     virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
     virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
     virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
+    virtio_add_feature(&features, VIRTIO_BLK_F_DISCARD);
+    virtio_add_feature(&features, VIRTIO_BLK_F_WRITE_ZEROES);
     if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) {
         if (s->conf.scsi) {
             error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0");
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-24 17:23 [Qemu-devel] [PATCH RFC 0/2] virtio-blk: add DISCARD and WRITE ZEROES features Stefano Garzarella
  2019-01-24 17:23 ` [Qemu-devel] [PATCH RFC 1/2] " Stefano Garzarella
@ 2019-01-24 17:23 ` Stefano Garzarella
  2019-01-25  6:01   ` Thomas Huth
  1 sibling, 1 reply; 28+ messages in thread
From: Stefano Garzarella @ 2019-01-24 17:23 UTC (permalink / raw)
  To: qemu-devel
  Cc: Laurent Vivier, Michael S. Tsirkin, Stefan Hajnoczi, Max Reitz,
	Thomas Huth, Paolo Bonzini, Kevin Wolf, qemu-block

If the WRITE_ZEROES feature is enabled, we check this
command in the test_basic().

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
 tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 04c608764b..8cabbcb85a 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
 
     guest_free(alloc, req_addr);
 
+    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
+        struct virtio_blk_discard_write_zeroes *dwz_hdr;
+        void *expected;
+
+        /*
+         * WRITE_ZEROES request on the same sector of previous test where
+         * we wrote "TEST".
+         */
+        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
+        req.data = g_malloc0(512);
+        dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data;
+        dwz_hdr->sector = 0;
+        dwz_hdr->num_sectors = 1;
+        dwz_hdr->flags = 0;
+
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+        g_free(req.data);
+
+        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
+        qvirtqueue_add(vq, req_addr + 16, 512, false, true);
+        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
+
+        qvirtqueue_kick(dev, vq, free_head);
+
+        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 528);
+        g_assert_cmpint(status, ==, 0);
+
+        guest_free(alloc, req_addr);
+
+        /* Read request to check if the sector contains all zeroes */
+        req.type = VIRTIO_BLK_T_IN;
+        req.ioprio = 1;
+        req.sector = 0;
+        req.data = g_malloc0(512);
+
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+        g_free(req.data);
+
+        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
+        qvirtqueue_add(vq, req_addr + 16, 512, true, true);
+        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
+
+        qvirtqueue_kick(dev, vq, free_head);
+
+        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 528);
+        g_assert_cmpint(status, ==, 0);
+
+        data = g_malloc(512);
+        expected = g_malloc0(512);
+        memread(req_addr + 16, data, 512);
+        g_assert_cmpmem(data, 512, expected, 512);
+        g_free(expected);
+        g_free(data);
+
+        guest_free(alloc, req_addr);
+    }
+
     if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
         /* Write and read with 2 descriptor layout */
         /* Write request */
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 1/2] virtio-blk: add DISCARD and WRITE ZEROES features
  2019-01-24 17:23 ` [Qemu-devel] [PATCH RFC 1/2] " Stefano Garzarella
@ 2019-01-24 17:55   ` Dr. David Alan Gilbert
  2019-01-24 18:31     ` Stefano Garzarella
  2019-01-25 14:58   ` Stefan Hajnoczi
  1 sibling, 1 reply; 28+ messages in thread
From: Dr. David Alan Gilbert @ 2019-01-24 17:55 UTC (permalink / raw)
  To: Stefano Garzarella
  Cc: qemu-devel, Laurent Vivier, Kevin Wolf, Thomas Huth, qemu-block,
	Michael S. Tsirkin, Max Reitz, Stefan Hajnoczi, Paolo Bonzini

* Stefano Garzarella (sgarzare@redhat.com) wrote:
> This patch adds the support of DISCARD and WRITE ZEROES commands,
> that have been introduced in the virtio-blk protocol to have
> better performance when using SSD backend.
> 
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>

Hi,
  Do you need to make those features machine-type dependent
so that a VM started on a nice new qemu with this feature doesn't
get confused when live migrated back to an older version?

Dave

> ---
>  hw/block/virtio-blk.c | 79 +++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 79 insertions(+)
> 
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index f208c6ddb9..8850957751 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -145,6 +145,25 @@ out:
>      aio_context_release(blk_get_aio_context(s->conf.conf.blk));
>  }
>  
> +static void virtio_blk_discard_wzeroes_complete(void *opaque, int ret)
> +{
> +    VirtIOBlockReq *req = opaque;
> +    VirtIOBlock *s = req->dev;
> +
> +    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
> +    if (ret) {
> +        if (virtio_blk_handle_rw_error(req, -ret, 0)) {
> +            goto out;
> +        }
> +    }
> +
> +    virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
> +    virtio_blk_free_request(req);
> +
> +out:
> +    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
> +}
> +
>  #ifdef __linux__
>  
>  typedef struct {
> @@ -584,6 +603,56 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
>          virtio_blk_free_request(req);
>          break;
>      }
> +    /*
> +     * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with
> +     * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement,
> +     * so we must mask it for these requests, then we will check the type.
> +     */
> +    case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT:
> +    case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT:
> +    {
> +        struct virtio_blk_discard_write_zeroes dwz_hdr;
> +        uint64_t sector;
> +        int bytes;
> +
> +        if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr,
> +                                sizeof(dwz_hdr)) != sizeof(dwz_hdr))) {
> +            virtio_error(vdev, "virtio-blk discard/wzeroes header too short");
> +            return -1;
> +        }
> +
> +        sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &dwz_hdr.sector);
> +        bytes = virtio_ldl_p(VIRTIO_DEVICE(req->dev),
> +                             &dwz_hdr.num_sectors) << BDRV_SECTOR_BITS;
> +
> +        if (!virtio_blk_sect_range_ok(req->dev, sector, bytes)) {
> +            virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
> +            virtio_blk_free_request(req);
> +            return 0;
> +        }
> +
> +        if ((type & ~(VIRTIO_BLK_T_BARRIER)) == VIRTIO_BLK_T_DISCARD) {
> +            blk_aio_pdiscard(req->dev->blk, sector << BDRV_SECTOR_BITS, bytes,
> +                             virtio_blk_discard_wzeroes_complete, req);
> +        } else if ((type & ~(VIRTIO_BLK_T_BARRIER)) ==
> +                   VIRTIO_BLK_T_WRITE_ZEROES) {
> +            int flags = 0;
> +
> +            if (virtio_ldl_p(VIRTIO_DEVICE(req->dev), &dwz_hdr.flags) &
> +                VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
> +                flags |= BDRV_REQ_MAY_UNMAP;
> +            }
> +
> +            blk_aio_pwrite_zeroes(req->dev->blk, sector << BDRV_SECTOR_BITS,
> +                                  bytes, flags,
> +                                  virtio_blk_discard_wzeroes_complete, req);
> +        } else { /* Unsupported if VIRTIO_BLK_T_OUT is not set */
> +            virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
> +            virtio_blk_free_request(req);
> +        }
> +
> +        break;
> +    }
>      default:
>          virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
>          virtio_blk_free_request(req);
> @@ -763,6 +832,14 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
>      blkcfg.alignment_offset = 0;
>      blkcfg.wce = blk_enable_write_cache(s->blk);
>      virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues);
> +    virtio_stl_p(vdev, &blkcfg.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS);
> +    virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1);
> +    virtio_stl_p(vdev, &blkcfg.discard_sector_alignment,
> +                 blk_size >> BDRV_SECTOR_BITS);
> +    virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors,
> +                 BDRV_REQUEST_MAX_SECTORS);
> +    virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1);
> +    blkcfg.write_zeroes_may_unmap = 1;
>      memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
>  }
>  
> @@ -787,6 +864,8 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features,
>      virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
>      virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
>      virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
> +    virtio_add_feature(&features, VIRTIO_BLK_F_DISCARD);
> +    virtio_add_feature(&features, VIRTIO_BLK_F_WRITE_ZEROES);
>      if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) {
>          if (s->conf.scsi) {
>              error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0");
> -- 
> 2.20.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 1/2] virtio-blk: add DISCARD and WRITE ZEROES features
  2019-01-24 17:55   ` Dr. David Alan Gilbert
@ 2019-01-24 18:31     ` Stefano Garzarella
  0 siblings, 0 replies; 28+ messages in thread
From: Stefano Garzarella @ 2019-01-24 18:31 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: qemu devel list, Laurent Vivier, Kevin Wolf, Thomas Huth,
	qemu-block, Michael S. Tsirkin, Max Reitz, Stefan Hajnoczi,
	Paolo Bonzini

On Thu, Jan 24, 2019 at 6:55 PM Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * Stefano Garzarella (sgarzare@redhat.com) wrote:
> > This patch adds the support of DISCARD and WRITE ZEROES commands,
> > that have been introduced in the virtio-blk protocol to have
> > better performance when using SSD backend.
> >
> > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>
> Hi,
>   Do you need to make those features machine-type dependent
> so that a VM started on a nice new qemu with this feature doesn't
> get confused when live migrated back to an older version?
>

Hi Dave,
oh, thanks! I think the answer is absolutely yes :)
I'll fix it!

Another doubt that I have now is that I need to flush the
MultiReqBuffer before submitting DISCARD or WRITE ZEROES commands.

Thanks,
Stefano

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-24 17:23 ` [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command Stefano Garzarella
@ 2019-01-25  6:01   ` Thomas Huth
  2019-01-25  6:07     ` Thomas Huth
  2019-01-25  6:07     ` Thomas Huth
  0 siblings, 2 replies; 28+ messages in thread
From: Thomas Huth @ 2019-01-25  6:01 UTC (permalink / raw)
  To: Stefano Garzarella, qemu-devel
  Cc: Laurent Vivier, Kevin Wolf, qemu-block, Michael S. Tsirkin,
	Max Reitz, Stefan Hajnoczi, Paolo Bonzini

On 2019-01-24 18:23, Stefano Garzarella wrote:
> If the WRITE_ZEROES feature is enabled, we check this
> command in the test_basic().
> 
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 63 insertions(+)
> 
> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> index 04c608764b..8cabbcb85a 100644
> --- a/tests/virtio-blk-test.c
> +++ b/tests/virtio-blk-test.c
> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
>  
>      guest_free(alloc, req_addr);
>  
> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> +        void *expected;
> +
> +        /*
> +         * WRITE_ZEROES request on the same sector of previous test where
> +         * we wrote "TEST".
> +         */
> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> +        req.data = g_malloc0(512);

Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
something similar here, to see whether zeroes or 0xaa is written?

> +        dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data;
> +        dwz_hdr->sector = 0;
> +        dwz_hdr->num_sectors = 1;
> +        dwz_hdr->flags = 0;
> +
> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
> +
> +        g_free(req.data);
> +
> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
> +        qvirtqueue_add(vq, req_addr + 16, 512, false, true);
> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
> +
> +        qvirtqueue_kick(dev, vq, free_head);
> +
> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
> +                               QVIRTIO_BLK_TIMEOUT_US);
> +        status = readb(req_addr + 528);
> +        g_assert_cmpint(status, ==, 0);
> +
> +        guest_free(alloc, req_addr);
> +
> +        /* Read request to check if the sector contains all zeroes */
> +        req.type = VIRTIO_BLK_T_IN;
> +        req.ioprio = 1;
> +        req.sector = 0;
> +        req.data = g_malloc0(512);
> +
> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
> +
> +        g_free(req.data);
> +
> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
> +        qvirtqueue_add(vq, req_addr + 16, 512, true, true);
> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
> +
> +        qvirtqueue_kick(dev, vq, free_head);
> +
> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
> +                               QVIRTIO_BLK_TIMEOUT_US);
> +        status = readb(req_addr + 528);
> +        g_assert_cmpint(status, ==, 0);
> +
> +        data = g_malloc(512);
> +        expected = g_malloc0(512);
> +        memread(req_addr + 16, data, 512);
> +        g_assert_cmpmem(data, 512, expected, 512);
> +        g_free(expected);
> +        g_free(data);
> +
> +        guest_free(alloc, req_addr);
> +    }
> +
>      if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
>          /* Write and read with 2 descriptor layout */
>          /* Write request */
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25  6:01   ` Thomas Huth
@ 2019-01-25  6:07     ` Thomas Huth
  2019-01-25  6:07     ` Thomas Huth
  1 sibling, 0 replies; 28+ messages in thread
From: Thomas Huth @ 2019-01-25  6:07 UTC (permalink / raw)
  To: Stefano Garzarella, qemu-devel
  Cc: Laurent Vivier, Kevin Wolf, qemu-block, Michael S. Tsirkin,
	Max Reitz, Stefan Hajnoczi, Paolo Bonzini

On 2019-01-25 07:01, Thomas Huth wrote:
> On 2019-01-24 18:23, Stefano Garzarella wrote:
>> If the WRITE_ZEROES feature is enabled, we check this
>> command in the test_basic().
>>
>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> ---
>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 63 insertions(+)
>>
>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
>> index 04c608764b..8cabbcb85a 100644
>> --- a/tests/virtio-blk-test.c
>> +++ b/tests/virtio-blk-test.c
>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
>>  
>>      guest_free(alloc, req_addr);
>>  
>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
>> +        void *expected;
>> +
>> +        /*
>> +         * WRITE_ZEROES request on the same sector of previous test where
>> +         * we wrote "TEST".
>> +         */
>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
>> +        req.data = g_malloc0(512);
> 
> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> something similar here, to see whether zeroes or 0xaa is written?

Ah, never mind, I thought req.data would be a sector buffer here, but
looking at the lines below, it apparently is something different.

Why do you allocate 512 bytes here? I'd rather expect
g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
then you could also use a local "struct virtio_blk_discard_write_zeroes
dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?

>> +        dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data;
>> +        dwz_hdr->sector = 0;
>> +        dwz_hdr->num_sectors = 1;
>> +        dwz_hdr->flags = 0;
>> +
>> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
>> +
>> +        g_free(req.data);
>> +
>> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
>> +        qvirtqueue_add(vq, req_addr + 16, 512, false, true);
>> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
>> +
>> +        qvirtqueue_kick(dev, vq, free_head);
>> +
>> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
>> +                               QVIRTIO_BLK_TIMEOUT_US);
>> +        status = readb(req_addr + 528);
>> +        g_assert_cmpint(status, ==, 0);
>> +
>> +        guest_free(alloc, req_addr);
>> +
>> +        /* Read request to check if the sector contains all zeroes */
>> +        req.type = VIRTIO_BLK_T_IN;
>> +        req.ioprio = 1;
>> +        req.sector = 0;
>> +        req.data = g_malloc0(512);
>> +
>> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
>> +
>> +        g_free(req.data);
>> +
>> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
>> +        qvirtqueue_add(vq, req_addr + 16, 512, true, true);
>> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
>> +
>> +        qvirtqueue_kick(dev, vq, free_head);
>> +
>> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
>> +                               QVIRTIO_BLK_TIMEOUT_US);
>> +        status = readb(req_addr + 528);
>> +        g_assert_cmpint(status, ==, 0);
>> +
>> +        data = g_malloc(512);
>> +        expected = g_malloc0(512);
>> +        memread(req_addr + 16, data, 512);
>> +        g_assert_cmpmem(data, 512, expected, 512);
>> +        g_free(expected);
>> +        g_free(data);
>> +
>> +        guest_free(alloc, req_addr);
>> +    }
>> +
>>      if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
>>          /* Write and read with 2 descriptor layout */
>>          /* Write request */
>>
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25  6:01   ` Thomas Huth
  2019-01-25  6:07     ` Thomas Huth
@ 2019-01-25  6:07     ` Thomas Huth
  2019-01-25  8:16       ` Stefano Garzarella
  1 sibling, 1 reply; 28+ messages in thread
From: Thomas Huth @ 2019-01-25  6:07 UTC (permalink / raw)
  To: Stefano Garzarella, qemu-devel
  Cc: Laurent Vivier, Kevin Wolf, qemu-block, Michael S. Tsirkin,
	Max Reitz, Stefan Hajnoczi, Paolo Bonzini

On 2019-01-25 07:01, Thomas Huth wrote:
> On 2019-01-24 18:23, Stefano Garzarella wrote:
>> If the WRITE_ZEROES feature is enabled, we check this
>> command in the test_basic().
>>
>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> ---
>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 63 insertions(+)
>>
>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
>> index 04c608764b..8cabbcb85a 100644
>> --- a/tests/virtio-blk-test.c
>> +++ b/tests/virtio-blk-test.c
>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
>>  
>>      guest_free(alloc, req_addr);
>>  
>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
>> +        void *expected;
>> +
>> +        /*
>> +         * WRITE_ZEROES request on the same sector of previous test where
>> +         * we wrote "TEST".
>> +         */
>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
>> +        req.data = g_malloc0(512);
> 
> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> something similar here, to see whether zeroes or 0xaa is written?

Ah, never mind, I thought req.data would be a sector buffer here, but
looking at the lines below, it apparently is something different.

Why do you allocate 512 bytes here? I'd rather expect
g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
then you could also use a local "struct virtio_blk_discard_write_zeroes
dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?

>> +        dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data;
>> +        dwz_hdr->sector = 0;
>> +        dwz_hdr->num_sectors = 1;
>> +        dwz_hdr->flags = 0;
>> +
>> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
>> +
>> +        g_free(req.data);
>> +
>> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
>> +        qvirtqueue_add(vq, req_addr + 16, 512, false, true);
>> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
>> +
>> +        qvirtqueue_kick(dev, vq, free_head);
>> +
>> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
>> +                               QVIRTIO_BLK_TIMEOUT_US);
>> +        status = readb(req_addr + 528);
>> +        g_assert_cmpint(status, ==, 0);
>> +
>> +        guest_free(alloc, req_addr);
>> +
>> +        /* Read request to check if the sector contains all zeroes */
>> +        req.type = VIRTIO_BLK_T_IN;
>> +        req.ioprio = 1;
>> +        req.sector = 0;
>> +        req.data = g_malloc0(512);
>> +
>> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
>> +
>> +        g_free(req.data);
>> +
>> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
>> +        qvirtqueue_add(vq, req_addr + 16, 512, true, true);
>> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
>> +
>> +        qvirtqueue_kick(dev, vq, free_head);
>> +
>> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
>> +                               QVIRTIO_BLK_TIMEOUT_US);
>> +        status = readb(req_addr + 528);
>> +        g_assert_cmpint(status, ==, 0);
>> +
>> +        data = g_malloc(512);
>> +        expected = g_malloc0(512);
>> +        memread(req_addr + 16, data, 512);
>> +        g_assert_cmpmem(data, 512, expected, 512);
>> +        g_free(expected);
>> +        g_free(data);
>> +
>> +        guest_free(alloc, req_addr);
>> +    }
>> +
>>      if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
>>          /* Write and read with 2 descriptor layout */
>>          /* Write request */
>>
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25  6:07     ` Thomas Huth
@ 2019-01-25  8:16       ` Stefano Garzarella
  2019-01-25  8:49         ` Thomas Huth
  0 siblings, 1 reply; 28+ messages in thread
From: Stefano Garzarella @ 2019-01-25  8:16 UTC (permalink / raw)
  To: Thomas Huth
  Cc: qemu-devel, Laurent Vivier, Kevin Wolf, qemu-block,
	Michael S. Tsirkin, Max Reitz, Stefan Hajnoczi, Paolo Bonzini

On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> On 2019-01-25 07:01, Thomas Huth wrote:
> > On 2019-01-24 18:23, Stefano Garzarella wrote:
> >> If the WRITE_ZEROES feature is enabled, we check this
> >> command in the test_basic().
> >>
> >> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >> ---
> >>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 63 insertions(+)
> >>
> >> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> >> index 04c608764b..8cabbcb85a 100644
> >> --- a/tests/virtio-blk-test.c
> >> +++ b/tests/virtio-blk-test.c
> >> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
> >>  
> >>      guest_free(alloc, req_addr);
> >>  
> >> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> >> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> >> +        void *expected;
> >> +
> >> +        /*
> >> +         * WRITE_ZEROES request on the same sector of previous test where
> >> +         * we wrote "TEST".
> >> +         */
> >> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> >> +        req.data = g_malloc0(512);
> > 
> > Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> > something similar here, to see whether zeroes or 0xaa is written?
> 
> Ah, never mind, I thought req.data would be a sector buffer here, but
> looking at the lines below, it apparently is something different.
> 
> Why do you allocate 512 bytes here? I'd rather expect
> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> then you could also use a local "struct virtio_blk_discard_write_zeroes
> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> 

Hi Thomas,
it was my initial implementation, but on the first test I discovered
that virtio_blk_request() has an assert on the data_size and it requires
a multiple of 512 bytes.
Then I looked at the virtio-spec #1, and it seems that data should be
multiple of 512 bytes also if it contains the struct
virtio_blk_discard_write_zeroes. (I'm not sure)

Anyway I tried to allocate only the space for that struct, commented the
assert and the test works well.

How do you suggest to proceed?


[1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)


Thanks,
Stefano

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25  8:16       ` Stefano Garzarella
@ 2019-01-25  8:49         ` Thomas Huth
  2019-01-25 11:58           ` Liu, Changpeng
                             ` (2 more replies)
  0 siblings, 3 replies; 28+ messages in thread
From: Thomas Huth @ 2019-01-25  8:49 UTC (permalink / raw)
  To: Stefano Garzarella, Michael S. Tsirkin, Changpeng Liu
  Cc: qemu-devel, Laurent Vivier, Kevin Wolf, qemu-block, Max Reitz,
	Stefan Hajnoczi, Paolo Bonzini

On 2019-01-25 09:16, Stefano Garzarella wrote:
> On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
>> On 2019-01-25 07:01, Thomas Huth wrote:
>>> On 2019-01-24 18:23, Stefano Garzarella wrote:
>>>> If the WRITE_ZEROES feature is enabled, we check this
>>>> command in the test_basic().
>>>>
>>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>>>> ---
>>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
>>>>  1 file changed, 63 insertions(+)
>>>>
>>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
>>>> index 04c608764b..8cabbcb85a 100644
>>>> --- a/tests/virtio-blk-test.c
>>>> +++ b/tests/virtio-blk-test.c
>>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
>>>>  
>>>>      guest_free(alloc, req_addr);
>>>>  
>>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
>>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
>>>> +        void *expected;
>>>> +
>>>> +        /*
>>>> +         * WRITE_ZEROES request on the same sector of previous test where
>>>> +         * we wrote "TEST".
>>>> +         */
>>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
>>>> +        req.data = g_malloc0(512);
>>>
>>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
>>> something similar here, to see whether zeroes or 0xaa is written?
>>
>> Ah, never mind, I thought req.data would be a sector buffer here, but
>> looking at the lines below, it apparently is something different.
>>
>> Why do you allocate 512 bytes here? I'd rather expect
>> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
>> then you could also use a local "struct virtio_blk_discard_write_zeroes
>> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
>>
> 
> Hi Thomas,
> it was my initial implementation, but on the first test I discovered
> that virtio_blk_request() has an assert on the data_size and it requires
> a multiple of 512 bytes.
> Then I looked at the virtio-spec #1, and it seems that data should be
> multiple of 512 bytes also if it contains the struct
> virtio_blk_discard_write_zeroes. (I'm not sure)
> 
> Anyway I tried to allocate only the space for that struct, commented the
> assert and the test works well.
> 
> How do you suggest to proceed?

Wow, that's a tough question. Looking at the virtio spec, I agree with
you, it looks like struct virtio_blk_discard_write_zeroes should be
padded to 512 bytes here. But when I look at the Linux sources
(drivers/block/virtio_blk.c), I fail to see that they are doing the
padding there (but maybe I'm just too blind).

Looking at the QEMU sources, it seems like it can deal with both and
always sets the status right behind the last byte:

    req->in = (void *)in_iov[in_num - 1].iov_base
              + in_iov[in_num - 1].iov_len
              - sizeof(struct virtio_blk_inhdr);

Anyway, I think the virtio spec should be clearer here to avoid bad
implementations in the future, so maybe Changpeng or Michael could
update the spec here a little bit?

 Thomas


> [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> 
> 
> Thanks,
> Stefano
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25  8:49         ` Thomas Huth
@ 2019-01-25 11:58           ` Liu, Changpeng
  2019-01-25 12:48             ` Thomas Huth
  2019-01-25 15:12           ` Stefan Hajnoczi
  2019-01-25 19:14           ` Michael S. Tsirkin
  2 siblings, 1 reply; 28+ messages in thread
From: Liu, Changpeng @ 2019-01-25 11:58 UTC (permalink / raw)
  To: Thomas Huth, Stefano Garzarella, Michael S. Tsirkin
  Cc: qemu-devel, Laurent Vivier, Kevin Wolf, qemu-block, Max Reitz,
	Stefan Hajnoczi, Paolo Bonzini



> -----Original Message-----
> From: Thomas Huth [mailto:thuth@redhat.com]
> Sent: Friday, January 25, 2019 4:49 PM
> To: Stefano Garzarella <sgarzare@redhat.com>; Michael S. Tsirkin
> <mst@redhat.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Cc: qemu-devel@nongnu.org; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf
> <kwolf@redhat.com>; qemu-block@nongnu.org; Max Reitz
> <mreitz@redhat.com>; Stefan Hajnoczi <stefanha@redhat.com>; Paolo Bonzini
> <pbonzini@redhat.com>
> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for
> WRITE_ZEROES command
> 
> On 2019-01-25 09:16, Stefano Garzarella wrote:
> > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> >> On 2019-01-25 07:01, Thomas Huth wrote:
> >>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> >>>> If the WRITE_ZEROES feature is enabled, we check this
> >>>> command in the test_basic().
> >>>>
> >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >>>> ---
> >>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> >>>>  1 file changed, 63 insertions(+)
> >>>>
> >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> >>>> index 04c608764b..8cabbcb85a 100644
> >>>> --- a/tests/virtio-blk-test.c
> >>>> +++ b/tests/virtio-blk-test.c
> >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev,
> QGuestAllocator *alloc,
> >>>>
> >>>>      guest_free(alloc, req_addr);
> >>>>
> >>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> >>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> >>>> +        void *expected;
> >>>> +
> >>>> +        /*
> >>>> +         * WRITE_ZEROES request on the same sector of previous test where
> >>>> +         * we wrote "TEST".
> >>>> +         */
> >>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> >>>> +        req.data = g_malloc0(512);
> >>>
> >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> >>> something similar here, to see whether zeroes or 0xaa is written?
> >>
> >> Ah, never mind, I thought req.data would be a sector buffer here, but
> >> looking at the lines below, it apparently is something different.
> >>
> >> Why do you allocate 512 bytes here? I'd rather expect
> >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> >> then you could also use a local "struct virtio_blk_discard_write_zeroes
> >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> >>
> >
> > Hi Thomas,
> > it was my initial implementation, but on the first test I discovered
> > that virtio_blk_request() has an assert on the data_size and it requires
> > a multiple of 512 bytes.
> > Then I looked at the virtio-spec #1, and it seems that data should be
> > multiple of 512 bytes also if it contains the struct
> > virtio_blk_discard_write_zeroes. (I'm not sure)
> >
> > Anyway I tried to allocate only the space for that struct, commented the
> > assert and the test works well.
> >
> > How do you suggest to proceed?
> 
> Wow, that's a tough question. Looking at the virtio spec, I agree with
> you, it looks like struct virtio_blk_discard_write_zeroes should be
> padded to 512 bytes here. But when I look at the Linux sources
> (drivers/block/virtio_blk.c), I fail to see that they are doing the
> padding there (but maybe I'm just too blind).
> 
> Looking at the QEMU sources, it seems like it can deal with both and
> always sets the status right behind the last byte:
> 
>     req->in = (void *)in_iov[in_num - 1].iov_base
>               + in_iov[in_num - 1].iov_len
>               - sizeof(struct virtio_blk_inhdr);
> 
> Anyway, I think the virtio spec should be clearer here to avoid bad
> implementations in the future, so maybe Changpeng or Michael could
> update the spec here a little bit?
The data for Discard and Write Zeroes commands are struct virtio_blk_discard_write_zeroes
aligned, that means you can pass 16 bytes aligned data, based on the segments number supported,
this is also aligned with NVMe specification and  the SCSI specification.
> 
>  Thomas
> 
> 
> > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> >
> >
> > Thanks,
> > Stefano
> >


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25 11:58           ` Liu, Changpeng
@ 2019-01-25 12:48             ` Thomas Huth
  2019-01-25 19:18               ` Michael S. Tsirkin
  0 siblings, 1 reply; 28+ messages in thread
From: Thomas Huth @ 2019-01-25 12:48 UTC (permalink / raw)
  To: Liu, Changpeng, Stefano Garzarella, Michael S. Tsirkin
  Cc: Laurent Vivier, Kevin Wolf, qemu-block, qemu-devel, Max Reitz,
	Stefan Hajnoczi, Paolo Bonzini

On 2019-01-25 12:58, Liu, Changpeng wrote:
> 
> 
>> -----Original Message-----
>> From: Thomas Huth [mailto:thuth@redhat.com]
>> Sent: Friday, January 25, 2019 4:49 PM
>> To: Stefano Garzarella <sgarzare@redhat.com>; Michael S. Tsirkin
>> <mst@redhat.com>; Liu, Changpeng <changpeng.liu@intel.com>
>> Cc: qemu-devel@nongnu.org; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf
>> <kwolf@redhat.com>; qemu-block@nongnu.org; Max Reitz
>> <mreitz@redhat.com>; Stefan Hajnoczi <stefanha@redhat.com>; Paolo Bonzini
>> <pbonzini@redhat.com>
>> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for
>> WRITE_ZEROES command
>>
>> On 2019-01-25 09:16, Stefano Garzarella wrote:
>>> On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
>>>> On 2019-01-25 07:01, Thomas Huth wrote:
>>>>> On 2019-01-24 18:23, Stefano Garzarella wrote:
>>>>>> If the WRITE_ZEROES feature is enabled, we check this
>>>>>> command in the test_basic().
>>>>>>
>>>>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>>>>>> ---
>>>>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
>>>>>>  1 file changed, 63 insertions(+)
>>>>>>
>>>>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
>>>>>> index 04c608764b..8cabbcb85a 100644
>>>>>> --- a/tests/virtio-blk-test.c
>>>>>> +++ b/tests/virtio-blk-test.c
>>>>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev,
>> QGuestAllocator *alloc,
>>>>>>
>>>>>>      guest_free(alloc, req_addr);
>>>>>>
>>>>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
>>>>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
>>>>>> +        void *expected;
>>>>>> +
>>>>>> +        /*
>>>>>> +         * WRITE_ZEROES request on the same sector of previous test where
>>>>>> +         * we wrote "TEST".
>>>>>> +         */
>>>>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
>>>>>> +        req.data = g_malloc0(512);
>>>>>
>>>>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
>>>>> something similar here, to see whether zeroes or 0xaa is written?
>>>>
>>>> Ah, never mind, I thought req.data would be a sector buffer here, but
>>>> looking at the lines below, it apparently is something different.
>>>>
>>>> Why do you allocate 512 bytes here? I'd rather expect
>>>> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
>>>> then you could also use a local "struct virtio_blk_discard_write_zeroes
>>>> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
>>>>
>>>
>>> Hi Thomas,
>>> it was my initial implementation, but on the first test I discovered
>>> that virtio_blk_request() has an assert on the data_size and it requires
>>> a multiple of 512 bytes.
>>> Then I looked at the virtio-spec #1, and it seems that data should be
>>> multiple of 512 bytes also if it contains the struct
>>> virtio_blk_discard_write_zeroes. (I'm not sure)
>>>
>>> Anyway I tried to allocate only the space for that struct, commented the
>>> assert and the test works well.
>>>
>>> How do you suggest to proceed?
>>
>> Wow, that's a tough question. Looking at the virtio spec, I agree with
>> you, it looks like struct virtio_blk_discard_write_zeroes should be
>> padded to 512 bytes here. But when I look at the Linux sources
>> (drivers/block/virtio_blk.c), I fail to see that they are doing the
>> padding there (but maybe I'm just too blind).
>>
>> Looking at the QEMU sources, it seems like it can deal with both and
>> always sets the status right behind the last byte:
>>
>>     req->in = (void *)in_iov[in_num - 1].iov_base
>>               + in_iov[in_num - 1].iov_len
>>               - sizeof(struct virtio_blk_inhdr);
>>
>> Anyway, I think the virtio spec should be clearer here to avoid bad
>> implementations in the future, so maybe Changpeng or Michael could
>> update the spec here a little bit?
> The data for Discard and Write Zeroes commands are struct virtio_blk_discard_write_zeroes
> aligned, that means you can pass 16 bytes aligned data, based on the segments number supported,
> this is also aligned with NVMe specification and  the SCSI specification.

Ok, thanks, so the "u8 data[][512];" is wrong in the virtio spec in this
case? See:

 https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944

At least this should be mentioned in the description of the data field,
I think.

 Thomas

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 1/2] virtio-blk: add DISCARD and WRITE ZEROES features
  2019-01-24 17:23 ` [Qemu-devel] [PATCH RFC 1/2] " Stefano Garzarella
  2019-01-24 17:55   ` Dr. David Alan Gilbert
@ 2019-01-25 14:58   ` Stefan Hajnoczi
  2019-01-25 16:18     ` Stefano Garzarella
  1 sibling, 1 reply; 28+ messages in thread
From: Stefan Hajnoczi @ 2019-01-25 14:58 UTC (permalink / raw)
  To: Stefano Garzarella
  Cc: qemu-devel, Laurent Vivier, Kevin Wolf, Thomas Huth, qemu-block,
	Michael S. Tsirkin, Max Reitz, Stefan Hajnoczi, Paolo Bonzini

[-- Attachment #1: Type: text/plain, Size: 4049 bytes --]

On Thu, Jan 24, 2019 at 06:23:22PM +0100, Stefano Garzarella wrote:
> @@ -584,6 +603,56 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
>          virtio_blk_free_request(req);
>          break;
>      }
> +    /*
> +     * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with
> +     * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement,
> +     * so we must mask it for these requests, then we will check the type.
> +     */
> +    case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT:
> +    case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT:
> +    {
> +        struct virtio_blk_discard_write_zeroes dwz_hdr;
> +        uint64_t sector;
> +        int bytes;
> +
> +        if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr,
> +                                sizeof(dwz_hdr)) != sizeof(dwz_hdr))) {

"The data used for discard or write zeroes command is described by one
or more virtio_blk_discard_write_zeroes structs."

This needs to be a loop so that multiple dwz structs can be processed up
to the length of virtio_blk_req->data[].

> +            virtio_error(vdev, "virtio-blk discard/wzeroes header too short");
> +            return -1;
> +        }
> +
> +        sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &dwz_hdr.sector);
> +        bytes = virtio_ldl_p(VIRTIO_DEVICE(req->dev),
> +                             &dwz_hdr.num_sectors) << BDRV_SECTOR_BITS;

Please handle num_sectors << BDRV_SECTOR_BITS overflow so that we don't
need to worry about what happens with an overflowed value later on.

> +
> +        if (!virtio_blk_sect_range_ok(req->dev, sector, bytes)) {
> +            virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
> +            virtio_blk_free_request(req);
> +            return 0;
> +        }
> +
> +        if ((type & ~(VIRTIO_BLK_T_BARRIER)) == VIRTIO_BLK_T_DISCARD) {

Missing device requirement: "the device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard commands if the unmap flag is set."

> +            blk_aio_pdiscard(req->dev->blk, sector << BDRV_SECTOR_BITS, bytes,
> +                             virtio_blk_discard_wzeroes_complete, req);
> +        } else if ((type & ~(VIRTIO_BLK_T_BARRIER)) ==
> +                   VIRTIO_BLK_T_WRITE_ZEROES) {
> +            int flags = 0;
> +
> +            if (virtio_ldl_p(VIRTIO_DEVICE(req->dev), &dwz_hdr.flags) &
> +                VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
> +                flags |= BDRV_REQ_MAY_UNMAP;
> +            }
> +
> +            blk_aio_pwrite_zeroes(req->dev->blk, sector << BDRV_SECTOR_BITS,
> +                                  bytes, flags,
> +                                  virtio_blk_discard_wzeroes_complete, req);

Please add block_acct_start(), this is treated as a write.

> +        } else { /* Unsupported if VIRTIO_BLK_T_OUT is not set */
> +            virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
> +            virtio_blk_free_request(req);
> +        }
> +
> +        break;
> +    }
>      default:
>          virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
>          virtio_blk_free_request(req);
> @@ -763,6 +832,14 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
>      blkcfg.alignment_offset = 0;
>      blkcfg.wce = blk_enable_write_cache(s->blk);
>      virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues);
> +    virtio_stl_p(vdev, &blkcfg.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS);
> +    virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1);
> +    virtio_stl_p(vdev, &blkcfg.discard_sector_alignment,
> +                 blk_size >> BDRV_SECTOR_BITS);
> +    virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors,
> +                 BDRV_REQUEST_MAX_SECTORS);
> +    virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1);
> +    blkcfg.write_zeroes_may_unmap = 1;

Please check hw/scsi/scsi-disk.c for an example of how to initialize
these limits.  It should be possible to set some of them via s->conf so
I'm surprised so many fields are hardcoded in this patch.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25  8:49         ` Thomas Huth
  2019-01-25 11:58           ` Liu, Changpeng
@ 2019-01-25 15:12           ` Stefan Hajnoczi
  2019-01-25 19:17               ` [virtio-comment] " Michael S. Tsirkin
  2019-01-25 19:14           ` Michael S. Tsirkin
  2 siblings, 1 reply; 28+ messages in thread
From: Stefan Hajnoczi @ 2019-01-25 15:12 UTC (permalink / raw)
  To: Thomas Huth
  Cc: Stefano Garzarella, Michael S. Tsirkin, Changpeng Liu,
	Laurent Vivier, Kevin Wolf, qemu-block, qemu-devel, Max Reitz,
	Stefan Hajnoczi, Paolo Bonzini, virtio-comment

[-- Attachment #1: Type: text/plain, Size: 3934 bytes --]

On Fri, Jan 25, 2019 at 09:49:03AM +0100, Thomas Huth wrote:
> On 2019-01-25 09:16, Stefano Garzarella wrote:
> > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> >> On 2019-01-25 07:01, Thomas Huth wrote:
> >>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> >>>> If the WRITE_ZEROES feature is enabled, we check this
> >>>> command in the test_basic().
> >>>>
> >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >>>> ---
> >>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> >>>>  1 file changed, 63 insertions(+)
> >>>>
> >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> >>>> index 04c608764b..8cabbcb85a 100644
> >>>> --- a/tests/virtio-blk-test.c
> >>>> +++ b/tests/virtio-blk-test.c
> >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
> >>>>  
> >>>>      guest_free(alloc, req_addr);
> >>>>  
> >>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> >>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> >>>> +        void *expected;
> >>>> +
> >>>> +        /*
> >>>> +         * WRITE_ZEROES request on the same sector of previous test where
> >>>> +         * we wrote "TEST".
> >>>> +         */
> >>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> >>>> +        req.data = g_malloc0(512);
> >>>
> >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> >>> something similar here, to see whether zeroes or 0xaa is written?
> >>
> >> Ah, never mind, I thought req.data would be a sector buffer here, but
> >> looking at the lines below, it apparently is something different.
> >>
> >> Why do you allocate 512 bytes here? I'd rather expect
> >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> >> then you could also use a local "struct virtio_blk_discard_write_zeroes
> >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> >>
> > 
> > Hi Thomas,
> > it was my initial implementation, but on the first test I discovered
> > that virtio_blk_request() has an assert on the data_size and it requires
> > a multiple of 512 bytes.
> > Then I looked at the virtio-spec #1, and it seems that data should be
> > multiple of 512 bytes also if it contains the struct
> > virtio_blk_discard_write_zeroes. (I'm not sure)
> > 
> > Anyway I tried to allocate only the space for that struct, commented the
> > assert and the test works well.
> > 
> > How do you suggest to proceed?
> 
> Wow, that's a tough question. Looking at the virtio spec, I agree with
> you, it looks like struct virtio_blk_discard_write_zeroes should be
> padded to 512 bytes here. But when I look at the Linux sources
> (drivers/block/virtio_blk.c), I fail to see that they are doing the
> padding there (but maybe I'm just too blind).

The only evidence for "pad to 512 bytes" interpretation that I see in
the spec is "u8 data[][512];".  Or have I missed something more
explicit?

Based on the Linux guest driver code and the lack of more evidence in
the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
for discard/write zero requests.

> Looking at the QEMU sources, it seems like it can deal with both and
> always sets the status right behind the last byte:
> 
>     req->in = (void *)in_iov[in_num - 1].iov_base
>               + in_iov[in_num - 1].iov_len
>               - sizeof(struct virtio_blk_inhdr);
> 
> Anyway, I think the virtio spec should be clearer here to avoid bad
> implementations in the future, so maybe Changpeng or Michael could
> update the spec here a little bit?

Yep, good point.  VIRTIO 1.1 is available for public comments, so I've
CCed the list.

Stefan

>  Thomas
> 
> 
> > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> > 
> > 
> > Thanks,
> > Stefano
> > 
> 
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 1/2] virtio-blk: add DISCARD and WRITE ZEROES features
  2019-01-25 14:58   ` Stefan Hajnoczi
@ 2019-01-25 16:18     ` Stefano Garzarella
  2019-01-27 12:51       ` Stefan Hajnoczi
  0 siblings, 1 reply; 28+ messages in thread
From: Stefano Garzarella @ 2019-01-25 16:18 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: qemu-devel, Laurent Vivier, Kevin Wolf, Thomas Huth, qemu-block,
	Michael S. Tsirkin, Max Reitz, Stefan Hajnoczi, Paolo Bonzini

On Fri, Jan 25, 2019 at 02:58:56PM +0000, Stefan Hajnoczi wrote:
> On Thu, Jan 24, 2019 at 06:23:22PM +0100, Stefano Garzarella wrote:
> > @@ -584,6 +603,56 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
> >          virtio_blk_free_request(req);
> >          break;
> >      }
> > +    /*
> > +     * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with
> > +     * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement,
> > +     * so we must mask it for these requests, then we will check the type.
> > +     */
> > +    case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT:
> > +    case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT:
> > +    {
> > +        struct virtio_blk_discard_write_zeroes dwz_hdr;
> > +        uint64_t sector;
> > +        int bytes;
> > +
> > +        if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr,
> > +                                sizeof(dwz_hdr)) != sizeof(dwz_hdr))) {
> 
> "The data used for discard or write zeroes command is described by one
> or more virtio_blk_discard_write_zeroes structs."
> 
> This needs to be a loop so that multiple dwz structs can be processed up
> to the length of virtio_blk_req->data[].
> 

Since I set the "max_discard_seg" and "max_write_zeroes_seg" to 1, I
avoided the loop, but as you suggested, making their configurable, I will
add the loop. Thanks!

> > +            virtio_error(vdev, "virtio-blk discard/wzeroes header too short");
> > +            return -1;
> > +        }
> > +
> > +        sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &dwz_hdr.sector);
> > +        bytes = virtio_ldl_p(VIRTIO_DEVICE(req->dev),
> > +                             &dwz_hdr.num_sectors) << BDRV_SECTOR_BITS;
> 
> Please handle num_sectors << BDRV_SECTOR_BITS overflow so that we don't
> need to worry about what happens with an overflowed value later on.
> 

Should I also check if num_sectors is less than "max_discard_sectors" or
"max_write_zeroes_sectors"?

> > +
> > +        if (!virtio_blk_sect_range_ok(req->dev, sector, bytes)) {
> > +            virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
> > +            virtio_blk_free_request(req);
> > +            return 0;
> > +        }
> > +
> > +        if ((type & ~(VIRTIO_BLK_T_BARRIER)) == VIRTIO_BLK_T_DISCARD) {
> 
> Missing device requirement: "the device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard commands if the unmap flag is set."
> 

Ooh, thanks! I'll fix it!

> > +            blk_aio_pdiscard(req->dev->blk, sector << BDRV_SECTOR_BITS, bytes,
> > +                             virtio_blk_discard_wzeroes_complete, req);
> > +        } else if ((type & ~(VIRTIO_BLK_T_BARRIER)) ==
> > +                   VIRTIO_BLK_T_WRITE_ZEROES) {
> > +            int flags = 0;
> > +
> > +            if (virtio_ldl_p(VIRTIO_DEVICE(req->dev), &dwz_hdr.flags) &
> > +                VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
> > +                flags |= BDRV_REQ_MAY_UNMAP;
> > +            }
> > +
> > +            blk_aio_pwrite_zeroes(req->dev->blk, sector << BDRV_SECTOR_BITS,
> > +                                  bytes, flags,
> > +                                  virtio_blk_discard_wzeroes_complete, req);
> 
> Please add block_acct_start(), this is treated as a write.
> 

Sure!

> > +        } else { /* Unsupported if VIRTIO_BLK_T_OUT is not set */
> > +            virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
> > +            virtio_blk_free_request(req);
> > +        }
> > +
> > +        break;
> > +    }
> >      default:
> >          virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
> >          virtio_blk_free_request(req);
> > @@ -763,6 +832,14 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
> >      blkcfg.alignment_offset = 0;
> >      blkcfg.wce = blk_enable_write_cache(s->blk);
> >      virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues);
> > +    virtio_stl_p(vdev, &blkcfg.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS);
> > +    virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1);
> > +    virtio_stl_p(vdev, &blkcfg.discard_sector_alignment,
> > +                 blk_size >> BDRV_SECTOR_BITS);
> > +    virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors,
> > +                 BDRV_REQUEST_MAX_SECTORS);
> > +    virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1);
> > +    blkcfg.write_zeroes_may_unmap = 1;
> 
> Please check hw/scsi/scsi-disk.c for an example of how to initialize
> these limits.  It should be possible to set some of them via s->conf so
> I'm surprised so many fields are hardcoded in this patch.

Sorry for that! I'll make some of them configurable!

Last question:
Do you think make sense flush the MultiReqBuffer before call
blk_aio_pdiscard() or blk_aio_pwrite_zeroes()?

Thanks,
Stefano

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25  8:49         ` Thomas Huth
  2019-01-25 11:58           ` Liu, Changpeng
  2019-01-25 15:12           ` Stefan Hajnoczi
@ 2019-01-25 19:14           ` Michael S. Tsirkin
  2 siblings, 0 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2019-01-25 19:14 UTC (permalink / raw)
  To: Thomas Huth
  Cc: Stefano Garzarella, Changpeng Liu, qemu-devel, Laurent Vivier,
	Kevin Wolf, qemu-block, Max Reitz, Stefan Hajnoczi,
	Paolo Bonzini

On Fri, Jan 25, 2019 at 09:49:03AM +0100, Thomas Huth wrote:
> On 2019-01-25 09:16, Stefano Garzarella wrote:
> > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> >> On 2019-01-25 07:01, Thomas Huth wrote:
> >>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> >>>> If the WRITE_ZEROES feature is enabled, we check this
> >>>> command in the test_basic().
> >>>>
> >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >>>> ---
> >>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> >>>>  1 file changed, 63 insertions(+)
> >>>>
> >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> >>>> index 04c608764b..8cabbcb85a 100644
> >>>> --- a/tests/virtio-blk-test.c
> >>>> +++ b/tests/virtio-blk-test.c
> >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
> >>>>  
> >>>>      guest_free(alloc, req_addr);
> >>>>  
> >>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> >>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> >>>> +        void *expected;
> >>>> +
> >>>> +        /*
> >>>> +         * WRITE_ZEROES request on the same sector of previous test where
> >>>> +         * we wrote "TEST".
> >>>> +         */
> >>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> >>>> +        req.data = g_malloc0(512);
> >>>
> >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> >>> something similar here, to see whether zeroes or 0xaa is written?
> >>
> >> Ah, never mind, I thought req.data would be a sector buffer here, but
> >> looking at the lines below, it apparently is something different.
> >>
> >> Why do you allocate 512 bytes here? I'd rather expect
> >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> >> then you could also use a local "struct virtio_blk_discard_write_zeroes
> >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> >>
> > 
> > Hi Thomas,
> > it was my initial implementation, but on the first test I discovered
> > that virtio_blk_request() has an assert on the data_size and it requires
> > a multiple of 512 bytes.
> > Then I looked at the virtio-spec #1, and it seems that data should be
> > multiple of 512 bytes also if it contains the struct
> > virtio_blk_discard_write_zeroes. (I'm not sure)
> > 
> > Anyway I tried to allocate only the space for that struct, commented the
> > assert and the test works well.
> > 
> > How do you suggest to proceed?
> 
> Wow, that's a tough question. Looking at the virtio spec, I agree with
> you, it looks like struct virtio_blk_discard_write_zeroes should be
> padded to 512 bytes here. But when I look at the Linux sources
> (drivers/block/virtio_blk.c), I fail to see that they are doing the
> padding there (but maybe I'm just too blind).
> 
> Looking at the QEMU sources, it seems like it can deal with both and
> always sets the status right behind the last byte:
> 
>     req->in = (void *)in_iov[in_num - 1].iov_base
>               + in_iov[in_num - 1].iov_len
>               - sizeof(struct virtio_blk_inhdr);
> 
> Anyway, I think the virtio spec should be clearer here to avoid bad
> implementations in the future, so maybe Changpeng or Michael could
> update the spec here a little bit?
> 
>  Thomas

I agree the spec makes it look like data is padded to 512 bytes.

I'm happy to write it up but let's decide what it is we want to
support. Arbitrary padding the way qemu does it? Or packed format?

> 
> > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> > 
> > 
> > Thanks,
> > Stefano
> > 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25 15:12           ` Stefan Hajnoczi
@ 2019-01-25 19:17               ` Michael S. Tsirkin
  0 siblings, 0 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2019-01-25 19:17 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Thomas Huth, Stefano Garzarella, Changpeng Liu, Laurent Vivier,
	Kevin Wolf, qemu-block, qemu-devel, Max Reitz, Stefan Hajnoczi,
	Paolo Bonzini, virtio-comment

On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> On Fri, Jan 25, 2019 at 09:49:03AM +0100, Thomas Huth wrote:
> > On 2019-01-25 09:16, Stefano Garzarella wrote:
> > > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> > >> On 2019-01-25 07:01, Thomas Huth wrote:
> > >>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> > >>>> If the WRITE_ZEROES feature is enabled, we check this
> > >>>> command in the test_basic().
> > >>>>
> > >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > >>>> ---
> > >>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> > >>>>  1 file changed, 63 insertions(+)
> > >>>>
> > >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> > >>>> index 04c608764b..8cabbcb85a 100644
> > >>>> --- a/tests/virtio-blk-test.c
> > >>>> +++ b/tests/virtio-blk-test.c
> > >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
> > >>>>  
> > >>>>      guest_free(alloc, req_addr);
> > >>>>  
> > >>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> > >>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> > >>>> +        void *expected;
> > >>>> +
> > >>>> +        /*
> > >>>> +         * WRITE_ZEROES request on the same sector of previous test where
> > >>>> +         * we wrote "TEST".
> > >>>> +         */
> > >>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> > >>>> +        req.data = g_malloc0(512);
> > >>>
> > >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> > >>> something similar here, to see whether zeroes or 0xaa is written?
> > >>
> > >> Ah, never mind, I thought req.data would be a sector buffer here, but
> > >> looking at the lines below, it apparently is something different.
> > >>
> > >> Why do you allocate 512 bytes here? I'd rather expect
> > >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> > >> then you could also use a local "struct virtio_blk_discard_write_zeroes
> > >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> > >>
> > > 
> > > Hi Thomas,
> > > it was my initial implementation, but on the first test I discovered
> > > that virtio_blk_request() has an assert on the data_size and it requires
> > > a multiple of 512 bytes.
> > > Then I looked at the virtio-spec #1, and it seems that data should be
> > > multiple of 512 bytes also if it contains the struct
> > > virtio_blk_discard_write_zeroes. (I'm not sure)
> > > 
> > > Anyway I tried to allocate only the space for that struct, commented the
> > > assert and the test works well.
> > > 
> > > How do you suggest to proceed?
> > 
> > Wow, that's a tough question. Looking at the virtio spec, I agree with
> > you, it looks like struct virtio_blk_discard_write_zeroes should be
> > padded to 512 bytes here. But when I look at the Linux sources
> > (drivers/block/virtio_blk.c), I fail to see that they are doing the
> > padding there (but maybe I'm just too blind).
> 
> The only evidence for "pad to 512 bytes" interpretation that I see in
> the spec is "u8 data[][512];".  Or have I missed something more
> explicit?

That's it. But if it doesn't mean "any number of 512 byte chunks"
then what does it mean?

> Based on the Linux guest driver code and the lack of more evidence in
> the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> for discard/write zero requests.

OK. Must devices support such padding?

> > Looking at the QEMU sources, it seems like it can deal with both and
> > always sets the status right behind the last byte:
> > 
> >     req->in = (void *)in_iov[in_num - 1].iov_base
> >               + in_iov[in_num - 1].iov_len
> >               - sizeof(struct virtio_blk_inhdr);
> > 
> > Anyway, I think the virtio spec should be clearer here to avoid bad
> > implementations in the future, so maybe Changpeng or Michael could
> > update the spec here a little bit?
> 
> Yep, good point.  VIRTIO 1.1 is available for public comments, so I've
> CCed the list.
> 
> Stefan

Thanks!
Care creating a github issue? And maybe proposing a spec patch.

> >  Thomas
> > 
> > 
> > > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> > > 
> > > 
> > > Thanks,
> > > Stefano
> > > 
> > 
> > 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [virtio-comment] Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
@ 2019-01-25 19:17               ` Michael S. Tsirkin
  0 siblings, 0 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2019-01-25 19:17 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Thomas Huth, Stefano Garzarella, Changpeng Liu, Laurent Vivier,
	Kevin Wolf, qemu-block, qemu-devel, Max Reitz, Stefan Hajnoczi,
	Paolo Bonzini, virtio-comment

On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> On Fri, Jan 25, 2019 at 09:49:03AM +0100, Thomas Huth wrote:
> > On 2019-01-25 09:16, Stefano Garzarella wrote:
> > > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> > >> On 2019-01-25 07:01, Thomas Huth wrote:
> > >>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> > >>>> If the WRITE_ZEROES feature is enabled, we check this
> > >>>> command in the test_basic().
> > >>>>
> > >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > >>>> ---
> > >>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> > >>>>  1 file changed, 63 insertions(+)
> > >>>>
> > >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> > >>>> index 04c608764b..8cabbcb85a 100644
> > >>>> --- a/tests/virtio-blk-test.c
> > >>>> +++ b/tests/virtio-blk-test.c
> > >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
> > >>>>  
> > >>>>      guest_free(alloc, req_addr);
> > >>>>  
> > >>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> > >>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> > >>>> +        void *expected;
> > >>>> +
> > >>>> +        /*
> > >>>> +         * WRITE_ZEROES request on the same sector of previous test where
> > >>>> +         * we wrote "TEST".
> > >>>> +         */
> > >>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> > >>>> +        req.data = g_malloc0(512);
> > >>>
> > >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> > >>> something similar here, to see whether zeroes or 0xaa is written?
> > >>
> > >> Ah, never mind, I thought req.data would be a sector buffer here, but
> > >> looking at the lines below, it apparently is something different.
> > >>
> > >> Why do you allocate 512 bytes here? I'd rather expect
> > >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> > >> then you could also use a local "struct virtio_blk_discard_write_zeroes
> > >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> > >>
> > > 
> > > Hi Thomas,
> > > it was my initial implementation, but on the first test I discovered
> > > that virtio_blk_request() has an assert on the data_size and it requires
> > > a multiple of 512 bytes.
> > > Then I looked at the virtio-spec #1, and it seems that data should be
> > > multiple of 512 bytes also if it contains the struct
> > > virtio_blk_discard_write_zeroes. (I'm not sure)
> > > 
> > > Anyway I tried to allocate only the space for that struct, commented the
> > > assert and the test works well.
> > > 
> > > How do you suggest to proceed?
> > 
> > Wow, that's a tough question. Looking at the virtio spec, I agree with
> > you, it looks like struct virtio_blk_discard_write_zeroes should be
> > padded to 512 bytes here. But when I look at the Linux sources
> > (drivers/block/virtio_blk.c), I fail to see that they are doing the
> > padding there (but maybe I'm just too blind).
> 
> The only evidence for "pad to 512 bytes" interpretation that I see in
> the spec is "u8 data[][512];".  Or have I missed something more
> explicit?

That's it. But if it doesn't mean "any number of 512 byte chunks"
then what does it mean?

> Based on the Linux guest driver code and the lack of more evidence in
> the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> for discard/write zero requests.

OK. Must devices support such padding?

> > Looking at the QEMU sources, it seems like it can deal with both and
> > always sets the status right behind the last byte:
> > 
> >     req->in = (void *)in_iov[in_num - 1].iov_base
> >               + in_iov[in_num - 1].iov_len
> >               - sizeof(struct virtio_blk_inhdr);
> > 
> > Anyway, I think the virtio spec should be clearer here to avoid bad
> > implementations in the future, so maybe Changpeng or Michael could
> > update the spec here a little bit?
> 
> Yep, good point.  VIRTIO 1.1 is available for public comments, so I've
> CCed the list.
> 
> Stefan

Thanks!
Care creating a github issue? And maybe proposing a spec patch.

> >  Thomas
> > 
> > 
> > > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> > > 
> > > 
> > > Thanks,
> > > Stefano
> > > 
> > 
> > 



This publicly archived list offers a means to provide input to the
OASIS Virtual I/O Device (VIRTIO) TC.

In order to verify user consent to the Feedback License terms and
to minimize spam in the list archive, subscription is required
before posting.

Subscribe: virtio-comment-subscribe@lists.oasis-open.org
Unsubscribe: virtio-comment-unsubscribe@lists.oasis-open.org
List help: virtio-comment-help@lists.oasis-open.org
List archive: https://lists.oasis-open.org/archives/virtio-comment/
Feedback License: https://www.oasis-open.org/who/ipr/feedback_license.pdf
List Guidelines: https://www.oasis-open.org/policies-guidelines/mailing-lists
Committee: https://www.oasis-open.org/committees/virtio/
Join OASIS: https://www.oasis-open.org/join/


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25 12:48             ` Thomas Huth
@ 2019-01-25 19:18               ` Michael S. Tsirkin
  0 siblings, 0 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2019-01-25 19:18 UTC (permalink / raw)
  To: Thomas Huth
  Cc: Liu, Changpeng, Stefano Garzarella, Laurent Vivier, Kevin Wolf,
	qemu-block, qemu-devel, Max Reitz, Stefan Hajnoczi,
	Paolo Bonzini

On Fri, Jan 25, 2019 at 01:48:26PM +0100, Thomas Huth wrote:
> On 2019-01-25 12:58, Liu, Changpeng wrote:
> > 
> > 
> >> -----Original Message-----
> >> From: Thomas Huth [mailto:thuth@redhat.com]
> >> Sent: Friday, January 25, 2019 4:49 PM
> >> To: Stefano Garzarella <sgarzare@redhat.com>; Michael S. Tsirkin
> >> <mst@redhat.com>; Liu, Changpeng <changpeng.liu@intel.com>
> >> Cc: qemu-devel@nongnu.org; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf
> >> <kwolf@redhat.com>; qemu-block@nongnu.org; Max Reitz
> >> <mreitz@redhat.com>; Stefan Hajnoczi <stefanha@redhat.com>; Paolo Bonzini
> >> <pbonzini@redhat.com>
> >> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for
> >> WRITE_ZEROES command
> >>
> >> On 2019-01-25 09:16, Stefano Garzarella wrote:
> >>> On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> >>>> On 2019-01-25 07:01, Thomas Huth wrote:
> >>>>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> >>>>>> If the WRITE_ZEROES feature is enabled, we check this
> >>>>>> command in the test_basic().
> >>>>>>
> >>>>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >>>>>> ---
> >>>>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> >>>>>>  1 file changed, 63 insertions(+)
> >>>>>>
> >>>>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> >>>>>> index 04c608764b..8cabbcb85a 100644
> >>>>>> --- a/tests/virtio-blk-test.c
> >>>>>> +++ b/tests/virtio-blk-test.c
> >>>>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev,
> >> QGuestAllocator *alloc,
> >>>>>>
> >>>>>>      guest_free(alloc, req_addr);
> >>>>>>
> >>>>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> >>>>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> >>>>>> +        void *expected;
> >>>>>> +
> >>>>>> +        /*
> >>>>>> +         * WRITE_ZEROES request on the same sector of previous test where
> >>>>>> +         * we wrote "TEST".
> >>>>>> +         */
> >>>>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> >>>>>> +        req.data = g_malloc0(512);
> >>>>>
> >>>>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> >>>>> something similar here, to see whether zeroes or 0xaa is written?
> >>>>
> >>>> Ah, never mind, I thought req.data would be a sector buffer here, but
> >>>> looking at the lines below, it apparently is something different.
> >>>>
> >>>> Why do you allocate 512 bytes here? I'd rather expect
> >>>> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> >>>> then you could also use a local "struct virtio_blk_discard_write_zeroes
> >>>> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> >>>>
> >>>
> >>> Hi Thomas,
> >>> it was my initial implementation, but on the first test I discovered
> >>> that virtio_blk_request() has an assert on the data_size and it requires
> >>> a multiple of 512 bytes.
> >>> Then I looked at the virtio-spec #1, and it seems that data should be
> >>> multiple of 512 bytes also if it contains the struct
> >>> virtio_blk_discard_write_zeroes. (I'm not sure)
> >>>
> >>> Anyway I tried to allocate only the space for that struct, commented the
> >>> assert and the test works well.
> >>>
> >>> How do you suggest to proceed?
> >>
> >> Wow, that's a tough question. Looking at the virtio spec, I agree with
> >> you, it looks like struct virtio_blk_discard_write_zeroes should be
> >> padded to 512 bytes here. But when I look at the Linux sources
> >> (drivers/block/virtio_blk.c), I fail to see that they are doing the
> >> padding there (but maybe I'm just too blind).
> >>
> >> Looking at the QEMU sources, it seems like it can deal with both and
> >> always sets the status right behind the last byte:
> >>
> >>     req->in = (void *)in_iov[in_num - 1].iov_base
> >>               + in_iov[in_num - 1].iov_len
> >>               - sizeof(struct virtio_blk_inhdr);
> >>
> >> Anyway, I think the virtio spec should be clearer here to avoid bad
> >> implementations in the future, so maybe Changpeng or Michael could
> >> update the spec here a little bit?
> > The data for Discard and Write Zeroes commands are struct virtio_blk_discard_write_zeroes
> > aligned, that means you can pass 16 bytes aligned data, based on the segments number supported,
> > this is also aligned with NVMe specification and  the SCSI specification.
> 
> Ok, thanks, so the "u8 data[][512];" is wrong in the virtio spec in this
> case? See:
> 
>  https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944
> 
> At least this should be mentioned in the description of the data field,
> I think.
> 
>  Thomas

OK. Is it a multiple of 512 for all other operations?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 1/2] virtio-blk: add DISCARD and WRITE ZEROES features
  2019-01-25 16:18     ` Stefano Garzarella
@ 2019-01-27 12:51       ` Stefan Hajnoczi
  2019-01-28  8:28         ` Stefano Garzarella
  0 siblings, 1 reply; 28+ messages in thread
From: Stefan Hajnoczi @ 2019-01-27 12:51 UTC (permalink / raw)
  To: Stefano Garzarella
  Cc: qemu-devel, Laurent Vivier, Kevin Wolf, Thomas Huth, qemu-block,
	Michael S. Tsirkin, Max Reitz, Stefan Hajnoczi, Paolo Bonzini

[-- Attachment #1: Type: text/plain, Size: 1504 bytes --]

On Fri, Jan 25, 2019 at 05:18:13PM +0100, Stefano Garzarella wrote:
> On Fri, Jan 25, 2019 at 02:58:56PM +0000, Stefan Hajnoczi wrote:
> > On Thu, Jan 24, 2019 at 06:23:22PM +0100, Stefano Garzarella wrote:
> > > +            virtio_error(vdev, "virtio-blk discard/wzeroes header too short");
> > > +            return -1;
> > > +        }
> > > +
> > > +        sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &dwz_hdr.sector);
> > > +        bytes = virtio_ldl_p(VIRTIO_DEVICE(req->dev),
> > > +                             &dwz_hdr.num_sectors) << BDRV_SECTOR_BITS;
> > 
> > Please handle num_sectors << BDRV_SECTOR_BITS overflow so that we don't
> > need to worry about what happens with an overflowed value later on.
> > 
> 
> Should I also check if num_sectors is less than "max_discard_sectors" or
> "max_write_zeroes_sectors"?

Yes, anything that virtio_blk_sect_range_ok() doesn't check needs to be
checked manually in this patch.  I can't see a reason for allowing
requests through that exceed the limit.

> Do you think make sense flush the MultiReqBuffer before call
> blk_aio_pdiscard() or blk_aio_pwrite_zeroes()?

I don't think that is necessary since virtio-blk doesn't guarantee
ordering (the legacy barrier feature isn't supported by QEMU).

Even the MultiReqBuffer is flushed, -drive aio=threads (the default
setting) could end up submitting discard/write zeroes before "earlier"
requests - it depends on host kernel thread scheduler decisions.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-25 19:17               ` [virtio-comment] " Michael S. Tsirkin
  (?)
@ 2019-01-27 12:57               ` Stefan Hajnoczi
  2019-01-27 18:42                   ` [virtio-comment] " Michael S. Tsirkin
  -1 siblings, 1 reply; 28+ messages in thread
From: Stefan Hajnoczi @ 2019-01-27 12:57 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Thomas Huth, Stefano Garzarella, Changpeng Liu, Laurent Vivier,
	Kevin Wolf, qemu-block, qemu-devel, Max Reitz, Stefan Hajnoczi,
	Paolo Bonzini, virtio-comment

[-- Attachment #1: Type: text/plain, Size: 1249 bytes --]

On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > Based on the Linux guest driver code and the lack of more evidence in
> > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > for discard/write zero requests.
> 
> OK. Must devices support such padding?

I see no reason to require padding.  Host APIs and physical storage
controllers do not require it.

> > > Looking at the QEMU sources, it seems like it can deal with both and
> > > always sets the status right behind the last byte:
> > > 
> > >     req->in = (void *)in_iov[in_num - 1].iov_base
> > >               + in_iov[in_num - 1].iov_len
> > >               - sizeof(struct virtio_blk_inhdr);
> > > 
> > > Anyway, I think the virtio spec should be clearer here to avoid bad
> > > implementations in the future, so maybe Changpeng or Michael could
> > > update the spec here a little bit?
> > 
> > Yep, good point.  VIRTIO 1.1 is available for public comments, so I've
> > CCed the list.
> > 
> > Stefan
> 
> Thanks!
> Care creating a github issue? And maybe proposing a spec patch.

Okay.  I will do this on Wednesday 30th of January.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-27 12:57               ` Stefan Hajnoczi
@ 2019-01-27 18:42                   ` Michael S. Tsirkin
  0 siblings, 0 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2019-01-27 18:42 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Thomas Huth, Stefano Garzarella, Changpeng Liu, Laurent Vivier,
	Kevin Wolf, qemu-block, qemu-devel, Max Reitz, Stefan Hajnoczi,
	Paolo Bonzini, virtio-comment

On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote:
> On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > > Based on the Linux guest driver code and the lack of more evidence in
> > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > > for discard/write zero requests.
> > 
> > OK. Must devices support such padding?
> 
> I see no reason to require padding.  Host APIs and physical storage
> controllers do not require it.

I'm not talking about requiring padding I'm talking about
supporting padding on device side.

One reason would be ease of extending the spec for guest.

E.g. imagine adding more fields to the command.
With suppport for padding guest simply adds fields
to its structure, and the unused ones are ignored
by device. Without support for padding you need two
versions of the structure.

Another reason would be compatibility.  For better or worse the spec
does make it look like 512 padding is present. This patch was merged very
recently so I agree it's not too late to clarify it that it's not
required, but it seems a bit too much to disallow that completely.
Let's assume for a minute a device turns up that already
implemented the 512 byte assumption? If padding is allowed then we can
write a driver that works with both devices.


> > > > Looking at the QEMU sources, it seems like it can deal with both and
> > > > always sets the status right behind the last byte:
> > > > 
> > > >     req->in = (void *)in_iov[in_num - 1].iov_base
> > > >               + in_iov[in_num - 1].iov_len
> > > >               - sizeof(struct virtio_blk_inhdr);
> > > > 
> > > > Anyway, I think the virtio spec should be clearer here to avoid bad
> > > > implementations in the future, so maybe Changpeng or Michael could
> > > > update the spec here a little bit?
> > > 
> > > Yep, good point.  VIRTIO 1.1 is available for public comments, so I've
> > > CCed the list.
> > > 
> > > Stefan
> > 
> > Thanks!
> > Care creating a github issue? And maybe proposing a spec patch.
> 
> Okay.  I will do this on Wednesday 30th of January.
> 
> Stefan

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [virtio-comment] Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
@ 2019-01-27 18:42                   ` Michael S. Tsirkin
  0 siblings, 0 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2019-01-27 18:42 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Thomas Huth, Stefano Garzarella, Changpeng Liu, Laurent Vivier,
	Kevin Wolf, qemu-block, qemu-devel, Max Reitz, Stefan Hajnoczi,
	Paolo Bonzini, virtio-comment

On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote:
> On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > > Based on the Linux guest driver code and the lack of more evidence in
> > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > > for discard/write zero requests.
> > 
> > OK. Must devices support such padding?
> 
> I see no reason to require padding.  Host APIs and physical storage
> controllers do not require it.

I'm not talking about requiring padding I'm talking about
supporting padding on device side.

One reason would be ease of extending the spec for guest.

E.g. imagine adding more fields to the command.
With suppport for padding guest simply adds fields
to its structure, and the unused ones are ignored
by device. Without support for padding you need two
versions of the structure.

Another reason would be compatibility.  For better or worse the spec
does make it look like 512 padding is present. This patch was merged very
recently so I agree it's not too late to clarify it that it's not
required, but it seems a bit too much to disallow that completely.
Let's assume for a minute a device turns up that already
implemented the 512 byte assumption? If padding is allowed then we can
write a driver that works with both devices.


> > > > Looking at the QEMU sources, it seems like it can deal with both and
> > > > always sets the status right behind the last byte:
> > > > 
> > > >     req->in = (void *)in_iov[in_num - 1].iov_base
> > > >               + in_iov[in_num - 1].iov_len
> > > >               - sizeof(struct virtio_blk_inhdr);
> > > > 
> > > > Anyway, I think the virtio spec should be clearer here to avoid bad
> > > > implementations in the future, so maybe Changpeng or Michael could
> > > > update the spec here a little bit?
> > > 
> > > Yep, good point.  VIRTIO 1.1 is available for public comments, so I've
> > > CCed the list.
> > > 
> > > Stefan
> > 
> > Thanks!
> > Care creating a github issue? And maybe proposing a spec patch.
> 
> Okay.  I will do this on Wednesday 30th of January.
> 
> Stefan



This publicly archived list offers a means to provide input to the
OASIS Virtual I/O Device (VIRTIO) TC.

In order to verify user consent to the Feedback License terms and
to minimize spam in the list archive, subscription is required
before posting.

Subscribe: virtio-comment-subscribe@lists.oasis-open.org
Unsubscribe: virtio-comment-unsubscribe@lists.oasis-open.org
List help: virtio-comment-help@lists.oasis-open.org
List archive: https://lists.oasis-open.org/archives/virtio-comment/
Feedback License: https://www.oasis-open.org/who/ipr/feedback_license.pdf
List Guidelines: https://www.oasis-open.org/policies-guidelines/mailing-lists
Committee: https://www.oasis-open.org/committees/virtio/
Join OASIS: https://www.oasis-open.org/join/


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 1/2] virtio-blk: add DISCARD and WRITE ZEROES features
  2019-01-27 12:51       ` Stefan Hajnoczi
@ 2019-01-28  8:28         ` Stefano Garzarella
  0 siblings, 0 replies; 28+ messages in thread
From: Stefano Garzarella @ 2019-01-28  8:28 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: qemu-devel, Laurent Vivier, Kevin Wolf, Thomas Huth, qemu-block,
	Michael S. Tsirkin, Max Reitz, Stefan Hajnoczi, Paolo Bonzini

On Sun, Jan 27, 2019 at 12:51:44PM +0000, Stefan Hajnoczi wrote:
> On Fri, Jan 25, 2019 at 05:18:13PM +0100, Stefano Garzarella wrote:
> > On Fri, Jan 25, 2019 at 02:58:56PM +0000, Stefan Hajnoczi wrote:
> > > On Thu, Jan 24, 2019 at 06:23:22PM +0100, Stefano Garzarella wrote:
> > > > +            virtio_error(vdev, "virtio-blk discard/wzeroes header too short");
> > > > +            return -1;
> > > > +        }
> > > > +
> > > > +        sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &dwz_hdr.sector);
> > > > +        bytes = virtio_ldl_p(VIRTIO_DEVICE(req->dev),
> > > > +                             &dwz_hdr.num_sectors) << BDRV_SECTOR_BITS;
> > > 
> > > Please handle num_sectors << BDRV_SECTOR_BITS overflow so that we don't
> > > need to worry about what happens with an overflowed value later on.
> > > 
> > 
> > Should I also check if num_sectors is less than "max_discard_sectors" or
> > "max_write_zeroes_sectors"?
> 
> Yes, anything that virtio_blk_sect_range_ok() doesn't check needs to be
> checked manually in this patch.  I can't see a reason for allowing
> requests through that exceed the limit.
> 
> > Do you think make sense flush the MultiReqBuffer before call
> > blk_aio_pdiscard() or blk_aio_pwrite_zeroes()?
> 
> I don't think that is necessary since virtio-blk doesn't guarantee
> ordering (the legacy barrier feature isn't supported by QEMU).
> 
> Even the MultiReqBuffer is flushed, -drive aio=threads (the default
> setting) could end up submitting discard/write zeroes before "earlier"
> requests - it depends on host kernel thread scheduler decisions.
> 

Thanks for the details!

Stefano

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-27 18:42                   ` [virtio-comment] " Michael S. Tsirkin
@ 2019-01-30  7:39                     ` Stefan Hajnoczi
  -1 siblings, 0 replies; 28+ messages in thread
From: Stefan Hajnoczi @ 2019-01-30  7:39 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Stefan Hajnoczi, Thomas Huth, Stefano Garzarella, Changpeng Liu,
	Laurent Vivier, Kevin Wolf, qemu-block, qemu-devel, Max Reitz,
	Paolo Bonzini, virtio-comment

[-- Attachment #1: Type: text/plain, Size: 2869 bytes --]

On Sun, Jan 27, 2019 at 01:42:03PM -0500, Michael S. Tsirkin wrote:
> On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote:
> > On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> > > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > > > Based on the Linux guest driver code and the lack of more evidence in
> > > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > > > for discard/write zero requests.
> > > 
> > > OK. Must devices support such padding?
> > 
> > I see no reason to require padding.  Host APIs and physical storage
> > controllers do not require it.
> 
> I'm not talking about requiring padding I'm talking about
> supporting padding on device side.
> 
> One reason would be ease of extending the spec for guest.
> 
> E.g. imagine adding more fields to the command.
> With suppport for padding guest simply adds fields
> to its structure, and the unused ones are ignored
> by device. Without support for padding you need two
> versions of the structure.

The simplest way is to say each struct virtio_blk_discard_write_zeroes
(currently 16 bytes long) is padded to 512 bytes, but this wastes a lot
of memory.

Stefano and I looked at the status of multiple segment discard/write
zeroes in Linux: only the NVMe driver supports multiple segments.  Even
SCSI sd doesn't support multiple segments.

Userspace APIs do not offer a way for applications to submit multiple
segments in a single call.  Only the block I/O scheduler creates
requests with multiple segments.

SPDK's virtio-blk driver and vhost-user-blk device backend also only
support 1 segment per request.

Given this situation, I'm not sure it's worth the complexity to spec out
a fancy padding scheme that no one will implement.  Wasting 512 - 16
bytes per segment also seems unattractive.  IMO it's acceptable to
introduce a feature bit if struct virtio_blk_discard_write_zeroes needs
extension in the future.

> Another reason would be compatibility.  For better or worse the spec
> does make it look like 512 padding is present. This patch was merged very
> recently so I agree it's not too late to clarify it that it's not
> required, but it seems a bit too much to disallow that completely.
> Let's assume for a minute a device turns up that already
> implemented the 512 byte assumption? If padding is allowed then we can
> write a driver that works with both devices.

The Linux guest driver doesn't honor 512 byte padding, so device
backends would already need to make an exception if we spec 512 byte
padding.

Let's begin VIRTIO 1.1 with the state of real-world implementations:
data[] must be a multiple of sizeof(struct
virtio_blk_discard_write_zeroes).

I'll send a patch to the virtio TC and we can discuss further in that
thread.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [virtio-comment] Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
@ 2019-01-30  7:39                     ` Stefan Hajnoczi
  0 siblings, 0 replies; 28+ messages in thread
From: Stefan Hajnoczi @ 2019-01-30  7:39 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Stefan Hajnoczi, Thomas Huth, Stefano Garzarella, Changpeng Liu,
	Laurent Vivier, Kevin Wolf, qemu-block, qemu-devel, Max Reitz,
	Paolo Bonzini, virtio-comment

[-- Attachment #1: Type: text/plain, Size: 2869 bytes --]

On Sun, Jan 27, 2019 at 01:42:03PM -0500, Michael S. Tsirkin wrote:
> On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote:
> > On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> > > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > > > Based on the Linux guest driver code and the lack of more evidence in
> > > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > > > for discard/write zero requests.
> > > 
> > > OK. Must devices support such padding?
> > 
> > I see no reason to require padding.  Host APIs and physical storage
> > controllers do not require it.
> 
> I'm not talking about requiring padding I'm talking about
> supporting padding on device side.
> 
> One reason would be ease of extending the spec for guest.
> 
> E.g. imagine adding more fields to the command.
> With suppport for padding guest simply adds fields
> to its structure, and the unused ones are ignored
> by device. Without support for padding you need two
> versions of the structure.

The simplest way is to say each struct virtio_blk_discard_write_zeroes
(currently 16 bytes long) is padded to 512 bytes, but this wastes a lot
of memory.

Stefano and I looked at the status of multiple segment discard/write
zeroes in Linux: only the NVMe driver supports multiple segments.  Even
SCSI sd doesn't support multiple segments.

Userspace APIs do not offer a way for applications to submit multiple
segments in a single call.  Only the block I/O scheduler creates
requests with multiple segments.

SPDK's virtio-blk driver and vhost-user-blk device backend also only
support 1 segment per request.

Given this situation, I'm not sure it's worth the complexity to spec out
a fancy padding scheme that no one will implement.  Wasting 512 - 16
bytes per segment also seems unattractive.  IMO it's acceptable to
introduce a feature bit if struct virtio_blk_discard_write_zeroes needs
extension in the future.

> Another reason would be compatibility.  For better or worse the spec
> does make it look like 512 padding is present. This patch was merged very
> recently so I agree it's not too late to clarify it that it's not
> required, but it seems a bit too much to disallow that completely.
> Let's assume for a minute a device turns up that already
> implemented the 512 byte assumption? If padding is allowed then we can
> write a driver that works with both devices.

The Linux guest driver doesn't honor 512 byte padding, so device
backends would already need to make an exception if we spec 512 byte
padding.

Let's begin VIRTIO 1.1 with the state of real-world implementations:
data[] must be a multiple of sizeof(struct
virtio_blk_discard_write_zeroes).

I'll send a patch to the virtio TC and we can discuss further in that
thread.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
  2019-01-30  7:39                     ` [virtio-comment] " Stefan Hajnoczi
@ 2019-01-30  7:59                       ` Liu, Changpeng
  -1 siblings, 0 replies; 28+ messages in thread
From: Liu, Changpeng @ 2019-01-30  7:59 UTC (permalink / raw)
  To: Stefan Hajnoczi, Michael S. Tsirkin
  Cc: Stefan Hajnoczi, Thomas Huth, Stefano Garzarella, Laurent Vivier,
	Kevin Wolf, qemu-block, qemu-devel, Max Reitz, Paolo Bonzini,
	virtio-comment



> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Wednesday, January 30, 2019 3:40 PM
> To: Michael S. Tsirkin <mst@redhat.com>
> Cc: Stefan Hajnoczi <stefanha@gmail.com>; Thomas Huth <thuth@redhat.com>;
> Stefano Garzarella <sgarzare@redhat.com>; Liu, Changpeng
> <changpeng.liu@intel.com>; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf
> <kwolf@redhat.com>; qemu-block@nongnu.org; qemu-devel@nongnu.org; Max
> Reitz <mreitz@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; virtio-
> comment@lists.oasis-open.org
> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for
> WRITE_ZEROES command
> 
> On Sun, Jan 27, 2019 at 01:42:03PM -0500, Michael S. Tsirkin wrote:
> > On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote:
> > > On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> > > > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > > > > Based on the Linux guest driver code and the lack of more evidence in
> > > > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > > > > for discard/write zero requests.
> > > >
> > > > OK. Must devices support such padding?
> > >
> > > I see no reason to require padding.  Host APIs and physical storage
> > > controllers do not require it.
> >
> > I'm not talking about requiring padding I'm talking about
> > supporting padding on device side.
> >
> > One reason would be ease of extending the spec for guest.
> >
> > E.g. imagine adding more fields to the command.
> > With suppport for padding guest simply adds fields
> > to its structure, and the unused ones are ignored
> > by device. Without support for padding you need two
> > versions of the structure.
> 
> The simplest way is to say each struct virtio_blk_discard_write_zeroes
> (currently 16 bytes long) is padded to 512 bytes, but this wastes a lot
> of memory.
> 
> Stefano and I looked at the status of multiple segment discard/write
> zeroes in Linux: only the NVMe driver supports multiple segments.  Even
> SCSI sd doesn't support multiple segments.
> 
> Userspace APIs do not offer a way for applications to submit multiple
> segments in a single call.  Only the block I/O scheduler creates
> requests with multiple segments.
> 
> SPDK's virtio-blk driver and vhost-user-blk device backend also only
> support 1 segment per request.
> 
> Given this situation, I'm not sure it's worth the complexity to spec out
> a fancy padding scheme that no one will implement.  Wasting 512 - 16
> bytes per segment also seems unattractive.  IMO it's acceptable to
> introduce a feature bit if struct virtio_blk_discard_write_zeroes needs
> extension in the future.
> 
> > Another reason would be compatibility.  For better or worse the spec
> > does make it look like 512 padding is present. This patch was merged very
> > recently so I agree it's not too late to clarify it that it's not
> > required, but it seems a bit too much to disallow that completely.
> > Let's assume for a minute a device turns up that already
> > implemented the 512 byte assumption? If padding is allowed then we can
> > write a driver that works with both devices.
> 
> The Linux guest driver doesn't honor 512 byte padding, so device
> backends would already need to make an exception if we spec 512 byte
> padding.
> 
> Let's begin VIRTIO 1.1 with the state of real-world implementations:
> data[] must be a multiple of sizeof(struct
> virtio_blk_discard_write_zeroes).
Actually that's the purpose at the first beginning, padding to 512 bytes will waste some memory space, 
it's nice to have an exception other Read/Write commands. 
> 
> I'll send a patch to the virtio TC and we can discuss further in that
> thread.
> 
> Stefan

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [virtio-comment] RE: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command
@ 2019-01-30  7:59                       ` Liu, Changpeng
  0 siblings, 0 replies; 28+ messages in thread
From: Liu, Changpeng @ 2019-01-30  7:59 UTC (permalink / raw)
  To: Stefan Hajnoczi, Michael S. Tsirkin
  Cc: Stefan Hajnoczi, Thomas Huth, Stefano Garzarella, Laurent Vivier,
	Kevin Wolf, qemu-block, qemu-devel, Max Reitz, Paolo Bonzini,
	virtio-comment



> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Wednesday, January 30, 2019 3:40 PM
> To: Michael S. Tsirkin <mst@redhat.com>
> Cc: Stefan Hajnoczi <stefanha@gmail.com>; Thomas Huth <thuth@redhat.com>;
> Stefano Garzarella <sgarzare@redhat.com>; Liu, Changpeng
> <changpeng.liu@intel.com>; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf
> <kwolf@redhat.com>; qemu-block@nongnu.org; qemu-devel@nongnu.org; Max
> Reitz <mreitz@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; virtio-
> comment@lists.oasis-open.org
> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for
> WRITE_ZEROES command
> 
> On Sun, Jan 27, 2019 at 01:42:03PM -0500, Michael S. Tsirkin wrote:
> > On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote:
> > > On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> > > > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > > > > Based on the Linux guest driver code and the lack of more evidence in
> > > > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > > > > for discard/write zero requests.
> > > >
> > > > OK. Must devices support such padding?
> > >
> > > I see no reason to require padding.  Host APIs and physical storage
> > > controllers do not require it.
> >
> > I'm not talking about requiring padding I'm talking about
> > supporting padding on device side.
> >
> > One reason would be ease of extending the spec for guest.
> >
> > E.g. imagine adding more fields to the command.
> > With suppport for padding guest simply adds fields
> > to its structure, and the unused ones are ignored
> > by device. Without support for padding you need two
> > versions of the structure.
> 
> The simplest way is to say each struct virtio_blk_discard_write_zeroes
> (currently 16 bytes long) is padded to 512 bytes, but this wastes a lot
> of memory.
> 
> Stefano and I looked at the status of multiple segment discard/write
> zeroes in Linux: only the NVMe driver supports multiple segments.  Even
> SCSI sd doesn't support multiple segments.
> 
> Userspace APIs do not offer a way for applications to submit multiple
> segments in a single call.  Only the block I/O scheduler creates
> requests with multiple segments.
> 
> SPDK's virtio-blk driver and vhost-user-blk device backend also only
> support 1 segment per request.
> 
> Given this situation, I'm not sure it's worth the complexity to spec out
> a fancy padding scheme that no one will implement.  Wasting 512 - 16
> bytes per segment also seems unattractive.  IMO it's acceptable to
> introduce a feature bit if struct virtio_blk_discard_write_zeroes needs
> extension in the future.
> 
> > Another reason would be compatibility.  For better or worse the spec
> > does make it look like 512 padding is present. This patch was merged very
> > recently so I agree it's not too late to clarify it that it's not
> > required, but it seems a bit too much to disallow that completely.
> > Let's assume for a minute a device turns up that already
> > implemented the 512 byte assumption? If padding is allowed then we can
> > write a driver that works with both devices.
> 
> The Linux guest driver doesn't honor 512 byte padding, so device
> backends would already need to make an exception if we spec 512 byte
> padding.
> 
> Let's begin VIRTIO 1.1 with the state of real-world implementations:
> data[] must be a multiple of sizeof(struct
> virtio_blk_discard_write_zeroes).
Actually that's the purpose at the first beginning, padding to 512 bytes will waste some memory space, 
it's nice to have an exception other Read/Write commands. 
> 
> I'll send a patch to the virtio TC and we can discuss further in that
> thread.
> 
> Stefan

This publicly archived list offers a means to provide input to the
OASIS Virtual I/O Device (VIRTIO) TC.

In order to verify user consent to the Feedback License terms and
to minimize spam in the list archive, subscription is required
before posting.

Subscribe: virtio-comment-subscribe@lists.oasis-open.org
Unsubscribe: virtio-comment-unsubscribe@lists.oasis-open.org
List help: virtio-comment-help@lists.oasis-open.org
List archive: https://lists.oasis-open.org/archives/virtio-comment/
Feedback License: https://www.oasis-open.org/who/ipr/feedback_license.pdf
List Guidelines: https://www.oasis-open.org/policies-guidelines/mailing-lists
Committee: https://www.oasis-open.org/committees/virtio/
Join OASIS: https://www.oasis-open.org/join/


^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2019-01-30  7:59 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-24 17:23 [Qemu-devel] [PATCH RFC 0/2] virtio-blk: add DISCARD and WRITE ZEROES features Stefano Garzarella
2019-01-24 17:23 ` [Qemu-devel] [PATCH RFC 1/2] " Stefano Garzarella
2019-01-24 17:55   ` Dr. David Alan Gilbert
2019-01-24 18:31     ` Stefano Garzarella
2019-01-25 14:58   ` Stefan Hajnoczi
2019-01-25 16:18     ` Stefano Garzarella
2019-01-27 12:51       ` Stefan Hajnoczi
2019-01-28  8:28         ` Stefano Garzarella
2019-01-24 17:23 ` [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for WRITE_ZEROES command Stefano Garzarella
2019-01-25  6:01   ` Thomas Huth
2019-01-25  6:07     ` Thomas Huth
2019-01-25  6:07     ` Thomas Huth
2019-01-25  8:16       ` Stefano Garzarella
2019-01-25  8:49         ` Thomas Huth
2019-01-25 11:58           ` Liu, Changpeng
2019-01-25 12:48             ` Thomas Huth
2019-01-25 19:18               ` Michael S. Tsirkin
2019-01-25 15:12           ` Stefan Hajnoczi
2019-01-25 19:17             ` Michael S. Tsirkin
2019-01-25 19:17               ` [virtio-comment] " Michael S. Tsirkin
2019-01-27 12:57               ` Stefan Hajnoczi
2019-01-27 18:42                 ` Michael S. Tsirkin
2019-01-27 18:42                   ` [virtio-comment] " Michael S. Tsirkin
2019-01-30  7:39                   ` Stefan Hajnoczi
2019-01-30  7:39                     ` [virtio-comment] " Stefan Hajnoczi
2019-01-30  7:59                     ` Liu, Changpeng
2019-01-30  7:59                       ` [virtio-comment] " Liu, Changpeng
2019-01-25 19:14           ` Michael S. Tsirkin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.