qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/3] block: BDRV_REQ_PREFETCH
@ 2019-06-18 15:41 Vladimir Sementsov-Ogievskiy
  2019-06-18 15:42 ` [Qemu-devel] [PATCH v2 1/3] block: implement BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2019-06-18 15:41 UTC (permalink / raw)
  To: qemu-devel, qemu-block
  Cc: kwolf, fam, vsementsov, mreitz, stefanha, den, jsnow

Hi all!

Here is small new read flag: BDRV_REQ_PREFETCH, which in combination with
BDRV_REQ_COPY_ON_READ does copy-on-read without
extra buffer for read data. This means that only parts that needs COR
will be actually read and only corresponding buffers allocated, no more.

This allows to improve a bit block-stream and NBD_CMD_CACHE

v2: change interface to be just one flag BDRV_REQ_PREFETCH

v1 was "[PATCH 0/3] block: blk_co_pcache"
   https://lists.gnu.org/archive/html/qemu-devel/2019-06/msg01047.html

Vladimir Sementsov-Ogievskiy (3):
  block: implement BDRV_REQ_PREFETCH
  block/stream: use BDRV_REQ_PREFETCH
  nbd: improve CMD_CACHE: use blk_co_pcache

 include/block/block.h |  8 +++++++-
 block/io.c            | 18 ++++++++++++------
 block/stream.c        | 20 +++++++-------------
 nbd/server.c          | 43 +++++++++++++++++++++++++++++++++++--------
 4 files changed, 61 insertions(+), 28 deletions(-)

-- 
2.18.0



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH v2 1/3] block: implement BDRV_REQ_PREFETCH
  2019-06-18 15:41 [Qemu-devel] [PATCH v2 0/3] block: BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
@ 2019-06-18 15:42 ` Vladimir Sementsov-Ogievskiy
  2019-06-18 15:42 ` [Qemu-devel] [PATCH v2 2/3] block/stream: use BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
  2019-06-18 15:42 ` [Qemu-devel] [PATCH v2 3/3] nbd: improve CMD_CACHE: use blk_co_pcache Vladimir Sementsov-Ogievskiy
  2 siblings, 0 replies; 4+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2019-06-18 15:42 UTC (permalink / raw)
  To: qemu-devel, qemu-block
  Cc: kwolf, fam, vsementsov, mreitz, stefanha, den, jsnow

Do effective copy-on-read request when we don't need data actually. It
will be used for block-stream and NBD_CMD_CACHE.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 include/block/block.h |  8 +++++++-
 block/io.c            | 18 ++++++++++++------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index f9415ed740..a063ee135b 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -88,8 +88,14 @@ typedef enum {
      * fallback. */
     BDRV_REQ_NO_FALLBACK        = 0x100,
 
+    /*
+     * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ
+     * on read request and means that caller don't really need data to be
+     * written to qiov parameter which may be NULL.
+     */
+    BDRV_REQ_PREFETCH  = 0x200,
     /* Mask of valid flags */
-    BDRV_REQ_MASK               = 0x1ff,
+    BDRV_REQ_MASK               = 0x3ff,
 } BdrvRequestFlags;
 
 typedef struct BlockSizes {
diff --git a/block/io.c b/block/io.c
index 9ba1bada36..a232243de3 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1104,7 +1104,8 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
 }
 
 static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
-        int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
+        int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+        int flags)
 {
     BlockDriverState *bs = child->bs;
 
@@ -1215,9 +1216,11 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
                 goto err;
             }
 
-            qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
-                                pnum - skip_bytes);
-        } else {
+            if (!(flags & BDRV_REQ_PREFETCH)) {
+                qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
+                                    pnum - skip_bytes);
+            }
+        } else if (!(flags & BDRV_REQ_PREFETCH)) {
             /* Read directly into the destination */
             qemu_iovec_init(&local_qiov, qiov->niov);
             qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
@@ -1268,7 +1271,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
      * potential fallback support, if we ever implement any read flags
      * to pass through to drivers.  For now, there aren't any
      * passthrough flags.  */
-    assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ)));
+    assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ |
+                       BDRV_REQ_PREFETCH)));
 
     /* Handle Copy on Read and associated serialisation */
     if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -1296,7 +1300,9 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
         }
 
         if (!ret || pnum != bytes) {
-            ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
+            ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov, flags);
+            goto out;
+        } else if (flags & BDRV_REQ_PREFETCH) {
             goto out;
         }
     }
-- 
2.18.0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH v2 2/3] block/stream: use BDRV_REQ_PREFETCH
  2019-06-18 15:41 [Qemu-devel] [PATCH v2 0/3] block: BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
  2019-06-18 15:42 ` [Qemu-devel] [PATCH v2 1/3] block: implement BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
@ 2019-06-18 15:42 ` Vladimir Sementsov-Ogievskiy
  2019-06-18 15:42 ` [Qemu-devel] [PATCH v2 3/3] nbd: improve CMD_CACHE: use blk_co_pcache Vladimir Sementsov-Ogievskiy
  2 siblings, 0 replies; 4+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2019-06-18 15:42 UTC (permalink / raw)
  To: qemu-devel, qemu-block
  Cc: kwolf, fam, vsementsov, mreitz, stefanha, den, jsnow

This helps to avoid extra io, allocations and memory copying.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/stream.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 1a906fd860..b316bfb290 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -22,11 +22,11 @@
 
 enum {
     /*
-     * Size of data buffer for populating the image file.  This should be large
+     * Maximum chunk size to feed it to copy-on-read.  This should be large
      * enough to process multiple clusters in a single call, so that populating
      * contiguous regions of the image is efficient.
      */
-    STREAM_BUFFER_SIZE = 512 * 1024, /* in bytes */
+    STREAM_CHUNK = 512 * 1024, /* in bytes */
 };
 
 typedef struct StreamBlockJob {
@@ -39,13 +39,12 @@ typedef struct StreamBlockJob {
 } StreamBlockJob;
 
 static int coroutine_fn stream_populate(BlockBackend *blk,
-                                        int64_t offset, uint64_t bytes,
-                                        void *buf)
+                                        int64_t offset, uint64_t bytes)
 {
     assert(bytes < SIZE_MAX);
 
-    /* Copy-on-read the unallocated clusters */
-    return blk_co_pread(blk, offset, bytes, buf, BDRV_REQ_COPY_ON_READ);
+    return blk_co_preadv(blk, offset, bytes, NULL,
+                         BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
 }
 
 static void stream_abort(Job *job)
@@ -117,7 +116,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
     int error = 0;
     int ret = 0;
     int64_t n = 0; /* bytes */
-    void *buf;
 
     if (!bs->backing) {
         goto out;
@@ -130,8 +128,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
     }
     job_progress_set_remaining(&s->common.job, len);
 
-    buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE);
-
     /* Turn on copy-on-read for the whole block device so that guest read
      * requests help us make progress.  Only do this when copying the entire
      * backing chain since the copy-on-read operation does not take base into
@@ -154,7 +150,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 
         copy = false;
 
-        ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &n);
+        ret = bdrv_is_allocated(bs, offset, STREAM_CHUNK, &n);
         if (ret == 1) {
             /* Allocated in the top, no need to copy.  */
         } else if (ret >= 0) {
@@ -172,7 +168,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
         }
         trace_stream_one_iteration(s, offset, n, ret);
         if (copy) {
-            ret = stream_populate(blk, offset, n, buf);
+            ret = stream_populate(blk, offset, n);
         }
         if (ret < 0) {
             BlockErrorAction action =
@@ -206,8 +202,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
     /* Do not remove the backing file if an error was there but ignored.  */
     ret = error;
 
-    qemu_vfree(buf);
-
 out:
     /* Modify backing chain and close BDSes in main loop */
     return ret;
-- 
2.18.0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH v2 3/3] nbd: improve CMD_CACHE: use blk_co_pcache
  2019-06-18 15:41 [Qemu-devel] [PATCH v2 0/3] block: BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
  2019-06-18 15:42 ` [Qemu-devel] [PATCH v2 1/3] block: implement BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
  2019-06-18 15:42 ` [Qemu-devel] [PATCH v2 2/3] block/stream: use BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
@ 2019-06-18 15:42 ` Vladimir Sementsov-Ogievskiy
  2 siblings, 0 replies; 4+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2019-06-18 15:42 UTC (permalink / raw)
  To: qemu-devel, qemu-block
  Cc: kwolf, fam, vsementsov, mreitz, stefanha, den, jsnow

This helps to avoid extra io, allocations and memory copying.
We assume here that CMD_CACHE is always used with copy-on-read, as
otherwise it's a noop.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 nbd/server.c | 43 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index aeca3893fe..6048257872 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -2101,12 +2101,15 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
             return -EINVAL;
         }
 
-        req->data = blk_try_blockalign(client->exp->blk, request->len);
-        if (req->data == NULL) {
-            error_setg(errp, "No memory");
-            return -ENOMEM;
+        if (request->type != NBD_CMD_CACHE) {
+            req->data = blk_try_blockalign(client->exp->blk, request->len);
+            if (req->data == NULL) {
+                error_setg(errp, "No memory");
+                return -ENOMEM;
+            }
         }
     }
+
     if (request->type == NBD_CMD_WRITE) {
         if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
                      errp) < 0)
@@ -2191,7 +2194,7 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
     int ret;
     NBDExport *exp = client->exp;
 
-    assert(request->type == NBD_CMD_READ || request->type == NBD_CMD_CACHE);
+    assert(request->type == NBD_CMD_READ);
 
     /* XXX: NBD Protocol only documents use of FUA with WRITE */
     if (request->flags & NBD_CMD_FLAG_FUA) {
@@ -2203,7 +2206,7 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
     }
 
     if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
-        request->len && request->type != NBD_CMD_CACHE)
+        request->len)
     {
         return nbd_co_send_sparse_read(client, request->handle, request->from,
                                        data, request->len, errp);
@@ -2211,7 +2214,7 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
 
     ret = blk_pread(exp->blk, request->from + exp->dev_offset, data,
                     request->len);
-    if (ret < 0 || request->type == NBD_CMD_CACHE) {
+    if (ret < 0) {
         return nbd_send_generic_reply(client, request->handle, ret,
                                       "reading from file failed", errp);
     }
@@ -2230,6 +2233,28 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
     }
 }
 
+/*
+ * nbd_do_cmd_cache
+ *
+ * Handle NBD_CMD_CACHE request.
+ * Return -errno if sending fails. Other errors are reported directly to the
+ * client as an error reply.
+ */
+static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
+                                         Error **errp)
+{
+    int ret;
+    NBDExport *exp = client->exp;
+
+    assert(request->type == NBD_CMD_CACHE);
+
+    ret = blk_co_preadv(exp->blk, request->from + exp->dev_offset, request->len,
+                        NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+
+    return nbd_send_generic_reply(client, request->handle, ret,
+                                  "caching data failed", errp);
+}
+
 /* Handle NBD request.
  * Return -errno if sending fails. Other errors are reported directly to the
  * client as an error reply. */
@@ -2243,8 +2268,10 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
     char *msg;
 
     switch (request->type) {
-    case NBD_CMD_READ:
     case NBD_CMD_CACHE:
+        return nbd_do_cmd_cache(client, request, errp);
+
+    case NBD_CMD_READ:
         return nbd_do_cmd_read(client, request, data, errp);
 
     case NBD_CMD_WRITE:
-- 
2.18.0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-06-18 16:42 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-18 15:41 [Qemu-devel] [PATCH v2 0/3] block: BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
2019-06-18 15:42 ` [Qemu-devel] [PATCH v2 1/3] block: implement BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
2019-06-18 15:42 ` [Qemu-devel] [PATCH v2 2/3] block/stream: use BDRV_REQ_PREFETCH Vladimir Sementsov-Ogievskiy
2019-06-18 15:42 ` [Qemu-devel] [PATCH v2 3/3] nbd: improve CMD_CACHE: use blk_co_pcache Vladimir Sementsov-Ogievskiy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).