All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
@ 2018-09-07 10:21 Tim Smith
  2018-09-07 10:21 ` [Qemu-devel] [PATCH 2/3] Improve xen_disk response latency Tim Smith
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Tim Smith @ 2018-09-07 10:21 UTC (permalink / raw)
  To: qemu-devel

When I/O consists of many small requests, performance is improved by
batching them together in a single io_submit() call. When there are
relatively few requests, the extra overhead is not worth it. This
introduces a check to start batching I/O requests via blk_io_plug()/
blk_io_unplug() in an amount proportional to the number which were
already in flight at the time we started reading the ring.

Signed-off-by: Tim Smith <tim.smith@citrix.com>
---
 hw/block/xen_disk.c |   29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36eff94f84..6cb40d66fa 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -101,6 +101,9 @@ struct XenBlkDev {
     AioContext          *ctx;
 };
 
+/* Threshold of in-flight requests above which we will start using
+ * blk_io_plug()/blk_io_unplug() to batch requests */
+#define IO_PLUG_THRESHOLD 1
 /* ------------------------------------------------------------- */
 
 static void ioreq_reset(struct ioreq *ioreq)
@@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 {
     RING_IDX rc, rp;
     struct ioreq *ioreq;
+    int inflight_atstart = blkdev->requests_inflight;
+    int batched = 0;
 
     blkdev->more_work = 0;
 
@@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
     xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 
     blk_send_response_all(blkdev);
+    /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
+     * when we got here, this is an indication that there the bottleneck
+     * is below us, so it's worth beginning to batch up I/O requests
+     * rather than submitting them immediately. The maximum number
+     * of requests we're willing to batch is the number already in
+     * flight, so it can grow up to max_requests when the bottleneck
+     * is below us */
+    if (inflight_atstart > IO_PLUG_THRESHOLD) {
+        blk_io_plug(blkdev->blk);
+    }
     while (rc != rp) {
         /* pull request from ring */
         if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
@@ -589,7 +604,21 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
             continue;
         }
 
+        if (inflight_atstart > IO_PLUG_THRESHOLD && batched >= inflight_atstart) {
+            blk_io_unplug(blkdev->blk);
+        }
         ioreq_runio_qemu_aio(ioreq);
+        if (inflight_atstart > IO_PLUG_THRESHOLD) {
+            if (batched >= inflight_atstart) {
+                blk_io_plug(blkdev->blk);
+                batched=0;
+            } else {
+                batched++;
+            }
+        }
+    }
+    if (inflight_atstart > IO_PLUG_THRESHOLD) {
+        blk_io_unplug(blkdev->blk);
     }
 
     if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) {

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH 2/3] Improve xen_disk response latency
  2018-09-07 10:21 [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Tim Smith
@ 2018-09-07 10:21 ` Tim Smith
  2018-09-07 16:03   ` Paul Durrant
  2018-09-07 10:21 ` [Qemu-devel] [PATCH 3/3] Avoid repeated memory allocation in xen_disk Tim Smith
  2018-09-07 14:11 ` [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Paul Durrant
  2 siblings, 1 reply; 10+ messages in thread
From: Tim Smith @ 2018-09-07 10:21 UTC (permalink / raw)
  To: qemu-devel

If the I/O ring is full, the guest cannot send any more requests
until some responses are sent. Only sending all available responses
just before checking for new work does not leave much time for the
guest to supply new work, so this will cause stalls if the ring gets
full. Also, not completing reads as soon as possible adds latency
to the guest.

To alleviate that, complete IO requests as soon as they come back.
blk_send_response() already returns a value indicating whether
a notify should be sent, which is all the batching we need.

Signed-off-by: Tim Smith <tim.smith@citrix.com>
---
 hw/block/xen_disk.c |   43 ++++++++++++-------------------------------
 1 file changed, 12 insertions(+), 31 deletions(-)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 6cb40d66fa..c11cd21d37 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -83,11 +83,9 @@ struct XenBlkDev {
 
     /* request lists */
     QLIST_HEAD(inflight_head, ioreq) inflight;
-    QLIST_HEAD(finished_head, ioreq) finished;
     QLIST_HEAD(freelist_head, ioreq) freelist;
     int                 requests_total;
     int                 requests_inflight;
-    int                 requests_finished;
     unsigned int        max_requests;
 
     gboolean            feature_discard;
@@ -104,6 +102,9 @@ struct XenBlkDev {
 /* Threshold of in-flight requests above which we will start using
  * blk_io_plug()/blk_io_unplug() to batch requests */
 #define IO_PLUG_THRESHOLD 1
+
+static int blk_send_response(struct ioreq *ioreq);
+
 /* ------------------------------------------------------------- */
 
 static void ioreq_reset(struct ioreq *ioreq)
@@ -155,12 +156,10 @@ static void ioreq_finish(struct ioreq *ioreq)
     struct XenBlkDev *blkdev = ioreq->blkdev;
 
     QLIST_REMOVE(ioreq, list);
-    QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list);
     blkdev->requests_inflight--;
-    blkdev->requests_finished++;
 }
 
-static void ioreq_release(struct ioreq *ioreq, bool finish)
+static void ioreq_release(struct ioreq *ioreq)
 {
     struct XenBlkDev *blkdev = ioreq->blkdev;
 
@@ -168,11 +167,7 @@ static void ioreq_release(struct ioreq *ioreq, bool finish)
     ioreq_reset(ioreq);
     ioreq->blkdev = blkdev;
     QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
-    if (finish) {
-        blkdev->requests_finished--;
-    } else {
-        blkdev->requests_inflight--;
-    }
+    blkdev->requests_inflight--;
 }
 
 /*
@@ -351,6 +346,10 @@ static void qemu_aio_complete(void *opaque, int ret)
     default:
         break;
     }
+    if (blk_send_response(ioreq)) {
+        xen_pv_send_notify(&blkdev->xendev);
+    }
+    ioreq_release(ioreq);
     qemu_bh_schedule(blkdev->bh);
 
 done:
@@ -455,7 +454,7 @@ err:
     return -1;
 }
 
-static int blk_send_response_one(struct ioreq *ioreq)
+static int blk_send_response(struct ioreq *ioreq)
 {
     struct XenBlkDev  *blkdev = ioreq->blkdev;
     int               send_notify   = 0;
@@ -504,22 +503,6 @@ static int blk_send_response_one(struct ioreq *ioreq)
     return send_notify;
 }
 
-/* walk finished list, send outstanding responses, free requests */
-static void blk_send_response_all(struct XenBlkDev *blkdev)
-{
-    struct ioreq *ioreq;
-    int send_notify = 0;
-
-    while (!QLIST_EMPTY(&blkdev->finished)) {
-        ioreq = QLIST_FIRST(&blkdev->finished);
-        send_notify += blk_send_response_one(ioreq);
-        ioreq_release(ioreq, true);
-    }
-    if (send_notify) {
-        xen_pv_send_notify(&blkdev->xendev);
-    }
-}
-
 static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc)
 {
     switch (blkdev->protocol) {
@@ -554,7 +537,6 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
     rp = blkdev->rings.common.sring->req_prod;
     xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 
-    blk_send_response_all(blkdev);
     /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
      * when we got here, this is an indication that there the bottleneck
      * is below us, so it's worth beginning to batch up I/O requests
@@ -597,10 +579,10 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
                 break;
             };
 
-            if (blk_send_response_one(ioreq)) {
+            if (blk_send_response(ioreq)) {
                 xen_pv_send_notify(&blkdev->xendev);
             }
-            ioreq_release(ioreq, false);
+            ioreq_release(ioreq);
             continue;
         }
 
@@ -645,7 +627,6 @@ static void blk_alloc(struct XenDevice *xendev)
     trace_xen_disk_alloc(xendev->name);
 
     QLIST_INIT(&blkdev->inflight);
-    QLIST_INIT(&blkdev->finished);
     QLIST_INIT(&blkdev->freelist);
 
     blkdev->iothread = iothread_create(xendev->name, &err);

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH 3/3] Avoid repeated memory allocation in xen_disk
  2018-09-07 10:21 [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Tim Smith
  2018-09-07 10:21 ` [Qemu-devel] [PATCH 2/3] Improve xen_disk response latency Tim Smith
@ 2018-09-07 10:21 ` Tim Smith
  2018-09-07 16:05   ` Paul Durrant
  2018-09-07 14:11 ` [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Paul Durrant
  2 siblings, 1 reply; 10+ messages in thread
From: Tim Smith @ 2018-09-07 10:21 UTC (permalink / raw)
  To: qemu-devel

xen_disk currently allocates memory to hold the data for each ioreq
as that ioreq is used, and frees it afterwards. Because it requires
page-aligned blocks, this interacts poorly with non-page-aligned
allocations and balloons the heap.

Instead, allocate the maximum possible requirement, which is
BLKIF_MAX_SEGMENTS_PER_REQUEST pages (currently 11 pages) when
the ioreq is created, and keep that allocation until it is destroyed.
Since the ioreqs themselves are re-used via a free list, this
should actually improve memory usage.

Signed-off-by: Tim Smith <tim.smith@citrix.com>
---
 hw/block/xen_disk.c |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index c11cd21d37..67f894bba5 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -112,7 +112,6 @@ static void ioreq_reset(struct ioreq *ioreq)
     memset(&ioreq->req, 0, sizeof(ioreq->req));
     ioreq->status = 0;
     ioreq->start = 0;
-    ioreq->buf = NULL;
     ioreq->size = 0;
     ioreq->presync = 0;
 
@@ -137,6 +136,10 @@ static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
         /* allocate new struct */
         ioreq = g_malloc0(sizeof(*ioreq));
         ioreq->blkdev = blkdev;
+        /* We cannot need more pages per ioreq than this, and we do re-use ioreqs,
+         * so allocate the memory once here, to be freed in blk_free() when the
+         * ioreq is freed. */
+        ioreq->buf = qemu_memalign(XC_PAGE_SIZE, BLKIF_MAX_SEGMENTS_PER_REQUEST * XC_PAGE_SIZE);
         blkdev->requests_total++;
         qemu_iovec_init(&ioreq->v, 1);
     } else {
@@ -313,14 +316,12 @@ static void qemu_aio_complete(void *opaque, int ret)
         if (ret == 0) {
             ioreq_grant_copy(ioreq);
         }
-        qemu_vfree(ioreq->buf);
         break;
     case BLKIF_OP_WRITE:
     case BLKIF_OP_FLUSH_DISKCACHE:
         if (!ioreq->req.nr_segments) {
             break;
         }
-        qemu_vfree(ioreq->buf);
         break;
     default:
         break;
@@ -392,12 +393,10 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
 {
     struct XenBlkDev *blkdev = ioreq->blkdev;
 
-    ioreq->buf = qemu_memalign(XC_PAGE_SIZE, ioreq->size);
     if (ioreq->req.nr_segments &&
         (ioreq->req.operation == BLKIF_OP_WRITE ||
          ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE) &&
         ioreq_grant_copy(ioreq)) {
-        qemu_vfree(ioreq->buf);
         goto err;
     }
 
@@ -989,6 +988,7 @@ static int blk_free(struct XenDevice *xendev)
         ioreq = QLIST_FIRST(&blkdev->freelist);
         QLIST_REMOVE(ioreq, list);
         qemu_iovec_destroy(&ioreq->v);
+        qemu_vfree(ioreq->buf);
         g_free(ioreq);
     }
 

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
  2018-09-07 10:21 [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Tim Smith
  2018-09-07 10:21 ` [Qemu-devel] [PATCH 2/3] Improve xen_disk response latency Tim Smith
  2018-09-07 10:21 ` [Qemu-devel] [PATCH 3/3] Avoid repeated memory allocation in xen_disk Tim Smith
@ 2018-09-07 14:11 ` Paul Durrant
  2 siblings, 0 replies; 10+ messages in thread
From: Paul Durrant @ 2018-09-07 14:11 UTC (permalink / raw)
  To: Tim Smith, qemu-devel

> -----Original Message-----
> From: Qemu-devel [mailto:qemu-devel-
> bounces+paul.durrant=citrix.com@nongnu.org] On Behalf Of Tim Smith
> Sent: 07 September 2018 11:21
> To: qemu-devel@nongnu.org
> Subject: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
> 
> When I/O consists of many small requests, performance is improved by
> batching them together in a single io_submit() call. When there are
> relatively few requests, the extra overhead is not worth it. This
> introduces a check to start batching I/O requests via blk_io_plug()/
> blk_io_unplug() in an amount proportional to the number which were
> already in flight at the time we started reading the ring.
> 
> Signed-off-by: Tim Smith <tim.smith@citrix.com>

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
>  hw/block/xen_disk.c |   29 +++++++++++++++++++++++++++++
>  1 file changed, 29 insertions(+)
> 
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 36eff94f84..6cb40d66fa 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -101,6 +101,9 @@ struct XenBlkDev {
>      AioContext          *ctx;
>  };
> 
> +/* Threshold of in-flight requests above which we will start using
> + * blk_io_plug()/blk_io_unplug() to batch requests */
> +#define IO_PLUG_THRESHOLD 1
>  /* ------------------------------------------------------------- */
> 
>  static void ioreq_reset(struct ioreq *ioreq)
> @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>  {
>      RING_IDX rc, rp;
>      struct ioreq *ioreq;
> +    int inflight_atstart = blkdev->requests_inflight;
> +    int batched = 0;
> 
>      blkdev->more_work = 0;
> 
> @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>      xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
> 
>      blk_send_response_all(blkdev);
> +    /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
> +     * when we got here, this is an indication that there the bottleneck
> +     * is below us, so it's worth beginning to batch up I/O requests
> +     * rather than submitting them immediately. The maximum number
> +     * of requests we're willing to batch is the number already in
> +     * flight, so it can grow up to max_requests when the bottleneck
> +     * is below us */
> +    if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +        blk_io_plug(blkdev->blk);
> +    }
>      while (rc != rp) {
>          /* pull request from ring */
>          if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
> @@ -589,7 +604,21 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>              continue;
>          }
> 
> +        if (inflight_atstart > IO_PLUG_THRESHOLD && batched >=
> inflight_atstart) {
> +            blk_io_unplug(blkdev->blk);
> +        }
>          ioreq_runio_qemu_aio(ioreq);
> +        if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +            if (batched >= inflight_atstart) {
> +                blk_io_plug(blkdev->blk);
> +                batched=0;
> +            } else {
> +                batched++;
> +            }
> +        }
> +    }
> +    if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +        blk_io_unplug(blkdev->blk);
>      }
> 
>      if (blkdev->more_work && blkdev->requests_inflight < blkdev-
> >max_requests) {
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] Improve xen_disk response latency
  2018-09-07 10:21 ` [Qemu-devel] [PATCH 2/3] Improve xen_disk response latency Tim Smith
@ 2018-09-07 16:03   ` Paul Durrant
  0 siblings, 0 replies; 10+ messages in thread
From: Paul Durrant @ 2018-09-07 16:03 UTC (permalink / raw)
  To: Tim Smith, qemu-devel

> -----Original Message-----
> From: Qemu-devel [mailto:qemu-devel-
> bounces+paul.durrant=citrix.com@nongnu.org] On Behalf Of Tim Smith
> Sent: 07 September 2018 11:21
> To: qemu-devel@nongnu.org
> Subject: [Qemu-devel] [PATCH 2/3] Improve xen_disk response latency
> 
> If the I/O ring is full, the guest cannot send any more requests
> until some responses are sent. Only sending all available responses
> just before checking for new work does not leave much time for the
> guest to supply new work, so this will cause stalls if the ring gets
> full. Also, not completing reads as soon as possible adds latency
> to the guest.
> 
> To alleviate that, complete IO requests as soon as they come back.
> blk_send_response() already returns a value indicating whether
> a notify should be sent, which is all the batching we need.
> 
> Signed-off-by: Tim Smith <tim.smith@citrix.com>

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
>  hw/block/xen_disk.c |   43 ++++++++++++-------------------------------
>  1 file changed, 12 insertions(+), 31 deletions(-)
> 
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 6cb40d66fa..c11cd21d37 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -83,11 +83,9 @@ struct XenBlkDev {
> 
>      /* request lists */
>      QLIST_HEAD(inflight_head, ioreq) inflight;
> -    QLIST_HEAD(finished_head, ioreq) finished;
>      QLIST_HEAD(freelist_head, ioreq) freelist;
>      int                 requests_total;
>      int                 requests_inflight;
> -    int                 requests_finished;
>      unsigned int        max_requests;
> 
>      gboolean            feature_discard;
> @@ -104,6 +102,9 @@ struct XenBlkDev {
>  /* Threshold of in-flight requests above which we will start using
>   * blk_io_plug()/blk_io_unplug() to batch requests */
>  #define IO_PLUG_THRESHOLD 1
> +
> +static int blk_send_response(struct ioreq *ioreq);
> +
>  /* ------------------------------------------------------------- */
> 
>  static void ioreq_reset(struct ioreq *ioreq)
> @@ -155,12 +156,10 @@ static void ioreq_finish(struct ioreq *ioreq)
>      struct XenBlkDev *blkdev = ioreq->blkdev;
> 
>      QLIST_REMOVE(ioreq, list);
> -    QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list);
>      blkdev->requests_inflight--;
> -    blkdev->requests_finished++;
>  }
> 
> -static void ioreq_release(struct ioreq *ioreq, bool finish)
> +static void ioreq_release(struct ioreq *ioreq)
>  {
>      struct XenBlkDev *blkdev = ioreq->blkdev;
> 
> @@ -168,11 +167,7 @@ static void ioreq_release(struct ioreq *ioreq, bool
> finish)
>      ioreq_reset(ioreq);
>      ioreq->blkdev = blkdev;
>      QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
> -    if (finish) {
> -        blkdev->requests_finished--;
> -    } else {
> -        blkdev->requests_inflight--;
> -    }
> +    blkdev->requests_inflight--;
>  }
> 
>  /*
> @@ -351,6 +346,10 @@ static void qemu_aio_complete(void *opaque, int
> ret)
>      default:
>          break;
>      }
> +    if (blk_send_response(ioreq)) {
> +        xen_pv_send_notify(&blkdev->xendev);
> +    }
> +    ioreq_release(ioreq);
>      qemu_bh_schedule(blkdev->bh);
> 
>  done:
> @@ -455,7 +454,7 @@ err:
>      return -1;
>  }
> 
> -static int blk_send_response_one(struct ioreq *ioreq)
> +static int blk_send_response(struct ioreq *ioreq)
>  {
>      struct XenBlkDev  *blkdev = ioreq->blkdev;
>      int               send_notify   = 0;
> @@ -504,22 +503,6 @@ static int blk_send_response_one(struct ioreq
> *ioreq)
>      return send_notify;
>  }
> 
> -/* walk finished list, send outstanding responses, free requests */
> -static void blk_send_response_all(struct XenBlkDev *blkdev)
> -{
> -    struct ioreq *ioreq;
> -    int send_notify = 0;
> -
> -    while (!QLIST_EMPTY(&blkdev->finished)) {
> -        ioreq = QLIST_FIRST(&blkdev->finished);
> -        send_notify += blk_send_response_one(ioreq);
> -        ioreq_release(ioreq, true);
> -    }
> -    if (send_notify) {
> -        xen_pv_send_notify(&blkdev->xendev);
> -    }
> -}
> -
>  static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq,
> RING_IDX rc)
>  {
>      switch (blkdev->protocol) {
> @@ -554,7 +537,6 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>      rp = blkdev->rings.common.sring->req_prod;
>      xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
> 
> -    blk_send_response_all(blkdev);
>      /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
>       * when we got here, this is an indication that there the bottleneck
>       * is below us, so it's worth beginning to batch up I/O requests
> @@ -597,10 +579,10 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>                  break;
>              };
> 
> -            if (blk_send_response_one(ioreq)) {
> +            if (blk_send_response(ioreq)) {
>                  xen_pv_send_notify(&blkdev->xendev);
>              }
> -            ioreq_release(ioreq, false);
> +            ioreq_release(ioreq);
>              continue;
>          }
> 
> @@ -645,7 +627,6 @@ static void blk_alloc(struct XenDevice *xendev)
>      trace_xen_disk_alloc(xendev->name);
> 
>      QLIST_INIT(&blkdev->inflight);
> -    QLIST_INIT(&blkdev->finished);
>      QLIST_INIT(&blkdev->freelist);
> 
>      blkdev->iothread = iothread_create(xendev->name, &err);
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3] Avoid repeated memory allocation in xen_disk
  2018-09-07 10:21 ` [Qemu-devel] [PATCH 3/3] Avoid repeated memory allocation in xen_disk Tim Smith
@ 2018-09-07 16:05   ` Paul Durrant
  0 siblings, 0 replies; 10+ messages in thread
From: Paul Durrant @ 2018-09-07 16:05 UTC (permalink / raw)
  To: Tim Smith, qemu-devel

> -----Original Message-----
> From: Qemu-devel [mailto:qemu-devel-
> bounces+paul.durrant=citrix.com@nongnu.org] On Behalf Of Tim Smith
> Sent: 07 September 2018 11:22
> To: qemu-devel@nongnu.org
> Subject: [Qemu-devel] [PATCH 3/3] Avoid repeated memory allocation in
> xen_disk
> 
> xen_disk currently allocates memory to hold the data for each ioreq
> as that ioreq is used, and frees it afterwards. Because it requires
> page-aligned blocks, this interacts poorly with non-page-aligned
> allocations and balloons the heap.
> 
> Instead, allocate the maximum possible requirement, which is
> BLKIF_MAX_SEGMENTS_PER_REQUEST pages (currently 11 pages) when
> the ioreq is created, and keep that allocation until it is destroyed.
> Since the ioreqs themselves are re-used via a free list, this
> should actually improve memory usage.
> 
> Signed-off-by: Tim Smith <tim.smith@citrix.com>

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
>  hw/block/xen_disk.c |   10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index c11cd21d37..67f894bba5 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -112,7 +112,6 @@ static void ioreq_reset(struct ioreq *ioreq)
>      memset(&ioreq->req, 0, sizeof(ioreq->req));
>      ioreq->status = 0;
>      ioreq->start = 0;
> -    ioreq->buf = NULL;
>      ioreq->size = 0;
>      ioreq->presync = 0;
> 
> @@ -137,6 +136,10 @@ static struct ioreq *ioreq_start(struct XenBlkDev
> *blkdev)
>          /* allocate new struct */
>          ioreq = g_malloc0(sizeof(*ioreq));
>          ioreq->blkdev = blkdev;
> +        /* We cannot need more pages per ioreq than this, and we do re-use
> ioreqs,
> +         * so allocate the memory once here, to be freed in blk_free() when
> the
> +         * ioreq is freed. */
> +        ioreq->buf = qemu_memalign(XC_PAGE_SIZE,
> BLKIF_MAX_SEGMENTS_PER_REQUEST * XC_PAGE_SIZE);
>          blkdev->requests_total++;
>          qemu_iovec_init(&ioreq->v, 1);
>      } else {
> @@ -313,14 +316,12 @@ static void qemu_aio_complete(void *opaque, int
> ret)
>          if (ret == 0) {
>              ioreq_grant_copy(ioreq);
>          }
> -        qemu_vfree(ioreq->buf);
>          break;
>      case BLKIF_OP_WRITE:
>      case BLKIF_OP_FLUSH_DISKCACHE:
>          if (!ioreq->req.nr_segments) {
>              break;
>          }
> -        qemu_vfree(ioreq->buf);
>          break;
>      default:
>          break;
> @@ -392,12 +393,10 @@ static int ioreq_runio_qemu_aio(struct ioreq
> *ioreq)
>  {
>      struct XenBlkDev *blkdev = ioreq->blkdev;
> 
> -    ioreq->buf = qemu_memalign(XC_PAGE_SIZE, ioreq->size);
>      if (ioreq->req.nr_segments &&
>          (ioreq->req.operation == BLKIF_OP_WRITE ||
>           ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE) &&
>          ioreq_grant_copy(ioreq)) {
> -        qemu_vfree(ioreq->buf);
>          goto err;
>      }
> 
> @@ -989,6 +988,7 @@ static int blk_free(struct XenDevice *xendev)
>          ioreq = QLIST_FIRST(&blkdev->freelist);
>          QLIST_REMOVE(ioreq, list);
>          qemu_iovec_destroy(&ioreq->v);
> +        qemu_vfree(ioreq->buf);
>          g_free(ioreq);
>      }
> 
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
  2018-11-02 10:00 ` [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Tim Smith
  2018-11-02 11:14   ` Paul Durrant
@ 2018-11-02 13:53   ` Anthony PERARD
  1 sibling, 0 replies; 10+ messages in thread
From: Anthony PERARD @ 2018-11-02 13:53 UTC (permalink / raw)
  To: Tim Smith
  Cc: xen-devel, qemu-devel, qemu-block, Kevin Wolf, Paul Durrant,
	Stefano Stabellini, Max Reitz

On Fri, Nov 02, 2018 at 10:00:59AM +0000, Tim Smith wrote:
> When I/O consists of many small requests, performance is improved by
> batching them together in a single io_submit() call. When there are
> relatively few requests, the extra overhead is not worth it. This
> introduces a check to start batching I/O requests via blk_io_plug()/
> blk_io_unplug() in an amount proportional to the number which were
> already in flight at the time we started reading the ring.
> 
> Signed-off-by: Tim Smith <tim.smith@citrix.com>

Acked-by: Anthony PERARD <anthony.perard@citrix.com>

-- 
Anthony PERARD

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
  2018-11-02 10:00 ` [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Tim Smith
@ 2018-11-02 11:14   ` Paul Durrant
  2018-11-02 13:53   ` Anthony PERARD
  1 sibling, 0 replies; 10+ messages in thread
From: Paul Durrant @ 2018-11-02 11:14 UTC (permalink / raw)
  To: Tim Smith, xen-devel, qemu-devel, qemu-block
  Cc: Anthony Perard, Kevin Wolf, Stefano Stabellini, Max Reitz

> -----Original Message-----
> From: Tim Smith [mailto:tim.smith@citrix.com]
> Sent: 02 November 2018 10:01
> To: xen-devel@lists.xenproject.org; qemu-devel@nongnu.org; qemu-
> block@nongnu.org
> Cc: Anthony Perard <anthony.perard@citrix.com>; Kevin Wolf
> <kwolf@redhat.com>; Paul Durrant <Paul.Durrant@citrix.com>; Stefano
> Stabellini <sstabellini@kernel.org>; Max Reitz <mreitz@redhat.com>
> Subject: [PATCH 1/3] Improve xen_disk batching behaviour
> 
> When I/O consists of many small requests, performance is improved by
> batching them together in a single io_submit() call. When there are
> relatively few requests, the extra overhead is not worth it. This
> introduces a check to start batching I/O requests via blk_io_plug()/
> blk_io_unplug() in an amount proportional to the number which were
> already in flight at the time we started reading the ring.
> 
> Signed-off-by: Tim Smith <tim.smith@citrix.com>

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
>  hw/block/xen_disk.c |   30 ++++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 36eff94f84..cb2881b7e6 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -101,6 +101,9 @@ struct XenBlkDev {
>      AioContext          *ctx;
>  };
> 
> +/* Threshold of in-flight requests above which we will start using
> + * blk_io_plug()/blk_io_unplug() to batch requests */
> +#define IO_PLUG_THRESHOLD 1
>  /* ------------------------------------------------------------- */
> 
>  static void ioreq_reset(struct ioreq *ioreq)
> @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>  {
>      RING_IDX rc, rp;
>      struct ioreq *ioreq;
> +    int inflight_atstart = blkdev->requests_inflight;
> +    int batched = 0;
> 
>      blkdev->more_work = 0;
> 
> @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>      xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
> 
>      blk_send_response_all(blkdev);
> +    /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
> +     * when we got here, this is an indication that there the bottleneck
> +     * is below us, so it's worth beginning to batch up I/O requests
> +     * rather than submitting them immediately. The maximum number
> +     * of requests we're willing to batch is the number already in
> +     * flight, so it can grow up to max_requests when the bottleneck
> +     * is below us */
> +    if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +        blk_io_plug(blkdev->blk);
> +    }
>      while (rc != rp) {
>          /* pull request from ring */
>          if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
> @@ -589,7 +604,22 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>              continue;
>          }
> 
> +        if (inflight_atstart > IO_PLUG_THRESHOLD &&
> +            batched >= inflight_atstart) {
> +            blk_io_unplug(blkdev->blk);
> +        }
>          ioreq_runio_qemu_aio(ioreq);
> +        if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +            if (batched >= inflight_atstart) {
> +                blk_io_plug(blkdev->blk);
> +                batched = 0;
> +            } else {
> +                batched++;
> +            }
> +        }
> +    }
> +    if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +        blk_io_unplug(blkdev->blk);
>      }
> 
>      if (blkdev->more_work && blkdev->requests_inflight < blkdev-
> >max_requests) {


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
  2018-11-02 10:00 [Qemu-devel] [PATCH 0/3] Performance improvements for xen_disk v2 Tim Smith
@ 2018-11-02 10:00 ` Tim Smith
  2018-11-02 11:14   ` Paul Durrant
  2018-11-02 13:53   ` Anthony PERARD
  0 siblings, 2 replies; 10+ messages in thread
From: Tim Smith @ 2018-11-02 10:00 UTC (permalink / raw)
  To: xen-devel, qemu-devel, qemu-block
  Cc: Anthony Perard, Kevin Wolf, Paul Durrant, Stefano Stabellini, Max Reitz

When I/O consists of many small requests, performance is improved by
batching them together in a single io_submit() call. When there are
relatively few requests, the extra overhead is not worth it. This
introduces a check to start batching I/O requests via blk_io_plug()/
blk_io_unplug() in an amount proportional to the number which were
already in flight at the time we started reading the ring.

Signed-off-by: Tim Smith <tim.smith@citrix.com>
---
 hw/block/xen_disk.c |   30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36eff94f84..cb2881b7e6 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -101,6 +101,9 @@ struct XenBlkDev {
     AioContext          *ctx;
 };
 
+/* Threshold of in-flight requests above which we will start using
+ * blk_io_plug()/blk_io_unplug() to batch requests */
+#define IO_PLUG_THRESHOLD 1
 /* ------------------------------------------------------------- */
 
 static void ioreq_reset(struct ioreq *ioreq)
@@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 {
     RING_IDX rc, rp;
     struct ioreq *ioreq;
+    int inflight_atstart = blkdev->requests_inflight;
+    int batched = 0;
 
     blkdev->more_work = 0;
 
@@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
     xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 
     blk_send_response_all(blkdev);
+    /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
+     * when we got here, this is an indication that there the bottleneck
+     * is below us, so it's worth beginning to batch up I/O requests
+     * rather than submitting them immediately. The maximum number
+     * of requests we're willing to batch is the number already in
+     * flight, so it can grow up to max_requests when the bottleneck
+     * is below us */
+    if (inflight_atstart > IO_PLUG_THRESHOLD) {
+        blk_io_plug(blkdev->blk);
+    }
     while (rc != rp) {
         /* pull request from ring */
         if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
@@ -589,7 +604,22 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
             continue;
         }
 
+        if (inflight_atstart > IO_PLUG_THRESHOLD &&
+            batched >= inflight_atstart) {
+            blk_io_unplug(blkdev->blk);
+        }
         ioreq_runio_qemu_aio(ioreq);
+        if (inflight_atstart > IO_PLUG_THRESHOLD) {
+            if (batched >= inflight_atstart) {
+                blk_io_plug(blkdev->blk);
+                batched = 0;
+            } else {
+                batched++;
+            }
+        }
+    }
+    if (inflight_atstart > IO_PLUG_THRESHOLD) {
+        blk_io_unplug(blkdev->blk);
     }
 
     if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) {

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
  2018-11-02  9:29 [Qemu-devel] [PATCH 0/3] Performance improvements for xen_disk Tim Smith
@ 2018-11-02  9:29 ` Tim Smith
  0 siblings, 0 replies; 10+ messages in thread
From: Tim Smith @ 2018-11-02  9:29 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paul Durrant

When I/O consists of many small requests, performance is improved by
batching them together in a single io_submit() call. When there are
relatively few requests, the extra overhead is not worth it. This
introduces a check to start batching I/O requests via blk_io_plug()/
blk_io_unplug() in an amount proportional to the number which were
already in flight at the time we started reading the ring.

Signed-off-by: Tim Smith <tim.smith@citrix.com>
---
 hw/block/xen_disk.c |   29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36eff94f84..6cb40d66fa 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -101,6 +101,9 @@ struct XenBlkDev {
     AioContext          *ctx;
 };
 
+/* Threshold of in-flight requests above which we will start using
+ * blk_io_plug()/blk_io_unplug() to batch requests */
+#define IO_PLUG_THRESHOLD 1
 /* ------------------------------------------------------------- */
 
 static void ioreq_reset(struct ioreq *ioreq)
@@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 {
     RING_IDX rc, rp;
     struct ioreq *ioreq;
+    int inflight_atstart = blkdev->requests_inflight;
+    int batched = 0;
 
     blkdev->more_work = 0;
 
@@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
     xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 
     blk_send_response_all(blkdev);
+    /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
+     * when we got here, this is an indication that there the bottleneck
+     * is below us, so it's worth beginning to batch up I/O requests
+     * rather than submitting them immediately. The maximum number
+     * of requests we're willing to batch is the number already in
+     * flight, so it can grow up to max_requests when the bottleneck
+     * is below us */
+    if (inflight_atstart > IO_PLUG_THRESHOLD) {
+        blk_io_plug(blkdev->blk);
+    }
     while (rc != rp) {
         /* pull request from ring */
         if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
@@ -589,7 +604,21 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
             continue;
         }
 
+        if (inflight_atstart > IO_PLUG_THRESHOLD && batched >= inflight_atstart) {
+            blk_io_unplug(blkdev->blk);
+        }
         ioreq_runio_qemu_aio(ioreq);
+        if (inflight_atstart > IO_PLUG_THRESHOLD) {
+            if (batched >= inflight_atstart) {
+                blk_io_plug(blkdev->blk);
+                batched=0;
+            } else {
+                batched++;
+            }
+        }
+    }
+    if (inflight_atstart > IO_PLUG_THRESHOLD) {
+        blk_io_unplug(blkdev->blk);
     }
 
     if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) {

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-11-02 13:53 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-07 10:21 [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Tim Smith
2018-09-07 10:21 ` [Qemu-devel] [PATCH 2/3] Improve xen_disk response latency Tim Smith
2018-09-07 16:03   ` Paul Durrant
2018-09-07 10:21 ` [Qemu-devel] [PATCH 3/3] Avoid repeated memory allocation in xen_disk Tim Smith
2018-09-07 16:05   ` Paul Durrant
2018-09-07 14:11 ` [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Paul Durrant
2018-11-02  9:29 [Qemu-devel] [PATCH 0/3] Performance improvements for xen_disk Tim Smith
2018-11-02  9:29 ` [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Tim Smith
2018-11-02 10:00 [Qemu-devel] [PATCH 0/3] Performance improvements for xen_disk v2 Tim Smith
2018-11-02 10:00 ` [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour Tim Smith
2018-11-02 11:14   ` Paul Durrant
2018-11-02 13:53   ` Anthony PERARD

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.