* + aio-allocate-kiocbs-in-batches.patch added to -mm tree
@ 2011-09-28 19:14 akpm
0 siblings, 0 replies; 2+ messages in thread
From: akpm @ 2011-09-28 19:14 UTC (permalink / raw)
To: mm-commits; +Cc: jmoyer, dehrenberg
The patch titled
Subject: aio: allocate kiocbs in batches
has been added to the -mm tree. Its filename is
aio-allocate-kiocbs-in-batches.patch
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/SubmitChecklist when testing your code ***
See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find
out what to do about this
The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/
------------------------------------------------------
From: Jeff Moyer <jmoyer@redhat.com>
Subject: aio: allocate kiocbs in batches
In testing aio on a fast storage device, I found that the context lock
takes up a fair amount of cpu time in the I/O submission path. The reason
is that we take it for every I/O submitted (see __aio_get_req). Since we
know how many I/Os are passed to io_submit, we can preallocate the kiocbs
in batches, reducing the number of times we take and release the lock.
In my testing, I was able to reduce the amount of time spent in
_raw_spin_lock_irq by .56% (average of 3 runs). The command I used to
test this was:
aio-stress -O -o 2 -o 3 -r 8 -d 128 -b 32 -i 32 -s 16384 <dev>
I also tested the patch with various numbers of events passed to
io_submit, and I ran the xfstests aio group of tests to ensure I didn't
break anything.
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Daniel Ehrenberg <dehrenberg@google.com>
Signed-off-by: Andrew Morton <akpm@google.com>
---
fs/aio.c | 136 +++++++++++++++++++++++++++++++++---------
include/linux/aio.h | 1
2 files changed, 108 insertions(+), 29 deletions(-)
diff -puN fs/aio.c~aio-allocate-kiocbs-in-batches fs/aio.c
--- a/fs/aio.c~aio-allocate-kiocbs-in-batches
+++ a/fs/aio.c
@@ -440,8 +440,6 @@ void exit_aio(struct mm_struct *mm)
static struct kiocb *__aio_get_req(struct kioctx *ctx)
{
struct kiocb *req = NULL;
- struct aio_ring *ring;
- int okay = 0;
req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
if (unlikely(!req))
@@ -459,39 +457,114 @@ static struct kiocb *__aio_get_req(struc
INIT_LIST_HEAD(&req->ki_run_list);
req->ki_eventfd = NULL;
- /* Check if the completion queue has enough free space to
- * accept an event from this io.
- */
+ return req;
+}
+
+/*
+ * struct kiocb's are allocated in batches to reduce the number of
+ * times the ctx lock is acquired and released.
+ */
+#define KIOCB_BATCH_SIZE 32L
+struct kiocb_batch {
+ struct list_head head;
+ long count; /* number of requests left to allocate */
+};
+
+static void kiocb_batch_init(struct kiocb_batch *batch, long total)
+{
+ INIT_LIST_HEAD(&batch->head);
+ batch->count = total;
+}
+
+static void kiocb_batch_free(struct kiocb_batch *batch)
+{
+ struct kiocb *req, *n;
+
+ list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
+ list_del(&req->ki_batch);
+ kmem_cache_free(kiocb_cachep, req);
+ }
+}
+
+/*
+ * Allocate a batch of kiocbs. This avoids taking and dropping the
+ * context lock a lot during setup.
+ */
+static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
+{
+ unsigned short allocated, to_alloc;
+ long avail;
+ bool called_fput = false;
+ struct kiocb *req, *n;
+ struct aio_ring *ring;
+
+ to_alloc = min(batch->count, KIOCB_BATCH_SIZE);
+ for (allocated = 0; allocated < to_alloc; allocated++) {
+ req = __aio_get_req(ctx);
+ if (!req)
+ /* allocation failed, go with what we've got */
+ break;
+ list_add(&req->ki_batch, &batch->head);
+ }
+
+ if (allocated == 0)
+ goto out;
+
+retry:
spin_lock_irq(&ctx->ctx_lock);
- ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
- if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) {
+ ring = kmap_atomic(ctx->ring_info.ring_pages[0]);
+
+ avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active;
+ BUG_ON(avail < 0);
+ if (avail == 0 && !called_fput) {
+ /*
+ * Handle a potential starvation case. It is possible that
+ * we hold the last reference on a struct file, causing us
+ * to delay the final fput to non-irq context. In this case,
+ * ctx->reqs_active is artificially high. Calling the fput
+ * routine here may free up a slot in the event completion
+ * ring, allowing this allocation to succeed.
+ */
+ kunmap_atomic(ring);
+ spin_unlock_irq(&ctx->ctx_lock);
+ aio_fput_routine(NULL);
+ called_fput = true;
+ goto retry;
+ }
+
+ if (avail < allocated) {
+ /* Trim back the number of requests. */
+ list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
+ list_del(&req->ki_batch);
+ kmem_cache_free(kiocb_cachep, req);
+ if (--allocated <= avail)
+ break;
+ }
+ }
+
+ batch->count -= allocated;
+ list_for_each_entry(req, &batch->head, ki_batch) {
list_add(&req->ki_list, &ctx->active_reqs);
ctx->reqs_active++;
- okay = 1;
}
- kunmap_atomic(ring, KM_USER0);
- spin_unlock_irq(&ctx->ctx_lock);
- if (!okay) {
- kmem_cache_free(kiocb_cachep, req);
- req = NULL;
- }
+ kunmap_atomic(ring);
+ spin_unlock_irq(&ctx->ctx_lock);
- return req;
+out:
+ return allocated;
}
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct kiocb *aio_get_req(struct kioctx *ctx,
+ struct kiocb_batch *batch)
{
struct kiocb *req;
- /* Handle a potential starvation case -- should be exceedingly rare as
- * requests will be stuck on fput_head only if the aio_fput_routine is
- * delayed and the requests were the last user of the struct file.
- */
- req = __aio_get_req(ctx);
- if (unlikely(NULL == req)) {
- aio_fput_routine(NULL);
- req = __aio_get_req(ctx);
- }
+
+ if (list_empty(&batch->head))
+ if (kiocb_batch_refill(ctx, batch) == 0)
+ return NULL;
+ req = list_first_entry(&batch->head, struct kiocb, ki_batch);
+ list_del(&req->ki_batch);
return req;
}
@@ -1515,7 +1588,8 @@ static ssize_t aio_setup_iocb(struct kio
}
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- struct iocb *iocb, bool compat)
+ struct iocb *iocb, struct kiocb_batch *batch,
+ bool compat)
{
struct kiocb *req;
struct file *file;
@@ -1541,7 +1615,7 @@ static int io_submit_one(struct kioctx *
if (unlikely(!file))
return -EBADF;
- req = aio_get_req(ctx); /* returns with 2 references to req */
+ req = aio_get_req(ctx, batch); /* returns with 2 references to req */
if (unlikely(!req)) {
fput(file);
return -EAGAIN;
@@ -1621,8 +1695,9 @@ long do_io_submit(aio_context_t ctx_id,
{
struct kioctx *ctx;
long ret = 0;
- int i;
+ int i = 0;
struct blk_plug plug;
+ struct kiocb_batch batch;
if (unlikely(nr < 0))
return -EINVAL;
@@ -1639,6 +1714,8 @@ long do_io_submit(aio_context_t ctx_id,
return -EINVAL;
}
+ kiocb_batch_init(&batch, nr);
+
blk_start_plug(&plug);
/*
@@ -1659,12 +1736,13 @@ long do_io_submit(aio_context_t ctx_id,
break;
}
- ret = io_submit_one(ctx, user_iocb, &tmp, compat);
+ ret = io_submit_one(ctx, user_iocb, &tmp, &batch, compat);
if (ret)
break;
}
blk_finish_plug(&plug);
+ kiocb_batch_free(&batch);
put_ioctx(ctx);
return i ? i : ret;
}
diff -puN include/linux/aio.h~aio-allocate-kiocbs-in-batches include/linux/aio.h
--- a/include/linux/aio.h~aio-allocate-kiocbs-in-batches
+++ a/include/linux/aio.h
@@ -117,6 +117,7 @@ struct kiocb {
struct list_head ki_list; /* the aio core uses this
* for cancellation */
+ struct list_head ki_batch; /* batch allocation */
/*
* If the aio_resfd field of the userspace iocb is not zero,
_
Subject: Subject: aio: allocate kiocbs in batches
Patches currently in -mm which might be from jmoyer@redhat.com are
linux-next.patch
aio-allocate-kiocbs-in-batches.patch
dio-separate-fields-only-used-in-the-submission-path-from-struct-dio.patch
dio-fix-a-wrong-comment.patch
dio-rearrange-fields-in-dio-dio_submit-to-avoid-holes.patch
dio-use-a-slab-cache-for-struct-dio.patch
dio-separate-map_bh-from-dio-v2.patch
dio-inline-the-complete-submission-path-v2.patch
dio-merge-direct_io_walker-into-__blockdev_direct_io.patch
dio-remove-unnecessary-dio-argument-from-dio_pages_present.patch
dio-remove-unused-dio-parameter-from-dio_bio_add_page.patch
vfs-cache-request_queue-in-struct-block_device.patch
dio-optimize-cache-misses-in-the-submission-path-v2.patch
^ permalink raw reply [flat|nested] 2+ messages in thread
* + aio-allocate-kiocbs-in-batches.patch added to -mm tree
@ 2011-09-01 0:13 akpm
0 siblings, 0 replies; 2+ messages in thread
From: akpm @ 2011-09-01 0:13 UTC (permalink / raw)
To: mm-commits; +Cc: jmoyer
The patch titled
aio: allocate kiocbs in batches
has been added to the -mm tree. Its filename is
aio-allocate-kiocbs-in-batches.patch
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/SubmitChecklist when testing your code ***
See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find
out what to do about this
The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/
------------------------------------------------------
Subject: aio: allocate kiocbs in batches
From: Jeff Moyer <jmoyer@redhat.com>
When looking at perf data for my aio-stress test runs against some
relatively fast storage, I noticed that __aio_get_req was taking up more
CPU than I expected. The command line for aio-stress was:
aio-stress -O -r 8 -d 256 -b 32 -l -s 16384 /dev/sdk
which means we're sending 32 iocbs to io_submit at a time. Here's the
perf report -g output:
Events: 41K cycles
- 7.33% aio-stress [kernel.kallsyms] [k] _raw_spin_lock_irq
- _raw_spin_lock_irq
+ 75.38% scsi_request_fn
- 10.67% __aio_get_req
io_submit_one
The spinlock is the ctx_lock, taken when allocating the request. This
seemed like pretty low-hanging fruit to me, so I cooked up a patch to
allocate 32 kiocbs at a time. This is the result:
Events: 50K cycles
- 5.54% aio-stress [kernel.kallsyms] [k] _raw_spin_lock_irq
- _raw_spin_lock_irq
+ 83.23% scsi_request_fn
- 5.14% io_submit_one
So, aio-stress now takes up ~2% less CPU. This seems like a worth-while
thing to do, but I don't have a system handy that would show any real
performance gains from it. The higher the number of iocbs passed to
io_submit, the more of a win this will be.
I tested this with multiple different batch sizes (1, 32, 33, 64, 256) and
it didn't blow up.
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
fs/aio.c | 126 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 98 insertions(+), 28 deletions(-)
diff -puN fs/aio.c~aio-allocate-kiocbs-in-batches fs/aio.c
--- a/fs/aio.c~aio-allocate-kiocbs-in-batches
+++ a/fs/aio.c
@@ -440,8 +440,6 @@ void exit_aio(struct mm_struct *mm)
static struct kiocb *__aio_get_req(struct kioctx *ctx)
{
struct kiocb *req = NULL;
- struct aio_ring *ring;
- int okay = 0;
req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
if (unlikely(!req))
@@ -459,40 +457,107 @@ static struct kiocb *__aio_get_req(struc
INIT_LIST_HEAD(&req->ki_run_list);
req->ki_eventfd = NULL;
- /* Check if the completion queue has enough free space to
- * accept an event from this io.
+ return req;
+}
+
+#define KIOCB_STASH_SIZE 32
+struct kiocb_stash {
+ unsigned head;
+ unsigned tail;
+ unsigned total;
+ unsigned used;
+ struct kiocb *kiocbs[KIOCB_STASH_SIZE];
+};
+static void kiocb_stash_init(struct kiocb_stash *stash, int nr)
+{
+ stash->head = stash->tail = 0;
+ stash->total = nr;
+ stash->used = 0;
+}
+static void kiocb_stash_free(struct kiocb_stash *stash, int used)
+{
+ int i;
+
+ for (i = used; i < stash->total; i++)
+ kmem_cache_free(kiocb_cachep, stash->kiocbs[i]);
+}
+static inline unsigned kiocb_stash_avail(struct kiocb_stash *stash)
+{
+ return stash->tail - stash->head;
+}
+
+/*
+ * Allocate nr kiocbs. This aovids taking and dropping the lock a
+ * lot during setup.
+ */
+static int kiocb_stash_refill(struct kioctx *ctx, struct kiocb_stash *stash)
+{
+ int i, nr = KIOCB_STASH_SIZE;
+ int avail;
+ int called_fput = 0;
+ struct aio_ring *ring;
+
+ /*
+ * Allocate the requests up-front. Later, we'll trim the list
+ * if there isn't enough room in the completion ring.
*/
+ stash->head = 0;
+ if ((stash->total - stash->used) % KIOCB_STASH_SIZE)
+ /* don't need to allocate a full batch */
+ nr = stash->total - stash->used;
+
+ for (i = 0; i < nr; i++) {
+ stash->kiocbs[i] = __aio_get_req(ctx);
+ if (!stash->kiocbs[i] && !called_fput) {
+ /* Handle a potential starvation case --
+ * should be exceedingly rare as requests will
+ * be stuck on fput_head only if the
+ * aio_fput_routine is delayed and the
+ * requests were the last user of the struct
+ * file.
+ */
+ aio_fput_routine(NULL);
+ called_fput = 1;
+ i--;
+ continue;
+ }
+ }
+ stash->tail = i;
+
spin_lock_irq(&ctx->ctx_lock);
ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
- if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) {
- list_add(&req->ki_list, &ctx->active_reqs);
+
+ avail = aio_ring_avail(&ctx->ring_info, ring) + ctx->reqs_active;
+ if (avail < stash->tail) {
+ /* Trim back the number of requests. */
+ for (i = avail; i < stash->tail; i++)
+ kmem_cache_free(kiocb_cachep, stash->kiocbs[i]);
+ stash->tail = avail;
+ }
+ for (i = 0; i < stash->tail; i++) {
+ list_add(&stash->kiocbs[i]->ki_list, &ctx->active_reqs);
ctx->reqs_active++;
- okay = 1;
}
kunmap_atomic(ring, KM_USER0);
spin_unlock_irq(&ctx->ctx_lock);
- if (!okay) {
- kmem_cache_free(kiocb_cachep, req);
- req = NULL;
- }
-
- return req;
+ if (stash->tail == 0)
+ return -1;
+ return 0;
}
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct kiocb *aio_get_req(struct kioctx *ctx,
+ struct kiocb_stash *stash)
{
- struct kiocb *req;
- /* Handle a potential starvation case -- should be exceedingly rare as
- * requests will be stuck on fput_head only if the aio_fput_routine is
- * delayed and the requests were the last user of the struct file.
- */
- req = __aio_get_req(ctx);
- if (unlikely(NULL == req)) {
- aio_fput_routine(NULL);
- req = __aio_get_req(ctx);
+ int ret;
+
+ if (kiocb_stash_avail(stash) == 0) {
+ ret = kiocb_stash_refill(ctx, stash);
+ if (ret)
+ return NULL;
}
- return req;
+ stash->used++;
+ return stash->kiocbs[stash->head++];
}
static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
@@ -1515,7 +1580,8 @@ static ssize_t aio_setup_iocb(struct kio
}
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- struct iocb *iocb, bool compat)
+ struct iocb *iocb, struct kiocb_stash *stash,
+ bool compat)
{
struct kiocb *req;
struct file *file;
@@ -1541,7 +1607,7 @@ static int io_submit_one(struct kioctx *
if (unlikely(!file))
return -EBADF;
- req = aio_get_req(ctx); /* returns with 2 references to req */
+ req = aio_get_req(ctx, stash); /* returns with 2 references to req */
if (unlikely(!req)) {
fput(file);
return -EAGAIN;
@@ -1621,8 +1687,9 @@ long do_io_submit(aio_context_t ctx_id,
{
struct kioctx *ctx;
long ret = 0;
- int i;
+ int i = 0;
struct blk_plug plug;
+ struct kiocb_stash kiocbs;
if (unlikely(nr < 0))
return -EINVAL;
@@ -1639,6 +1706,8 @@ long do_io_submit(aio_context_t ctx_id,
return -EINVAL;
}
+ kiocb_stash_init(&kiocbs, nr);
+
blk_start_plug(&plug);
/*
@@ -1659,12 +1728,13 @@ long do_io_submit(aio_context_t ctx_id,
break;
}
- ret = io_submit_one(ctx, user_iocb, &tmp, compat);
+ ret = io_submit_one(ctx, user_iocb, &tmp, &kiocbs, compat);
if (ret)
break;
}
blk_finish_plug(&plug);
+ kiocb_stash_free(&kiocbs, i);
put_ioctx(ctx);
return i ? i : ret;
}
_
Patches currently in -mm which might be from jmoyer@redhat.com are
aio-allocate-kiocbs-in-batches.patch
aio-allocate-kiocbs-in-batches-fix.patch
dio-separate-fields-only-used-in-the-submission-path-from-struct-dio.patch
dio-fix-a-wrong-comment.patch
dio-rearrange-fields-in-dio-dio_submit-to-avoid-holes.patch
dio-use-a-slab-cache-for-struct-dio.patch
dio-separate-map_bh-from-dio-v2.patch
dio-inline-the-complete-submission-path-v2.patch
dio-merge-direct_io_walker-into-__blockdev_direct_io.patch
dio-remove-unnecessary-dio-argument-from-dio_pages_present.patch
dio-remove-unused-dio-parameter-from-dio_bio_add_page.patch
vfs-cache-request_queue-in-struct-block_device.patch
dio-optimize-cache-misses-in-the-submission-path-v2.patch
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2011-09-28 19:14 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-28 19:14 + aio-allocate-kiocbs-in-batches.patch added to -mm tree akpm
-- strict thread matches above, loose matches on Subject: below --
2011-09-01 0:13 akpm
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.