>From a42c0af6c96b47d9b915e4efdaa0211e9b6b5253 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Nov 2018 16:24:22 +0100 Subject: cleanup the aio_poll_reap related flow Should not change behavior except for fixing a bug where the number of returned iocbs was incorrectly overwritten when we actually loop on poll_done for the first call. I don't really understand why we loop there but not the second time we call it. Nor do I really understand the nested loop in the callers of __aio_check_polled, but that is for another time.. --- fs/aio.c | 152 ++++++++++++++++++------------------------------------- 1 file changed, 50 insertions(+), 102 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 348f04129035..d9198f99ed97 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1091,7 +1091,7 @@ static inline void iocb_put(struct aio_kiocb *iocb) } } -static void iocb_put_many(struct kioctx *ctx, void **iocbs, int nr) +static void iocb_put_many(struct kioctx *ctx, void **iocbs, int nr) { if (nr) { kmem_cache_free_bulk(kiocb_cachep, nr, iocbs); @@ -1316,42 +1316,33 @@ static struct block_device *aio_bdev_host(struct kiocb *req) #define AIO_POLL_STACK 8 -struct aio_poll_data { - struct io_event __user *evs; - int off; - long max; - void *iocbs[AIO_POLL_STACK]; - int to_free; -}; - /* - * Process the done_list of iocbs, copy to user space, and free them. - * Migh return with data->iocbs holding entries, in which case - * data->to_free is non-zero and the caller should free them. + * Process the done_list of iocbs, copy to user space, and free them. Might + * return with iocbs holding entries, in which case *to_free is non-zero and + * the caller should free them. */ -static long aio_poll_reap(struct kioctx *ctx, struct aio_poll_data *data) +static long aio_poll_reap(struct kioctx *ctx, struct io_event __user *evs, + int off, long max, void **iocbs, int *to_free) __releases(&ctx->poll_lock) __acquires(&ctx->poll_lock) { struct aio_kiocb *iocb; int ret, nr = 0; -restart: - while (!list_empty(&ctx->poll_done)) { + while ((iocb = list_first_entry_or_null(&ctx->poll_done, + struct aio_kiocb, ki_poll_list))) { struct io_event __user *uev; struct io_event ev; - if (data->to_free == ARRAY_SIZE(data->iocbs)) { - iocb_put_many(ctx, data->iocbs, data->to_free); - data->to_free = 0; + if (*to_free == AIO_POLL_STACK) { + iocb_put_many(ctx, iocbs, *to_free); + *to_free = 0; } - iocb = list_first_entry(&ctx->poll_done, struct aio_kiocb, - ki_poll_list); list_del(&iocb->ki_poll_list); + iocbs[*to_free++] = iocb; - data->iocbs[data->to_free++] = iocb; - if (!data->evs) { + if (!evs) { nr++; continue; } @@ -1361,65 +1352,26 @@ static long aio_poll_reap(struct kioctx *ctx, struct aio_poll_data *data) ev.res = iocb->ki_poll_res; ev.res2 = iocb->ki_poll_res2; - uev = data->evs + nr + data->off; - if (!__copy_to_user_inatomic(uev, &ev, sizeof(*uev))) { - nr++; - if (nr + data->off < data->max) - continue; - break; + uev = evs + nr + off; + if (unlikely(__copy_to_user_inatomic(uev, &ev, sizeof(*uev)))) { + /* + * Unexpected slow path, drop lock and attempt copy + * again. If this also fails we are done. + */ + spin_unlock_irq(&ctx->poll_lock); + ret = copy_to_user(uev, &ev, sizeof(*uev)); + spin_lock_irq(&ctx->poll_lock); + if (ret) + return nr ? nr : -EFAULT; } - /* - * Unexpected slow path, drop lock and attempt copy. If this - * also fails, we're done. If it worked, we got another event - * and we restart the list check since we dropped the lock. - */ - spin_unlock_irq(&ctx->poll_lock); - ret = copy_to_user(uev, &ev, sizeof(*uev)); - spin_lock_irq(&ctx->poll_lock); - if (!ret) { - nr++; - if (nr + data->off < data->max) - goto restart; - + if (++nr + off == max) break; - } - - if (!nr) - nr = -EFAULT; - break; } return nr; } -/* - * Reap done events, if any - */ -static long aio_poll_find(struct kioctx *ctx, struct io_event __user *evs, - int off, long max) -{ - struct aio_poll_data data = { - .evs = evs, - .off = off, - .max = max, - .to_free = 0 - }; - int ret; - - if (list_empty_careful(&ctx->poll_done)) - return 0; - - spin_lock_irq(&ctx->poll_lock); - ret = aio_poll_reap(ctx, &data); - spin_unlock_irq(&ctx->poll_lock); - - if (data.to_free) - iocb_put_many(ctx, data.iocbs, data.to_free); - - return ret; -} - static void aio_poll_for_events(struct kioctx *ctx, struct aio_iopoll_data *pd, unsigned int nr_pd, int off, long min, long max) { @@ -1448,42 +1400,32 @@ static int __aio_check_polled(struct kioctx *ctx, struct io_event __user *event, int off, unsigned int *entries, long min, long max) { struct aio_iopoll_data pd[AIO_POLL_STACK]; + void *iocbs[AIO_POLL_STACK]; + int to_free = 0; struct aio_kiocb *iocb; unsigned int nr_pd; - int ret, pre = 0; + int ret, found = 0; if (list_empty_careful(&ctx->poll_pending)) goto out; - spin_lock_irq(&ctx->poll_lock); - /* * Check if we already have done events that satisfy what we need */ - while (!list_empty(&ctx->poll_done)) { - struct aio_poll_data data = { - .evs = event, - .off = off, - .max = max, - .to_free = 0 - }; - - ret = aio_poll_reap(ctx, &data); - if (!ret) - break; - else if (ret < 0 || ret + off >= min) { + spin_lock_irq(&ctx->poll_lock); + while ((ret = aio_poll_reap(ctx, event, off, max, iocbs, &to_free))) { + if (ret < 0 || ret + off >= min) { spin_unlock_irq(&ctx->poll_lock); - - if (data.to_free) - iocb_put_many(ctx, data.iocbs, data.to_free); - + if (to_free) + iocb_put_many(ctx, iocbs, to_free); return ret; } - if (data.to_free) - iocb_put_many(ctx, data.iocbs, data.to_free); - - pre = ret; + if (to_free) { + iocb_put_many(ctx, iocbs, to_free); + to_free = 0; + } + found += ret; off += ret; } @@ -1518,13 +1460,19 @@ static int __aio_check_polled(struct kioctx *ctx, struct io_event __user *event, } out: - ret = aio_poll_find(ctx, event, off, max); - if (ret >= 0) - return pre + ret; - else if (pre) - return pre; + if (!list_empty_careful(&ctx->poll_done)) { + spin_lock_irq(&ctx->poll_lock); + ret = aio_poll_reap(ctx, event, off, max, iocbs, &to_free); + spin_unlock_irq(&ctx->poll_lock); + + if (to_free) + iocb_put_many(ctx, iocbs, to_free); + if (ret < 0) + return ret; + found += ret; + } - return ret; + return found; } /* -- 2.19.1