From: Jens Axboe <axboe@kernel.dk> To: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-block@vger.kernel.org, linux-arch@vger.kernel.org Cc: hch@lst.de, jmoyer@redhat.com, avi@scylladb.com, Jens Axboe <axboe@kernel.dk> Subject: [PATCH 07/16] io_uring: support for IO polling Date: Mon, 14 Jan 2019 19:55:22 -0700 [thread overview] Message-ID: <20190115025531.13985-8-axboe@kernel.dk> (raw) In-Reply-To: <20190115025531.13985-1-axboe@kernel.dk> Add support for a polled io_uring context. When a read or write is submitted to a polled context, the application must poll for completions on the CQ ring through io_uring_enter(2). Polled IO may not generate IRQ completions, hence they need to be actively found by the application itself. To use polling, io_uring_setup() must be used with the IORING_SETUP_IOPOLL flag being set. It is illegal to mix and match polled and non-polled IO on an io_uring. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- fs/io_uring.c | 253 ++++++++++++++++++++++++++++++++-- include/uapi/linux/io_uring.h | 5 + 2 files changed, 250 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7d74463217a6..fb1b04ccc12a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -56,6 +56,11 @@ struct io_cq_ring { struct io_uring_cqe cqes[]; }; +struct list_multi { + struct list_head list; + unsigned multi; +}; + struct io_ring_ctx { struct percpu_ref refs; @@ -88,6 +93,7 @@ struct io_ring_ctx { struct { spinlock_t completion_lock; + struct list_multi poll_list; } ____cacheline_aligned_in_smp; }; @@ -111,10 +117,14 @@ struct io_kiocb { struct list_head list; unsigned long flags; #define REQ_F_FORCE_NONBLOCK 1 /* inline submission attempt */ +#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ +#define REQ_F_IOPOLL_EAGAIN 4 /* submission got EAGAIN */ u64 user_data; + u64 res; }; #define IO_PLUG_THRESHOLD 2 +#define IO_IOPOLL_BATCH 8 static struct kmem_cache *req_cachep; @@ -144,6 +154,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) init_completion(&ctx->ctx_done); spin_lock_init(&ctx->completion_lock); init_waitqueue_head(&ctx->wait); + INIT_LIST_HEAD(&ctx->poll_list.list); mutex_init(&ctx->uring_lock); return ctx; } @@ -234,12 +245,180 @@ static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs) wake_up(&ctx->wait); } +static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) +{ + if (*nr) { + kmem_cache_free_bulk(req_cachep, *nr, reqs); + io_ring_drop_ctx_refs(ctx, *nr); + *nr = 0; + } +} + static void io_free_req(struct io_kiocb *req) { kmem_cache_free(req_cachep, req); io_ring_drop_ctx_refs(req->ctx, 1); } +/* + * Track whether we have multiple files in our lists. This will impact how + * we do polling eventually, not spinning if we're on potentially on different + * devices. + */ +static void io_multi_list_add(struct io_kiocb *req, struct list_multi *list) +{ + if (list_empty(&list->list)) { + list->multi = 0; + } else if (!list->multi) { + struct io_kiocb *list_req; + + list_req = list_first_entry(&list->list, struct io_kiocb, list); + if (list_req->rw.ki_filp != req->rw.ki_filp) + list->multi = 1; + } + + /* + * For fast devices, IO may have already completed. If it has, add + * it to the front so we find it first. We can't add to the poll_done + * list as that's unlocked from the completion side. + */ + if (req->flags & REQ_F_IOPOLL_COMPLETED) + list_add(&req->list, &list->list); + else + list_add_tail(&req->list, &list->list); +} + +/* + * Find and free completed poll iocbs + */ +static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, + struct list_head *done) +{ + void *reqs[IO_IOPOLL_BATCH]; + struct io_kiocb *req; + int to_free = 0; + + while (!list_empty(done)) { + req = list_first_entry(done, struct io_kiocb, list); + list_del(&req->list); + + __io_cqring_fill_event(ctx, req->user_data, req->res, 0); + + reqs[to_free++] = req; + (*nr_events)++; + + fput(req->rw.ki_filp); + if (to_free == ARRAY_SIZE(reqs)) + io_free_req_many(ctx, reqs, &to_free); + } + + if (to_free) + io_free_req_many(ctx, reqs, &to_free); +} + +static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, + long min) +{ + struct io_kiocb *req, *tmp; + int polled, found, ret; + LIST_HEAD(done); + bool spin; + + /* + * Only spin for completions if we don't have multiple devices hanging + * off our complete list, and we're under the requested amount. + */ + spin = !ctx->poll_list.multi && (*nr_events < min); + + ret = polled = found = 0; + list_for_each_entry_safe(req, tmp, &ctx->poll_list.list, list) { + struct kiocb *kiocb = &req->rw; + + if (req->flags & REQ_F_IOPOLL_COMPLETED) { + list_move_tail(&req->list, &done); + spin = false; + continue; + } + + ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); + if (ret < 0) + break; + + polled += ret; + if (polled && spin) + spin = false; + ret = 0; + } + + if (!list_empty(&done)) + io_iopoll_complete(ctx, nr_events, &done); + + return ret; +} + +/* + * Poll for a mininum of 'min' events, and a maximum of 'max'. Note that if + * min == 0 we consider that a non-spinning poll check - we'll still enter + * the driver poll loop, but only as a non-spinning completion check. + */ +static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events, + long min) +{ + int ret; + + do { + if (list_empty(&ctx->poll_list.list)) + return 0; + + ret = io_do_iopoll(ctx, nr_events, min); + if (ret < 0) + break; + } while (min && *nr_events < min); + + if (ret < 0) + return ret; + + return *nr_events < min; +} + +/* + * We can't just wait for polled events to come to us, we have to actively + * find and complete them. + */ +static void io_iopoll_reap_events(struct io_ring_ctx *ctx) +{ + if (!(ctx->flags & IORING_SETUP_IOPOLL)) + return; + + mutex_lock(&ctx->uring_lock); + while (!list_empty(&ctx->poll_list.list)) { + unsigned int nr_events = 0; + + io_iopoll_getevents(ctx, &nr_events, 1); + } + mutex_unlock(&ctx->uring_lock); +} + +static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) +{ + int ret = 0; + + while (!*nr_events || !need_resched()) { + int tmin = 0; + + if (*nr_events < min) + tmin = min - *nr_events; + + ret = io_iopoll_getevents(ctx, nr_events, tmin); + if (ret <= 0) + break; + ret = 0; + } + + return ret; +} + static void kiocb_end_write(struct kiocb *kiocb) { if (kiocb->ki_flags & IOCB_WRITE) { @@ -266,9 +445,37 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) io_free_req(req); } +static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) +{ + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); + + kiocb_end_write(kiocb); + + if (unlikely(res == -EAGAIN)) { + req->flags |= REQ_F_IOPOLL_EAGAIN; + } else { + req->flags |= REQ_F_IOPOLL_COMPLETED; + req->res = res; + } +} + +/* + * After the iocb has been issued, it's safe to be found on the poll list. + * Adding the kiocb to the list AFTER submission ensures that we don't + * find it from a io_getevents() thread before the issuer is done accessing + * the kiocb cookie. + */ +static void io_iopoll_req_issued(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + + io_multi_list_add(req, &ctx->poll_list); +} + static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, bool force_nonblock) { + struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw; int ret; @@ -294,12 +501,21 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, kiocb->ki_flags |= IOCB_NOWAIT; req->flags |= REQ_F_FORCE_NONBLOCK; } - if (kiocb->ki_flags & IOCB_HIPRI) { - ret = -EINVAL; - goto out_fput; - } + if (ctx->flags & IORING_SETUP_IOPOLL) { + ret = -EOPNOTSUPP; + if (!(kiocb->ki_flags & IOCB_DIRECT) || + !kiocb->ki_filp->f_op->iopoll) + goto out_fput; - kiocb->ki_complete = io_complete_rw; + kiocb->ki_flags |= IOCB_HIPRI; + kiocb->ki_complete = io_complete_rw_iopoll; + } else { + if (kiocb->ki_flags & IOCB_HIPRI) { + ret = -EINVAL; + goto out_fput; + } + kiocb->ki_complete = io_complete_rw; + } return 0; out_fput: fput(kiocb->ki_filp); @@ -444,6 +660,9 @@ static int io_nop(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; + if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + __io_cqring_fill_event(ctx, sqe->user_data, 0, 0); io_free_req(req); return 0; @@ -461,6 +680,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (force_nonblock) return -EAGAIN; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; if (unlikely(sqe->addr)) return -EINVAL; if (unlikely(sqe->fsync_flags & ~IORING_FSYNC_DATASYNC)) @@ -512,7 +733,16 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, break; } - return ret; + if (ret) + return ret; + + if (ctx->flags & IORING_SETUP_IOPOLL) { + if (req->flags & REQ_F_IOPOLL_EAGAIN) + return -EAGAIN; + io_iopoll_req_issued(req); + } + + return 0; } static void io_sq_wq_submit_work(struct work_struct *work) @@ -682,12 +912,17 @@ static int __io_uring_enter(struct io_ring_ctx *ctx, unsigned to_submit, return ret; } if (flags & IORING_ENTER_GETEVENTS) { + unsigned nr_events = 0; int get_ret; if (!ret && to_submit) min_complete = 0; - get_ret = io_cqring_wait(ctx, min_complete); + if (ctx->flags & IORING_SETUP_IOPOLL) + get_ret = io_iopoll_check(ctx, &nr_events, + min_complete); + else + get_ret = io_cqring_wait(ctx, min_complete); if (get_ret < 0 && !ret) ret = get_ret; } @@ -755,6 +990,7 @@ static void io_free_scq_urings(struct io_ring_ctx *ctx) static void io_ring_ctx_free(struct io_ring_ctx *ctx) { io_sq_offload_stop(ctx); + io_iopoll_reap_events(ctx); io_free_scq_urings(ctx); percpu_ref_exit(&ctx->refs); kfree(ctx); @@ -766,6 +1002,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) percpu_ref_kill(&ctx->refs); mutex_unlock(&ctx->uring_lock); + io_iopoll_reap_events(ctx); wait_for_completion(&ctx->ctx_done); io_ring_ctx_free(ctx); } @@ -975,7 +1212,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params, return -EINVAL; } - if (p.flags) + if (p.flags & ~IORING_SETUP_IOPOLL) return -EINVAL; ret = io_uring_create(entries, &p, compat); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ac49bd179ed9..d31ae2f767d1 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -33,6 +33,11 @@ struct io_uring_sqe { __u64 __pad2[3]; }; +/* + * io_uring_setup() flags + */ +#define IORING_SETUP_IOPOLL (1 << 0) /* io_context is polled */ + #define IORING_OP_NOP 0 #define IORING_OP_READV 1 #define IORING_OP_WRITEV 2 -- 2.17.1 -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>
WARNING: multiple messages have this Message-ID (diff)
From: Jens Axboe <axboe@kernel.dk> To: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-block@vger.kernel.org, linux-arch@vger.kernel.org Cc: hch@lst.de, jmoyer@redhat.com, avi@scylladb.com, Jens Axboe <axboe@kernel.dk> Subject: [PATCH 07/16] io_uring: support for IO polling Date: Mon, 14 Jan 2019 19:55:22 -0700 [thread overview] Message-ID: <20190115025531.13985-8-axboe@kernel.dk> (raw) Message-ID: <20190115025522.QN9TmZT6XlygLamqES_rRcazGw2oGkFfgW7ixjPBrm0@z> (raw) In-Reply-To: <20190115025531.13985-1-axboe@kernel.dk> Add support for a polled io_uring context. When a read or write is submitted to a polled context, the application must poll for completions on the CQ ring through io_uring_enter(2). Polled IO may not generate IRQ completions, hence they need to be actively found by the application itself. To use polling, io_uring_setup() must be used with the IORING_SETUP_IOPOLL flag being set. It is illegal to mix and match polled and non-polled IO on an io_uring. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- fs/io_uring.c | 253 ++++++++++++++++++++++++++++++++-- include/uapi/linux/io_uring.h | 5 + 2 files changed, 250 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7d74463217a6..fb1b04ccc12a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -56,6 +56,11 @@ struct io_cq_ring { struct io_uring_cqe cqes[]; }; +struct list_multi { + struct list_head list; + unsigned multi; +}; + struct io_ring_ctx { struct percpu_ref refs; @@ -88,6 +93,7 @@ struct io_ring_ctx { struct { spinlock_t completion_lock; + struct list_multi poll_list; } ____cacheline_aligned_in_smp; }; @@ -111,10 +117,14 @@ struct io_kiocb { struct list_head list; unsigned long flags; #define REQ_F_FORCE_NONBLOCK 1 /* inline submission attempt */ +#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ +#define REQ_F_IOPOLL_EAGAIN 4 /* submission got EAGAIN */ u64 user_data; + u64 res; }; #define IO_PLUG_THRESHOLD 2 +#define IO_IOPOLL_BATCH 8 static struct kmem_cache *req_cachep; @@ -144,6 +154,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) init_completion(&ctx->ctx_done); spin_lock_init(&ctx->completion_lock); init_waitqueue_head(&ctx->wait); + INIT_LIST_HEAD(&ctx->poll_list.list); mutex_init(&ctx->uring_lock); return ctx; } @@ -234,12 +245,180 @@ static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs) wake_up(&ctx->wait); } +static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) +{ + if (*nr) { + kmem_cache_free_bulk(req_cachep, *nr, reqs); + io_ring_drop_ctx_refs(ctx, *nr); + *nr = 0; + } +} + static void io_free_req(struct io_kiocb *req) { kmem_cache_free(req_cachep, req); io_ring_drop_ctx_refs(req->ctx, 1); } +/* + * Track whether we have multiple files in our lists. This will impact how + * we do polling eventually, not spinning if we're on potentially on different + * devices. + */ +static void io_multi_list_add(struct io_kiocb *req, struct list_multi *list) +{ + if (list_empty(&list->list)) { + list->multi = 0; + } else if (!list->multi) { + struct io_kiocb *list_req; + + list_req = list_first_entry(&list->list, struct io_kiocb, list); + if (list_req->rw.ki_filp != req->rw.ki_filp) + list->multi = 1; + } + + /* + * For fast devices, IO may have already completed. If it has, add + * it to the front so we find it first. We can't add to the poll_done + * list as that's unlocked from the completion side. + */ + if (req->flags & REQ_F_IOPOLL_COMPLETED) + list_add(&req->list, &list->list); + else + list_add_tail(&req->list, &list->list); +} + +/* + * Find and free completed poll iocbs + */ +static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, + struct list_head *done) +{ + void *reqs[IO_IOPOLL_BATCH]; + struct io_kiocb *req; + int to_free = 0; + + while (!list_empty(done)) { + req = list_first_entry(done, struct io_kiocb, list); + list_del(&req->list); + + __io_cqring_fill_event(ctx, req->user_data, req->res, 0); + + reqs[to_free++] = req; + (*nr_events)++; + + fput(req->rw.ki_filp); + if (to_free == ARRAY_SIZE(reqs)) + io_free_req_many(ctx, reqs, &to_free); + } + + if (to_free) + io_free_req_many(ctx, reqs, &to_free); +} + +static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, + long min) +{ + struct io_kiocb *req, *tmp; + int polled, found, ret; + LIST_HEAD(done); + bool spin; + + /* + * Only spin for completions if we don't have multiple devices hanging + * off our complete list, and we're under the requested amount. + */ + spin = !ctx->poll_list.multi && (*nr_events < min); + + ret = polled = found = 0; + list_for_each_entry_safe(req, tmp, &ctx->poll_list.list, list) { + struct kiocb *kiocb = &req->rw; + + if (req->flags & REQ_F_IOPOLL_COMPLETED) { + list_move_tail(&req->list, &done); + spin = false; + continue; + } + + ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); + if (ret < 0) + break; + + polled += ret; + if (polled && spin) + spin = false; + ret = 0; + } + + if (!list_empty(&done)) + io_iopoll_complete(ctx, nr_events, &done); + + return ret; +} + +/* + * Poll for a mininum of 'min' events, and a maximum of 'max'. Note that if + * min == 0 we consider that a non-spinning poll check - we'll still enter + * the driver poll loop, but only as a non-spinning completion check. + */ +static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events, + long min) +{ + int ret; + + do { + if (list_empty(&ctx->poll_list.list)) + return 0; + + ret = io_do_iopoll(ctx, nr_events, min); + if (ret < 0) + break; + } while (min && *nr_events < min); + + if (ret < 0) + return ret; + + return *nr_events < min; +} + +/* + * We can't just wait for polled events to come to us, we have to actively + * find and complete them. + */ +static void io_iopoll_reap_events(struct io_ring_ctx *ctx) +{ + if (!(ctx->flags & IORING_SETUP_IOPOLL)) + return; + + mutex_lock(&ctx->uring_lock); + while (!list_empty(&ctx->poll_list.list)) { + unsigned int nr_events = 0; + + io_iopoll_getevents(ctx, &nr_events, 1); + } + mutex_unlock(&ctx->uring_lock); +} + +static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) +{ + int ret = 0; + + while (!*nr_events || !need_resched()) { + int tmin = 0; + + if (*nr_events < min) + tmin = min - *nr_events; + + ret = io_iopoll_getevents(ctx, nr_events, tmin); + if (ret <= 0) + break; + ret = 0; + } + + return ret; +} + static void kiocb_end_write(struct kiocb *kiocb) { if (kiocb->ki_flags & IOCB_WRITE) { @@ -266,9 +445,37 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) io_free_req(req); } +static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) +{ + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); + + kiocb_end_write(kiocb); + + if (unlikely(res == -EAGAIN)) { + req->flags |= REQ_F_IOPOLL_EAGAIN; + } else { + req->flags |= REQ_F_IOPOLL_COMPLETED; + req->res = res; + } +} + +/* + * After the iocb has been issued, it's safe to be found on the poll list. + * Adding the kiocb to the list AFTER submission ensures that we don't + * find it from a io_getevents() thread before the issuer is done accessing + * the kiocb cookie. + */ +static void io_iopoll_req_issued(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + + io_multi_list_add(req, &ctx->poll_list); +} + static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, bool force_nonblock) { + struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw; int ret; @@ -294,12 +501,21 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, kiocb->ki_flags |= IOCB_NOWAIT; req->flags |= REQ_F_FORCE_NONBLOCK; } - if (kiocb->ki_flags & IOCB_HIPRI) { - ret = -EINVAL; - goto out_fput; - } + if (ctx->flags & IORING_SETUP_IOPOLL) { + ret = -EOPNOTSUPP; + if (!(kiocb->ki_flags & IOCB_DIRECT) || + !kiocb->ki_filp->f_op->iopoll) + goto out_fput; - kiocb->ki_complete = io_complete_rw; + kiocb->ki_flags |= IOCB_HIPRI; + kiocb->ki_complete = io_complete_rw_iopoll; + } else { + if (kiocb->ki_flags & IOCB_HIPRI) { + ret = -EINVAL; + goto out_fput; + } + kiocb->ki_complete = io_complete_rw; + } return 0; out_fput: fput(kiocb->ki_filp); @@ -444,6 +660,9 @@ static int io_nop(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; + if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + __io_cqring_fill_event(ctx, sqe->user_data, 0, 0); io_free_req(req); return 0; @@ -461,6 +680,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (force_nonblock) return -EAGAIN; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; if (unlikely(sqe->addr)) return -EINVAL; if (unlikely(sqe->fsync_flags & ~IORING_FSYNC_DATASYNC)) @@ -512,7 +733,16 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, break; } - return ret; + if (ret) + return ret; + + if (ctx->flags & IORING_SETUP_IOPOLL) { + if (req->flags & REQ_F_IOPOLL_EAGAIN) + return -EAGAIN; + io_iopoll_req_issued(req); + } + + return 0; } static void io_sq_wq_submit_work(struct work_struct *work) @@ -682,12 +912,17 @@ static int __io_uring_enter(struct io_ring_ctx *ctx, unsigned to_submit, return ret; } if (flags & IORING_ENTER_GETEVENTS) { + unsigned nr_events = 0; int get_ret; if (!ret && to_submit) min_complete = 0; - get_ret = io_cqring_wait(ctx, min_complete); + if (ctx->flags & IORING_SETUP_IOPOLL) + get_ret = io_iopoll_check(ctx, &nr_events, + min_complete); + else + get_ret = io_cqring_wait(ctx, min_complete); if (get_ret < 0 && !ret) ret = get_ret; } @@ -755,6 +990,7 @@ static void io_free_scq_urings(struct io_ring_ctx *ctx) static void io_ring_ctx_free(struct io_ring_ctx *ctx) { io_sq_offload_stop(ctx); + io_iopoll_reap_events(ctx); io_free_scq_urings(ctx); percpu_ref_exit(&ctx->refs); kfree(ctx); @@ -766,6 +1002,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) percpu_ref_kill(&ctx->refs); mutex_unlock(&ctx->uring_lock); + io_iopoll_reap_events(ctx); wait_for_completion(&ctx->ctx_done); io_ring_ctx_free(ctx); } @@ -975,7 +1212,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params, return -EINVAL; } - if (p.flags) + if (p.flags & ~IORING_SETUP_IOPOLL) return -EINVAL; ret = io_uring_create(entries, &p, compat); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ac49bd179ed9..d31ae2f767d1 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -33,6 +33,11 @@ struct io_uring_sqe { __u64 __pad2[3]; }; +/* + * io_uring_setup() flags + */ +#define IORING_SETUP_IOPOLL (1 << 0) /* io_context is polled */ + #define IORING_OP_NOP 0 #define IORING_OP_READV 1 #define IORING_OP_WRITEV 2 -- 2.17.1
next prev parent reply other threads:[~2019-01-15 2:55 UTC|newest] Thread overview: 64+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-01-15 2:55 (unknown), Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 01/16] fs: add an iopoll method to struct file_operations Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 02/16] block: wire up block device iopoll method Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 03/16] block: add bio_set_polled() helper Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 04/16] iomap: wire up the iopoll method Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 05/16] Add io_uring IO interface Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 16:51 ` Jonathan Corbet 2019-01-15 16:51 ` Jonathan Corbet 2019-01-15 16:55 ` Jens Axboe 2019-01-15 16:55 ` Jens Axboe 2019-01-15 17:26 ` Jens Axboe 2019-01-15 17:26 ` Jens Axboe 2019-01-16 10:41 ` Arnd Bergmann 2019-01-16 10:41 ` Arnd Bergmann 2019-01-16 11:00 ` Arnd Bergmann 2019-01-16 11:00 ` Arnd Bergmann 2019-01-16 15:12 ` Jens Axboe 2019-01-16 15:12 ` Jens Axboe 2019-01-16 15:16 ` Arnd Bergmann 2019-01-16 15:16 ` Arnd Bergmann 2019-01-16 15:25 ` Jens Axboe 2019-01-16 15:25 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 06/16] io_uring: add fsync support Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` Jens Axboe [this message] 2019-01-15 2:55 ` [PATCH 07/16] io_uring: support for IO polling Jens Axboe 2019-01-15 2:55 ` [PATCH 08/16] io_uring: add submission side request cache Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 09/16] fs: add fget_many() and fput_many() Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 10/16] io_uring: use fget/fput_many() for file references Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 11/16] io_uring: batch io_kiocb allocation Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 12/16] block: implement bio helper to add iter bvec pages to bio Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 13/16] io_uring: add support for pre-mapped user IO buffers Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-16 10:53 ` Arnd Bergmann 2019-01-16 15:14 ` Jens Axboe 2019-01-16 15:14 ` Jens Axboe 2019-01-16 15:32 ` Jens Axboe 2019-01-16 15:32 ` Jens Axboe 2019-01-16 15:41 ` Arnd Bergmann 2019-01-16 15:47 ` Jens Axboe 2019-01-16 15:47 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 14/16] io_uring: add submission polling Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 15/16] io_uring: add file registration Jens Axboe 2019-01-15 2:55 ` Jens Axboe 2019-01-16 10:45 ` Arnd Bergmann 2019-01-16 10:45 ` Arnd Bergmann 2019-01-16 15:15 ` Jens Axboe 2019-01-16 15:15 ` Jens Axboe 2019-01-15 2:55 ` [PATCH 16/16] io_uring: add io_uring_event cache hit information Jens Axboe 2019-01-15 2:55 ` Jens Axboe -- strict thread matches above, loose matches on Subject: below -- 2019-01-12 21:29 [PATCHSET v3] io_uring IO interface Jens Axboe 2019-01-12 21:30 ` [PATCH 07/16] io_uring: support for IO polling Jens Axboe 2019-01-12 21:30 ` Jens Axboe
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20190115025531.13985-8-axboe@kernel.dk \ --to=axboe@kernel.dk \ --cc=avi@scylladb.com \ --cc=hch@lst.de \ --cc=jmoyer@redhat.com \ --cc=linux-aio@kvack.org \ --cc=linux-arch@vger.kernel.org \ --cc=linux-block@vger.kernel.org \ --cc=linux-fsdevel@vger.kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).