On 10/01/2020 18:47, Jens Axboe wrote: > This adds support for doing madvise(2) through io_uring. We assume that > any operation can block, and hence punt everything async. This could be > improved, but hard to make bullet proof. The async punt ensures it's > safe. > I don't like that it share structs/fields names with fadvise. E.g. madvise's context is called struct io_fadvise. Could it at least have fadvise_advice filed in struct io_uring_sqe? io_uring parts of the patchset look good. Reviewed-by: Pavel Begunkov > Signed-off-by: Jens Axboe > --- > fs/io_uring.c | 56 ++++++++++++++++++++++++++++++++++- > include/uapi/linux/io_uring.h | 1 + > 2 files changed, 56 insertions(+), 1 deletion(-) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index 0b200a7d4ae0..378f97cc2bf2 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -403,7 +403,10 @@ struct io_files_update { > > struct io_fadvise { > struct file *file; > - u64 offset; > + union { > + u64 offset; > + u64 addr; > + }; > u32 len; > u32 advice; > }; > @@ -682,6 +685,10 @@ static const struct io_op_def io_op_defs[] = { > /* IORING_OP_FADVISE */ > .needs_file = 1, > }, > + { > + /* IORING_OP_MADVISE */ > + .needs_mm = 1, > + }, > }; > > static void io_wq_submit_work(struct io_wq_work **workptr); > @@ -2448,6 +2455,42 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt, > return 0; > } > > +static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) > +{ > +#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) > + if (sqe->ioprio || sqe->buf_index || sqe->off) > + return -EINVAL; > + > + req->fadvise.addr = READ_ONCE(sqe->addr); > + req->fadvise.len = READ_ONCE(sqe->len); > + req->fadvise.advice = READ_ONCE(sqe->fadvise_advice); > + return 0; > +#else > + return -EOPNOTSUPP; > +#endif > +} > + > +static int io_madvise(struct io_kiocb *req, struct io_kiocb **nxt, > + bool force_nonblock) > +{ > +#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) > + struct io_fadvise *fa = &req->fadvise; > + int ret; > + > + if (force_nonblock) > + return -EAGAIN; > + > + ret = do_madvise(fa->addr, fa->len, fa->advice); > + if (ret < 0) > + req_set_fail_links(req); > + io_cqring_add_event(req, ret); > + io_put_req_find_next(req, nxt); > + return 0; > +#else > + return -EOPNOTSUPP; > +#endif > +} > + > static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) > { > if (sqe->ioprio || sqe->buf_index || sqe->addr) > @@ -3769,6 +3812,9 @@ static int io_req_defer_prep(struct io_kiocb *req, > case IORING_OP_FADVISE: > ret = io_fadvise_prep(req, sqe); > break; > + case IORING_OP_MADVISE: > + ret = io_madvise_prep(req, sqe); > + break; > default: > printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", > req->opcode); > @@ -3973,6 +4019,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, > } > ret = io_fadvise(req, nxt, force_nonblock); > break; > + case IORING_OP_MADVISE: > + if (sqe) { > + ret = io_madvise_prep(req, sqe); > + if (ret) > + break; > + } > + ret = io_madvise(req, nxt, force_nonblock); > + break; > default: > ret = -EINVAL; > break; > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > index f87d8fb42916..7cb6fe0fccd7 100644 > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -88,6 +88,7 @@ enum { > IORING_OP_READ, > IORING_OP_WRITE, > IORING_OP_FADVISE, > + IORING_OP_MADVISE, > > /* this goes last, obviously */ > IORING_OP_LAST, > -- Pavel Begunkov