All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: linux-block@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-aio@kvack.org
Cc: hch@lst.de, Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 16/27] aio: add support for having user mapped iocbs
Date: Fri, 30 Nov 2018 09:56:35 -0700	[thread overview]
Message-ID: <20181130165646.27341-17-axboe@kernel.dk> (raw)
In-Reply-To: <20181130165646.27341-1-axboe@kernel.dk>

For io_submit(), we have to first copy each pointer to an iocb, then
copy the iocb. The latter is 64 bytes in size, and that's a lot of
copying for a single IO.

Add support for setting IOCTX_FLAG_USERIOCB through the new io_setup2()
system call, which allows the iocbs to reside in userspace. If this flag
is used, then io_submit() doesn't take pointers to iocbs anymore, it
takes an index value into the array of iocbs instead. Similary, for
io_getevents(), the iocb ->obj will be the index, not the pointer to the
iocb.

See the change made to fio to support this feature, it's pretty
trivialy to adapt to. For applications, like fio, that previously
embedded the iocb inside a application private structure, some sort
of lookup table/structure is needed to find the private IO structure
from the index at io_getevents() time.

http://git.kernel.dk/cgit/fio/commit/?id=3c3168e91329c83880c91e5abc28b9d6b940fd95

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/aio.c                     | 111 +++++++++++++++++++++++++++++++----
 include/uapi/linux/aio_abi.h |   2 +
 2 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 74831ce2185e..380e6fe8c429 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -121,6 +121,9 @@ struct kioctx {
 	struct page		**ring_pages;
 	long			nr_pages;
 
+	struct page		**iocb_pages;
+	long			iocb_nr_pages;
+
 	struct rcu_work		free_rwork;	/* see free_ioctx() */
 
 	/*
@@ -216,6 +219,11 @@ static struct vfsmount *aio_mnt;
 static const struct file_operations aio_ring_fops;
 static const struct address_space_operations aio_ctx_aops;
 
+static const unsigned int iocb_page_shift =
+				ilog2(PAGE_SIZE / sizeof(struct iocb));
+
+static void aio_useriocb_free(struct kioctx *);
+
 static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 {
 	struct file *file;
@@ -572,6 +580,7 @@ static void free_ioctx(struct work_struct *work)
 					  free_rwork);
 	pr_debug("freeing %p\n", ctx);
 
+	aio_useriocb_free(ctx);
 	aio_free_ring(ctx);
 	free_percpu(ctx->cpu);
 	percpu_ref_exit(&ctx->reqs);
@@ -1281,6 +1290,61 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 	return ret;
 }
 
+static struct iocb *aio_iocb_from_index(struct kioctx *ctx, int index)
+{
+	unsigned int page_index;
+	struct iocb *iocb;
+
+	page_index = index >> iocb_page_shift;
+	index &= ((1 << iocb_page_shift) - 1);
+	iocb = page_address(ctx->iocb_pages[page_index]);
+
+	return iocb + index;
+}
+
+static void aio_useriocb_free(struct kioctx *ctx)
+{
+	int i;
+
+	if (!ctx->iocb_nr_pages)
+		return;
+
+	for (i = 0; i < ctx->iocb_nr_pages; i++)
+		put_page(ctx->iocb_pages[i]);
+
+	kfree(ctx->iocb_pages);
+	ctx->iocb_pages = NULL;
+	ctx->iocb_nr_pages = 0;
+}
+
+static int aio_useriocb_map(struct kioctx *ctx, struct iocb __user *iocbs)
+{
+	int nr_pages, ret;
+
+	if ((unsigned long) iocbs & ~PAGE_MASK)
+		return -EINVAL;
+
+	nr_pages = sizeof(struct iocb) * ctx->max_reqs;
+	nr_pages = (nr_pages + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	ctx->iocb_pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+	if (!ctx->iocb_pages)
+		return -ENOMEM;
+
+	down_write(&current->mm->mmap_sem);
+	ret = get_user_pages((unsigned long) iocbs, nr_pages, 0,
+				ctx->iocb_pages, NULL);
+	up_write(&current->mm->mmap_sem);
+
+	if (ret < nr_pages) {
+		kfree(ctx->iocb_pages);
+		return -ENOMEM;
+	}
+
+	ctx->iocb_nr_pages = nr_pages;
+	return 0;
+}
+
 SYSCALL_DEFINE4(io_setup2, u32, nr_events, u32, flags, struct iocb * __user,
 		iocbs, aio_context_t __user *, ctxp)
 {
@@ -1288,7 +1352,7 @@ SYSCALL_DEFINE4(io_setup2, u32, nr_events, u32, flags, struct iocb * __user,
 	unsigned long ctx;
 	long ret;
 
-	if (flags)
+	if (flags & ~IOCTX_FLAG_USERIOCB)
 		return -EINVAL;
 
 	ret = get_user(ctx, ctxp);
@@ -1300,9 +1364,17 @@ SYSCALL_DEFINE4(io_setup2, u32, nr_events, u32, flags, struct iocb * __user,
 	if (IS_ERR(ioctx))
 		goto out;
 
+	if (flags & IOCTX_FLAG_USERIOCB) {
+		ret = aio_useriocb_map(ioctx, iocbs);
+		if (ret)
+			goto err;
+	}
+
 	ret = put_user(ioctx->user_id, ctxp);
-	if (ret)
+	if (ret) {
+err:
 		kill_ioctx(current->mm, ioctx, NULL);
+	}
 	percpu_ref_put(&ioctx->users);
 out:
 	return ret;
@@ -1851,10 +1923,13 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 		}
 	}
 
-	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
-	if (unlikely(ret)) {
-		pr_debug("EFAULT: aio_key\n");
-		goto out_put_req;
+	/* Don't support cancel on user mapped iocbs */
+	if (!(ctx->flags & IOCTX_FLAG_USERIOCB)) {
+		ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
+		if (unlikely(ret)) {
+			pr_debug("EFAULT: aio_key\n");
+			goto out_put_req;
+		}
 	}
 
 	req->ki_user_iocb = user_iocb;
@@ -1908,12 +1983,22 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			 bool compat)
 {
-	struct iocb iocb;
+	struct iocb iocb, *iocbp;
 
-	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
-		return -EFAULT;
+	if (ctx->flags & IOCTX_FLAG_USERIOCB) {
+		unsigned long iocb_index = (unsigned long) user_iocb;
+
+		if (iocb_index >= ctx->max_reqs)
+			return -EINVAL;
+
+		iocbp = aio_iocb_from_index(ctx, iocb_index);
+	} else {
+		if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
+			return -EFAULT;
+		iocbp = &iocb;
+	}
 
-	return __io_submit_one(ctx, &iocb, user_iocb, compat);
+	return __io_submit_one(ctx, iocbp, user_iocb, compat);
 }
 
 /*
@@ -2063,6 +2148,9 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 	if (unlikely(!ctx))
 		return -EINVAL;
 
+	if (ctx->flags & IOCTX_FLAG_USERIOCB)
+		goto err;
+
 	spin_lock_irq(&ctx->ctx_lock);
 	kiocb = lookup_kiocb(ctx, iocb);
 	if (kiocb) {
@@ -2079,9 +2167,8 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 		 */
 		ret = -EINPROGRESS;
 	}
-
+err:
 	percpu_ref_put(&ctx->users);
-
 	return ret;
 }
 
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index 8387e0af0f76..814e6606c413 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -106,6 +106,8 @@ struct iocb {
 	__u32	aio_resfd;
 }; /* 64 bytes */
 
+#define IOCTX_FLAG_USERIOCB	(1 << 0)	/* iocbs are user mapped */
+
 #undef IFBIG
 #undef IFLITTLE
 
-- 
2.17.1


  parent reply	other threads:[~2018-11-30 16:57 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-30 16:56 [PATCHSET v4] Support for polled aio Jens Axboe
2018-11-30 16:56 ` [PATCH 01/27] aio: fix failure to put the file pointer Jens Axboe
2018-11-30 17:07   ` Bart Van Assche
2018-11-30 17:08     ` Jens Axboe
2018-11-30 17:24       ` Bart Van Assche
2018-11-30 16:56 ` [PATCH 02/27] aio: clear IOCB_HIPRI Jens Axboe
2018-11-30 17:13   ` Christoph Hellwig
2018-11-30 17:14     ` Jens Axboe
2018-12-04 14:46       ` Christoph Hellwig
2018-12-04 16:40         ` Jens Axboe
2018-11-30 16:56 ` [PATCH 03/27] fs: add an iopoll method to struct file_operations Jens Axboe
2018-11-30 16:56 ` [PATCH 04/27] block: wire up block device iopoll method Jens Axboe
2018-11-30 16:56 ` [PATCH 05/27] block: ensure that async polled IO is marked REQ_NOWAIT Jens Axboe
2018-11-30 17:12   ` Bart Van Assche
2018-11-30 17:17     ` Jens Axboe
2018-12-04 14:48       ` Christoph Hellwig
2018-12-04 18:13         ` Jens Axboe
2018-11-30 16:56 ` [PATCH 06/27] iomap: wire up the iopoll method Jens Axboe
2018-11-30 16:56 ` [PATCH 07/27] iomap: ensure that async polled IO is marked REQ_NOWAIT Jens Axboe
2018-11-30 16:56 ` [PATCH 08/27] aio: use assigned completion handler Jens Axboe
2018-11-30 16:56 ` [PATCH 09/27] aio: separate out ring reservation from req allocation Jens Axboe
2018-11-30 16:56 ` [PATCH 10/27] aio: don't zero entire aio_kiocb aio_get_req() Jens Axboe
2018-12-04 14:49   ` Christoph Hellwig
2018-12-04 15:27     ` Jens Axboe
2018-11-30 16:56 ` [PATCH 11/27] aio: only use blk plugs for > 2 depth submissions Jens Axboe
2018-12-04 14:50   ` Christoph Hellwig
2018-11-30 16:56 ` [PATCH 12/27] aio: use iocb_put() instead of open coding it Jens Axboe
2018-12-04 14:50   ` Christoph Hellwig
2018-11-30 16:56 ` [PATCH 13/27] aio: split out iocb copy from io_submit_one() Jens Axboe
2018-11-30 16:56 ` [PATCH 14/27] aio: abstract out io_event filler helper Jens Axboe
2018-11-30 16:56 ` [PATCH 15/27] aio: add io_setup2() system call Jens Axboe
2018-11-30 16:56 ` Jens Axboe [this message]
2018-11-30 16:56 ` [PATCH 17/27] aio: support for IO polling Jens Axboe
2018-11-30 16:56 ` [PATCH 18/27] aio: add submission side request cache Jens Axboe
2018-11-30 16:56 ` [PATCH 19/27] fs: add fget_many() and fput_many() Jens Axboe
2018-11-30 16:56 ` [PATCH 20/27] aio: use fget/fput_many() for file references Jens Axboe
2018-11-30 16:56 ` [PATCH 21/27] aio: split iocb init from allocation Jens Axboe
2018-11-30 16:56 ` [PATCH 22/27] aio: batch aio_kiocb allocation Jens Axboe
2018-11-30 16:56 ` [PATCH 23/27] block: add BIO_HOLD_PAGES flag Jens Axboe
2018-11-30 16:56 ` [PATCH 24/27] block: implement bio helper to add iter kvec pages to bio Jens Axboe
2018-11-30 19:21   ` Al Viro
2018-11-30 20:15     ` Jens Axboe
2018-11-30 20:32       ` Jens Axboe
2018-11-30 21:11         ` Al Viro
2018-11-30 21:16           ` Jens Axboe
2018-11-30 21:25             ` Al Viro
2018-11-30 21:34               ` Jens Axboe
2018-11-30 22:06                 ` Jens Axboe
2018-12-04 14:55     ` Christoph Hellwig
2018-12-04 15:25       ` Jens Axboe
2018-11-30 16:56 ` [PATCH 25/27] fs: add support for mapping an ITER_KVEC for O_DIRECT Jens Axboe
2018-11-30 16:56 ` [PATCH 26/27] iov_iter: add import_kvec() Jens Axboe
2018-11-30 19:17   ` Al Viro
2018-11-30 20:15     ` Jens Axboe
2018-11-30 16:56 ` [PATCH 27/27] aio: add support for pre-mapped user IO buffers Jens Axboe
2018-11-30 21:44   ` Jeff Moyer
2018-11-30 21:57     ` Jens Axboe
2018-11-30 22:04       ` Jeff Moyer
2018-11-30 22:11         ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181130165646.27341-17-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=linux-aio@kvack.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.