linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: linux-block@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-aio@kvack.org
Cc: hch@lst.de, jmoyer@redhat.com, clm@fb.com, Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 13/27] aio: add io_setup2() system call
Date: Mon, 10 Dec 2018 17:15:35 -0700	[thread overview]
Message-ID: <20181211001549.30085-14-axboe@kernel.dk> (raw)
In-Reply-To: <20181211001549.30085-1-axboe@kernel.dk>

This is just like io_setup(), except add a flags argument to let the
caller control/define some of the io_context behavior.

Outside of the flags, we add an iocb array and two user pointers for
future use.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/sysctl/fs.txt            |  8 +--
 arch/x86/entry/syscalls/syscall_64.tbl |  1 +
 fs/aio.c                               | 80 ++++++++++++++++++--------
 include/linux/syscalls.h               |  3 +
 include/uapi/asm-generic/unistd.h      |  4 +-
 kernel/sys_ni.c                        |  1 +
 6 files changed, 67 insertions(+), 30 deletions(-)

diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 819caf8ca05f..5e484eb7a25f 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -47,10 +47,10 @@ Currently, these files are in /proc/sys/fs:
 aio-nr & aio-max-nr:
 
 aio-nr is the running total of the number of events specified on the
-io_setup system call for all currently active aio contexts.  If aio-nr
-reaches aio-max-nr then io_setup will fail with EAGAIN.  Note that
-raising aio-max-nr does not result in the pre-allocation or re-sizing
-of any kernel data structures.
+io_setup/io_setup2 system call for all currently active aio contexts.
+If aio-nr reaches aio-max-nr then io_setup will fail with EAGAIN.
+Note that raising aio-max-nr does not result in the pre-allocation or
+re-sizing of any kernel data structures.
 
 ==============================================================
 
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f0b1709a5ffb..67c357225fb0 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,7 @@
 332	common	statx			__x64_sys_statx
 333	common	io_pgetevents		__x64_sys_io_pgetevents
 334	common	rseq			__x64_sys_rseq
+335	common	io_setup2		__x64_sys_io_setup2
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/aio.c b/fs/aio.c
index 173f1f79dc8f..0bad70eab553 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -100,6 +100,8 @@ struct kioctx {
 
 	unsigned long		user_id;
 
+	unsigned int		flags;
+
 	struct __percpu kioctx_cpu *cpu;
 
 	/*
@@ -686,10 +688,8 @@ static void aio_nr_sub(unsigned nr)
 	spin_unlock(&aio_nr_lock);
 }
 
-/* ioctx_alloc
- *	Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
- */
-static struct kioctx *ioctx_alloc(unsigned nr_events)
+static struct kioctx *io_setup_flags(unsigned long ctxid,
+				     unsigned int nr_events, unsigned int flags)
 {
 	struct mm_struct *mm = current->mm;
 	struct kioctx *ctx;
@@ -701,6 +701,12 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 	 */
 	unsigned int max_reqs = nr_events;
 
+	if (unlikely(ctxid || nr_events == 0)) {
+		pr_debug("EINVAL: ctx %lu nr_events %u\n",
+		         ctxid, nr_events);
+		return ERR_PTR(-EINVAL);
+	}
+
 	/*
 	 * We keep track of the number of available ringbuffer slots, to prevent
 	 * overflow (reqs_available), and we also use percpu counters for this.
@@ -726,6 +732,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 	if (!ctx)
 		return ERR_PTR(-ENOMEM);
 
+	ctx->flags = flags;
 	ctx->max_reqs = max_reqs;
 
 	spin_lock_init(&ctx->ctx_lock);
@@ -1281,6 +1288,45 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 	return ret;
 }
 
+/* sys_io_setup2:
+ *	Like sys_io_setup(), except that it takes a set of flags
+ *	(IOCTX_FLAG_*), and some pointers to user structures:
+ *
+ *	*iocbs - pointer to array of struct iocb, for when
+ *	IOCTX_FLAG_USERIOCB is set in flags.
+ *
+ *	*user1 - reserved for future use
+ *
+ *	*user2 - reserved for future use.
+ */
+SYSCALL_DEFINE6(io_setup2, u32, nr_events, u32, flags, struct iocb __user *,
+		iocbs, void __user *, user1, void __user *, user2,
+		aio_context_t __user *, ctxp)
+{
+	struct kioctx *ioctx;
+	unsigned long ctx;
+	long ret;
+
+	if (flags || user1 || user2)
+		return -EINVAL;
+
+	ret = get_user(ctx, ctxp);
+	if (unlikely(ret))
+		goto out;
+
+	ioctx = io_setup_flags(ctx, nr_events, flags);
+	ret = PTR_ERR(ioctx);
+	if (IS_ERR(ioctx))
+		goto out;
+
+	ret = put_user(ioctx->user_id, ctxp);
+	if (ret)
+		kill_ioctx(current->mm, ioctx, NULL);
+	percpu_ref_put(&ioctx->users);
+out:
+	return ret;
+}
+
 /* sys_io_setup:
  *	Create an aio_context capable of receiving at least nr_events.
  *	ctxp must not point to an aio_context that already exists, and
@@ -1296,7 +1342,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
  */
 SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
 {
-	struct kioctx *ioctx = NULL;
+	struct kioctx *ioctx;
 	unsigned long ctx;
 	long ret;
 
@@ -1304,14 +1350,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
 	if (unlikely(ret))
 		goto out;
 
-	ret = -EINVAL;
-	if (unlikely(ctx || nr_events == 0)) {
-		pr_debug("EINVAL: ctx %lu nr_events %u\n",
-		         ctx, nr_events);
-		goto out;
-	}
-
-	ioctx = ioctx_alloc(nr_events);
+	ioctx = io_setup_flags(ctx, nr_events, 0);
 	ret = PTR_ERR(ioctx);
 	if (!IS_ERR(ioctx)) {
 		ret = put_user(ioctx->user_id, ctxp);
@@ -1327,7 +1366,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
 {
-	struct kioctx *ioctx = NULL;
+	struct kioctx *ioctx;
 	unsigned long ctx;
 	long ret;
 
@@ -1335,23 +1374,14 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
 	if (unlikely(ret))
 		goto out;
 
-	ret = -EINVAL;
-	if (unlikely(ctx || nr_events == 0)) {
-		pr_debug("EINVAL: ctx %lu nr_events %u\n",
-		         ctx, nr_events);
-		goto out;
-	}
-
-	ioctx = ioctx_alloc(nr_events);
+	ioctx = io_setup_flags(ctx, nr_events, 0);
 	ret = PTR_ERR(ioctx);
 	if (!IS_ERR(ioctx)) {
-		/* truncating is ok because it's a user address */
-		ret = put_user((u32)ioctx->user_id, ctx32p);
+		ret = put_user(ioctx->user_id, ctx32p);
 		if (ret)
 			kill_ioctx(current->mm, ioctx, NULL);
 		percpu_ref_put(&ioctx->users);
 	}
-
 out:
 	return ret;
 }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2ac3d13a915b..a20a663d583f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -287,6 +287,9 @@ static inline void addr_limit_user_check(void)
  */
 #ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
 asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx);
+asmlinkage long sys_io_setup2(unsigned, unsigned, struct iocb __user *,
+				void __user *, void __user *,
+				aio_context_t __user *);
 asmlinkage long sys_io_destroy(aio_context_t ctx);
 asmlinkage long sys_io_submit(aio_context_t, long,
 			struct iocb __user * __user *);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index c7f3321fbe43..1bbaa4c59f20 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -738,9 +738,11 @@ __SYSCALL(__NR_statx,     sys_statx)
 __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
 #define __NR_rseq 293
 __SYSCALL(__NR_rseq, sys_rseq)
+#define __NR_io_setup2 294
+__SYSCALL(__NR_io_setup2, sys_io_setup2)
 
 #undef __NR_syscalls
-#define __NR_syscalls 294
+#define __NR_syscalls 295
 
 /*
  * 32 bit systems traditionally used different
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index df556175be50..17c8b4393669 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -37,6 +37,7 @@ asmlinkage long sys_ni_syscall(void)
  */
 
 COND_SYSCALL(io_setup);
+COND_SYSCALL(io_setup2);
 COND_SYSCALL_COMPAT(io_setup);
 COND_SYSCALL(io_destroy);
 COND_SYSCALL(io_submit);
-- 
2.17.1


  parent reply	other threads:[~2018-12-11  0:16 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-11  0:15 [PATCHSET v7] Support for polled and buffed aio (and more) Jens Axboe
2018-12-11  0:15 ` [PATCH 01/27] fs: add an iopoll method to struct file_operations Jens Axboe
2018-12-11  0:15 ` [PATCH 02/27] block: add REQ_HIPRI_ASYNC Jens Axboe
2018-12-11  0:15 ` [PATCH 03/27] block: wire up block device iopoll method Jens Axboe
2018-12-11  0:15 ` [PATCH 04/27] block: use REQ_HIPRI_ASYNC for non-sync polled IO Jens Axboe
2018-12-11  0:15 ` [PATCH 05/27] iomap: wire up the iopoll method Jens Axboe
2018-12-11  0:15 ` [PATCH 06/27] aio: use assigned completion handler Jens Axboe
2018-12-11  0:15 ` [PATCH 07/27] aio: separate out ring reservation from req allocation Jens Axboe
2018-12-11  0:15 ` [PATCH 08/27] aio: don't zero entire aio_kiocb aio_get_req() Jens Axboe
2018-12-11  0:15 ` [PATCH 09/27] aio: only use blk plugs for > 2 depth submissions Jens Axboe
2018-12-11  0:15 ` [PATCH 10/27] aio: use iocb_put() instead of open coding it Jens Axboe
2018-12-11  0:15 ` [PATCH 11/27] aio: split out iocb copy from io_submit_one() Jens Axboe
2018-12-11  0:15 ` [PATCH 12/27] aio: abstract out io_event filler helper Jens Axboe
2018-12-11  0:15 ` Jens Axboe [this message]
2018-12-11  0:15 ` [PATCH 14/27] aio: add support for having user mapped iocbs Jens Axboe
2018-12-11  0:15 ` [PATCH 15/27] aio: support for IO polling Jens Axboe
2018-12-11  0:15 ` [PATCH 16/27] aio: add submission side request cache Jens Axboe
2018-12-11  0:15 ` [PATCH 17/27] fs: add fget_many() and fput_many() Jens Axboe
2018-12-11  0:15 ` [PATCH 18/27] aio: use fget/fput_many() for file references Jens Axboe
2018-12-11  0:15 ` [PATCH 19/27] aio: split iocb init from allocation Jens Axboe
2018-12-11  0:15 ` [PATCH 20/27] aio: batch aio_kiocb allocation Jens Axboe
2018-12-11  0:15 ` [PATCH 21/27] block: add BIO_HOLD_PAGES flag Jens Axboe
2018-12-11  0:15 ` [PATCH 22/27] block: implement bio helper to add iter bvec pages to bio Jens Axboe
2018-12-11  0:15 ` [PATCH 23/27] fs: add support for mapping an ITER_BVEC for O_DIRECT Jens Axboe
2018-12-11  0:15 ` [PATCH 24/27] aio: add support for pre-mapped user IO buffers Jens Axboe
2018-12-11  0:15 ` [PATCH 25/27] aio: split old ring complete out from aio_complete() Jens Axboe
2018-12-11  0:15 ` [PATCH 26/27] aio: add support for submission/completion rings Jens Axboe
2018-12-11  0:15 ` [PATCH 27/27] aio: support kernel side submission for aio with SCQRING Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181211001549.30085-14-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=clm@fb.com \
    --cc=hch@lst.de \
    --cc=jmoyer@redhat.com \
    --cc=linux-aio@kvack.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).