linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "André Almeida" <andrealmeid@collabora.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Darren Hart <dvhart@infradead.org>,
	linux-kernel@vger.kernel.org,
	Steven Rostedt <rostedt@goodmis.org>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: kernel@collabora.com, krisman@collabora.com,
	pgriffais@valvesoftware.com, z.figura12@gmail.com,
	joel@joelfernandes.org, malteskarupke@fastmail.fm,
	linux-api@vger.kernel.org, fweimer@redhat.com,
	libc-alpha@sourceware.org, linux-kselftest@vger.kernel.org,
	shuah@kernel.org, acme@kernel.org, corbet@lwn.net,
	"André Almeida" <andrealmeid@collabora.com>
Subject: [RFC PATCH v2 03/13] futex2: Implement vectorized wait
Date: Wed,  3 Mar 2021 21:42:09 -0300	[thread overview]
Message-ID: <20210304004219.134051-4-andrealmeid@collabora.com> (raw)
In-Reply-To: <20210304004219.134051-1-andrealmeid@collabora.com>

Add support to wait on multiple futexes. This is the interface
implemented by this syscall:

futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
	    unsigned int flags, struct timespec *timo)

struct futex_waitv {
	void *uaddr;
	unsigned int val;
	unsigned int flags;
};

Given an array of struct futex_waitv, wait on each uaddr. The thread
wakes if a futex_wake() is performed at any uaddr. The syscall returns
immediately if any waiter has *uaddr != val. *timo is an optional
timeout value for the operation. The flags argument of the syscall
should be used solely for specifying the timeout as realtime, if needed.
Flags for shared futexes, sizes, etc. should be used on the individual
flags of each waiter.

Returns the array index of one of the awakened futexes. There’s no given
information of how many were awakened, or any particular attribute of it
(if it’s the first awakened, if it is of the smaller index...).

Signed-off-by: André Almeida <andrealmeid@collabora.com>
---
 arch/arm/tools/syscall.tbl                    |   1 +
 arch/arm64/include/asm/unistd.h               |   2 +-
 arch/arm64/include/asm/unistd32.h             |   2 +
 arch/x86/entry/syscalls/syscall_32.tbl        |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl        |   1 +
 include/linux/compat.h                        |  11 ++
 include/linux/syscalls.h                      |   4 +
 include/uapi/asm-generic/unistd.h             |   5 +-
 include/uapi/linux/futex.h                    |  14 ++
 kernel/futex2.c                               | 177 ++++++++++++++++++
 kernel/sys_ni.c                               |   1 +
 tools/include/uapi/asm-generic/unistd.h       |   5 +-
 .../arch/x86/entry/syscalls/syscall_64.tbl    |   1 +
 13 files changed, 222 insertions(+), 3 deletions(-)

diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 2bf93c69e00a..f9b55f2ea444 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -458,3 +458,4 @@
 442	common	mount_setattr			sys_mount_setattr
 443	common	futex_wait			sys_futex_wait
 444	common	futex_wake			sys_futex_wake
+445	common  futex_waitv                     sys_futex_waitv
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 64ebdc1ec581..d1cc2849dc00 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		445
+#define __NR_compat_syscalls		446
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 15c2cd5f1c95..1e19604b8885 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -897,6 +897,8 @@ __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
 __SYSCALL(__NR_futex_wait, sys_futex_wait)
 #define __NR_futex_wake 444
 __SYSCALL(__NR_futex_wake, sys_futex_wake)
+#define __NR_futex_waitv 445
+__SYSCALL(__NR_futex_waitv, compat_sys_futex_waitv)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 17d22509d780..4bc546c841b0 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -449,3 +449,4 @@
 442	i386	mount_setattr		sys_mount_setattr
 443	i386	futex_wait		sys_futex_wait
 444	i386	futex_wake		sys_futex_wake
+445	i386	futex_waitv		sys_futex_waitv			compat_sys_futex_waitv
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 3336b5cd5bdb..a715e88e3d6d 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -366,6 +366,7 @@
 442	common	mount_setattr		sys_mount_setattr
 443	common	futex_wait		sys_futex_wait
 444	common	futex_wake		sys_futex_wake
+445	common	futex_waitv		sys_futex_waitv
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 6e65be753603..041d18174350 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -365,6 +365,12 @@ struct compat_robust_list_head {
 	compat_uptr_t			list_op_pending;
 };
 
+struct compat_futex_waitv {
+	compat_uptr_t uaddr;
+	compat_uint_t val;
+	compat_uint_t flags;
+};
+
 #ifdef CONFIG_COMPAT_OLD_SIGACTION
 struct compat_old_sigaction {
 	compat_uptr_t			sa_handler;
@@ -654,6 +660,11 @@ asmlinkage long
 compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 			   compat_size_t __user *len_ptr);
 
+/* kernel/futex2.c */
+asmlinkage long compat_sys_futex_waitv(struct compat_futex_waitv *waiters,
+				       compat_uint_t nr_futexes, compat_uint_t flags,
+				       struct __kernel_timespec __user *timo);
+
 /* kernel/itimer.c */
 asmlinkage long compat_sys_getitimer(int which,
 				     struct old_itimerval32 __user *it);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 352f69a2b94c..48e96fe7d8f6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -69,6 +69,7 @@ struct io_uring_params;
 struct clone_args;
 struct open_how;
 struct mount_attr;
+struct futex_waitv;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -625,6 +626,9 @@ asmlinkage long sys_futex_wait(void __user *uaddr, unsigned int val,
 			       struct __kernel_timespec __user *timo);
 asmlinkage long sys_futex_wake(void __user *uaddr, unsigned int nr_wake,
 			       unsigned int flags);
+asmlinkage long sys_futex_waitv(struct futex_waitv __user *waiters,
+				unsigned int nr_futexes, unsigned int flags,
+				struct __kernel_timespec __user *timo);
 
 /* kernel/hrtimer.c */
 asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 738315f148fa..2a6adca37fe9 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -870,8 +870,11 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
 #define __NR_futex_wake 444
 __SYSCALL(__NR_futex_wake, sys_futex_wake)
 
+#define __NR_futex_waitv 445
+__SC_COMP(__NR_futex_waitv, sys_futex_waitv, compat_sys_futex_waitv)
+
 #undef __NR_syscalls
-#define __NR_syscalls 445
+#define __NR_syscalls 446
 
 /*
  * 32 bit systems traditionally used different
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index 70ea66fffb1c..3216aee015d2 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -48,6 +48,20 @@
 
 #define FUTEX_SHARED_FLAG 8
 
+#define FUTEX_WAITV_MAX 128
+
+/**
+ * struct futex_waitv - A waiter for vectorized wait
+ * @uaddr: User address to wait on
+ * @val:   Expected value at uaddr
+ * @flags: Flags for this waiter
+ */
+struct futex_waitv {
+	void __user *uaddr;
+	unsigned int val;
+	unsigned int flags;
+};
+
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
  * thread exit time.
diff --git a/kernel/futex2.c b/kernel/futex2.c
index 0ac669fe6edd..98e7024bf5e3 100644
--- a/kernel/futex2.c
+++ b/kernel/futex2.c
@@ -82,6 +82,12 @@ struct futex_bucket {
 /* Mask for futex2 flag operations */
 #define FUTEX2_MASK (FUTEX_SIZE_MASK | FUTEX_CLOCK_REALTIME | FUTEX_SHARED_FLAG)
 
+/* Mask for sys_futex_waitv flag */
+#define FUTEXV_MASK (FUTEX_CLOCK_REALTIME)
+
+/* Mask for each futex in futex_waitv list */
+#define FUTEXV_WAITER_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG)
+
 #define is_object_shared ((futexv->objects[i].flags & FUTEX_SHARED_FLAG) ? true : false)
 
 #define FUT_OFF_INODE    1 /* We set bit 0 if key has a reference on inode */
@@ -688,6 +694,177 @@ SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val,
 	return __futex_waitv(futexv, 1, timo, flags);
 }
 
+#ifdef CONFIG_COMPAT
+/**
+ * compat_futex_parse_waitv - Parse a waitv array from userspace
+ * @futexv:	Kernel side list of waiters to be filled
+ * @uwaitv:     Userspace list to be parsed
+ * @nr_futexes: Length of futexv
+ *
+ * Return: Error code on failure, pointer to a prepared futexv otherwise
+ */
+static int compat_futex_parse_waitv(struct futex_waiter_head *futexv,
+				    struct compat_futex_waitv __user *uwaitv,
+				    unsigned int nr_futexes)
+{
+	struct futex_bucket *bucket;
+	struct compat_futex_waitv waitv;
+	unsigned int i;
+
+	for (i = 0; i < nr_futexes; i++) {
+		if (copy_from_user(&waitv, &uwaitv[i], sizeof(waitv)))
+			return -EFAULT;
+
+		if ((waitv.flags & ~FUTEXV_WAITER_MASK) ||
+		    (waitv.flags & FUTEX_SIZE_MASK) != FUTEX_32)
+			return -EINVAL;
+
+		futexv->objects[i].key.pointer = 0;
+		futexv->objects[i].flags  = waitv.flags;
+		futexv->objects[i].uaddr  = compat_ptr(waitv.uaddr);
+		futexv->objects[i].val    = waitv.val;
+		futexv->objects[i].index  = i;
+
+		bucket = futex_get_bucket(compat_ptr(waitv.uaddr),
+					  &futexv->objects[i].key,
+					  is_object_shared);
+
+		if (IS_ERR(bucket))
+			return PTR_ERR(bucket);
+
+		futexv->objects[i].bucket = bucket;
+
+		INIT_LIST_HEAD(&futexv->objects[i].list);
+	}
+
+	return 0;
+}
+
+COMPAT_SYSCALL_DEFINE4(futex_waitv, struct compat_futex_waitv __user *, waiters,
+		       unsigned int, nr_futexes, unsigned int, flags,
+		       struct __kernel_timespec __user *, timo)
+{
+	struct futex_waiter_head *futexv;
+	int ret;
+
+	if (flags & ~FUTEXV_MASK)
+		return -EINVAL;
+
+	if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
+		return -EINVAL;
+
+	futexv = kmalloc((sizeof(struct futex_waiter) * nr_futexes) +
+			 sizeof(*futexv), GFP_KERNEL);
+	if (!futexv)
+		return -ENOMEM;
+
+	futexv->hint = false;
+	futexv->task = current;
+
+	ret = compat_futex_parse_waitv(futexv, waiters, nr_futexes);
+
+	if (!ret)
+		ret = __futex_waitv(futexv, nr_futexes, timo, flags);
+
+	kfree(futexv);
+
+	return ret;
+}
+#endif
+
+/**
+ * futex_parse_waitv - Parse a waitv array from userspace
+ * @futexv:	Kernel side list of waiters to be filled
+ * @uwaitv:     Userspace list to be parsed
+ * @nr_futexes: Length of futexv
+ *
+ * Return: Error code on failure, pointer to a prepared futexv otherwise
+ */
+static int futex_parse_waitv(struct futex_waiter_head *futexv,
+			     struct futex_waitv __user *uwaitv,
+			     unsigned int nr_futexes)
+{
+	struct futex_bucket *bucket;
+	struct futex_waitv waitv;
+	unsigned int i;
+
+	for (i = 0; i < nr_futexes; i++) {
+		if (copy_from_user(&waitv, &uwaitv[i], sizeof(waitv)))
+			return -EFAULT;
+
+		if ((waitv.flags & ~FUTEXV_WAITER_MASK) ||
+		    (waitv.flags & FUTEX_SIZE_MASK) != FUTEX_32)
+			return -EINVAL;
+
+		futexv->objects[i].key.pointer = 0;
+		futexv->objects[i].flags  = waitv.flags;
+		futexv->objects[i].uaddr  = waitv.uaddr;
+		futexv->objects[i].val    = waitv.val;
+		futexv->objects[i].index  = i;
+
+		bucket = futex_get_bucket(waitv.uaddr, &futexv->objects[i].key,
+					  is_object_shared);
+
+		if (IS_ERR(bucket))
+			return PTR_ERR(bucket);
+
+		futexv->objects[i].bucket = bucket;
+
+		INIT_LIST_HEAD(&futexv->objects[i].list);
+	}
+
+	return 0;
+}
+
+/**
+ * sys_futex_waitv - Wait on a list of futexes
+ * @waiters:    List of futexes to wait on
+ * @nr_futexes: Length of futexv
+ * @flags:      Flag for timeout (monotonic/realtime)
+ * @timo:	Optional absolute timeout.
+ *
+ * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
+ * if a futex_wake() is performed at any uaddr. The syscall returns immediately
+ * if any waiter has *uaddr != val. *timo is an optional timeout value for the
+ * operation. Each waiter has individual flags. The `flags` argument for the
+ * syscall should be used solely for specifying the timeout as realtime, if
+ * needed. Flags for shared futexes, sizes, etc. should be used on the
+ * individual flags of each waiter.
+ *
+ * Returns the array index of one of the awaken futexes. There's no given
+ * information of how many were awakened, or any particular attribute of it (if
+ * it's the first awakened, if it is of the smaller index...).
+ */
+SYSCALL_DEFINE4(futex_waitv, struct futex_waitv __user *, waiters,
+		unsigned int, nr_futexes, unsigned int, flags,
+		struct __kernel_timespec __user *, timo)
+{
+	struct futex_waiter_head *futexv;
+	int ret;
+
+	if (flags & ~FUTEXV_MASK)
+		return -EINVAL;
+
+	if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
+		return -EINVAL;
+
+	futexv = kmalloc((sizeof(struct futex_waiter) * nr_futexes) +
+			 sizeof(*futexv), GFP_KERNEL);
+	if (!futexv)
+		return -ENOMEM;
+
+	futexv->hint = false;
+	futexv->task = current;
+
+	ret = futex_parse_waitv(futexv, waiters, nr_futexes);
+	if (!ret)
+		ret = __futex_waitv(futexv, nr_futexes, timo, flags);
+
+	kfree(futexv);
+
+	return ret;
+}
+
 /**
  * futex_get_parent - For a given futex in a futexv list, get a pointer to the futexv
  * @waiter: Address of futex in the list
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 27ef83ca8a9d..977890c58ab5 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -153,6 +153,7 @@ COND_SYSCALL_COMPAT(get_robust_list);
 /* kernel/futex2.c */
 COND_SYSCALL(futex_wait);
 COND_SYSCALL(futex_wake);
+COND_SYSCALL(futex_waitv);
 
 /* kernel/hrtimer.c */
 
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 738315f148fa..2a6adca37fe9 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -870,8 +870,11 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
 #define __NR_futex_wake 444
 __SYSCALL(__NR_futex_wake, sys_futex_wake)
 
+#define __NR_futex_waitv 445
+__SC_COMP(__NR_futex_waitv, sys_futex_waitv, compat_sys_futex_waitv)
+
 #undef __NR_syscalls
-#define __NR_syscalls 445
+#define __NR_syscalls 446
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 45632be70a15..31661fc0cd25 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -365,6 +365,7 @@
 441	common	epoll_pwait2		sys_epoll_pwait2
 443	common  futex_wait              sys_futex_wait
 444	common  futex_wake              sys_futex_wake
+445	common  futex_waitv             sys_futex_waitv
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
-- 
2.30.1


  parent reply	other threads:[~2021-03-04  1:09 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-04  0:42 [RFC PATCH v2 00/13] Add futex2 syscall André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 01/13] futex2: Implement wait and wake functions André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 02/13] futex2: Add support for shared futexes André Almeida
2021-03-04  0:42 ` André Almeida [this message]
2021-03-04  0:42 ` [RFC PATCH v2 04/13] futex2: Implement requeue operation André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 05/13] futex2: Add compatibility entry point for x86_x32 ABI André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 06/13] docs: locking: futex2: Add documentation André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 07/13] selftests: futex2: Add wake/wait test André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 08/13] selftests: futex2: Add timeout test André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 09/13] selftests: futex2: Add wouldblock test André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 10/13] selftests: futex2: Add waitv test André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 11/13] selftests: futex2: Add requeue test André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 12/13] perf bench: Add futex2 benchmark tests André Almeida
2021-03-04  0:42 ` [RFC PATCH v2 13/13] kernel: Enable waitpid() for futex2 André Almeida
2021-03-04  5:44 ` [RFC PATCH v2 00/13] Add futex2 syscall Peter Oskolkov
2021-03-04 18:58   ` André Almeida
2021-03-05 20:03     ` Peter Oskolkov
2021-03-05 20:08       ` Peter Oskolkov
2021-03-04 15:01 ` Theodore Ts'o
2021-03-04 19:15   ` André Almeida
2021-03-07 11:34 ` Stefan Metzmacher
2021-03-07 11:56   ` Daurnimator
2021-03-08 11:52     ` Stefan Metzmacher
2021-03-08 11:11   ` David Laight
2021-03-08 11:55     ` Stefan Metzmacher
2021-03-08 16:18 ` Zebediah Figura
2021-03-08 17:33   ` David Laight

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210304004219.134051-4-andrealmeid@collabora.com \
    --to=andrealmeid@collabora.com \
    --cc=acme@kernel.org \
    --cc=bigeasy@linutronix.de \
    --cc=corbet@lwn.net \
    --cc=dvhart@infradead.org \
    --cc=fweimer@redhat.com \
    --cc=joel@joelfernandes.org \
    --cc=kernel@collabora.com \
    --cc=krisman@collabora.com \
    --cc=libc-alpha@sourceware.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=malteskarupke@fastmail.fm \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pgriffais@valvesoftware.com \
    --cc=rostedt@goodmis.org \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=z.figura12@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).