linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "André Almeida" <andrealmeid@igalia.com>
To: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org,
	"Paul E . McKenney" <paulmck@kernel.org>,
	"Boqun Feng" <boqun.feng@gmail.com>,
	"H . Peter Anvin" <hpa@zytor.com>, "Paul Turner" <pjt@google.com>,
	linux-api@vger.kernel.org,
	"Christian Brauner" <brauner@kernel.org>,
	"Florian Weimer" <fw@deneb.enyo.de>,
	David.Laight@ACULAB.COM, carlos@redhat.com,
	"Peter Oskolkov" <posk@posk.io>,
	"Alexander Mikhalitsyn" <alexander@mihalicyn.com>,
	"Chris Kennelly" <ckennelly@google.com>,
	"Ingo Molnar" <mingo@redhat.com>,
	"Darren Hart" <dvhart@infradead.org>,
	"Davidlohr Bueso" <dave@stgolabs.net>,
	"André Almeida" <andrealmeid@igalia.com>,
	libc-alpha@sourceware.org, "Steven Rostedt" <rostedt@goodmis.org>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Noah Goldstein" <goldstein.w.n@gmail.com>,
	"Daniel Colascione" <dancol@google.com>,
	longman@redhat.com, kernel-dev@igalia.com
Subject: [RFC PATCH 1/1] futex: Add FUTEX_SPIN operation
Date: Thu, 25 Apr 2024 17:43:32 -0300	[thread overview]
Message-ID: <20240425204332.221162-2-andrealmeid@igalia.com> (raw)
In-Reply-To: <20240425204332.221162-1-andrealmeid@igalia.com>

Add a new futex mode for futex wait, the futex spin.

Given the FUTEX2_SPIN flag, parse the futex value as the PID of the lock
owner. Then, before going to the normal wait path, spins while the lock
owner is running in a different CPU, to avoid the whole context switch
operation and to quickly return to userspace. If the lock owner is not
running, just sleep as the normal futex wait path.

The check for the owner to be running or not is important to avoid
spinning for something that won't be released quickly. Userspace is
responsible on providing the proper PID, the kernel does a basic check.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
 include/uapi/linux/futex.h |  2 +-
 kernel/futex/futex.h       |  6 ++-
 kernel/futex/waitwake.c    | 79 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index d2ee625ea189..d77d692ffac2 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -63,7 +63,7 @@
 #define FUTEX2_SIZE_U32		0x02
 #define FUTEX2_SIZE_U64		0x03
 #define FUTEX2_NUMA		0x04
-			/*	0x08 */
+#define FUTEX2_SPIN		0x08
 			/*	0x10 */
 			/*	0x20 */
 			/*	0x40 */
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 8b195d06f4e8..180c1c10dc81 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -37,6 +37,7 @@
 #define FLAGS_HAS_TIMEOUT	0x0040
 #define FLAGS_NUMA		0x0080
 #define FLAGS_STRICT		0x0100
+#define FLAGS_SPIN		0x0200
 
 /* FUTEX_ to FLAGS_ */
 static inline unsigned int futex_to_flags(unsigned int op)
@@ -52,7 +53,7 @@ static inline unsigned int futex_to_flags(unsigned int op)
 	return flags;
 }
 
-#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE)
+#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE | FUTEX2_SPIN)
 
 /* FUTEX2_ to FLAGS_ */
 static inline unsigned int futex2_to_flags(unsigned int flags2)
@@ -65,6 +66,9 @@ static inline unsigned int futex2_to_flags(unsigned int flags2)
 	if (flags2 & FUTEX2_NUMA)
 		flags |= FLAGS_NUMA;
 
+	if (flags2 & FUTEX2_SPIN)
+		flags |= FLAGS_SPIN;
+
 	return flags;
 }
 
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index 3a10375d9521..94feac92cf4f 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -372,6 +372,78 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
 	__set_current_state(TASK_RUNNING);
 }
 
+static inline bool task_on_cpu(struct task_struct *p)
+{
+#ifdef CONFIG_SMP
+	return !!(p->on_cpu);
+#else
+	return false;
+#endif
+}
+
+static int futex_spin(struct futex_hash_bucket *hb, struct futex_q *q,
+		       struct hrtimer_sleeper *timeout, void __user *uaddr, u32 val)
+{
+	struct task_struct *p;
+	u32 pid, uval;
+	unsigned int i = 0;
+
+	if (futex_get_value_locked(&uval, uaddr))
+		return -EFAULT;
+
+	pid = uval;
+
+	p = find_get_task_by_vpid(pid);
+	if (!p) {
+		printk("%s: no task found with PID %d\n", __func__, pid);
+		return -EAGAIN;
+	}
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		put_task_struct(p);
+		printk("%s: can't spin in a kernel task\n", __func__);
+		return -EPERM;
+	}
+
+	futex_queue(q, hb);
+
+	if (timeout)
+		hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
+
+	while (1) {
+		if (likely(!plist_node_empty(&q->list))) {
+			if (timeout && !timeout->task)
+				return 0;
+
+			/* spin */
+			if (task_on_cpu(p)) {
+				i++;
+				continue;
+			/* task is not running, sleep */
+			} else {
+				break;
+			}
+		} else {
+			printk("%s: woke after %d spins\n", __func__, i);
+			return 0;
+		}
+	}
+
+	printk("%s: spinned %d times, sleeping\n", __func__, i);
+
+	/* spinning didn't work, go to the normal path */
+	set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
+
+	if (likely(!plist_node_empty(&q->list))) {
+		if (!timeout || timeout->task)
+			schedule();
+	}
+
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
 /**
  * futex_unqueue_multiple - Remove various futexes from their hash bucket
  * @v:	   The list of futexes to unqueue
@@ -665,8 +737,11 @@ int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
 	if (ret)
 		return ret;
 
-	/* futex_queue and wait for wakeup, timeout, or a signal. */
-	futex_wait_queue(hb, &q, to);
+	if (flags & FLAGS_SPIN)
+		futex_spin(hb, &q, to, uaddr, val);
+	else
+		/* futex_queue and wait for wakeup, timeout, or a signal. */
+		futex_wait_queue(hb, &q, to);
 
 	/* If we were woken (and unqueued), we succeeded, whatever. */
 	if (!futex_unqueue(&q))
-- 
2.44.0


  reply	other threads:[~2024-04-25 20:44 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-25 20:43 [RFC PATCH 0/1] Add FUTEX_SPIN operation André Almeida
2024-04-25 20:43 ` André Almeida [this message]
2024-04-26  9:43 ` Florian Weimer
2024-04-26 10:14   ` Peter Zijlstra
2024-04-26 10:26 ` Christian Brauner
2024-05-01 23:44   ` André Almeida
2024-05-02  8:45     ` Christian Brauner
2024-05-02  9:51       ` Florian Weimer
2024-05-02 10:14         ` Christian Brauner
2024-05-02 10:39           ` Florian Weimer
2024-05-02 13:08             ` Christian Brauner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240425204332.221162-2-andrealmeid@igalia.com \
    --to=andrealmeid@igalia.com \
    --cc=David.Laight@ACULAB.COM \
    --cc=alexander@mihalicyn.com \
    --cc=boqun.feng@gmail.com \
    --cc=brauner@kernel.org \
    --cc=carlos@redhat.com \
    --cc=ckennelly@google.com \
    --cc=corbet@lwn.net \
    --cc=dancol@google.com \
    --cc=dave@stgolabs.net \
    --cc=dvhart@infradead.org \
    --cc=fw@deneb.enyo.de \
    --cc=goldstein.w.n@gmail.com \
    --cc=hpa@zytor.com \
    --cc=kernel-dev@igalia.com \
    --cc=libc-alpha@sourceware.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mingo@redhat.com \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=posk@posk.io \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).