All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Oskolkov <posk@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	linux-kernel@vger.kernel.org, linux-api@vger.kernel.org
Cc: Paul Turner <pjt@google.com>, Ben Segall <bsegall@google.com>,
	Peter Oskolkov <posk@google.com>, Peter Oskolkov <posk@posk.io>,
	Joel Fernandes <joel@joelfernandes.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andrei Vagin <avagin@google.com>,
	Jim Newsome <jnewsome@torproject.org>
Subject: [RFC PATCH v0.1 5/9] lib/umcg: implement UMCG core API for userspace
Date: Thu, 20 May 2021 11:36:10 -0700	[thread overview]
Message-ID: <20210520183614.1227046-6-posk@google.com> (raw)
In-Reply-To: <20210520183614.1227046-1-posk@google.com>

UMCG (= User Managed Concurrency Groups) kernel API
is designed to be minimalistic and requires tightly
coupled userspace code to make it easy to use.

Add userspace UMCG core API to achieve this goal.

Signed-off-by: Peter Oskolkov <posk@google.com>
---
 tools/lib/umcg/.gitignore |   4 +
 tools/lib/umcg/Makefile   |  11 ++
 tools/lib/umcg/libumcg.c  | 350 ++++++++++++++++++++++++++++++++++++++
 tools/lib/umcg/libumcg.h  | 154 +++++++++++++++++
 4 files changed, 519 insertions(+)
 create mode 100644 tools/lib/umcg/.gitignore
 create mode 100644 tools/lib/umcg/Makefile
 create mode 100644 tools/lib/umcg/libumcg.c
 create mode 100644 tools/lib/umcg/libumcg.h

diff --git a/tools/lib/umcg/.gitignore b/tools/lib/umcg/.gitignore
new file mode 100644
index 000000000000..ea55ae666041
--- /dev/null
+++ b/tools/lib/umcg/.gitignore
@@ -0,0 +1,4 @@
+PDX-License-Identifier: GPL-2.0-only
+libumcg.a
+libumcg.o
+
diff --git a/tools/lib/umcg/Makefile b/tools/lib/umcg/Makefile
new file mode 100644
index 000000000000..fa53fd5a851a
--- /dev/null
+++ b/tools/lib/umcg/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -g -I../../../usr/include/ -I../../include/
+
+libumcg.a: libumcg.o
+	ar rc libumcg.a libumcg.o
+
+libumcg.o: libumcg.c
+
+clean :
+	rm libumcg.a libumcg.o
diff --git a/tools/lib/umcg/libumcg.c b/tools/lib/umcg/libumcg.c
new file mode 100644
index 000000000000..b177fb1d4b17
--- /dev/null
+++ b/tools/lib/umcg/libumcg.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include "libumcg.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <threads.h>
+
+/* UMCG API version supported by this library. */
+static const uint32_t umcg_api_version = 1;
+
+struct umcg_group {
+	uint32_t group_id;
+};
+
+/**
+ * struct umcg_task_tls - per thread struct used to identify/manage UMCG tasks
+ *
+ * Each UMCG task requires an instance of struct umcg_task passed to
+ * sys_umcg_register. This struct contains it, as well as several additional
+ * fields.
+ */
+struct umcg_task_tls {
+	struct umcg_task	umcg_task;
+	umcg_tid		self;
+	intptr_t		tag;
+	pid_t			tid;
+
+} __attribute((aligned(4 * sizeof(uint64_t))));
+
+static thread_local struct umcg_task_tls *umcg_task_tls;
+
+umcg_tid umcg_get_utid(void)
+{
+	return (umcg_tid)&umcg_task_tls;
+}
+
+static umcg_tid umcg_task_to_utid(struct umcg_task *ut)
+{
+	if (!ut)
+		return UMCG_NONE;
+
+	return ((struct umcg_task_tls *)ut)->self;
+}
+
+static struct umcg_task_tls *utid_to_utls(umcg_tid utid)
+{
+	if (!utid || !*(struct umcg_task_tls **)utid) {
+		fprintf(stderr, "utid_to_utls: NULL\n");
+		/* Kill the process rather than corrupt memory. */
+		raise(SIGKILL);
+		return NULL;
+	}
+	return *(struct umcg_task_tls **)utid;
+}
+
+void umcg_set_task_tag(umcg_tid utid, intptr_t tag)
+{
+	utid_to_utls(utid)->tag = tag;
+}
+
+intptr_t umcg_get_task_tag(umcg_tid utid)
+{
+	return utid_to_utls(utid)->tag;
+}
+
+umcg_tid umcg_register_core_task(intptr_t tag)
+{
+	int ret;
+
+	if (umcg_task_tls != NULL) {
+		errno = EINVAL;
+		return UMCG_NONE;
+	}
+
+	umcg_task_tls = malloc(sizeof(struct umcg_task_tls));
+	if (!umcg_task_tls) {
+		errno = ENOMEM;
+		return UMCG_NONE;
+	}
+
+	umcg_task_tls->umcg_task.state = UMCG_TASK_NONE;
+	umcg_task_tls->self = (umcg_tid)&umcg_task_tls;
+	umcg_task_tls->tag = tag;
+	umcg_task_tls->tid = gettid();
+
+	ret = sys_umcg_register_task(umcg_api_version, UMCG_REGISTER_CORE_TASK,
+			UMCG_NOID, &umcg_task_tls->umcg_task);
+	if (ret) {
+		free(umcg_task_tls);
+		umcg_task_tls = NULL;
+		errno = ret;
+		return UMCG_NONE;
+	}
+
+	return umcg_task_tls->self;
+}
+
+int umcg_unregister_task(void)
+{
+	int ret;
+
+	if (!umcg_task_tls) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	ret = sys_umcg_unregister_task(0);
+	if (ret) {
+		errno = ret;
+		return -1;
+	}
+
+	free(umcg_task_tls);
+	atomic_store_explicit(&umcg_task_tls, NULL, memory_order_seq_cst);
+	return 0;
+}
+
+/* Helper return codes. */
+enum umcg_prepare_op_result {
+	UMCG_OP_DONE,
+	UMCG_OP_SYS,
+	UMCG_OP_AGAIN,
+	UMCG_OP_ERROR
+};
+
+static enum umcg_prepare_op_result umcg_prepare_wait(void)
+{
+	struct umcg_task *ut;
+	uint32_t umcg_state;
+	int ret;
+
+	if (!umcg_task_tls) {
+		errno = EINVAL;
+		return UMCG_OP_ERROR;
+	}
+
+	ut = &umcg_task_tls->umcg_task;
+
+	umcg_state = UMCG_TASK_RUNNING;
+	if (atomic_compare_exchange_strong_explicit(&ut->state,
+			&umcg_state, UMCG_TASK_RUNNABLE,
+			memory_order_seq_cst, memory_order_seq_cst))
+		return UMCG_OP_SYS;
+
+	if (umcg_state != (UMCG_TASK_RUNNING | UMCG_TF_WAKEUP_QUEUED)) {
+		fprintf(stderr, "libumcg: unexpected state before wait: %u\n",
+				umcg_state);
+		errno = EINVAL;
+		return UMCG_OP_ERROR;
+	}
+
+	if (atomic_compare_exchange_strong_explicit(&ut->state,
+			&umcg_state, UMCG_TASK_RUNNING,
+			memory_order_seq_cst, memory_order_seq_cst)) {
+		return UMCG_OP_DONE;
+	}
+
+	/* Raced with another wait/wake? This is not supported. */
+	fprintf(stderr, "libumcg: failed to remove the wakeup flag: %u\n",
+			umcg_state);
+	errno = EINVAL;
+	return UMCG_OP_ERROR;
+}
+
+static int umcg_do_wait(const struct timespec *timeout)
+{
+	uint32_t umcg_state;
+	int ret;
+
+	do {
+		ret = sys_umcg_wait(0, timeout);
+		if (ret != 0 && errno != EAGAIN)
+			return ret;
+
+		umcg_state = atomic_load_explicit(
+				&umcg_task_tls->umcg_task.state,
+				memory_order_acquire);
+	} while (umcg_state == UMCG_TASK_RUNNABLE);
+
+	return 0;
+}
+
+int umcg_wait(const struct timespec *timeout)
+{
+	switch (umcg_prepare_wait()) {
+	case UMCG_OP_DONE:
+		return 0;
+	case UMCG_OP_SYS:
+		break;
+	case UMCG_OP_ERROR:
+		return -1;
+	default:
+		fprintf(stderr, "Unknown pre_op result.\n");
+		exit(1);
+		return -1;
+	}
+
+	return umcg_do_wait(timeout);
+}
+
+static enum umcg_prepare_op_result umcg_prepare_wake(struct umcg_task_tls *utls)
+{
+	struct umcg_task *ut = &utls->umcg_task;
+	uint32_t umcg_state, next_state;
+
+	next_state = UMCG_TASK_RUNNING;
+	umcg_state = UMCG_TASK_RUNNABLE;
+	if (atomic_compare_exchange_strong_explicit(&ut->state,
+			&umcg_state, next_state,
+			memory_order_seq_cst, memory_order_seq_cst))
+		return UMCG_OP_SYS;
+
+	if (umcg_state != UMCG_TASK_RUNNING) {
+		if (umcg_state == (UMCG_TASK_RUNNING | UMCG_TF_WAKEUP_QUEUED)) {
+			/*
+			 * With ping-pong mutual swapping using wake/wait
+			 * without synchronization this can happen.
+			 */
+			return UMCG_OP_AGAIN;
+		}
+		fprintf(stderr, "libumcg: unexpected state in umcg_wake(): %u\n",
+				umcg_state);
+		errno = EINVAL;
+		return UMCG_OP_ERROR;
+	}
+
+	if (atomic_compare_exchange_strong_explicit(&ut->state,
+			&umcg_state, UMCG_TASK_RUNNING | UMCG_TF_WAKEUP_QUEUED,
+			memory_order_seq_cst, memory_order_seq_cst)) {
+		return UMCG_OP_DONE;
+	}
+
+	if (umcg_state != UMCG_TASK_RUNNABLE) {
+		fprintf(stderr, "libumcg: unexpected state in umcg_wake (1): %u\n",
+				umcg_state);
+		errno = EINVAL;
+		return UMCG_OP_ERROR;
+	}
+
+	return UMCG_OP_AGAIN;
+}
+
+static int umcg_do_wake_or_swap(struct umcg_task_tls *next_utls,
+				uint64_t prev_wait_counter, bool should_wait,
+				const struct timespec *timeout)
+{
+	int ret;
+
+again:
+
+	if (should_wait)
+		ret = sys_umcg_swap(0, next_utls->tid, 0, timeout);
+	else
+		ret = sys_umcg_wake(0, next_utls->tid);
+
+	if (ret && errno == EAGAIN)
+		goto again;
+
+	return ret;
+}
+
+int umcg_wake(umcg_tid next)
+{
+	struct umcg_task_tls *utls = *(struct umcg_task_tls **)next;
+	uint64_t prev_wait_counter;
+
+	if (!utls) {
+		errno = EINVAL;
+		return -1;
+	}
+
+again:
+	switch (umcg_prepare_wake(utls)) {
+	case UMCG_OP_DONE:
+		return 0;
+	case UMCG_OP_SYS:
+		break;
+	case UMCG_OP_ERROR:
+		return -1;
+	case UMCG_OP_AGAIN:
+		goto again;
+	default:
+		fprintf(stderr, "libumcg: unknown pre_op result.\n");
+		exit(1);
+		return -1;
+	}
+
+	return umcg_do_wake_or_swap(utls, prev_wait_counter, false, NULL);
+}
+
+int umcg_swap(umcg_tid next, const struct timespec *timeout)
+{
+	struct umcg_task_tls *utls = *(struct umcg_task_tls **)next;
+	bool should_wake, should_wait;
+	uint64_t prev_wait_counter;
+	int ret;
+
+	if (!utls) {
+		errno = EINVAL;
+		return -1;
+	}
+
+again:
+	switch (umcg_prepare_wake(utls)) {
+	case UMCG_OP_DONE:
+		should_wake = false;
+		break;
+	case UMCG_OP_SYS:
+		should_wake = true;
+		break;
+	case UMCG_OP_ERROR:
+		return -1;
+	case UMCG_OP_AGAIN:
+		goto again;
+	default:
+		fprintf(stderr, "lubumcg: unknown pre_op result.\n");
+		exit(1);
+		return -1;
+	}
+
+	switch (umcg_prepare_wait()) {
+	case UMCG_OP_DONE:
+		should_wait = false;
+		break;
+	case UMCG_OP_SYS:
+		should_wait = true;
+		break;
+	case UMCG_OP_ERROR:
+		return -1;
+	default:
+		fprintf(stderr, "lubumcg: unknown pre_op result.\n");
+		exit(1);
+		return -1;
+	}
+
+	if (should_wake)
+		return umcg_do_wake_or_swap(utls, prev_wait_counter,
+				should_wait, timeout);
+
+	if (should_wait)
+		return umcg_do_wait(timeout);
+
+	return 0;
+}
diff --git a/tools/lib/umcg/libumcg.h b/tools/lib/umcg/libumcg.h
new file mode 100644
index 000000000000..31ef786d1965
--- /dev/null
+++ b/tools/lib/umcg/libumcg.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LIBUMCG_H
+#define __LIBUMCG_H
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <syscall.h>
+#include <time.h>
+
+#include <linux/umcg.h>
+
+static int sys_umcg_api_version(uint32_t requested_api_version, uint32_t flags)
+{
+	return syscall(__NR_umcg_api_version, requested_api_version, flags);
+}
+
+static int sys_umcg_register_task(uint32_t api_version, uint32_t flags,
+		uint32_t group_id, struct umcg_task *umcg_task)
+{
+	return syscall(__NR_umcg_register_task, api_version, flags, group_id,
+			umcg_task);
+}
+
+static int sys_umcg_unregister_task(uint32_t flags)
+{
+	return syscall(__NR_umcg_unregister_task, flags);
+}
+
+static int sys_umcg_wait(uint32_t flags, const struct timespec *timeout)
+{
+	return syscall(__NR_umcg_wait, flags, timeout);
+}
+
+static int sys_umcg_wake(uint32_t flags, uint32_t next_tid)
+{
+	return syscall(__NR_umcg_wake, flags, next_tid);
+}
+
+static int sys_umcg_swap(uint32_t wake_flags, uint32_t next_tid,
+		uint32_t wait_flags, const struct timespec *timeout)
+{
+	return syscall(__NR_umcg_swap, wake_flags, next_tid,
+			wait_flags, timeout);
+}
+
+typedef intptr_t umcg_tid; /* UMCG thread ID. */
+
+#define UMCG_NONE	(0)
+
+/**
+ * umcg_get_utid - return the UMCG ID of the current thread.
+ *
+ * The function always succeeds, and the returned ID is guaranteed to be
+ * stable over the life of the thread (and multiple
+ * umcg_register/umcg_unregister calls).
+ *
+ * The ID is NOT guaranteed to be unique over the life of the process.
+ */
+umcg_tid umcg_get_utid(void);
+
+/**
+ * umcg_set_task_tag - add an arbitrary tag to a registered UMCG task.
+ *
+ * Note: non-thread-safe: the user is responsible for proper memory fencing.
+ */
+void umcg_set_task_tag(umcg_tid utid, intptr_t tag);
+
+/*
+ * umcg_get_task_tag - get the task tag. Returns zero if none set.
+ *
+ * Note: non-thread-safe: the user is responsible for proper memory fencing.
+ */
+intptr_t umcg_get_task_tag(umcg_tid utid);
+
+/**
+ * umcg_register_core_task - register the current thread as a UMCG core task
+ *
+ * Return:
+ * UMCG_NONE     - an error occurred. Check errno.
+ * != UMCG_NONE  - the ID of the thread to be used with UMCG API (guaranteed
+ *                 to match the value returned by umcg_get_utid).
+ */
+umcg_tid umcg_register_core_task(intptr_t tag);
+
+/**
+ * umcg_unregister_task - unregister the current thread
+ *
+ * Return:
+ * 0              - OK
+ * -1             - the current thread is not a UMCG thread
+ */
+int umcg_unregister_task(void);
+
+/**
+ * umcg_wait - block the current thread
+ * @timeout:   absolute timeout (not supported at the moment)
+ *
+ * Blocks the current thread, which must have been registered via umcg_register,
+ * until it is waken via umcg_wake or swapped into via umcg_swap. If the current
+ * thread has a wakeup queued (see umcg_wake), returns zero immediately,
+ * consuming the wakeup.
+ *
+ * Return:
+ * 0         - OK, the thread was waken;
+ * -1        - did not wake normally;
+ *               errno:
+ *                 EINTR: interrupted
+ *                 EINVAL: some other error occurred
+ */
+int umcg_wait(const struct timespec *timeout);
+
+/**
+ * umcg_wake - wake @next
+ * @next:      ID of the thread to wake (IDs are returned by umcg_register).
+ *
+ * If @next is blocked via umcg_wait, or umcg_swap, wake it. If @next is
+ * running, queue the wakeup, so that a future block of @next will consume
+ * the wakeup but will not block.
+ *
+ * umcg_wake is non-blocking, but may retry a few times to make sure @next
+ * has indeed woken.
+ *
+ * umcg_wake can queue at most one wakeup; if @next has a wakeup queued,
+ * an error is returned.
+ *
+ * Return:
+ * 0         - OK, @next has woken, or a wakeup has been queued;
+ * -1        - an error occurred.
+ */
+int umcg_wake(umcg_tid next);
+
+/**
+ * umcg_swap - wake @next, put the current thread to sleep
+ * @next:      ID of the thread to wake
+ * @timeout:   absolute timeout (not supported at the moment)
+ *
+ * umcg_swap is semantically equivalent to
+ *
+ *     int ret = umcg_wake(next);
+ *     if (ret)
+ *             return ret;
+ *     return umcg_wait(timeout);
+ *
+ * but may do a synchronous context switch into @next on the current CPU.
+ */
+int umcg_swap(umcg_tid next, const struct timespec *timeout);
+
+#endif  /* __LIBUMCG_H */
-- 
2.31.1.818.g46aad6cb9e-goog


  parent reply	other threads:[~2021-05-20 18:36 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-20 18:36 [RFC PATCH v0.1 0/9] UMCG early preview/RFC patchset Peter Oskolkov
2021-05-20 18:36 ` [RFC PATCH v0.1 1/9] sched/umcg: add UMCG syscall stubs and CONFIG_UMCG Peter Oskolkov
2021-05-22 18:40   ` kernel test robot
2021-05-22 21:49   ` kernel test robot
2021-05-20 18:36 ` [RFC PATCH v0.1 2/9] sched/umcg: add uapi/linux/umcg.h and sched/umcg.c Peter Oskolkov
2021-05-20 18:36 ` [RFC PATCH v0.1 3/9] sched: add WF_CURRENT_CPU and externise ttwu Peter Oskolkov
2021-05-20 18:36 ` [RFC PATCH v0.1 4/9] sched/umcg: implement core UMCG API Peter Oskolkov
2021-05-21 19:06   ` Andrei Vagin
2021-05-21 21:31     ` Jann Horn
2021-05-21 22:03       ` Peter Oskolkov
2021-05-21 19:32   ` Andy Lutomirski
2021-05-21 22:01     ` Peter Oskolkov
2021-05-21 21:33   ` Jann Horn
2021-06-09 13:01     ` Peter Zijlstra
2021-05-20 18:36 ` Peter Oskolkov [this message]
2021-05-20 18:36 ` [RFC PATCH v0.1 6/9] selftests/umcg: add UMCG core API selftest Peter Oskolkov
2021-05-20 18:36 ` [RFC PATCH v0.1 7/9] sched/umcg: add UMCG server/worker API (early RFC) Peter Oskolkov
2021-05-21 20:17   ` Andrei Vagin
2021-05-22 18:29   ` kernel test robot
2021-05-22 19:34   ` kernel test robot
2021-05-22 20:19   ` kernel test robot
2021-05-20 18:36 ` [RFC PATCH v0.1 8/9] lib/umcg: " Peter Oskolkov
2021-05-20 18:36 ` [RFC PATCH v0.1 9/9] selftests/umcg: add UMCG server/worker API selftest Peter Oskolkov
2021-05-20 21:17 ` [RFC PATCH v0.1 0/9] UMCG early preview/RFC patchset Jonathan Corbet
2021-05-20 21:38   ` Peter Oskolkov
2021-05-21  0:15     ` Randy Dunlap
2021-05-21  8:04       ` Peter Zijlstra
2021-05-21 15:08     ` Jonathan Corbet
2021-05-21 16:03       ` Peter Oskolkov
2021-05-21 19:17         ` Jonathan Corbet
2021-05-27  0:06           ` Peter Oskolkov
2021-05-27 15:41             ` Jonathan Corbet
     [not found] ` <CAEWA0a72SvpcuN4ov=98T3uWtExPCr7BQePOgjkqD1ofWKEASw@mail.gmail.com>
2021-05-21 19:13   ` Peter Oskolkov
2021-05-21 23:08     ` Jann Horn
2021-06-09 12:54 ` Peter Zijlstra
2021-06-09 20:18   ` Peter Oskolkov
2021-06-10 18:02     ` Peter Zijlstra
2021-06-10 20:06       ` Peter Oskolkov
2021-07-07 17:45       ` Thierry Delisle
2021-07-08 21:44         ` Peter Oskolkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210520183614.1227046-6-posk@google.com \
    --to=posk@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=avagin@google.com \
    --cc=bsegall@google.com \
    --cc=jnewsome@torproject.org \
    --cc=joel@joelfernandes.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=posk@posk.io \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.