From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org,
Thomas Gleixner <tglx@linutronix.de>,
"Paul E . McKenney" <paulmck@linux.ibm.com>,
Boqun Feng <boqun.feng@gmail.com>,
"H . Peter Anvin" <hpa@zytor.com>, Paul Turner <pjt@google.com>,
linux-api@vger.kernel.org,
Christian Brauner <christian.brauner@ubuntu.com>,
Florian Weimer <fw@deneb.enyo.de>,
carlos@redhat.com,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Subject: [RFC PATCH 2/4] rseq: Allow extending struct rseq
Date: Mon, 13 Jul 2020 23:03:46 -0400 [thread overview]
Message-ID: <20200714030348.6214-3-mathieu.desnoyers@efficios.com> (raw)
In-Reply-To: <20200714030348.6214-1-mathieu.desnoyers@efficios.com>
Add a __rseq_abi.flags "RSEQ_TLS_FLAG_SIZE", which indicates support for
extending struct rseq. This adds two new fields to struct rseq:
user_size and kernel_size.
The user_size field allows the size of the __rseq_abi definition (which
can be overridden by symbol interposition either by a preloaded library
or by the application) to be handed over to the kernel at registration.
This registration can be performed by a library, e.g. glibc, which does
not know there is interposition taking place.
The kernel_size is populated by the kernel when the "RSEQ_TLS_FLAG_SIZE"
flag is set in __rseq_abi.flags to the minimum between user_size and
the offset of the "end" field of struct rseq as known by the kernel.
This allows user-space to query which fields are effectively populated
by the kernel.
A rseq_size field is added to the task struct to keep track of the
"kernel_size" effective for each thread.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
include/linux/sched.h | 4 ++++
include/uapi/linux/rseq.h | 37 ++++++++++++++++++++++++++++++++--
kernel/rseq.c | 42 +++++++++++++++++++++++++++++++++------
3 files changed, 75 insertions(+), 8 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 692e327d7455..5d61a3197987 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1147,6 +1147,7 @@ struct task_struct {
#ifdef CONFIG_RSEQ
struct rseq __user *rseq;
u32 rseq_sig;
+ u32 rseq_size;
/*
* RmW on rseq_event_mask must be performed atomically
* with respect to preemption.
@@ -1976,10 +1977,12 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
if (clone_flags & CLONE_VM) {
t->rseq = NULL;
t->rseq_sig = 0;
+ t->rseq_size = 0;
t->rseq_event_mask = 0;
} else {
t->rseq = current->rseq;
t->rseq_sig = current->rseq_sig;
+ t->rseq_size = current->rseq_size;
t->rseq_event_mask = current->rseq_event_mask;
}
}
@@ -1988,6 +1991,7 @@ static inline void rseq_execve(struct task_struct *t)
{
t->rseq = NULL;
t->rseq_sig = 0;
+ t->rseq_size = 0;
t->rseq_event_mask = 0;
}
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index e11d9df5e564..03c0b5e9a859 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -37,6 +37,15 @@ enum rseq_cs_flags {
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
};
+enum rseq_tls_flags_bit {
+ /* enum rseq_cs_flags reserves bits 0-2. */
+ RSEQ_TLS_FLAG_SIZE_BIT = 3,
+};
+
+enum rseq_tls_flags {
+ RSEQ_TLS_FLAG_SIZE = (1U << RSEQ_TLS_FLAG_SIZE_BIT),
+};
+
/* The rseq_len expected by rseq registration is always 32 bytes. */
enum rseq_len_expected {
RSEQ_LEN_EXPECTED = 32,
@@ -133,8 +142,9 @@ struct rseq {
*
* This field should only be updated by the thread which
* registered this data structure. Read by the kernel.
- * Mainly used for single-stepping through rseq critical sections
- * with debuggers.
+ *
+ * The RSEQ_CS flags are mainly used for single-stepping through rseq
+ * critical sections with debuggers.
*
* - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
* Inhibit instruction sequence block restart on preemption
@@ -145,8 +155,31 @@ struct rseq {
* - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
* Inhibit instruction sequence block restart on migration for
* this thread.
+ *
+ * - RSEQ_TLS_FLAG_SIZE
+ * Extensible struct rseq ABI. This flag should be statically
+ * initialized.
*/
__u32 flags;
+ /*
+ * With __rseq_abi.flags RSEQ_TLS_FLAG_SIZE set, user_size should be
+ * statically initialized to offsetof(struct rseq, end).
+ */
+ __u16 user_size;
+ /*
+ * With __rseq_abi.flags RSEQ_TLS_FLAG_SIZE set, if the kernel supports
+ * extensible struct rseq ABI, the kernel_size field is populated by
+ * the kernel to the minimum between user_size and the offset of the
+ * "end" field within the struct rseq supported by the kernel on
+ * successful registration. Should be initialized to 0.
+ */
+ __u16 kernel_size;
+
+ /*
+ * Very last field of the structure, to calculate size excluding padding
+ * with offsetof().
+ */
+ char end[];
} __attribute__((aligned(4 * sizeof(__u64))));
#endif /* _UAPI_LINUX_RSEQ_H */
diff --git a/kernel/rseq.c b/kernel/rseq.c
index a4f86a9d6937..bbc57fc18573 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -96,6 +96,7 @@ static int rseq_update_cpu_id(struct task_struct *t)
static int rseq_reset_rseq_cpu_id(struct task_struct *t)
{
u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
+ u16 kernel_size = 0;
/*
* Reset cpu_id_start to its initial state (0).
@@ -109,6 +110,11 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
*/
if (put_user(cpu_id, &t->rseq->cpu_id))
return -EFAULT;
+ /*
+ * Reset kernel_size to its initial state (0).
+ */
+ if (put_user(kernel_size, &t->rseq->kernel_size))
+ return -EFAULT;
return 0;
}
@@ -266,7 +272,7 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
if (unlikely(t->flags & PF_EXITING))
return;
- if (unlikely(!access_ok(t->rseq, sizeof(*t->rseq))))
+ if (unlikely(!access_ok(t->rseq, t->rseq_size)))
goto error;
ret = rseq_ip_fixup(regs);
if (unlikely(ret < 0))
@@ -294,7 +300,7 @@ void rseq_syscall(struct pt_regs *regs)
if (!t->rseq)
return;
- if (!access_ok(t->rseq, sizeof(*t->rseq)) ||
+ if (!access_ok(t->rseq, t->rseq_size) ||
rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
force_sig(SIGSEGV);
}
@@ -308,6 +314,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
int, flags, u32, sig)
{
int ret;
+ u32 tls_flags;
if (flags & RSEQ_FLAG_UNREGISTER) {
if (flags & ~RSEQ_FLAG_UNREGISTER)
@@ -315,7 +322,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
/* Unregister rseq for current thread. */
if (current->rseq != rseq || !current->rseq)
return -EINVAL;
- if (rseq_len != sizeof(*rseq))
+ if (rseq_len != RSEQ_LEN_EXPECTED)
return -EINVAL;
if (current->rseq_sig != sig)
return -EPERM;
@@ -323,6 +330,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
if (ret)
return ret;
current->rseq = NULL;
+ current->rseq_size = 0;
current->rseq_sig = 0;
return 0;
}
@@ -336,7 +344,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
* the provided address differs from the prior
* one.
*/
- if (current->rseq != rseq || rseq_len != sizeof(*rseq))
+ if (current->rseq != rseq || rseq_len != RSEQ_LEN_EXPECTED)
return -EINVAL;
if (current->rseq_sig != sig)
return -EPERM;
@@ -349,10 +357,32 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
* ensure the provided rseq is properly aligned and valid.
*/
if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
- rseq_len != sizeof(*rseq))
+ rseq_len != RSEQ_LEN_EXPECTED)
return -EINVAL;
- if (!access_ok(rseq, rseq_len))
+ if (!access_ok(rseq, RSEQ_LEN_EXPECTED))
return -EFAULT;
+
+ /* Handle extensible struct rseq ABI. */
+ ret = get_user(tls_flags, &rseq->flags);
+ if (ret)
+ return ret;
+ if (tls_flags & RSEQ_TLS_FLAG_SIZE) {
+ u16 user_size, kernel_size;
+
+ ret = get_user(user_size, &rseq->user_size);
+ if (ret)
+ return ret;
+ if (user_size < offsetof(struct rseq, kernel_size) + sizeof(u16))
+ return -EINVAL;
+ kernel_size = min_t(u16, user_size, offsetof(struct rseq, end));
+ ret = put_user(kernel_size, &rseq->kernel_size);
+ if (ret)
+ return ret;
+ current->rseq_size = kernel_size;
+ } else {
+ current->rseq_size = offsetof(struct rseq, user_size);
+ }
+
current->rseq = rseq;
current->rseq_sig = sig;
/*
--
2.17.1
next prev parent reply other threads:[~2020-07-14 3:04 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-14 3:03 [RFC PATCH 0/4] rseq: Introduce extensible struct rseq Mathieu Desnoyers
2020-07-14 3:03 ` [RFC PATCH 1/4] selftests: rseq: Use fixed value as rseq_len parameter Mathieu Desnoyers
2020-07-14 3:03 ` Mathieu Desnoyers [this message]
2020-07-14 9:58 ` [RFC PATCH 2/4] rseq: Allow extending struct rseq Florian Weimer
2020-07-14 12:50 ` Mathieu Desnoyers
2020-07-14 13:00 ` Florian Weimer
2020-07-14 13:19 ` Mathieu Desnoyers
2020-07-14 21:30 ` Carlos O'Donell
2020-07-15 13:12 ` Mathieu Desnoyers
2020-07-15 13:22 ` Florian Weimer
2020-07-15 13:31 ` Mathieu Desnoyers
2020-07-15 13:42 ` Florian Weimer
2020-07-15 13:55 ` Christian Brauner
2020-07-15 14:20 ` Mathieu Desnoyers
2020-07-15 14:54 ` Mathieu Desnoyers
2020-07-15 14:58 ` Florian Weimer
2020-07-15 15:26 ` Mathieu Desnoyers
2020-07-14 17:24 ` Peter Oskolkov
2020-07-14 17:43 ` Mathieu Desnoyers
2020-07-14 18:33 ` Peter Oskolkov
2020-07-15 2:34 ` Chris Kennelly
2020-07-15 6:31 ` Florian Weimer
2020-07-15 10:59 ` Christian Brauner
2020-07-15 14:38 ` Mathieu Desnoyers
2020-07-15 14:50 ` Mathieu Desnoyers
2020-07-15 11:38 ` Christian Brauner
2020-07-15 12:33 ` Christian Brauner
2020-07-15 15:10 ` Mathieu Desnoyers
2020-07-15 15:33 ` Christian Brauner
2020-07-14 3:03 ` [RFC PATCH 3/4] selftests: rseq: define __rseq_abi with extensible size Mathieu Desnoyers
2020-07-14 3:03 ` [RFC PATCH 4/4] selftests: rseq: print rseq extensible size in basic test Mathieu Desnoyers
2020-07-14 20:55 ` [RFC PATCH 0/4] rseq: Introduce extensible struct rseq Carlos O'Donell
2020-07-15 13:02 ` Mathieu Desnoyers
2020-07-16 13:39 ` Carlos O'Donell
2020-07-16 14:45 ` Mathieu Desnoyers
2020-07-15 15:12 ` Florian Weimer
2020-07-15 15:32 ` Mathieu Desnoyers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200714030348.6214-3-mathieu.desnoyers@efficios.com \
--to=mathieu.desnoyers@efficios.com \
--cc=boqun.feng@gmail.com \
--cc=carlos@redhat.com \
--cc=christian.brauner@ubuntu.com \
--cc=fw@deneb.enyo.de \
--cc=hpa@zytor.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=paulmck@linux.ibm.com \
--cc=peterz@infradead.org \
--cc=pjt@google.com \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).