From: Peter Zijlstra <peterz@infradead.org>
To: mingo@redhat.com, tglx@linutronix.de, juri.lelli@redhat.com,
vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
bristot@redhat.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-api@vger.kernel.org, x86@kernel.org, pjt@google.com,
posk@google.com, avagin@google.com, jannh@google.com,
tdelisle@uwaterloo.ca, mark.rutland@arm.com, posk@posk.io
Subject: Re: [RFC][PATCH v2 5/5] sched: User Mode Concurency Groups
Date: Tue, 25 Jan 2022 15:59:31 +0100 [thread overview]
Message-ID: <YfAQU6q6jQ/D5AYl@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <Ye635PiRpv4rXVl0@hirez.programming.kicks-ass.net>
On Mon, Jan 24, 2022 at 03:29:56PM +0100, Peter Zijlstra wrote:
> Oh how I hate signals... this can get scribbled by a syscall/fault from
> sigcontext :/
OK, the below seems to work. I'll see if I can clean it up some.
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -94,28 +94,44 @@ static inline int syscall_get_arch(struc
#else /* CONFIG_X86_64 */
-static inline void syscall_get_arguments(struct task_struct *task,
- struct pt_regs *regs,
- unsigned long *args)
+static inline unsigned long
+syscall_get_argument(struct task_struct *task, struct pt_regs *regs, int nr)
{
-# ifdef CONFIG_IA32_EMULATION
+#ifdef CONFIG_IA32_EMULATION
if (task->thread_info.status & TS_COMPAT) {
- *args++ = regs->bx;
- *args++ = regs->cx;
- *args++ = regs->dx;
- *args++ = regs->si;
- *args++ = regs->di;
- *args = regs->bp;
+ switch (nr) {
+ case 0: return regs->bx;
+ case 1: return regs->cx;
+ case 2: return regs->dx;
+ case 3: return regs->si;
+ case 4: return regs->di;
+ case 5: return regs->bp;
+ }
} else
-# endif
+#endif
{
- *args++ = regs->di;
- *args++ = regs->si;
- *args++ = regs->dx;
- *args++ = regs->r10;
- *args++ = regs->r8;
- *args = regs->r9;
+ switch (nr) {
+ case 0: return regs->di;
+ case 1: return regs->si;
+ case 2: return regs->dx;
+ case 3: return regs->r10;
+ case 4: return regs->r8;
+ case 5: return regs->r9;
+ }
}
+
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned long *args)
+{
+ int i;
+
+ for (i = 0; i < 6; i++)
+ *args++ = syscall_get_argument(task, regs, i);
}
static inline int syscall_get_arch(struct task_struct *task)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1307,6 +1307,9 @@ struct task_struct {
struct task_struct *umcg_server;
struct umcg_task __user *umcg_server_task;
struct page *umcg_server_page;
+
+ unsigned long umcg_stack_pointer;
+ unsigned int umcg_worker;
#endif
struct tlbflush_unmap_batch tlb_ubc;
--- a/kernel/sched/umcg.c
+++ b/kernel/sched/umcg.c
@@ -459,7 +459,7 @@ static int umcg_wait(u64 timo)
/*
* Blocked case for umcg_sys_exit(), shared with sys_umcg_ctl().
*/
-static void umcg_unblock_and_wait(void)
+static void umcg_unblock(void)
{
struct task_struct *tsk = current;
struct umcg_task __user *self = READ_ONCE(tsk->umcg_task);
@@ -478,15 +478,7 @@ static void umcg_unblock_and_wait(void)
umcg_unpin_pages();
- switch (umcg_wait(0)) {
- case 0:
- case -EINTR:
- /* notify_resume will continue the wait after the signal */
- break;
-
- default:
- UMCG_DIE("wait");
- }
+ /* notify-resume will wait */
tsk->flags |= PF_UMCG_WORKER;
}
@@ -509,7 +501,7 @@ void umcg_sys_exit(struct pt_regs *regs)
return;
}
- umcg_unblock_and_wait();
+ umcg_unblock();
}
/* return-to-user path */
@@ -518,11 +510,47 @@ void umcg_notify_resume(struct pt_regs *
struct task_struct *tsk = current;
struct umcg_task __user *self = tsk->umcg_task;
bool worker = tsk->flags & PF_UMCG_WORKER;
+ u64 timeout = 0;
u32 state;
+ int ret;
+
+ /*
+ * Unix signals are horrible, but we have to handle them somehow.
+ *
+ * - simply discarding a signal breaks userspace so is not an option.
+ *
+ * - returning -EINTR and have userspace deal with it is not an option
+ * since we can be blocked here due to !syscall reasons (page-faults
+ * for example). But it's also not permissible to have random
+ * syscalls return -EINTR that didn't before.
+ *
+ * - subjecting signal handlers to UMCG would render existing signal
+ * handler code subject to the whims and latencies of UMCG; given that
+ * most signal hander code is short and time sensitive, this seems
+ * undesirable (consider ^C not working because it got delivered to a
+ * blocked task).
+ *
+ * Therefore the chosen path is to exclude signal context from UMCG
+ * entirely and treat it as unmanaged time.
+ */
+ if (tsk->umcg_stack_pointer) {
+ if (tsk->umcg_stack_pointer != user_stack_pointer(regs))
+ return;
+
+ tsk->umcg_stack_pointer = 0;
+ worker = tsk->umcg_worker;
+ tsk->umcg_worker = 0;
+
+ if (worker) {
+ set_syscall_work(SYSCALL_UMCG);
+ /* and PF_UMCG_SYSCALL at done */
+ }
+ goto resume;
+ }
/* avoid recursion vs schedule() */
if (worker)
- current->flags &= ~PF_UMCG_WORKER;
+ tsk->flags &= ~PF_UMCG_WORKER;
if (get_user(state, &self->state))
UMCG_DIE("get-state");
@@ -554,10 +582,31 @@ void umcg_notify_resume(struct pt_regs *
umcg_unpin_pages();
}
- switch (umcg_wait(0)) {
+resume:
+ /*
+ * Hack alert! Since the return-to-user path must resume waiting it
+ * needs access to the timeout argument and set the return value.
+ */
+ if (syscall_get_nr(tsk, regs) == __NR_umcg_wait)
+ timeout = syscall_get_argument(tsk, regs, 1);
+
+ ret = umcg_wait(timeout);
+ switch (ret) {
case 0:
+ break;
+
case -EINTR:
/* we will resume the wait after the signal */
+ WARN_ON_ONCE(tsk->umcg_stack_pointer);
+ tsk->umcg_stack_pointer = user_stack_pointer(regs);
+ tsk->umcg_worker = worker;
+ clear_task_syscall_work(tsk, SYSCALL_UMCG);
+ /* implicitly clears PF_UMCG_WORKER with the early exit */
+ return;
+
+ case -ETIMEDOUT:
+ /* must be __NR_umcg_wait */
+ regs_set_return_value(regs, ret);
break;
default:
@@ -566,7 +615,7 @@ void umcg_notify_resume(struct pt_regs *
done:
if (worker)
- current->flags |= PF_UMCG_WORKER;
+ tsk->flags |= PF_UMCG_WORKER;
}
/**
@@ -755,16 +804,7 @@ SYSCALL_DEFINE2(umcg_wait, u32, flags, u
umcg_unpin_pages();
- ret = umcg_wait(timo);
- switch (ret) {
- case 0: /* all done */
- case -EINTR: /* umcg_notify_resume() will continue the wait */
- ret = 0;
- break;
-
- default:
- goto unblock;
- }
+ /* notify-resume will wait */
out:
if (worker)
tsk->flags |= PF_UMCG_WORKER;
@@ -831,7 +871,7 @@ static int umcg_register(struct umcg_tas
set_syscall_work(SYSCALL_UMCG); /* hook syscall */
set_thread_flag(TIF_UMCG); /* hook return-to-user */
- umcg_unblock_and_wait();
+ umcg_unblock();
} else {
if ((ut.state & (UMCG_TASK_MASK | UMCG_TF_MASK)) != UMCG_TASK_RUNNING)
next prev parent reply other threads:[~2022-01-25 16:20 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-20 15:55 [RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 1/5] mm: Avoid unmapping pinned pages Peter Zijlstra
2022-01-20 18:03 ` Nadav Amit
2022-01-21 7:59 ` Peter Zijlstra
2022-01-20 18:25 ` David Hildenbrand
2022-01-21 7:51 ` Peter Zijlstra
2022-01-21 8:22 ` David Hildenbrand
2022-01-21 8:59 ` Peter Zijlstra
2022-01-21 9:04 ` David Hildenbrand
2022-01-21 11:40 ` Peter Zijlstra
2022-01-21 12:04 ` David Hildenbrand
2022-01-20 15:55 ` [RFC][PATCH v2 2/5] entry,x86: Create common IRQ operations for exceptions Peter Zijlstra
2022-01-21 16:34 ` Mark Rutland
2022-01-20 15:55 ` [RFC][PATCH v2 3/5] sched/umcg: add WF_CURRENT_CPU and externise ttwu Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 4/5] x86/uaccess: Implement unsafe_try_cmpxchg_user() Peter Zijlstra
2022-01-27 2:17 ` Sean Christopherson
2022-01-27 6:36 ` Sean Christopherson
2022-01-27 9:56 ` Peter Zijlstra
2022-01-27 23:33 ` Sean Christopherson
2022-01-28 0:17 ` Nick Desaulniers
2022-01-28 16:29 ` Sean Christopherson
2022-01-27 9:55 ` Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 5/5] sched: User Mode Concurency Groups Peter Zijlstra
2022-01-21 11:47 ` Peter Zijlstra
2022-01-21 15:18 ` Peter Zijlstra
2022-01-24 14:29 ` Peter Zijlstra
2022-01-24 16:44 ` Peter Zijlstra
2022-01-24 17:06 ` Peter Oskolkov
2022-01-25 14:59 ` Peter Zijlstra [this message]
2022-01-24 13:59 ` Peter Zijlstra
2022-01-21 12:26 ` Peter Zijlstra
2022-01-21 16:57 ` Mark Rutland
2022-01-24 9:48 ` Peter Zijlstra
2022-01-24 10:03 ` Peter Zijlstra
2022-01-24 10:07 ` Peter Zijlstra
2022-01-24 10:27 ` Mark Rutland
2022-01-24 14:46 ` Tao Zhou
2022-01-27 12:19 ` Peter Zijlstra
2022-01-27 18:33 ` Tao Zhou
2022-01-27 12:25 ` Peter Zijlstra
2022-01-27 18:47 ` Tao Zhou
2022-01-27 12:26 ` Peter Zijlstra
2022-01-27 18:31 ` Tao Zhou
2022-01-20 17:28 ` [RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups Peter Oskolkov
2022-01-21 8:01 ` Peter Zijlstra
2022-01-21 18:01 ` Steven Rostedt
2022-01-24 8:20 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YfAQU6q6jQ/D5AYl@hirez.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=avagin@google.com \
--cc=bristot@redhat.com \
--cc=bsegall@google.com \
--cc=dietmar.eggemann@arm.com \
--cc=jannh@google.com \
--cc=juri.lelli@redhat.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mark.rutland@arm.com \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=pjt@google.com \
--cc=posk@google.com \
--cc=posk@posk.io \
--cc=rostedt@goodmis.org \
--cc=tdelisle@uwaterloo.ca \
--cc=tglx@linutronix.de \
--cc=vincent.guittot@linaro.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).