linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: mingo@redhat.com, tglx@linutronix.de, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	bristot@redhat.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-api@vger.kernel.org, x86@kernel.org, pjt@google.com,
	posk@google.com, avagin@google.com, jannh@google.com,
	tdelisle@uwaterloo.ca, mark.rutland@arm.com, posk@posk.io
Subject: Re: [RFC][PATCH v2 5/5] sched: User Mode Concurency Groups
Date: Tue, 25 Jan 2022 15:59:31 +0100	[thread overview]
Message-ID: <YfAQU6q6jQ/D5AYl@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <Ye635PiRpv4rXVl0@hirez.programming.kicks-ass.net>

On Mon, Jan 24, 2022 at 03:29:56PM +0100, Peter Zijlstra wrote:

> Oh how I hate signals... this can get scribbled by a syscall/fault from
> sigcontext :/

OK, the below seems to work. I'll see if I can clean it up some.

--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -94,28 +94,44 @@ static inline int syscall_get_arch(struc
 
 #else	 /* CONFIG_X86_64 */
 
-static inline void syscall_get_arguments(struct task_struct *task,
-					 struct pt_regs *regs,
-					 unsigned long *args)
+static inline unsigned long
+syscall_get_argument(struct task_struct *task, struct pt_regs *regs, int nr)
 {
-# ifdef CONFIG_IA32_EMULATION
+#ifdef CONFIG_IA32_EMULATION
 	if (task->thread_info.status & TS_COMPAT) {
-		*args++ = regs->bx;
-		*args++ = regs->cx;
-		*args++ = regs->dx;
-		*args++ = regs->si;
-		*args++ = regs->di;
-		*args   = regs->bp;
+		switch (nr) {
+		case 0: return regs->bx;
+		case 1: return regs->cx;
+		case 2: return regs->dx;
+		case 3: return regs->si;
+		case 4: return regs->di;
+		case 5: return regs->bp;
+		}
 	} else
-# endif
+#endif
 	{
-		*args++ = regs->di;
-		*args++ = regs->si;
-		*args++ = regs->dx;
-		*args++ = regs->r10;
-		*args++ = regs->r8;
-		*args   = regs->r9;
+		switch (nr) {
+		case 0: return regs->di;
+		case 1: return regs->si;
+		case 2: return regs->dx;
+		case 3: return regs->r10;
+		case 4: return regs->r8;
+		case 5: return regs->r9;
+		}
 	}
+
+	WARN_ON_ONCE(1);
+	return 0;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long *args)
+{
+	int i;
+
+	for (i = 0; i < 6; i++)
+		*args++ = syscall_get_argument(task, regs, i);
 }
 
 static inline int syscall_get_arch(struct task_struct *task)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1307,6 +1307,9 @@ struct task_struct {
 	struct task_struct	*umcg_server;
 	struct umcg_task __user *umcg_server_task;
 	struct page		*umcg_server_page;
+
+	unsigned long		umcg_stack_pointer;
+	unsigned int		umcg_worker;
 #endif
 
 	struct tlbflush_unmap_batch	tlb_ubc;
--- a/kernel/sched/umcg.c
+++ b/kernel/sched/umcg.c
@@ -459,7 +459,7 @@ static int umcg_wait(u64 timo)
 /*
  * Blocked case for umcg_sys_exit(), shared with sys_umcg_ctl().
  */
-static void umcg_unblock_and_wait(void)
+static void umcg_unblock(void)
 {
 	struct task_struct *tsk = current;
 	struct umcg_task __user *self = READ_ONCE(tsk->umcg_task);
@@ -478,15 +478,7 @@ static void umcg_unblock_and_wait(void)
 
 	umcg_unpin_pages();
 
-	switch (umcg_wait(0)) {
-	case 0:
-	case -EINTR:
-		/* notify_resume will continue the wait after the signal */
-		break;
-
-	default:
-		UMCG_DIE("wait");
-	}
+	/* notify-resume will wait */
 
 	tsk->flags |= PF_UMCG_WORKER;
 }
@@ -509,7 +501,7 @@ void umcg_sys_exit(struct pt_regs *regs)
 		return;
 	}
 
-	umcg_unblock_and_wait();
+	umcg_unblock();
 }
 
 /* return-to-user path */
@@ -518,11 +510,47 @@ void umcg_notify_resume(struct pt_regs *
 	struct task_struct *tsk = current;
 	struct umcg_task __user *self = tsk->umcg_task;
 	bool worker = tsk->flags & PF_UMCG_WORKER;
+	u64 timeout = 0;
 	u32 state;
+	int ret;
+
+	/*
+	 * Unix signals are horrible, but we have to handle them somehow.
+	 *
+	 * - simply discarding a signal breaks userspace so is not an option.
+	 *
+	 * - returning -EINTR and have userspace deal with it is not an option
+	 *   since we can be blocked here due to !syscall reasons (page-faults
+	 *   for example). But it's also not permissible to have random
+	 *   syscalls return -EINTR that didn't before.
+	 *
+	 * - subjecting signal handlers to UMCG would render existing signal
+	 *   handler code subject to the whims and latencies of UMCG; given that
+	 *   most signal hander code is short and time sensitive, this seems
+	 *   undesirable (consider ^C not working because it got delivered to a
+	 *   blocked task).
+	 *
+	 * Therefore the chosen path is to exclude signal context from UMCG
+	 * entirely and treat it as unmanaged time.
+	 */
+	if (tsk->umcg_stack_pointer) {
+		if (tsk->umcg_stack_pointer != user_stack_pointer(regs))
+			return;
+
+		tsk->umcg_stack_pointer = 0;
+		worker = tsk->umcg_worker;
+		tsk->umcg_worker = 0;
+
+		if (worker) {
+			set_syscall_work(SYSCALL_UMCG);
+			/* and PF_UMCG_SYSCALL at done */
+		}
+		goto resume;
+	}
 
 	/* avoid recursion vs schedule() */
 	if (worker)
-		current->flags &= ~PF_UMCG_WORKER;
+		tsk->flags &= ~PF_UMCG_WORKER;
 
 	if (get_user(state, &self->state))
 		UMCG_DIE("get-state");
@@ -554,10 +582,31 @@ void umcg_notify_resume(struct pt_regs *
 		umcg_unpin_pages();
 	}
 
-	switch (umcg_wait(0)) {
+resume:
+	/*
+	 * Hack alert! Since the return-to-user path must resume waiting it
+	 * needs access to the timeout argument and set the return value.
+	 */
+	if (syscall_get_nr(tsk, regs) == __NR_umcg_wait)
+		timeout = syscall_get_argument(tsk, regs, 1);
+
+	ret = umcg_wait(timeout);
+	switch (ret) {
 	case 0:
+		break;
+
 	case -EINTR:
 		/* we will resume the wait after the signal */
+		WARN_ON_ONCE(tsk->umcg_stack_pointer);
+		tsk->umcg_stack_pointer = user_stack_pointer(regs);
+		tsk->umcg_worker = worker;
+		clear_task_syscall_work(tsk, SYSCALL_UMCG);
+		/* implicitly clears PF_UMCG_WORKER with the early exit */
+		return;
+
+	case -ETIMEDOUT:
+		/* must be __NR_umcg_wait */
+		regs_set_return_value(regs, ret);
 		break;
 
 	default:
@@ -566,7 +615,7 @@ void umcg_notify_resume(struct pt_regs *
 
 done:
 	if (worker)
-		current->flags |= PF_UMCG_WORKER;
+		tsk->flags |= PF_UMCG_WORKER;
 }
 
 /**
@@ -755,16 +804,7 @@ SYSCALL_DEFINE2(umcg_wait, u32, flags, u
 
 	umcg_unpin_pages();
 
-	ret = umcg_wait(timo);
-	switch (ret) {
-	case 0:		/* all done */
-	case -EINTR:	/* umcg_notify_resume() will continue the wait */
-		ret = 0;
-		break;
-
-	default:
-		goto unblock;
-	}
+	/* notify-resume will wait */
 out:
 	if (worker)
 		tsk->flags |= PF_UMCG_WORKER;
@@ -831,7 +871,7 @@ static int umcg_register(struct umcg_tas
 		set_syscall_work(SYSCALL_UMCG);		/* hook syscall */
 		set_thread_flag(TIF_UMCG);		/* hook return-to-user */
 
-		umcg_unblock_and_wait();
+		umcg_unblock();
 
 	} else {
 		if ((ut.state & (UMCG_TASK_MASK | UMCG_TF_MASK)) != UMCG_TASK_RUNNING)

  parent reply	other threads:[~2022-01-25 16:20 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-20 15:55 [RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 1/5] mm: Avoid unmapping pinned pages Peter Zijlstra
2022-01-20 18:03   ` Nadav Amit
2022-01-21  7:59     ` Peter Zijlstra
2022-01-20 18:25   ` David Hildenbrand
2022-01-21  7:51     ` Peter Zijlstra
2022-01-21  8:22       ` David Hildenbrand
2022-01-21  8:59       ` Peter Zijlstra
2022-01-21  9:04         ` David Hildenbrand
2022-01-21 11:40           ` Peter Zijlstra
2022-01-21 12:04             ` David Hildenbrand
2022-01-20 15:55 ` [RFC][PATCH v2 2/5] entry,x86: Create common IRQ operations for exceptions Peter Zijlstra
2022-01-21 16:34   ` Mark Rutland
2022-01-20 15:55 ` [RFC][PATCH v2 3/5] sched/umcg: add WF_CURRENT_CPU and externise ttwu Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 4/5] x86/uaccess: Implement unsafe_try_cmpxchg_user() Peter Zijlstra
2022-01-27  2:17   ` Sean Christopherson
2022-01-27  6:36     ` Sean Christopherson
2022-01-27  9:56       ` Peter Zijlstra
2022-01-27 23:33         ` Sean Christopherson
2022-01-28  0:17           ` Nick Desaulniers
2022-01-28 16:29             ` Sean Christopherson
2022-01-27  9:55     ` Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 5/5] sched: User Mode Concurency Groups Peter Zijlstra
2022-01-21 11:47   ` Peter Zijlstra
2022-01-21 15:18     ` Peter Zijlstra
2022-01-24 14:29       ` Peter Zijlstra
2022-01-24 16:44         ` Peter Zijlstra
2022-01-24 17:06           ` Peter Oskolkov
2022-01-25 14:59         ` Peter Zijlstra [this message]
2022-01-24 13:59     ` Peter Zijlstra
2022-01-21 12:26   ` Peter Zijlstra
2022-01-21 16:57   ` Mark Rutland
2022-01-24  9:48     ` Peter Zijlstra
2022-01-24 10:03     ` Peter Zijlstra
2022-01-24 10:07       ` Peter Zijlstra
2022-01-24 10:27         ` Mark Rutland
2022-01-24 14:46   ` Tao Zhou
2022-01-27 12:19     ` Peter Zijlstra
2022-01-27 18:33       ` Tao Zhou
2022-01-27 12:25     ` Peter Zijlstra
2022-01-27 18:47       ` Tao Zhou
2022-01-27 12:26     ` Peter Zijlstra
2022-01-27 18:31   ` Tao Zhou
2022-01-20 17:28 ` [RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups Peter Oskolkov
2022-01-21  8:01   ` Peter Zijlstra
2022-01-21 18:01 ` Steven Rostedt
2022-01-24  8:20   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YfAQU6q6jQ/D5AYl@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=avagin@google.com \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=jannh@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mark.rutland@arm.com \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=pjt@google.com \
    --cc=posk@google.com \
    --cc=posk@posk.io \
    --cc=rostedt@goodmis.org \
    --cc=tdelisle@uwaterloo.ca \
    --cc=tglx@linutronix.de \
    --cc=vincent.guittot@linaro.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).