All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall()
@ 2021-02-01 17:45 Oleg Nesterov
  2021-02-01 17:46 ` [PATCH v4 1/4] introduce set_restart_fn() and arch_set_restart_data() Oleg Nesterov
                   ` (5 more replies)
  0 siblings, 6 replies; 20+ messages in thread
From: Oleg Nesterov @ 2021-02-01 17:45 UTC (permalink / raw)
  To: Andrew Morton, Thomas Gleixner
  Cc: Andy Lutomirski, Ingo Molnar, Jan Kratochvil, Linus Torvalds,
	Pedro Alves, Peter Anvin, linux-kernel, x86

Somehow I forgot about this problem. Let me resend the last version
based on discussion with Linus. IIRC he was agree with this series.

And let me remind why 3/4 temporary adds the "transient" TS_COMPAT_RESTART
flag killed by the next patch: to simplify the backporting. 1-3 can fix
the problem without breaking the kABI.

Oleg.
---
 arch/x86/include/asm/processor.h   |  9 ---------
 arch/x86/include/asm/thread_info.h | 15 ++++++++++++++-
 arch/x86/kernel/signal.c           | 24 +-----------------------
 fs/select.c                        | 10 ++++------
 include/linux/restart_block.h      |  1 +
 include/linux/thread_info.h        | 13 +++++++++++++
 kernel/futex.c                     |  3 +--
 kernel/time/alarmtimer.c           |  2 +-
 kernel/time/hrtimer.c              |  2 +-
 kernel/time/posix-cpu-timers.c     |  2 +-
 10 files changed, 37 insertions(+), 44 deletions(-)


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v4 1/4] introduce set_restart_fn() and arch_set_restart_data()
  2021-02-01 17:45 [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Oleg Nesterov
@ 2021-02-01 17:46 ` Oleg Nesterov
  2021-03-16 21:17   ` [tip: x86/urgent] kernel, fs: Introduce and use " tip-bot2 for Oleg Nesterov
  2021-02-01 17:46 ` [PATCH v4 2/4] x86: mv TS_COMPAT from asm/processor.h to Oleg Nesterov
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Oleg Nesterov @ 2021-02-01 17:46 UTC (permalink / raw)
  To: Andrew Morton, Thomas Gleixner
  Cc: Andy Lutomirski, Ingo Molnar, Jan Kratochvil, Linus Torvalds,
	Pedro Alves, Peter Anvin, linux-kernel, x86

Preparation. Add the new helper which sets restart_block->fn and calls
the dummy arch_set_restart_data() helper.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 fs/select.c                    | 10 ++++------
 include/linux/thread_info.h    | 13 +++++++++++++
 kernel/futex.c                 |  3 +--
 kernel/time/alarmtimer.c       |  2 +-
 kernel/time/hrtimer.c          |  2 +-
 kernel/time/posix-cpu-timers.c |  2 +-
 6 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/fs/select.c b/fs/select.c
index 37aaa8317f3a..945896d0ac9e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1055,10 +1055,9 @@ static long do_restart_poll(struct restart_block *restart_block)
 
 	ret = do_sys_poll(ufds, nfds, to);
 
-	if (ret == -ERESTARTNOHAND) {
-		restart_block->fn = do_restart_poll;
-		ret = -ERESTART_RESTARTBLOCK;
-	}
+	if (ret == -ERESTARTNOHAND)
+		ret = set_restart_fn(restart_block, do_restart_poll);
+
 	return ret;
 }
 
@@ -1080,7 +1079,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
 		struct restart_block *restart_block;
 
 		restart_block = &current->restart_block;
-		restart_block->fn = do_restart_poll;
 		restart_block->poll.ufds = ufds;
 		restart_block->poll.nfds = nfds;
 
@@ -1091,7 +1089,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
 		} else
 			restart_block->poll.has_timeout = 0;
 
-		ret = -ERESTART_RESTARTBLOCK;
+		ret = set_restart_fn(restart_block, do_restart_poll);
 	}
 	return ret;
 }
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index c8a974cead73..495fb6a94e58 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <linux/bug.h>
 #include <linux/restart_block.h>
+#include <linux/errno.h>
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 /*
@@ -57,6 +58,18 @@ enum syscall_work_bit {
 
 #ifdef __KERNEL__
 
+#ifndef arch_set_restart_data
+#define arch_set_restart_data(restart) do { } while (0)
+#endif
+
+static inline long set_restart_fn(struct restart_block *restart,
+					long (*fn)(struct restart_block *))
+{
+	restart->fn = fn;
+	arch_set_restart_data(restart);
+	return -ERESTART_RESTARTBLOCK;
+}
+
 #ifndef THREAD_ALIGN
 #define THREAD_ALIGN	THREAD_SIZE
 #endif
diff --git a/kernel/futex.c b/kernel/futex.c
index c47d1015d759..1ec50f9681a6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2724,14 +2724,13 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
 		goto out;
 
 	restart = &current->restart_block;
-	restart->fn = futex_wait_restart;
 	restart->futex.uaddr = uaddr;
 	restart->futex.val = val;
 	restart->futex.time = *abs_time;
 	restart->futex.bitset = bitset;
 	restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
 
-	ret = -ERESTART_RESTARTBLOCK;
+	ret = set_restart_fn(restart, futex_wait_restart);
 
 out:
 	if (to) {
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index f4ace1bf8382..daeaa7140d0a 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -848,9 +848,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
 	if (flags == TIMER_ABSTIME)
 		return -ERESTARTNOHAND;
 
-	restart->fn = alarm_timer_nsleep_restart;
 	restart->nanosleep.clockid = type;
 	restart->nanosleep.expires = exp;
+	set_restart_fn(restart, alarm_timer_nsleep_restart);
 	return ret;
 }
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 743c852e10f2..30f936431c56 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1939,9 +1939,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
 	}
 
 	restart = &current->restart_block;
-	restart->fn = hrtimer_nanosleep_restart;
 	restart->nanosleep.clockid = t.timer.base->clockid;
 	restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
+	set_restart_fn(restart, hrtimer_nanosleep_restart);
 out:
 	destroy_hrtimer_on_stack(&t.timer);
 	return ret;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a71758e34e45..9abe15255bc4 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1480,8 +1480,8 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 		if (flags & TIMER_ABSTIME)
 			return -ERESTARTNOHAND;
 
-		restart_block->fn = posix_cpu_nsleep_restart;
 		restart_block->nanosleep.clockid = which_clock;
+		set_restart_fn(restart_block, posix_cpu_nsleep_restart);
 	}
 	return error;
 }
-- 
2.25.1.362.g51ebf55


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v4 2/4] x86: mv TS_COMPAT from asm/processor.h to
  2021-02-01 17:45 [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Oleg Nesterov
  2021-02-01 17:46 ` [PATCH v4 1/4] introduce set_restart_fn() and arch_set_restart_data() Oleg Nesterov
@ 2021-02-01 17:46 ` Oleg Nesterov
  2021-03-16 21:17   ` [tip: x86/urgent] x86: Move TS_COMPAT back to asm/thread_info.h tip-bot2 for Oleg Nesterov
  2021-02-01 17:47 ` [PATCH v4 3/4] x86: introduce TS_COMPAT_RESTART to fix Oleg Nesterov
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Oleg Nesterov @ 2021-02-01 17:46 UTC (permalink / raw)
  To: Andrew Morton, Thomas Gleixner
  Cc: Andy Lutomirski, Ingo Molnar, Jan Kratochvil, Linus Torvalds,
	Pedro Alves, Peter Anvin, linux-kernel, x86

Move TS_COMPAT back to asm/thread_info.h, close to TS_I386_REGS_POKED.

It was moved to asm/processor.h by b9d989c7218a ("x86/asm: Move the
thread_info::status field to thread_struct"), then later 37a8f7c38339
("x86/asm: Move 'status' from thread_struct to thread_info") moved the
'status' field back but TS_COMPAT was forgotten.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/include/asm/processor.h   | 9 ---------
 arch/x86/include/asm/thread_info.h | 9 +++++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c20a52b5534b..c66df6368909 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -552,15 +552,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
 	*size = fpu_kernel_xstate_size;
 }
 
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
-
 static inline void
 native_load_sp0(unsigned long sp0)
 {
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 0d751d5da702..c2dc29e215ea 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -205,6 +205,15 @@ static inline int arch_within_stack_frames(const void * const stack,
 
 #endif
 
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
+
 #ifdef CONFIG_COMPAT
 #define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
 #endif
-- 
2.25.1.362.g51ebf55


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v4 3/4] x86: introduce TS_COMPAT_RESTART to fix
  2021-02-01 17:45 [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Oleg Nesterov
  2021-02-01 17:46 ` [PATCH v4 1/4] introduce set_restart_fn() and arch_set_restart_data() Oleg Nesterov
  2021-02-01 17:46 ` [PATCH v4 2/4] x86: mv TS_COMPAT from asm/processor.h to Oleg Nesterov
@ 2021-02-01 17:47 ` Oleg Nesterov
  2021-02-01 18:32   ` Andy Lutomirski
  2021-03-16 21:17   ` [tip: x86/urgent] x86: Introduce TS_COMPAT_RESTART to fix get_nr_restart_syscall() tip-bot2 for Oleg Nesterov
  2021-02-01 17:47 ` [PATCH v4 4/4] x86: introduce restart_block->arch_data to kill Oleg Nesterov
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 20+ messages in thread
From: Oleg Nesterov @ 2021-02-01 17:47 UTC (permalink / raw)
  To: Andrew Morton, Thomas Gleixner
  Cc: Andy Lutomirski, Ingo Molnar, Jan Kratochvil, Linus Torvalds,
	Pedro Alves, Peter Anvin, linux-kernel, x86

The comment in get_nr_restart_syscall() says:

	 * The problem is that we can get here when ptrace pokes
	 * syscall-like values into regs even if we're not in a syscall
	 * at all.

Yes. but if we are not in syscall then the

	status & (TS_COMPAT|TS_I386_REGS_POKED)

check below can't really help:

	- TS_COMPAT can't be set

	- TS_I386_REGS_POKED is only set if regs->orig_ax was changed by
	  32bit debugger; and even in this case get_nr_restart_syscall()
	  is only correct if the tracee is 32bit too.

Suppose that 64bit debugger plays with 32bit tracee and

	* Tracee calls sleep(2)	// TS_COMPAT is set
	* User interrupts the tracee by CTRL-C after 1 sec and does
	  "(gdb) call func()"
	* gdb saves the regs by PTRACE_GETREGS
	* does PTRACE_SETREGS to set %rip='func' and %orig_rax=-1
	* PTRACE_CONT		// TS_COMPAT is cleared
	* func() hits int3.
	* Debugger catches SIGTRAP.
	* Restore original regs by PTRACE_SETREGS.
	* PTRACE_CONT

get_nr_restart_syscall() wrongly returns __NR_restart_syscall==219, the
tracee calls ia32_sys_call_table[219] == sys_madvise.

This patch adds the sticky TS_COMPAT_RESTART flag which survives after
return to user mode, hopefully it allows us to kill TS_I386_REGS_POKED
but this needs another patch.

Test-case:

  $ cvs -d :pserver:anoncvs:anoncvs@sourceware.org:/cvs/systemtap co ptrace-tests
  $ gcc -o erestartsys-trap-debuggee ptrace-tests/tests/erestartsys-trap-debuggee.c --m32
  $ gcc -o erestartsys-trap-debugger ptrace-tests/tests/erestartsys-trap-debugger.c -lutil
  $ ./erestartsys-trap-debugger
  Unexpected: retval 1, errno 22
  erestartsys-trap-debugger: ptrace-tests/tests/erestartsys-trap-debugger.c:421

Reported-by: Jan Kratochvil <jan.kratochvil@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/include/asm/thread_info.h | 14 +++++++++++++-
 arch/x86/kernel/signal.c           | 24 +-----------------------
 2 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c2dc29e215ea..30d1d187019f 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -214,10 +214,22 @@ static inline int arch_within_stack_frames(const void * const stack,
  */
 #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
 
+#ifndef __ASSEMBLY__
 #ifdef CONFIG_COMPAT
 #define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
+#define TS_COMPAT_RESTART	0x0008
+
+#define arch_set_restart_data	arch_set_restart_data
+
+static inline void arch_set_restart_data(struct restart_block *restart)
+{
+	struct thread_info *ti = current_thread_info();
+	if (ti->status & TS_COMPAT)
+		ti->status |= TS_COMPAT_RESTART;
+	else
+		ti->status &= ~TS_COMPAT_RESTART;
+}
 #endif
-#ifndef __ASSEMBLY__
 
 #ifdef CONFIG_X86_32
 #define in_ia32_syscall() true
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index ea794a083c44..6c26d2c3a2e4 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -766,30 +766,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
-	/*
-	 * This function is fundamentally broken as currently
-	 * implemented.
-	 *
-	 * The idea is that we want to trigger a call to the
-	 * restart_block() syscall and that we want in_ia32_syscall(),
-	 * in_x32_syscall(), etc. to match whatever they were in the
-	 * syscall being restarted.  We assume that the syscall
-	 * instruction at (regs->ip - 2) matches whatever syscall
-	 * instruction we used to enter in the first place.
-	 *
-	 * The problem is that we can get here when ptrace pokes
-	 * syscall-like values into regs even if we're not in a syscall
-	 * at all.
-	 *
-	 * For now, we maintain historical behavior and guess based on
-	 * stored state.  We could do better by saving the actual
-	 * syscall arch in restart_block or (with caveats on x32) by
-	 * checking if regs->ip points to 'int $0x80'.  The current
-	 * behavior is incorrect if a tracer has a different bitness
-	 * than the tracee.
-	 */
 #ifdef CONFIG_IA32_EMULATION
-	if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
+	if (current_thread_info()->status & TS_COMPAT_RESTART)
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
-- 
2.25.1.362.g51ebf55


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v4 4/4] x86: introduce restart_block->arch_data to kill
  2021-02-01 17:45 [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Oleg Nesterov
                   ` (2 preceding siblings ...)
  2021-02-01 17:47 ` [PATCH v4 3/4] x86: introduce TS_COMPAT_RESTART to fix Oleg Nesterov
@ 2021-02-01 17:47 ` Oleg Nesterov
  2021-03-16 21:17   ` [tip: x86/urgent] x86: Introduce restart_block->arch_data to remove TS_COMPAT_RESTART tip-bot2 for Oleg Nesterov
  2021-02-01 18:18 ` [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Linus Torvalds
  2021-02-03 23:19 ` Oleg Nesterov
  5 siblings, 1 reply; 20+ messages in thread
From: Oleg Nesterov @ 2021-02-01 17:47 UTC (permalink / raw)
  To: Andrew Morton, Thomas Gleixner
  Cc: Andy Lutomirski, Ingo Molnar, Jan Kratochvil, Linus Torvalds,
	Pedro Alves, Peter Anvin, linux-kernel, x86

With this patch x86 just saves current_thread_info()->status in the
new restart_block->arch_data field, TS_COMPAT_RESTART can be removed.

Rather than saving "status" we could shift the code from
get_nr_restart_syscall() to arch_set_restart_data() and save the syscall
number in ->arch_data.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/include/asm/thread_info.h | 12 ++----------
 arch/x86/kernel/signal.c           |  2 +-
 include/linux/restart_block.h      |  1 +
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 30d1d187019f..06b740bae431 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -217,18 +217,10 @@ static inline int arch_within_stack_frames(const void * const stack,
 #ifndef __ASSEMBLY__
 #ifdef CONFIG_COMPAT
 #define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
-#define TS_COMPAT_RESTART	0x0008
 
-#define arch_set_restart_data	arch_set_restart_data
+#define arch_set_restart_data(restart)	\
+	do { restart->arch_data = current_thread_info()->status; } while (0)
 
-static inline void arch_set_restart_data(struct restart_block *restart)
-{
-	struct thread_info *ti = current_thread_info();
-	if (ti->status & TS_COMPAT)
-		ti->status |= TS_COMPAT_RESTART;
-	else
-		ti->status &= ~TS_COMPAT_RESTART;
-}
 #endif
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6c26d2c3a2e4..f306e85a08a6 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -767,7 +767,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
 #ifdef CONFIG_IA32_EMULATION
-	if (current_thread_info()->status & TS_COMPAT_RESTART)
+	if (current->restart_block.arch_data & TS_COMPAT)
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index bba2920e9c05..980a65594412 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -23,6 +23,7 @@ enum timespec_type {
  * System call restart block.
  */
 struct restart_block {
+	unsigned long arch_data;
 	long (*fn)(struct restart_block *);
 	union {
 		/* For futex_wait and futex_wait_requeue_pi */
-- 
2.25.1.362.g51ebf55


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall()
  2021-02-01 17:45 [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Oleg Nesterov
                   ` (3 preceding siblings ...)
  2021-02-01 17:47 ` [PATCH v4 4/4] x86: introduce restart_block->arch_data to kill Oleg Nesterov
@ 2021-02-01 18:18 ` Linus Torvalds
  2021-02-01 18:19   ` Linus Torvalds
  2021-02-03 23:19 ` Oleg Nesterov
  5 siblings, 1 reply; 20+ messages in thread
From: Linus Torvalds @ 2021-02-01 18:18 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, Thomas Gleixner, Andy Lutomirski, Ingo Molnar,
	Jan Kratochvil, Pedro Alves, Peter Anvin,
	Linux Kernel Mailing List, the arch/x86 maintainers

On Mon, Feb 1, 2021 at 9:46 AM Oleg Nesterov <oleg@redhat.com> wrote:
>
> Somehow I forgot about this problem. Let me resend the last version
> based on discussion with Linus. IIRC he was agree with this series.

Yeah, looks sane to me.

            Linus

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall()
  2021-02-01 18:18 ` [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Linus Torvalds
@ 2021-02-01 18:19   ` Linus Torvalds
  2021-02-02 15:55     ` Oleg Nesterov
  0 siblings, 1 reply; 20+ messages in thread
From: Linus Torvalds @ 2021-02-01 18:19 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, Thomas Gleixner, Andy Lutomirski, Ingo Molnar,
	Jan Kratochvil, Pedro Alves, Peter Anvin,
	Linux Kernel Mailing List, the arch/x86 maintainers

On Mon, Feb 1, 2021 at 10:18 AM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> Yeah, looks sane to me.

Oh, and in a perfect world we'd have a test for this condition too, no?

                 Linus

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 3/4] x86: introduce TS_COMPAT_RESTART to fix
  2021-02-01 17:47 ` [PATCH v4 3/4] x86: introduce TS_COMPAT_RESTART to fix Oleg Nesterov
@ 2021-02-01 18:32   ` Andy Lutomirski
  2021-02-02 15:02     ` Oleg Nesterov
  2021-03-16 21:17   ` [tip: x86/urgent] x86: Introduce TS_COMPAT_RESTART to fix get_nr_restart_syscall() tip-bot2 for Oleg Nesterov
  1 sibling, 1 reply; 20+ messages in thread
From: Andy Lutomirski @ 2021-02-01 18:32 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, Thomas Gleixner, Andy Lutomirski, Ingo Molnar,
	Jan Kratochvil, Linus Torvalds, Pedro Alves, Peter Anvin, LKML,
	X86 ML

On Mon, Feb 1, 2021 at 9:47 AM Oleg Nesterov <oleg@redhat.com> wrote:
>
> The comment in get_nr_restart_syscall() says:
>
>          * The problem is that we can get here when ptrace pokes
>          * syscall-like values into regs even if we're not in a syscall
>          * at all.
>
> Yes. but if we are not in syscall then the
>
>         status & (TS_COMPAT|TS_I386_REGS_POKED)
>
> check below can't really help:
>
>         - TS_COMPAT can't be set
>
>         - TS_I386_REGS_POKED is only set if regs->orig_ax was changed by
>           32bit debugger; and even in this case get_nr_restart_syscall()
>           is only correct if the tracee is 32bit too.
>
> Suppose that 64bit debugger plays with 32bit tracee and

At the risk of asking an obnoxious question here:

>
>         * Tracee calls sleep(2) // TS_COMPAT is set
>         * User interrupts the tracee by CTRL-C after 1 sec and does
>           "(gdb) call func()"
>         * gdb saves the regs by PTRACE_GETREGS

It seems to me that a better solution may be for gdb to see the
post-restart-setup state.  In other words, shouldn't the GETREGS
return with the ax pointing to the restart syscall already?

Now maybe this ship has sailed a long long time ago and we can't do
this, but is it at all worth considering?

>         * does PTRACE_SETREGS to set %rip='func' and %orig_rax=-1
>         * PTRACE_CONT           // TS_COMPAT is cleared
>         * func() hits int3.
>         * Debugger catches SIGTRAP.
>         * Restore original regs by PTRACE_SETREGS.
>         * PTRACE_CONT
>
> get_nr_restart_syscall() wrongly returns __NR_restart_syscall==219, the
> tracee calls ia32_sys_call_table[219] == sys_madvise.
>
> This patch adds the sticky TS_COMPAT_RESTART flag which survives after
> return to user mode, hopefully it allows us to kill TS_I386_REGS_POKED
> but this needs another patch.
>
> Test-case:
>
>   $ cvs -d :pserver:anoncvs:anoncvs@sourceware.org:/cvs/systemtap co ptrace-tests
>   $ gcc -o erestartsys-trap-debuggee ptrace-tests/tests/erestartsys-trap-debuggee.c --m32
>   $ gcc -o erestartsys-trap-debugger ptrace-tests/tests/erestartsys-trap-debugger.c -lutil
>   $ ./erestartsys-trap-debugger
>   Unexpected: retval 1, errno 22
>   erestartsys-trap-debugger: ptrace-tests/tests/erestartsys-trap-debugger.c:421
>
> Reported-by: Jan Kratochvil <jan.kratochvil@redhat.com>
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
>  arch/x86/include/asm/thread_info.h | 14 +++++++++++++-
>  arch/x86/kernel/signal.c           | 24 +-----------------------
>  2 files changed, 14 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index c2dc29e215ea..30d1d187019f 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -214,10 +214,22 @@ static inline int arch_within_stack_frames(const void * const stack,
>   */
>  #define TS_COMPAT              0x0002  /* 32bit syscall active (64BIT)*/
>
> +#ifndef __ASSEMBLY__
>  #ifdef CONFIG_COMPAT
>  #define TS_I386_REGS_POKED     0x0004  /* regs poked by 32-bit ptracer */
> +#define TS_COMPAT_RESTART      0x0008
> +
> +#define arch_set_restart_data  arch_set_restart_data
> +
> +static inline void arch_set_restart_data(struct restart_block *restart)
> +{
> +       struct thread_info *ti = current_thread_info();
> +       if (ti->status & TS_COMPAT)
> +               ti->status |= TS_COMPAT_RESTART;
> +       else
> +               ti->status &= ~TS_COMPAT_RESTART;
> +}
>  #endif
> -#ifndef __ASSEMBLY__
>
>  #ifdef CONFIG_X86_32
>  #define in_ia32_syscall() true
> diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
> index ea794a083c44..6c26d2c3a2e4 100644
> --- a/arch/x86/kernel/signal.c
> +++ b/arch/x86/kernel/signal.c
> @@ -766,30 +766,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
>
>  static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
>  {
> -       /*
> -        * This function is fundamentally broken as currently
> -        * implemented.
> -        *
> -        * The idea is that we want to trigger a call to the
> -        * restart_block() syscall and that we want in_ia32_syscall(),
> -        * in_x32_syscall(), etc. to match whatever they were in the
> -        * syscall being restarted.  We assume that the syscall
> -        * instruction at (regs->ip - 2) matches whatever syscall
> -        * instruction we used to enter in the first place.
> -        *
> -        * The problem is that we can get here when ptrace pokes
> -        * syscall-like values into regs even if we're not in a syscall
> -        * at all.
> -        *
> -        * For now, we maintain historical behavior and guess based on
> -        * stored state.  We could do better by saving the actual
> -        * syscall arch in restart_block or (with caveats on x32) by
> -        * checking if regs->ip points to 'int $0x80'.  The current
> -        * behavior is incorrect if a tracer has a different bitness
> -        * than the tracee.
> -        */
>  #ifdef CONFIG_IA32_EMULATION
> -       if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
> +       if (current_thread_info()->status & TS_COMPAT_RESTART)
>                 return __NR_ia32_restart_syscall;
>  #endif
>  #ifdef CONFIG_X86_X32_ABI
> --
> 2.25.1.362.g51ebf55
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 3/4] x86: introduce TS_COMPAT_RESTART to fix
  2021-02-01 18:32   ` Andy Lutomirski
@ 2021-02-02 15:02     ` Oleg Nesterov
  2021-02-02 17:27       ` Andy Lutomirski
  0 siblings, 1 reply; 20+ messages in thread
From: Oleg Nesterov @ 2021-02-02 15:02 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andrew Morton, Thomas Gleixner, Ingo Molnar, Jan Kratochvil,
	Linus Torvalds, Pedro Alves, Peter Anvin, LKML, X86 ML

On 02/01, Andy Lutomirski wrote:
>
> On Mon, Feb 1, 2021 at 9:47 AM Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > The comment in get_nr_restart_syscall() says:
> >
> >          * The problem is that we can get here when ptrace pokes
> >          * syscall-like values into regs even if we're not in a syscall
> >          * at all.
> >
> > Yes. but if we are not in syscall then the
> >
> >         status & (TS_COMPAT|TS_I386_REGS_POKED)
> >
> > check below can't really help:
> >
> >         - TS_COMPAT can't be set
> >
> >         - TS_I386_REGS_POKED is only set if regs->orig_ax was changed by
> >           32bit debugger; and even in this case get_nr_restart_syscall()
> >           is only correct if the tracee is 32bit too.
> >
> > Suppose that 64bit debugger plays with 32bit tracee and
>
> At the risk of asking an obnoxious question here:
>
> >
> >         * Tracee calls sleep(2) // TS_COMPAT is set
> >         * User interrupts the tracee by CTRL-C after 1 sec and does
> >           "(gdb) call func()"
> >         * gdb saves the regs by PTRACE_GETREGS
>
> It seems to me that a better solution may be for gdb to see the
> post-restart-setup state.  In other words, shouldn't the GETREGS
> return with the ax pointing to the restart syscall already?

and ip = regs-ip - 2? And hide ERESTART_BLOCK from debugger? Perhaps
I misunderstood, but this doesn't look like a better solution to me.
Not to mention this would be the serious user-visible change... And
even the necessary changes in getreg() do not look good to me.

Plus I do not understand how this could work. OK, suppose that the
tracee reports a signal with ax = ERESTART_BLOCK.

Debugger simply does GETREGS + SETREGS + PTRACE_CONT(signr). In this
case handle_signal() should set ax = -EINTR, but syscall_get_error()
will report __NR_ia32_restart_syscall?

Probably I greatly misunderstood you...

Oleg.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall()
  2021-02-01 18:19   ` Linus Torvalds
@ 2021-02-02 15:55     ` Oleg Nesterov
  2021-02-02 18:15       ` Linus Torvalds
  0 siblings, 1 reply; 20+ messages in thread
From: Oleg Nesterov @ 2021-02-02 15:55 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, Thomas Gleixner, Andy Lutomirski, Ingo Molnar,
	Jan Kratochvil, Pedro Alves, Peter Anvin,
	Linux Kernel Mailing List, the arch/x86 maintainers

On 02/01, Linus Torvalds wrote:
>
> On Mon, Feb 1, 2021 at 10:18 AM Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > Yeah, looks sane to me.
>
> Oh, and in a perfect world we'd have a test for this condition too, no?

There is the "erestartsys-trap-debugger" test in ptrace-tests suite.
Do you mean you want another test in tools/testing/selftests/ptrace ?

Oleg.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 3/4] x86: introduce TS_COMPAT_RESTART to fix
  2021-02-02 15:02     ` Oleg Nesterov
@ 2021-02-02 17:27       ` Andy Lutomirski
  0 siblings, 0 replies; 20+ messages in thread
From: Andy Lutomirski @ 2021-02-02 17:27 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Andrew Morton, Thomas Gleixner, Ingo Molnar,
	Jan Kratochvil, Linus Torvalds, Pedro Alves, Peter Anvin, LKML,
	X86 ML

On Tue, Feb 2, 2021 at 7:03 AM Oleg Nesterov <oleg@redhat.com> wrote:
>
> On 02/01, Andy Lutomirski wrote:
> >
> > On Mon, Feb 1, 2021 at 9:47 AM Oleg Nesterov <oleg@redhat.com> wrote:
> > >
> > > The comment in get_nr_restart_syscall() says:
> > >
> > >          * The problem is that we can get here when ptrace pokes
> > >          * syscall-like values into regs even if we're not in a syscall
> > >          * at all.
> > >
> > > Yes. but if we are not in syscall then the
> > >
> > >         status & (TS_COMPAT|TS_I386_REGS_POKED)
> > >
> > > check below can't really help:
> > >
> > >         - TS_COMPAT can't be set
> > >
> > >         - TS_I386_REGS_POKED is only set if regs->orig_ax was changed by
> > >           32bit debugger; and even in this case get_nr_restart_syscall()
> > >           is only correct if the tracee is 32bit too.
> > >
> > > Suppose that 64bit debugger plays with 32bit tracee and
> >
> > At the risk of asking an obnoxious question here:
> >
> > >
> > >         * Tracee calls sleep(2) // TS_COMPAT is set
> > >         * User interrupts the tracee by CTRL-C after 1 sec and does
> > >           "(gdb) call func()"
> > >         * gdb saves the regs by PTRACE_GETREGS
> >
> > It seems to me that a better solution may be for gdb to see the
> > post-restart-setup state.  In other words, shouldn't the GETREGS
> > return with the ax pointing to the restart syscall already?
>
> and ip = regs-ip - 2? And hide ERESTART_BLOCK from debugger? Perhaps
> I misunderstood, but this doesn't look like a better solution to me.
> Not to mention this would be the serious user-visible change... And
> even the necessary changes in getreg() do not look good to me.
>
> Plus I do not understand how this could work. OK, suppose that the
> tracee reports a signal with ax = ERESTART_BLOCK.
>
> Debugger simply does GETREGS + SETREGS + PTRACE_CONT(signr). In this
> case handle_signal() should set ax = -EINTR, but syscall_get_error()
> will report __NR_ia32_restart_syscall?
>
> Probably I greatly misunderstood you...

My idea may well be nuts, but I was indeed imagining that we hide
ERESTART_BLOCK from the debugger.  We would do the ip -= 2 and nr =
__NR_..._restart_syscall before any ptrace events happen at all.
Admittedly, this may cause strace to fail, so this is probably a bad
idea.

Oh well, your patch is probably a decent solution.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall()
  2021-02-02 15:55     ` Oleg Nesterov
@ 2021-02-02 18:15       ` Linus Torvalds
  2021-02-02 19:23         ` Oleg Nesterov
  0 siblings, 1 reply; 20+ messages in thread
From: Linus Torvalds @ 2021-02-02 18:15 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, Thomas Gleixner, Andy Lutomirski, Ingo Molnar,
	Jan Kratochvil, Pedro Alves, Peter Anvin,
	Linux Kernel Mailing List, the arch/x86 maintainers

On Tue, Feb 2, 2021 at 7:56 AM Oleg Nesterov <oleg@redhat.com> wrote:
>
> There is the "erestartsys-trap-debugger" test in ptrace-tests suite.
> Do you mean you want another test in tools/testing/selftests/ptrace ?

No, I guess it's fine if it's caught by the ptrace test suite - we'll
hopefully get fairly timely "guys, you broke it" reports.

Is that ptrace erestartsys-trap-debugger.c test new, or has it just
always failed? Or is the problem that it is so expected to fail that
we wouldn't get reports of it anyway (this clearly fell off your radar
for a long time)?

IOW, my only worry is that this is somewhat subtle (understatement of
the year), has been around forever, and if we care about this debugger
case, we should have _some_ way to make sure that if it gets broken
again we at least get notified some way in a reasonably timely
manner...

               Linus

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall()
  2021-02-02 18:15       ` Linus Torvalds
@ 2021-02-02 19:23         ` Oleg Nesterov
  0 siblings, 0 replies; 20+ messages in thread
From: Oleg Nesterov @ 2021-02-02 19:23 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, Thomas Gleixner, Andy Lutomirski, Ingo Molnar,
	Jan Kratochvil, Pedro Alves, Peter Anvin,
	Linux Kernel Mailing List, the arch/x86 maintainers

On 02/02, Linus Torvalds wrote:
>
> On Tue, Feb 2, 2021 at 7:56 AM Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > There is the "erestartsys-trap-debugger" test in ptrace-tests suite.
> > Do you mean you want another test in tools/testing/selftests/ptrace ?
>
> No, I guess it's fine if it's caught by the ptrace test suite - we'll
> hopefully get fairly timely "guys, you broke it" reports.
>
> Is that ptrace erestartsys-trap-debugger.c test new, or has it just
> always failed? Or is the problem that it is so expected to fail that
> we wouldn't get reports of it anyway (this clearly fell off your radar
> for a long time)?

No, this test-case is very old. It used to work when this
get_nr_restart_syscall() logic was based on the test_thread_flag(TIF_IA32)
check.

Then it started to fail somewhere 2-3 years ago, and to be honest I didn't
even try to find which exactly patch broke this test. Because this logic
was always wrong anyway, even if this test-case used to work.

I sent v1 soon after this bug was reported, but every time I was too lazy,
that is why this (minor) problem is still not fixed. So, in short, this is
my fault in any case.

Oleg.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall()
  2021-02-01 17:45 [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Oleg Nesterov
                   ` (4 preceding siblings ...)
  2021-02-01 18:18 ` [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Linus Torvalds
@ 2021-02-03 23:19 ` Oleg Nesterov
  2021-03-16 18:10   ` Oleg Nesterov
  5 siblings, 1 reply; 20+ messages in thread
From: Oleg Nesterov @ 2021-02-03 23:19 UTC (permalink / raw)
  To: Andrew Morton, Thomas Gleixner
  Cc: Andy Lutomirski, Ingo Molnar, Jan Kratochvil, Linus Torvalds,
	Pedro Alves, Peter Anvin, linux-kernel, x86

It seems that nobody objects,

Andrew, Andy, Thomas, how do you think this series should be routed?

On 02/01, Oleg Nesterov wrote:
>
> Somehow I forgot about this problem. Let me resend the last version
> based on discussion with Linus. IIRC he was agree with this series.
>
> And let me remind why 3/4 temporary adds the "transient" TS_COMPAT_RESTART
> flag killed by the next patch: to simplify the backporting. 1-3 can fix
> the problem without breaking the kABI.
>
> Oleg.
> ---
>  arch/x86/include/asm/processor.h   |  9 ---------
>  arch/x86/include/asm/thread_info.h | 15 ++++++++++++++-
>  arch/x86/kernel/signal.c           | 24 +-----------------------
>  fs/select.c                        | 10 ++++------
>  include/linux/restart_block.h      |  1 +
>  include/linux/thread_info.h        | 13 +++++++++++++
>  kernel/futex.c                     |  3 +--
>  kernel/time/alarmtimer.c           |  2 +-
>  kernel/time/hrtimer.c              |  2 +-
>  kernel/time/posix-cpu-timers.c     |  2 +-
>  10 files changed, 37 insertions(+), 44 deletions(-)


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall()
  2021-02-03 23:19 ` Oleg Nesterov
@ 2021-03-16 18:10   ` Oleg Nesterov
  2021-03-16 18:26     ` Thomas Gleixner
  0 siblings, 1 reply; 20+ messages in thread
From: Oleg Nesterov @ 2021-03-16 18:10 UTC (permalink / raw)
  To: Andrew Morton, Thomas Gleixner
  Cc: Andy Lutomirski, Ingo Molnar, Jan Kratochvil, Linus Torvalds,
	Pedro Alves, Peter Anvin, linux-kernel, x86

On 02/04, Oleg Nesterov wrote:
>
> It seems that nobody objects,
>
> Andrew, Andy, Thomas, how do you think this series should be routed?

ping...

What can I do to finally get this merged?

Should I resend once again? Whom?


> On 02/01, Oleg Nesterov wrote:
> >
> > Somehow I forgot about this problem. Let me resend the last version
> > based on discussion with Linus. IIRC he was agree with this series.
> >
> > And let me remind why 3/4 temporary adds the "transient" TS_COMPAT_RESTART
> > flag killed by the next patch: to simplify the backporting. 1-3 can fix
> > the problem without breaking the kABI.
> >
> > Oleg.
> > ---
> >  arch/x86/include/asm/processor.h   |  9 ---------
> >  arch/x86/include/asm/thread_info.h | 15 ++++++++++++++-
> >  arch/x86/kernel/signal.c           | 24 +-----------------------
> >  fs/select.c                        | 10 ++++------
> >  include/linux/restart_block.h      |  1 +
> >  include/linux/thread_info.h        | 13 +++++++++++++
> >  kernel/futex.c                     |  3 +--
> >  kernel/time/alarmtimer.c           |  2 +-
> >  kernel/time/hrtimer.c              |  2 +-
> >  kernel/time/posix-cpu-timers.c     |  2 +-
> >  10 files changed, 37 insertions(+), 44 deletions(-)


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall()
  2021-03-16 18:10   ` Oleg Nesterov
@ 2021-03-16 18:26     ` Thomas Gleixner
  0 siblings, 0 replies; 20+ messages in thread
From: Thomas Gleixner @ 2021-03-16 18:26 UTC (permalink / raw)
  To: Oleg Nesterov, Andrew Morton
  Cc: Andy Lutomirski, Ingo Molnar, Jan Kratochvil, Linus Torvalds,
	Pedro Alves, Peter Anvin, linux-kernel, x86

On Tue, Mar 16 2021 at 19:10, Oleg Nesterov wrote:

> On 02/04, Oleg Nesterov wrote:
>>
>> It seems that nobody objects,
>>
>> Andrew, Andy, Thomas, how do you think this series should be routed?
>
> ping...
>
> What can I do to finally get this merged?
>
> Should I resend once again? Whom?

I'll pick it up then

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tip: x86/urgent] x86: Move TS_COMPAT back to asm/thread_info.h
  2021-02-01 17:46 ` [PATCH v4 2/4] x86: mv TS_COMPAT from asm/processor.h to Oleg Nesterov
@ 2021-03-16 21:17   ` tip-bot2 for Oleg Nesterov
  0 siblings, 0 replies; 20+ messages in thread
From: tip-bot2 for Oleg Nesterov @ 2021-03-16 21:17 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Oleg Nesterov, Thomas Gleixner, stable, x86, linux-kernel

The following commit has been merged into the x86/urgent branch of tip:

Commit-ID:     66c1b6d74cd7035e85c426f0af4aede19e805c8a
Gitweb:        https://git.kernel.org/tip/66c1b6d74cd7035e85c426f0af4aede19e805c8a
Author:        Oleg Nesterov <oleg@redhat.com>
AuthorDate:    Mon, 01 Feb 2021 18:46:49 +01:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 16 Mar 2021 22:13:11 +01:00

x86: Move TS_COMPAT back to asm/thread_info.h

Move TS_COMPAT back to asm/thread_info.h, close to TS_I386_REGS_POKED.

It was moved to asm/processor.h by b9d989c7218a ("x86/asm: Move the
thread_info::status field to thread_struct"), then later 37a8f7c38339
("x86/asm: Move 'status' from thread_struct to thread_info") moved the
'status' field back but TS_COMPAT was forgotten.

Preparatory patch to fix the COMPAT case for get_nr_restart_syscall()

Fixes: 609c19a385c8 ("x86/ptrace: Stop setting TS_COMPAT in ptrace code")
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210201174649.GA17880@redhat.com
---
 arch/x86/include/asm/processor.h   |  9 ---------
 arch/x86/include/asm/thread_info.h |  9 +++++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index dc6d149..f1b9ed5 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -551,15 +551,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
 	*size = fpu_kernel_xstate_size;
 }
 
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
-
 static inline void
 native_load_sp0(unsigned long sp0)
 {
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 0d751d5..c2dc29e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -205,6 +205,15 @@ static inline int arch_within_stack_frames(const void * const stack,
 
 #endif
 
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
+
 #ifdef CONFIG_COMPAT
 #define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
 #endif

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [tip: x86/urgent] x86: Introduce restart_block->arch_data to remove TS_COMPAT_RESTART
  2021-02-01 17:47 ` [PATCH v4 4/4] x86: introduce restart_block->arch_data to kill Oleg Nesterov
@ 2021-03-16 21:17   ` tip-bot2 for Oleg Nesterov
  0 siblings, 0 replies; 20+ messages in thread
From: tip-bot2 for Oleg Nesterov @ 2021-03-16 21:17 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Oleg Nesterov, Thomas Gleixner, x86, linux-kernel

The following commit has been merged into the x86/urgent branch of tip:

Commit-ID:     b2e9df850c58c2b36e915e7d3bed3f6107cccba6
Gitweb:        https://git.kernel.org/tip/b2e9df850c58c2b36e915e7d3bed3f6107cccba6
Author:        Oleg Nesterov <oleg@redhat.com>
AuthorDate:    Mon, 01 Feb 2021 18:47:16 +01:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 16 Mar 2021 22:13:11 +01:00

x86: Introduce restart_block->arch_data to remove TS_COMPAT_RESTART

Save the current_thread_info()->status of X86 in the new
restart_block->arch_data field so TS_COMPAT_RESTART can be removed again.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210201174716.GA17898@redhat.com

---
 arch/x86/include/asm/thread_info.h | 12 ++----------
 arch/x86/kernel/signal.c           |  2 +-
 include/linux/restart_block.h      |  1 +
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 30d1d18..06b740b 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -217,18 +217,10 @@ static inline int arch_within_stack_frames(const void * const stack,
 #ifndef __ASSEMBLY__
 #ifdef CONFIG_COMPAT
 #define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
-#define TS_COMPAT_RESTART	0x0008
 
-#define arch_set_restart_data	arch_set_restart_data
+#define arch_set_restart_data(restart)	\
+	do { restart->arch_data = current_thread_info()->status; } while (0)
 
-static inline void arch_set_restart_data(struct restart_block *restart)
-{
-	struct thread_info *ti = current_thread_info();
-	if (ti->status & TS_COMPAT)
-		ti->status |= TS_COMPAT_RESTART;
-	else
-		ti->status &= ~TS_COMPAT_RESTART;
-}
 #endif
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6c26d2c..f306e85 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -767,7 +767,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
 #ifdef CONFIG_IA32_EMULATION
-	if (current_thread_info()->status & TS_COMPAT_RESTART)
+	if (current->restart_block.arch_data & TS_COMPAT)
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index bba2920..980a655 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -23,6 +23,7 @@ enum timespec_type {
  * System call restart block.
  */
 struct restart_block {
+	unsigned long arch_data;
 	long (*fn)(struct restart_block *);
 	union {
 		/* For futex_wait and futex_wait_requeue_pi */

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [tip: x86/urgent] x86: Introduce TS_COMPAT_RESTART to fix get_nr_restart_syscall()
  2021-02-01 17:47 ` [PATCH v4 3/4] x86: introduce TS_COMPAT_RESTART to fix Oleg Nesterov
  2021-02-01 18:32   ` Andy Lutomirski
@ 2021-03-16 21:17   ` tip-bot2 for Oleg Nesterov
  1 sibling, 0 replies; 20+ messages in thread
From: tip-bot2 for Oleg Nesterov @ 2021-03-16 21:17 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Jan Kratochvil, Oleg Nesterov, Thomas Gleixner, stable, x86,
	linux-kernel

The following commit has been merged into the x86/urgent branch of tip:

Commit-ID:     8c150ba2fb5995c84a7a43848250d444a3329a7d
Gitweb:        https://git.kernel.org/tip/8c150ba2fb5995c84a7a43848250d444a3329a7d
Author:        Oleg Nesterov <oleg@redhat.com>
AuthorDate:    Mon, 01 Feb 2021 18:47:09 +01:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 16 Mar 2021 22:13:11 +01:00

x86: Introduce TS_COMPAT_RESTART to fix get_nr_restart_syscall()

The comment in get_nr_restart_syscall() says:

	 * The problem is that we can get here when ptrace pokes
	 * syscall-like values into regs even if we're not in a syscall
	 * at all.

Yes, but if not in a syscall then the

	status & (TS_COMPAT|TS_I386_REGS_POKED)

check below can't really help:

	- TS_COMPAT can't be set

	- TS_I386_REGS_POKED is only set if regs->orig_ax was changed by
	  32bit debugger; and even in this case get_nr_restart_syscall()
	  is only correct if the tracee is 32bit too.

Suppose that a 64bit debugger plays with a 32bit tracee and

	* Tracee calls sleep(2)	// TS_COMPAT is set
	* User interrupts the tracee by CTRL-C after 1 sec and does
	  "(gdb) call func()"
	* gdb saves the regs by PTRACE_GETREGS
	* does PTRACE_SETREGS to set %rip='func' and %orig_rax=-1
	* PTRACE_CONT		// TS_COMPAT is cleared
	* func() hits int3.
	* Debugger catches SIGTRAP.
	* Restore original regs by PTRACE_SETREGS.
	* PTRACE_CONT

get_nr_restart_syscall() wrongly returns __NR_restart_syscall==219, the
tracee calls ia32_sys_call_table[219] == sys_madvise.

Add the sticky TS_COMPAT_RESTART flag which survives after return to user
mode. It's going to be removed in the next step again by storing the
information in the restart block. As a further cleanup it might be possible
to remove also TS_I386_REGS_POKED with that.

Test-case:

  $ cvs -d :pserver:anoncvs:anoncvs@sourceware.org:/cvs/systemtap co ptrace-tests
  $ gcc -o erestartsys-trap-debuggee ptrace-tests/tests/erestartsys-trap-debuggee.c --m32
  $ gcc -o erestartsys-trap-debugger ptrace-tests/tests/erestartsys-trap-debugger.c -lutil
  $ ./erestartsys-trap-debugger
  Unexpected: retval 1, errno 22
  erestartsys-trap-debugger: ptrace-tests/tests/erestartsys-trap-debugger.c:421

Fixes: 609c19a385c8 ("x86/ptrace: Stop setting TS_COMPAT in ptrace code")
Reported-by: Jan Kratochvil <jan.kratochvil@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210201174709.GA17895@redhat.com

---
 arch/x86/include/asm/thread_info.h | 14 +++++++++++++-
 arch/x86/kernel/signal.c           | 24 +-----------------------
 2 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c2dc29e..30d1d18 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -214,10 +214,22 @@ static inline int arch_within_stack_frames(const void * const stack,
  */
 #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
 
+#ifndef __ASSEMBLY__
 #ifdef CONFIG_COMPAT
 #define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
+#define TS_COMPAT_RESTART	0x0008
+
+#define arch_set_restart_data	arch_set_restart_data
+
+static inline void arch_set_restart_data(struct restart_block *restart)
+{
+	struct thread_info *ti = current_thread_info();
+	if (ti->status & TS_COMPAT)
+		ti->status |= TS_COMPAT_RESTART;
+	else
+		ti->status &= ~TS_COMPAT_RESTART;
+}
 #endif
-#ifndef __ASSEMBLY__
 
 #ifdef CONFIG_X86_32
 #define in_ia32_syscall() true
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index ea794a0..6c26d2c 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -766,30 +766,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
-	/*
-	 * This function is fundamentally broken as currently
-	 * implemented.
-	 *
-	 * The idea is that we want to trigger a call to the
-	 * restart_block() syscall and that we want in_ia32_syscall(),
-	 * in_x32_syscall(), etc. to match whatever they were in the
-	 * syscall being restarted.  We assume that the syscall
-	 * instruction at (regs->ip - 2) matches whatever syscall
-	 * instruction we used to enter in the first place.
-	 *
-	 * The problem is that we can get here when ptrace pokes
-	 * syscall-like values into regs even if we're not in a syscall
-	 * at all.
-	 *
-	 * For now, we maintain historical behavior and guess based on
-	 * stored state.  We could do better by saving the actual
-	 * syscall arch in restart_block or (with caveats on x32) by
-	 * checking if regs->ip points to 'int $0x80'.  The current
-	 * behavior is incorrect if a tracer has a different bitness
-	 * than the tracee.
-	 */
 #ifdef CONFIG_IA32_EMULATION
-	if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
+	if (current_thread_info()->status & TS_COMPAT_RESTART)
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [tip: x86/urgent] kernel, fs: Introduce and use set_restart_fn() and arch_set_restart_data()
  2021-02-01 17:46 ` [PATCH v4 1/4] introduce set_restart_fn() and arch_set_restart_data() Oleg Nesterov
@ 2021-03-16 21:17   ` tip-bot2 for Oleg Nesterov
  0 siblings, 0 replies; 20+ messages in thread
From: tip-bot2 for Oleg Nesterov @ 2021-03-16 21:17 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Oleg Nesterov, Thomas Gleixner, stable, x86, linux-kernel

The following commit has been merged into the x86/urgent branch of tip:

Commit-ID:     5abbe51a526253b9f003e9a0a195638dc882d660
Gitweb:        https://git.kernel.org/tip/5abbe51a526253b9f003e9a0a195638dc882d660
Author:        Oleg Nesterov <oleg@redhat.com>
AuthorDate:    Mon, 01 Feb 2021 18:46:41 +01:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 16 Mar 2021 22:13:10 +01:00

kernel, fs: Introduce and use set_restart_fn() and arch_set_restart_data()

Preparation for fixing get_nr_restart_syscall() on X86 for COMPAT.

Add a new helper which sets restart_block->fn and calls a dummy
arch_set_restart_data() helper.

Fixes: 609c19a385c8 ("x86/ptrace: Stop setting TS_COMPAT in ptrace code")
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210201174641.GA17871@redhat.com

---
 fs/select.c                    | 10 ++++------
 include/linux/thread_info.h    | 13 +++++++++++++
 kernel/futex.c                 |  3 +--
 kernel/time/alarmtimer.c       |  2 +-
 kernel/time/hrtimer.c          |  2 +-
 kernel/time/posix-cpu-timers.c |  2 +-
 6 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/fs/select.c b/fs/select.c
index 37aaa83..945896d 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1055,10 +1055,9 @@ static long do_restart_poll(struct restart_block *restart_block)
 
 	ret = do_sys_poll(ufds, nfds, to);
 
-	if (ret == -ERESTARTNOHAND) {
-		restart_block->fn = do_restart_poll;
-		ret = -ERESTART_RESTARTBLOCK;
-	}
+	if (ret == -ERESTARTNOHAND)
+		ret = set_restart_fn(restart_block, do_restart_poll);
+
 	return ret;
 }
 
@@ -1080,7 +1079,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
 		struct restart_block *restart_block;
 
 		restart_block = &current->restart_block;
-		restart_block->fn = do_restart_poll;
 		restart_block->poll.ufds = ufds;
 		restart_block->poll.nfds = nfds;
 
@@ -1091,7 +1089,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
 		} else
 			restart_block->poll.has_timeout = 0;
 
-		ret = -ERESTART_RESTARTBLOCK;
+		ret = set_restart_fn(restart_block, do_restart_poll);
 	}
 	return ret;
 }
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 9b2158c..157762d 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <linux/bug.h>
 #include <linux/restart_block.h>
+#include <linux/errno.h>
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 /*
@@ -59,6 +60,18 @@ enum syscall_work_bit {
 
 #ifdef __KERNEL__
 
+#ifndef arch_set_restart_data
+#define arch_set_restart_data(restart) do { } while (0)
+#endif
+
+static inline long set_restart_fn(struct restart_block *restart,
+					long (*fn)(struct restart_block *))
+{
+	restart->fn = fn;
+	arch_set_restart_data(restart);
+	return -ERESTART_RESTARTBLOCK;
+}
+
 #ifndef THREAD_ALIGN
 #define THREAD_ALIGN	THREAD_SIZE
 #endif
diff --git a/kernel/futex.c b/kernel/futex.c
index e68db77..00febd6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2728,14 +2728,13 @@ retry:
 		goto out;
 
 	restart = &current->restart_block;
-	restart->fn = futex_wait_restart;
 	restart->futex.uaddr = uaddr;
 	restart->futex.val = val;
 	restart->futex.time = *abs_time;
 	restart->futex.bitset = bitset;
 	restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
 
-	ret = -ERESTART_RESTARTBLOCK;
+	ret = set_restart_fn(restart, futex_wait_restart);
 
 out:
 	if (to) {
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 98d7a15..4d94e2b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -854,9 +854,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
 	if (flags == TIMER_ABSTIME)
 		return -ERESTARTNOHAND;
 
-	restart->fn = alarm_timer_nsleep_restart;
 	restart->nanosleep.clockid = type;
 	restart->nanosleep.expires = exp;
+	set_restart_fn(restart, alarm_timer_nsleep_restart);
 	return ret;
 }
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 788b9d1..5c9d968 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1957,9 +1957,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
 	}
 
 	restart = &current->restart_block;
-	restart->fn = hrtimer_nanosleep_restart;
 	restart->nanosleep.clockid = t.timer.base->clockid;
 	restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
+	set_restart_fn(restart, hrtimer_nanosleep_restart);
 out:
 	destroy_hrtimer_on_stack(&t.timer);
 	return ret;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a71758e..9abe152 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1480,8 +1480,8 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 		if (flags & TIMER_ABSTIME)
 			return -ERESTARTNOHAND;
 
-		restart_block->fn = posix_cpu_nsleep_restart;
 		restart_block->nanosleep.clockid = which_clock;
+		set_restart_fn(restart_block, posix_cpu_nsleep_restart);
 	}
 	return error;
 }

^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2021-03-16 21:17 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-01 17:45 [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Oleg Nesterov
2021-02-01 17:46 ` [PATCH v4 1/4] introduce set_restart_fn() and arch_set_restart_data() Oleg Nesterov
2021-03-16 21:17   ` [tip: x86/urgent] kernel, fs: Introduce and use " tip-bot2 for Oleg Nesterov
2021-02-01 17:46 ` [PATCH v4 2/4] x86: mv TS_COMPAT from asm/processor.h to Oleg Nesterov
2021-03-16 21:17   ` [tip: x86/urgent] x86: Move TS_COMPAT back to asm/thread_info.h tip-bot2 for Oleg Nesterov
2021-02-01 17:47 ` [PATCH v4 3/4] x86: introduce TS_COMPAT_RESTART to fix Oleg Nesterov
2021-02-01 18:32   ` Andy Lutomirski
2021-02-02 15:02     ` Oleg Nesterov
2021-02-02 17:27       ` Andy Lutomirski
2021-03-16 21:17   ` [tip: x86/urgent] x86: Introduce TS_COMPAT_RESTART to fix get_nr_restart_syscall() tip-bot2 for Oleg Nesterov
2021-02-01 17:47 ` [PATCH v4 4/4] x86: introduce restart_block->arch_data to kill Oleg Nesterov
2021-03-16 21:17   ` [tip: x86/urgent] x86: Introduce restart_block->arch_data to remove TS_COMPAT_RESTART tip-bot2 for Oleg Nesterov
2021-02-01 18:18 ` [PATCH RESEND v4 0/4] x86: fix get_nr_restart_syscall() Linus Torvalds
2021-02-01 18:19   ` Linus Torvalds
2021-02-02 15:55     ` Oleg Nesterov
2021-02-02 18:15       ` Linus Torvalds
2021-02-02 19:23         ` Oleg Nesterov
2021-02-03 23:19 ` Oleg Nesterov
2021-03-16 18:10   ` Oleg Nesterov
2021-03-16 18:26     ` Thomas Gleixner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.