* [patch 0/8] x86/entry: Consolidation - Part II
@ 2020-02-25 22:08 Thomas Gleixner
2020-02-25 22:08 ` [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space Thomas Gleixner
` (7 more replies)
0 siblings, 8 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-25 22:08 UTC (permalink / raw)
To: LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
Hi!
This is the second batch of a 73 patches series which consolidates the x86
entry code. The larger explanation is in the part I cover letter:
https://lore.kernel.org/r/20200225213636.689276920@linutronix.de
I applies on top of part I which can be found via the above link.
This part cleans up the entry code and lifts the irq tracing and entry/exit
work into C, which is a preliminary to make especially the exit work a
generic infrastructure.
It has some rough edges as some of the ASM code is shared with
exceptions/traps/interrupts, which will be addressed in later parts of the
series.
This applies on top of part one which is available here:
git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git entry-v1-part1
To get both part 1 and part 2 pull from here:
git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git entry-v1-part2
Thanks,
tglx
8<---------------
common.c | 82 ++++++++++++++++++++++++++++++++++++++++--------------
entry_32.S | 24 ++-------------
entry_64.S | 6 ---
entry_64_compat.S | 32 +++------------------
4 files changed, 71 insertions(+), 73 deletions(-)
^ permalink raw reply [flat|nested] 56+ messages in thread
* [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space
2020-02-25 22:08 [patch 0/8] x86/entry: Consolidation - Part II Thomas Gleixner
@ 2020-02-25 22:08 ` Thomas Gleixner
2020-02-27 19:49 ` Borislav Petkov
` (2 more replies)
2020-02-25 22:08 ` [patch 2/8] x86/entry/common: Consolidate syscall entry code Thomas Gleixner
` (6 subsequent siblings)
7 siblings, 3 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-25 22:08 UTC (permalink / raw)
To: LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
User space cannot longer disable interrupts so trace return to user space
unconditionally as IRQS_ON.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -174,7 +174,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
movq %rsp, %rsi
call do_syscall_64 /* returns with IRQs disabled */
- TRACE_IRQS_IRETQ /* we're about to change IF */
+ TRACE_IRQS_ON /* return enables interrupts */
/*
* Try to use SYSRET instead of IRET if we're returning to
@@ -619,7 +619,7 @@ SYM_CODE_START_LOCAL(common_interrupt)
.Lretint_user:
mov %rsp,%rdi
call prepare_exit_to_usermode
- TRACE_IRQS_IRETQ
+ TRACE_IRQS_ON
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
^ permalink raw reply [flat|nested] 56+ messages in thread
* [patch 2/8] x86/entry/common: Consolidate syscall entry code
2020-02-25 22:08 [patch 0/8] x86/entry: Consolidation - Part II Thomas Gleixner
2020-02-25 22:08 ` [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space Thomas Gleixner
@ 2020-02-25 22:08 ` Thomas Gleixner
2020-02-27 19:57 ` Borislav Petkov
` (2 more replies)
2020-02-25 22:08 ` [patch 3/8] x86/entry/common: Mark syscall entry points notrace/nokprobe Thomas Gleixner
` (5 subsequent siblings)
7 siblings, 3 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-25 22:08 UTC (permalink / raw)
To: LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
All syscall entry points call enter_from_user_mode() and
local_irq_enable(). Move that into an inline helper so the interrupt
tracing can be moved into that helper later instead of sprinkling
it all over the place.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/common.c | 48 +++++++++++++++++++++++++++++++++---------------
1 file changed, 33 insertions(+), 15 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -49,6 +49,18 @@
static inline void enter_from_user_mode(void) {}
#endif
+/*
+ * All syscall entry variants call with interrupts disabled.
+ *
+ * Invoke context tracking if enabled and enable interrupts for further
+ * processing.
+ */
+static __always_inline void syscall_entry_fixups(void)
+{
+ enter_from_user_mode();
+ local_irq_enable();
+}
+
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
{
#ifdef CONFIG_X86_64
@@ -279,13 +291,11 @@ static void syscall_slow_exit_work(struc
}
#ifdef CONFIG_X86_64
-__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+static __always_inline
+void do_syscall_64_irqs_on(unsigned long nr, struct pt_regs *regs)
{
- struct thread_info *ti;
+ struct thread_info *ti = current_thread_info();
- enter_from_user_mode();
- local_irq_enable();
- ti = current_thread_info();
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs);
@@ -303,6 +313,12 @@ static void syscall_slow_exit_work(struc
syscall_return_slowpath(regs);
}
+
+__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+{
+ syscall_entry_fixups();
+ do_syscall_64_irqs_on(nr, regs);
+}
#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
@@ -355,19 +371,17 @@ static __always_inline void do_syscall_3
/* Handles int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs)
{
- enter_from_user_mode();
- local_irq_enable();
+ syscall_entry_fixups();
do_syscall_32_irqs_on(regs);
}
-/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
-__visible long do_fast_syscall_32(struct pt_regs *regs)
+/* Fast syscall 32bit variant */
+static __always_inline long do_fast_syscall_32_irqs_on(struct pt_regs *regs)
{
/*
* Called using the internal vDSO SYSENTER/SYSCALL32 calling
* convention. Adjust regs so it looks like we entered using int80.
*/
-
unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
vdso_image_32.sym_int80_landing_pad;
@@ -378,10 +392,6 @@ static __always_inline void do_syscall_3
*/
regs->ip = landing_pad;
- enter_from_user_mode();
-
- local_irq_enable();
-
/* Fetch EBP from where the vDSO stashed it. */
if (
#ifdef CONFIG_X86_64
@@ -437,4 +447,12 @@ static __always_inline void do_syscall_3
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
#endif
}
-#endif
+
+/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
+__visible long do_fast_syscall_32(struct pt_regs *regs)
+{
+ syscall_entry_fixups();
+ return do_fast_syscall_32_irqs_on(regs);
+}
+
+#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
^ permalink raw reply [flat|nested] 56+ messages in thread
* [patch 3/8] x86/entry/common: Mark syscall entry points notrace/nokprobe
2020-02-25 22:08 [patch 0/8] x86/entry: Consolidation - Part II Thomas Gleixner
2020-02-25 22:08 ` [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space Thomas Gleixner
2020-02-25 22:08 ` [patch 2/8] x86/entry/common: Consolidate syscall entry code Thomas Gleixner
@ 2020-02-25 22:08 ` Thomas Gleixner
2020-02-27 23:15 ` Frederic Weisbecker
2020-02-28 8:59 ` Alexandre Chartre
2020-02-25 22:08 ` [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code Thomas Gleixner
` (4 subsequent siblings)
7 siblings, 2 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-25 22:08 UTC (permalink / raw)
To: LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
Anything before enter_from_user_mode() is not safe to be traced or probed.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/common.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -314,11 +314,12 @@ void do_syscall_64_irqs_on(unsigned long
syscall_return_slowpath(regs);
}
-__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+__visible notrace void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
syscall_entry_fixups();
do_syscall_64_irqs_on(nr, regs);
}
+NOKPROBE_SYMBOL(do_syscall_64);
#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
@@ -369,11 +370,12 @@ static __always_inline void do_syscall_3
}
/* Handles int $0x80 */
-__visible void do_int80_syscall_32(struct pt_regs *regs)
+__visible notrace void do_int80_syscall_32(struct pt_regs *regs)
{
syscall_entry_fixups();
do_syscall_32_irqs_on(regs);
}
+NOKPROBE_SYMBOL(do_int80_syscall_32);
/* Fast syscall 32bit variant */
static __always_inline long do_fast_syscall_32_irqs_on(struct pt_regs *regs)
@@ -449,10 +451,11 @@ static __always_inline long do_fast_sysc
}
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
-__visible long do_fast_syscall_32(struct pt_regs *regs)
+__visible notrace long do_fast_syscall_32(struct pt_regs *regs)
{
syscall_entry_fixups();
return do_fast_syscall_32_irqs_on(regs);
}
+NOKPROBE_SYMBOL(do_fast_syscall_32);
#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
^ permalink raw reply [flat|nested] 56+ messages in thread
* [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-25 22:08 [patch 0/8] x86/entry: Consolidation - Part II Thomas Gleixner
` (2 preceding siblings ...)
2020-02-25 22:08 ` [patch 3/8] x86/entry/common: Mark syscall entry points notrace/nokprobe Thomas Gleixner
@ 2020-02-25 22:08 ` Thomas Gleixner
2020-02-26 5:43 ` Andy Lutomirski
` (2 more replies)
2020-02-25 22:08 ` [patch 5/8] x86/entry/common: Provide trace/kprobe safe exit to user space functions Thomas Gleixner
` (3 subsequent siblings)
7 siblings, 3 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-25 22:08 UTC (permalink / raw)
To: LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
Now that the C entry points are safe, move the irq flags tracing code into
the entry helper.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/common.c | 5 +++++
arch/x86/entry/entry_32.S | 12 ------------
arch/x86/entry/entry_64.S | 2 --
arch/x86/entry/entry_64_compat.S | 18 ------------------
4 files changed, 5 insertions(+), 32 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -57,6 +57,11 @@ static inline void enter_from_user_mode(
*/
static __always_inline void syscall_entry_fixups(void)
{
+ /*
+ * Usermode is traced as interrupts enabled, but the syscall entry
+ * mechanisms disable interrupts. Tell the tracer.
+ */
+ trace_hardirqs_off();
enter_from_user_mode();
local_irq_enable();
}
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -960,12 +960,6 @@ SYM_FUNC_START(entry_SYSENTER_32)
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
- /*
- * User mode is traced as though IRQs are on, and SYSENTER
- * turned them off.
- */
- TRACE_IRQS_OFF
-
movl %esp, %eax
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
@@ -1075,12 +1069,6 @@ SYM_FUNC_START(entry_INT80_32)
SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1 /* save rest */
- /*
- * User mode is traced as though IRQs are on, and the interrupt gate
- * turned them off.
- */
- TRACE_IRQS_OFF
-
movl %esp, %eax
call do_int80_syscall_32
.Lsyscall_32_done:
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -167,8 +167,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
PUSH_AND_CLEAR_REGS rax=$-ENOSYS
- TRACE_IRQS_OFF
-
/* IRQs are off. */
movq %rax, %rdi
movq %rsp, %rsi
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -129,12 +129,6 @@ SYM_FUNC_START(entry_SYSENTER_compat)
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
- /*
- * User mode is traced as though IRQs are on, and SYSENTER
- * turned them off.
- */
- TRACE_IRQS_OFF
-
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
@@ -247,12 +241,6 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft
pushq $0 /* pt_regs->r15 = 0 */
xorl %r15d, %r15d /* nospec r15 */
- /*
- * User mode is traced as though IRQs are on, and SYSENTER
- * turned them off.
- */
- TRACE_IRQS_OFF
-
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
@@ -403,12 +391,6 @@ SYM_CODE_START(entry_INT80_compat)
xorl %r15d, %r15d /* nospec r15 */
cld
- /*
- * User mode is traced as though IRQs are on, and the interrupt
- * gate turned them off.
- */
- TRACE_IRQS_OFF
-
movq %rsp, %rdi
call do_int80_syscall_32
.Lsyscall_32_done:
^ permalink raw reply [flat|nested] 56+ messages in thread
* [patch 5/8] x86/entry/common: Provide trace/kprobe safe exit to user space functions
2020-02-25 22:08 [patch 0/8] x86/entry: Consolidation - Part II Thomas Gleixner
` (3 preceding siblings ...)
2020-02-25 22:08 ` [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code Thomas Gleixner
@ 2020-02-25 22:08 ` Thomas Gleixner
2020-02-26 5:45 ` Andy Lutomirski
2020-02-27 15:43 ` Alexandre Chartre
2020-02-25 22:08 ` [patch 6/8] x86/entry: Move irq tracing to syscall_slow_exit_work Thomas Gleixner
` (2 subsequent siblings)
7 siblings, 2 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-25 22:08 UTC (permalink / raw)
To: LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
Split prepare_enter_to_user_mode() and mark it notrace/noprobe so the irq
flags tracing on return can be put into it.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/common.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -196,7 +196,7 @@ static void exit_to_usermode_loop(struct
}
/* Called with IRQs disabled. */
-__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
+static inline void __prepare_exit_to_usermode(struct pt_regs *regs)
{
struct thread_info *ti = current_thread_info();
u32 cached_flags;
@@ -241,6 +241,12 @@ static void exit_to_usermode_loop(struct
mds_user_clear_cpu_buffers();
}
+__visible inline notrace void prepare_exit_to_usermode(struct pt_regs *regs)
+{
+ __prepare_exit_to_usermode(regs);
+}
+NOKPROBE_SYMBOL(prepare_exit_to_usermode);
+
#define SYSCALL_EXIT_WORK_FLAGS \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
@@ -271,7 +277,7 @@ static void syscall_slow_exit_work(struc
* Called with IRQs on and fully valid regs. Returns with IRQs off in a
* state such that we can immediately switch to user mode.
*/
-__visible inline void syscall_return_slowpath(struct pt_regs *regs)
+__visible inline notrace void syscall_return_slowpath(struct pt_regs *regs)
{
struct thread_info *ti = current_thread_info();
u32 cached_flags = READ_ONCE(ti->flags);
@@ -292,8 +298,9 @@ static void syscall_slow_exit_work(struc
syscall_slow_exit_work(regs, cached_flags);
local_irq_disable();
- prepare_exit_to_usermode(regs);
+ __prepare_exit_to_usermode(regs);
}
+NOKPROBE_SYMBOL(syscall_return_slowpath);
#ifdef CONFIG_X86_64
static __always_inline
@@ -417,7 +424,7 @@ static __always_inline long do_fast_sysc
/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
- prepare_exit_to_usermode(regs);
+ __prepare_exit_to_usermode(regs);
return 0; /* Keep it simple: use IRET. */
}
^ permalink raw reply [flat|nested] 56+ messages in thread
* [patch 6/8] x86/entry: Move irq tracing to syscall_slow_exit_work
2020-02-25 22:08 [patch 0/8] x86/entry: Consolidation - Part II Thomas Gleixner
` (4 preceding siblings ...)
2020-02-25 22:08 ` [patch 5/8] x86/entry/common: Provide trace/kprobe safe exit to user space functions Thomas Gleixner
@ 2020-02-25 22:08 ` Thomas Gleixner
2020-02-26 5:47 ` Andy Lutomirski
2020-02-27 16:12 ` Alexandre Chartre
2020-02-25 22:08 ` [patch 7/8] x86/entry: Move irq tracing to prepare_exit_to_user_mode() Thomas Gleixner
2020-02-25 22:08 ` [patch 8/8] x86/entry: Move irqflags tracing to do_int80_syscall_32() Thomas Gleixner
7 siblings, 2 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-25 22:08 UTC (permalink / raw)
To: LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
which removes the ASM IRQ tracepoints.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/common.c | 2 ++
arch/x86/entry/entry_32.S | 3 ++-
arch/x86/entry/entry_64.S | 3 ---
3 files changed, 4 insertions(+), 4 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -299,6 +299,8 @@ static void syscall_slow_exit_work(struc
local_irq_disable();
__prepare_exit_to_usermode(regs);
+ /* Return to user space enables interrupts */
+ trace_hardirqs_on();
}
NOKPROBE_SYMBOL(syscall_return_slowpath);
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -812,7 +812,7 @@ SYM_CODE_START(ret_from_fork)
movl %esp, %eax
call syscall_return_slowpath
STACKLEAK_ERASE
- jmp restore_all
+ jmp restore_all_switch_stack
/* kernel thread */
1: movl %edi, %eax
@@ -1077,6 +1077,7 @@ SYM_FUNC_START(entry_INT80_32)
restore_all:
TRACE_IRQS_IRET
+restore_all_switch_stack:
SWITCH_TO_ENTRY_STACK
CHECK_AND_APPLY_ESPFIX
.Lrestore_nocheck:
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -172,8 +172,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
movq %rsp, %rsi
call do_syscall_64 /* returns with IRQs disabled */
- TRACE_IRQS_ON /* return enables interrupts */
-
/*
* Try to use SYSRET instead of IRET if we're returning to
* a completely clean 64-bit userspace context. If we're not,
@@ -340,7 +338,6 @@ SYM_CODE_START(ret_from_fork)
UNWIND_HINT_REGS
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
- TRACE_IRQS_ON /* user mode is traced as IRQS on */
jmp swapgs_restore_regs_and_return_to_usermode
1:
^ permalink raw reply [flat|nested] 56+ messages in thread
* [patch 7/8] x86/entry: Move irq tracing to prepare_exit_to_user_mode()
2020-02-25 22:08 [patch 0/8] x86/entry: Consolidation - Part II Thomas Gleixner
` (5 preceding siblings ...)
2020-02-25 22:08 ` [patch 6/8] x86/entry: Move irq tracing to syscall_slow_exit_work Thomas Gleixner
@ 2020-02-25 22:08 ` Thomas Gleixner
2020-02-26 5:50 ` Andy Lutomirski
2020-02-25 22:08 ` [patch 8/8] x86/entry: Move irqflags tracing to do_int80_syscall_32() Thomas Gleixner
7 siblings, 1 reply; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-25 22:08 UTC (permalink / raw)
To: LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
which again gets it out of the ASM code.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/common.c | 1 +
arch/x86/entry/entry_32.S | 2 +-
arch/x86/entry/entry_64.S | 1 -
3 files changed, 2 insertions(+), 2 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -244,6 +244,7 @@ static inline void __prepare_exit_to_use
__visible inline notrace void prepare_exit_to_usermode(struct pt_regs *regs)
{
__prepare_exit_to_usermode(regs);
+ trace_hardirqs_on();
}
NOKPROBE_SYMBOL(prepare_exit_to_usermode);
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -855,7 +855,7 @@ SYM_CODE_START_LOCAL(ret_from_exception)
TRACE_IRQS_OFF
movl %esp, %eax
call prepare_exit_to_usermode
- jmp restore_all
+ jmp restore_all_switch_stack
SYM_CODE_END(ret_from_exception)
SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -614,7 +614,6 @@ SYM_CODE_START_LOCAL(common_interrupt)
.Lretint_user:
mov %rsp,%rdi
call prepare_exit_to_usermode
- TRACE_IRQS_ON
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
^ permalink raw reply [flat|nested] 56+ messages in thread
* [patch 8/8] x86/entry: Move irqflags tracing to do_int80_syscall_32()
2020-02-25 22:08 [patch 0/8] x86/entry: Consolidation - Part II Thomas Gleixner
` (6 preceding siblings ...)
2020-02-25 22:08 ` [patch 7/8] x86/entry: Move irq tracing to prepare_exit_to_user_mode() Thomas Gleixner
@ 2020-02-25 22:08 ` Thomas Gleixner
2020-02-27 16:46 ` Alexandre Chartre
7 siblings, 1 reply; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-25 22:08 UTC (permalink / raw)
To: LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
which cleans up the ASM maze.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/common.c | 8 +++++++-
arch/x86/entry/entry_32.S | 9 ++-------
arch/x86/entry/entry_64_compat.S | 14 +++++---------
3 files changed, 14 insertions(+), 17 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -333,6 +333,7 @@ void do_syscall_64_irqs_on(unsigned long
{
syscall_entry_fixups();
do_syscall_64_irqs_on(nr, regs);
+ trace_hardirqs_on();
}
NOKPROBE_SYMBOL(do_syscall_64);
#endif
@@ -389,6 +390,7 @@ static __always_inline void do_syscall_3
{
syscall_entry_fixups();
do_syscall_32_irqs_on(regs);
+ trace_hardirqs_on();
}
NOKPROBE_SYMBOL(do_int80_syscall_32);
@@ -468,8 +470,12 @@ static __always_inline long do_fast_sysc
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible notrace long do_fast_syscall_32(struct pt_regs *regs)
{
+ long ret;
+
syscall_entry_fixups();
- return do_fast_syscall_32_irqs_on(regs);
+ ret = do_fast_syscall_32_irqs_on(regs);
+ trace_hardirqs_on();
+ return ret;
}
NOKPROBE_SYMBOL(do_fast_syscall_32);
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -811,8 +811,7 @@ SYM_CODE_START(ret_from_fork)
/* When we fork, we trace the syscall return in the child, too. */
movl %esp, %eax
call syscall_return_slowpath
- STACKLEAK_ERASE
- jmp restore_all_switch_stack
+ jmp .Lsyscall_32_done
/* kernel thread */
1: movl %edi, %eax
@@ -968,8 +967,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
STACKLEAK_ERASE
-/* Opportunistic SYSEXIT */
- TRACE_IRQS_ON /* User mode traces as IRQs on. */
+ /* Opportunistic SYSEXIT */
/*
* Setup entry stack - we keep the pointer in %eax and do the
@@ -1072,11 +1070,8 @@ SYM_FUNC_START(entry_INT80_32)
movl %esp, %eax
call do_int80_syscall_32
.Lsyscall_32_done:
-
STACKLEAK_ERASE
-restore_all:
- TRACE_IRQS_IRET
restore_all_switch_stack:
SWITCH_TO_ENTRY_STACK
CHECK_AND_APPLY_ESPFIX
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -132,8 +132,8 @@ SYM_FUNC_START(entry_SYSENTER_compat)
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+ ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
+ "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
jmp sysret32_from_system_call
.Lsysenter_fix_flags:
@@ -244,8 +244,8 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+ ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
+ "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
/* Opportunistic SYSRET */
sysret32_from_system_call:
@@ -254,7 +254,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft
* stack. So let's erase the thread stack right now.
*/
STACKLEAK_ERASE
- TRACE_IRQS_ON /* User mode traces as IRQs on. */
+
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
@@ -393,9 +393,5 @@ SYM_CODE_START(entry_INT80_compat)
movq %rsp, %rdi
call do_int80_syscall_32
-.Lsyscall_32_done:
-
- /* Go back to user mode. */
- TRACE_IRQS_ON
jmp swapgs_restore_regs_and_return_to_usermode
SYM_CODE_END(entry_INT80_compat)
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-25 22:08 ` [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code Thomas Gleixner
@ 2020-02-26 5:43 ` Andy Lutomirski
2020-02-26 8:17 ` Peter Zijlstra
2020-02-27 23:11 ` Frederic Weisbecker
2020-02-28 9:00 ` Alexandre Chartre
2 siblings, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2020-02-26 5:43 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 2:08 PM, Thomas Gleixner wrote:
> Now that the C entry points are safe, move the irq flags tracing code into
> the entry helper.
I'm so confused.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/common.c | 5 +++++
> arch/x86/entry/entry_32.S | 12 ------------
> arch/x86/entry/entry_64.S | 2 --
> arch/x86/entry/entry_64_compat.S | 18 ------------------
> 4 files changed, 5 insertions(+), 32 deletions(-)
>
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -57,6 +57,11 @@ static inline void enter_from_user_mode(
> */
> static __always_inline void syscall_entry_fixups(void)
> {
> + /*
> + * Usermode is traced as interrupts enabled, but the syscall entry
> + * mechanisms disable interrupts. Tell the tracer.
> + */
> + trace_hardirqs_off();
Your earlier patches suggest quite strongly that tracing isn't safe
until enter_from_user_mode(). But trace_hardirqs_off() calls
trace_irq_disable_rcuidle(), which looks [0] like a tracepoint.
Did you perhaps mean to do this *after* enter_from_user_mode()?
(My tracepoint-fu is pretty bad, and I can't actually find where it's
defined as a tracepoint. But then it does nothing at all, so it must be
a tracepoint, right?)
> enter_from_user_mode();
> local_irq_enable();
> }
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 5/8] x86/entry/common: Provide trace/kprobe safe exit to user space functions
2020-02-25 22:08 ` [patch 5/8] x86/entry/common: Provide trace/kprobe safe exit to user space functions Thomas Gleixner
@ 2020-02-26 5:45 ` Andy Lutomirski
2020-02-26 8:15 ` Peter Zijlstra
2020-02-27 15:43 ` Alexandre Chartre
1 sibling, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2020-02-26 5:45 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 2:08 PM, Thomas Gleixner wrote:
> Split prepare_enter_to_user_mode() and mark it notrace/noprobe so the irq
> flags tracing on return can be put into it.
Is our tooling clever enough for thsi to do anything? You have a static
inline that is only called in one place, and the caller is notrace and
NOKPROBE. Does this actually allow tracing in the static inline callee?
--Andy
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 6/8] x86/entry: Move irq tracing to syscall_slow_exit_work
2020-02-25 22:08 ` [patch 6/8] x86/entry: Move irq tracing to syscall_slow_exit_work Thomas Gleixner
@ 2020-02-26 5:47 ` Andy Lutomirski
2020-02-27 16:12 ` Alexandre Chartre
1 sibling, 0 replies; 56+ messages in thread
From: Andy Lutomirski @ 2020-02-26 5:47 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 2:08 PM, Thomas Gleixner wrote:
> which removes the ASM IRQ tracepoints.
It's still after we re-enter rcuidle. Is tracing actually okay?
I always had the impression that tracing was okay in rcuidle mode
because the tracing code was smart enough to do the right thing, but
your patch 3/8 changelog says:
Anything before enter_from_user_mode() is not safe to be traced or probed.
--Andy
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 7/8] x86/entry: Move irq tracing to prepare_exit_to_user_mode()
2020-02-25 22:08 ` [patch 7/8] x86/entry: Move irq tracing to prepare_exit_to_user_mode() Thomas Gleixner
@ 2020-02-26 5:50 ` Andy Lutomirski
2020-02-26 19:53 ` Thomas Gleixner
0 siblings, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2020-02-26 5:50 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 2:08 PM, Thomas Gleixner wrote:
> which again gets it out of the ASM code.
Why is this better than just sticking the tracing in
__prepare_exit_from_usermode() or just not splitting it in the first place?
(ISTM the real right solution is to make sure that it's okay for
trace_hardirqs_... to be safe even when rcuidle. But I may still be
missing something.)
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 5/8] x86/entry/common: Provide trace/kprobe safe exit to user space functions
2020-02-26 5:45 ` Andy Lutomirski
@ 2020-02-26 8:15 ` Peter Zijlstra
0 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2020-02-26 8:15 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Thomas Gleixner, LKML, x86, Steven Rostedt, Brian Gerst,
Juergen Gross, Paolo Bonzini, Arnd Bergmann
On Tue, Feb 25, 2020 at 09:45:11PM -0800, Andy Lutomirski wrote:
> On 2/25/20 2:08 PM, Thomas Gleixner wrote:
> > Split prepare_enter_to_user_mode() and mark it notrace/noprobe so the irq
> > flags tracing on return can be put into it.
>
> Is our tooling clever enough for thsi to do anything? You have a static
> inline that is only called in one place, and the caller is notrace and
> NOKPROBE. Does this actually allow tracing in the static inline callee?
tracing, no, but the NOKPROBE on inline functions is buggered afaiu.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-26 5:43 ` Andy Lutomirski
@ 2020-02-26 8:17 ` Peter Zijlstra
2020-02-26 11:20 ` Andy Lutomirski
0 siblings, 1 reply; 56+ messages in thread
From: Peter Zijlstra @ 2020-02-26 8:17 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Thomas Gleixner, LKML, x86, Steven Rostedt, Brian Gerst,
Juergen Gross, Paolo Bonzini, Arnd Bergmann
On Tue, Feb 25, 2020 at 09:43:46PM -0800, Andy Lutomirski wrote:
> On 2/25/20 2:08 PM, Thomas Gleixner wrote:
> > Now that the C entry points are safe, move the irq flags tracing code into
> > the entry helper.
>
> I'm so confused.
>
> >
> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> > ---
> > arch/x86/entry/common.c | 5 +++++
> > arch/x86/entry/entry_32.S | 12 ------------
> > arch/x86/entry/entry_64.S | 2 --
> > arch/x86/entry/entry_64_compat.S | 18 ------------------
> > 4 files changed, 5 insertions(+), 32 deletions(-)
> >
> > --- a/arch/x86/entry/common.c
> > +++ b/arch/x86/entry/common.c
> > @@ -57,6 +57,11 @@ static inline void enter_from_user_mode(
> > */
> > static __always_inline void syscall_entry_fixups(void)
> > {
> > + /*
> > + * Usermode is traced as interrupts enabled, but the syscall entry
> > + * mechanisms disable interrupts. Tell the tracer.
> > + */
> > + trace_hardirqs_off();
>
> Your earlier patches suggest quite strongly that tracing isn't safe
> until enter_from_user_mode(). But trace_hardirqs_off() calls
> trace_irq_disable_rcuidle(), which looks [0] like a tracepoint.
>
> Did you perhaps mean to do this *after* enter_from_user_mode()?
aside from the fact that enter_from_user_mode() itself also has a
tracepoint, the crucial detail is that we must not trace/kprobe the
function calling this.
Specifically for #PF, because we need read_cr2() before this. See later
patches.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-26 8:17 ` Peter Zijlstra
@ 2020-02-26 11:20 ` Andy Lutomirski
2020-02-26 19:51 ` Thomas Gleixner
0 siblings, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2020-02-26 11:20 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Andy Lutomirski, Thomas Gleixner, LKML, x86, Steven Rostedt,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann
> On Feb 26, 2020, at 12:17 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, Feb 25, 2020 at 09:43:46PM -0800, Andy Lutomirski wrote:
>>> On 2/25/20 2:08 PM, Thomas Gleixner wrote:
>>> Now that the C entry points are safe, move the irq flags tracing code into
>>> the entry helper.
>>
>> I'm so confused.
>>
>>>
>>> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>>> ---
>>> arch/x86/entry/common.c | 5 +++++
>>> arch/x86/entry/entry_32.S | 12 ------------
>>> arch/x86/entry/entry_64.S | 2 --
>>> arch/x86/entry/entry_64_compat.S | 18 ------------------
>>> 4 files changed, 5 insertions(+), 32 deletions(-)
>>>
>>> --- a/arch/x86/entry/common.c
>>> +++ b/arch/x86/entry/common.c
>>> @@ -57,6 +57,11 @@ static inline void enter_from_user_mode(
>>> */
>>> static __always_inline void syscall_entry_fixups(void)
>>> {
>>> + /*
>>> + * Usermode is traced as interrupts enabled, but the syscall entry
>>> + * mechanisms disable interrupts. Tell the tracer.
>>> + */
>>> + trace_hardirqs_off();
>>
>> Your earlier patches suggest quite strongly that tracing isn't safe
>> until enter_from_user_mode(). But trace_hardirqs_off() calls
>> trace_irq_disable_rcuidle(), which looks [0] like a tracepoint.
>>
>> Did you perhaps mean to do this *after* enter_from_user_mode()?
>
> aside from the fact that enter_from_user_mode() itself also has a
> tracepoint, the crucial detail is that we must not trace/kprobe the
> function calling this.
>
> Specifically for #PF, because we need read_cr2() before this. See later
> patches.
Indeed. I’m fine with this patch, but I still don’t understand what the changelog is about. And I’m still rather baffled by most of the notrace annotations in the series.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-26 11:20 ` Andy Lutomirski
@ 2020-02-26 19:51 ` Thomas Gleixner
2020-02-29 14:44 ` Thomas Gleixner
0 siblings, 1 reply; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-26 19:51 UTC (permalink / raw)
To: Andy Lutomirski, Peter Zijlstra
Cc: Andy Lutomirski, LKML, x86, Steven Rostedt, Brian Gerst,
Juergen Gross, Paolo Bonzini, Arnd Bergmann
Andy Lutomirski <luto@amacapital.net> writes:
>> On Feb 26, 2020, at 12:17 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>> On Tue, Feb 25, 2020 at 09:43:46PM -0800, Andy Lutomirski wrote:
>>> Your earlier patches suggest quite strongly that tracing isn't safe
>>> until enter_from_user_mode(). But trace_hardirqs_off() calls
>>> trace_irq_disable_rcuidle(), which looks [0] like a tracepoint.
>>>
>>> Did you perhaps mean to do this *after* enter_from_user_mode()?
>>
>> aside from the fact that enter_from_user_mode() itself also has a
>> tracepoint, the crucial detail is that we must not trace/kprobe the
>> function calling this.
>>
>> Specifically for #PF, because we need read_cr2() before this. See later
>> patches.
>
> Indeed. I’m fine with this patch, but I still don’t understand what
> the changelog is about.
Yeah, the changelog is not really helpful. Let me fix that.
> And I’m still rather baffled by most of the notrace annotations in the
> series.
As discussed on IRC, this might be too broad, but then I rather have the
actual C-entry points neither traceable nor probable in general and
relax this by calling functions which can be traced and probed.
My rationale for this decision was that enter_from_user_mode() is marked
notrace/noprobe as well, so I kept the protection scope the same as we
had in the ASM maze which is marked noprobe already.
Thanks,
tglx
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 7/8] x86/entry: Move irq tracing to prepare_exit_to_user_mode()
2020-02-26 5:50 ` Andy Lutomirski
@ 2020-02-26 19:53 ` Thomas Gleixner
2020-02-26 20:07 ` Andy Lutomirski
0 siblings, 1 reply; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-26 19:53 UTC (permalink / raw)
To: Andy Lutomirski, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
Andy Lutomirski <luto@kernel.org> writes:
> On 2/25/20 2:08 PM, Thomas Gleixner wrote:
>> which again gets it out of the ASM code.
>
> Why is this better than just sticking the tracing in
> __prepare_exit_from_usermode() or just not splitting it in the first
> place?
The split is there because prepare_exit_from_usermode() is used from the
idtentry maze as well. Once that stuff is converted in the later series
the split goes away again.
Thanks,
tglx
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 7/8] x86/entry: Move irq tracing to prepare_exit_to_user_mode()
2020-02-26 19:53 ` Thomas Gleixner
@ 2020-02-26 20:07 ` Andy Lutomirski
0 siblings, 0 replies; 56+ messages in thread
From: Andy Lutomirski @ 2020-02-26 20:07 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Andy Lutomirski, LKML, X86 ML, Steven Rostedt, Brian Gerst,
Juergen Gross, Paolo Bonzini, Arnd Bergmann
On Wed, Feb 26, 2020 at 11:54 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Andy Lutomirski <luto@kernel.org> writes:
>
> > On 2/25/20 2:08 PM, Thomas Gleixner wrote:
> >> which again gets it out of the ASM code.
> >
> > Why is this better than just sticking the tracing in
> > __prepare_exit_from_usermode() or just not splitting it in the first
> > place?
>
> The split is there because prepare_exit_from_usermode() is used from the
> idtentry maze as well. Once that stuff is converted in the later series
> the split goes away again.
>
Okay.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 5/8] x86/entry/common: Provide trace/kprobe safe exit to user space functions
2020-02-25 22:08 ` [patch 5/8] x86/entry/common: Provide trace/kprobe safe exit to user space functions Thomas Gleixner
2020-02-26 5:45 ` Andy Lutomirski
@ 2020-02-27 15:43 ` Alexandre Chartre
2020-02-27 15:53 ` Thomas Gleixner
1 sibling, 1 reply; 56+ messages in thread
From: Alexandre Chartre @ 2020-02-27 15:43 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 11:08 PM, Thomas Gleixner wrote:
> Split prepare_enter_to_user_mode() and mark it notrace/noprobe so the irq
> flags tracing on return can be put into it.
This splits prepare_exit_to_usermode() not prepare_enter_to_user_mode().
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/common.c | 15 +++++++++++----
> 1 file changed, 11 insertions(+), 4 deletions(-)
>
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -196,7 +196,7 @@ static void exit_to_usermode_loop(struct
> }
>
> /* Called with IRQs disabled. */
> -__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
> +static inline void __prepare_exit_to_usermode(struct pt_regs *regs)
> {
> struct thread_info *ti = current_thread_info();
> u32 cached_flags;
> @@ -241,6 +241,12 @@ static void exit_to_usermode_loop(struct
> mds_user_clear_cpu_buffers();
> }
>
> +__visible inline notrace void prepare_exit_to_usermode(struct pt_regs *regs)
> +{
> + __prepare_exit_to_usermode(regs);
> +}
> +NOKPROBE_SYMBOL(prepare_exit_to_usermode);
Would it be worth grouping local_irq_disable() and prepare_exit_to_usermode()
(similarly to what was done entry with syscall_entry_fixups()) and then put
trace_hardirqs_on() there too. For example:
static __always_inline void syscall_exit_fixups(struct pt_regs *regs)
{
local_irq_disable();
prepare_exit_to_usermode(regs);
trace_hardirqs_on();
}
Or is this planned once prepare_exit_from_usermode() is not used by idtentry
anymore?
Thanks,
alex.
> #define SYSCALL_EXIT_WORK_FLAGS \
> (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
> _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
> @@ -271,7 +277,7 @@ static void syscall_slow_exit_work(struc
> * Called with IRQs on and fully valid regs. Returns with IRQs off in a
> * state such that we can immediately switch to user mode.
> */
> -__visible inline void syscall_return_slowpath(struct pt_regs *regs)
> +__visible inline notrace void syscall_return_slowpath(struct pt_regs *regs)
> {
> struct thread_info *ti = current_thread_info();
> u32 cached_flags = READ_ONCE(ti->flags);
> @@ -292,8 +298,9 @@ static void syscall_slow_exit_work(struc
> syscall_slow_exit_work(regs, cached_flags);
>
> local_irq_disable();
> - prepare_exit_to_usermode(regs);
> + __prepare_exit_to_usermode(regs);
> }
> +NOKPROBE_SYMBOL(syscall_return_slowpath);
>
> #ifdef CONFIG_X86_64
> static __always_inline
> @@ -417,7 +424,7 @@ static __always_inline long do_fast_sysc
> /* User code screwed up. */
> local_irq_disable();
> regs->ax = -EFAULT;
> - prepare_exit_to_usermode(regs);
> + __prepare_exit_to_usermode(regs);
> return 0; /* Keep it simple: use IRET. */
> }
>
>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 5/8] x86/entry/common: Provide trace/kprobe safe exit to user space functions
2020-02-27 15:43 ` Alexandre Chartre
@ 2020-02-27 15:53 ` Thomas Gleixner
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-27 15:53 UTC (permalink / raw)
To: Alexandre Chartre, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
Alexandre Chartre <alexandre.chartre@oracle.com> writes:
> On 2/25/20 11:08 PM, Thomas Gleixner wrote:
>> Split prepare_enter_to_user_mode() and mark it notrace/noprobe so the irq
>> flags tracing on return can be put into it.
>
> This splits prepare_exit_to_usermode() not prepare_enter_to_user_mode().
Ooops
>> /* Called with IRQs disabled. */
>> -__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
>> +static inline void __prepare_exit_to_usermode(struct pt_regs *regs)
>> {
>> struct thread_info *ti = current_thread_info();
>> u32 cached_flags;
>> @@ -241,6 +241,12 @@ static void exit_to_usermode_loop(struct
>> mds_user_clear_cpu_buffers();
>> }
>>
>> +__visible inline notrace void prepare_exit_to_usermode(struct pt_regs *regs)
>> +{
>> + __prepare_exit_to_usermode(regs);
>> +}
>> +NOKPROBE_SYMBOL(prepare_exit_to_usermode);
>
>
> Would it be worth grouping local_irq_disable() and prepare_exit_to_usermode()
> (similarly to what was done entry with syscall_entry_fixups()) and then put
> trace_hardirqs_on() there too. For example:
>
> static __always_inline void syscall_exit_fixups(struct pt_regs *regs)
> {
> local_irq_disable();
> prepare_exit_to_usermode(regs);
> trace_hardirqs_on();
> }
>
> Or is this planned once prepare_exit_from_usermode() is not used by idtentry
> anymore?
This should be consolidated at the very end when all the interrupt muck
is done, but maybe I forgot.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 6/8] x86/entry: Move irq tracing to syscall_slow_exit_work
2020-02-25 22:08 ` [patch 6/8] x86/entry: Move irq tracing to syscall_slow_exit_work Thomas Gleixner
2020-02-26 5:47 ` Andy Lutomirski
@ 2020-02-27 16:12 ` Alexandre Chartre
1 sibling, 0 replies; 56+ messages in thread
From: Alexandre Chartre @ 2020-02-27 16:12 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 11:08 PM, Thomas Gleixner wrote:
> which removes the ASM IRQ tracepoints.
This moves irq tracing to syscall_return_slowpath, not syscall_slow_exit_work,
right?
alex.
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/common.c | 2 ++
> arch/x86/entry/entry_32.S | 3 ++-
> arch/x86/entry/entry_64.S | 3 ---
> 3 files changed, 4 insertions(+), 4 deletions(-)
>
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -299,6 +299,8 @@ static void syscall_slow_exit_work(struc
>
> local_irq_disable();
> __prepare_exit_to_usermode(regs);
> + /* Return to user space enables interrupts */
> + trace_hardirqs_on();
> }
> NOKPROBE_SYMBOL(syscall_return_slowpath);
>
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -812,7 +812,7 @@ SYM_CODE_START(ret_from_fork)
> movl %esp, %eax
> call syscall_return_slowpath
> STACKLEAK_ERASE
> - jmp restore_all
> + jmp restore_all_switch_stack
>
> /* kernel thread */
> 1: movl %edi, %eax
> @@ -1077,6 +1077,7 @@ SYM_FUNC_START(entry_INT80_32)
>
> restore_all:
> TRACE_IRQS_IRET
> +restore_all_switch_stack:
> SWITCH_TO_ENTRY_STACK
> CHECK_AND_APPLY_ESPFIX
> .Lrestore_nocheck:
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -172,8 +172,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
> movq %rsp, %rsi
> call do_syscall_64 /* returns with IRQs disabled */
>
> - TRACE_IRQS_ON /* return enables interrupts */
> -
> /*
> * Try to use SYSRET instead of IRET if we're returning to
> * a completely clean 64-bit userspace context. If we're not,
> @@ -340,7 +338,6 @@ SYM_CODE_START(ret_from_fork)
> UNWIND_HINT_REGS
> movq %rsp, %rdi
> call syscall_return_slowpath /* returns with IRQs disabled */
> - TRACE_IRQS_ON /* user mode is traced as IRQS on */
> jmp swapgs_restore_regs_and_return_to_usermode
>
> 1:
>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 8/8] x86/entry: Move irqflags tracing to do_int80_syscall_32()
2020-02-25 22:08 ` [patch 8/8] x86/entry: Move irqflags tracing to do_int80_syscall_32() Thomas Gleixner
@ 2020-02-27 16:46 ` Alexandre Chartre
2020-02-28 13:49 ` Thomas Gleixner
0 siblings, 1 reply; 56+ messages in thread
From: Alexandre Chartre @ 2020-02-27 16:46 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 11:08 PM, Thomas Gleixner wrote:
> which cleans up the ASM maze.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/common.c | 8 +++++++-
> arch/x86/entry/entry_32.S | 9 ++-------
> arch/x86/entry/entry_64_compat.S | 14 +++++---------
> 3 files changed, 14 insertions(+), 17 deletions(-)
>
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -333,6 +333,7 @@ void do_syscall_64_irqs_on(unsigned long
> {
> syscall_entry_fixups();
> do_syscall_64_irqs_on(nr, regs);
> + trace_hardirqs_on();
> }
trace_hardirqs_on() is already called through syscall_return_slowpath()
(from the previous patch):
do_syscall_64()
-> do_syscall_64_irqs_on()
-> syscall_return_slowpath()
-> trace_hardirqs_on()
> NOKPROBE_SYMBOL(do_syscall_64);
> #endif
> @@ -389,6 +390,7 @@ static __always_inline void do_syscall_3
> {
> syscall_entry_fixups();
> do_syscall_32_irqs_on(regs);
> + trace_hardirqs_on();
> }
Same here:
do_int80_syscall_32()
-> do_syscall_32_irqs_on()
-> syscall_return_slowpath()
-> trace_hardirqs_on()
> NOKPROBE_SYMBOL(do_int80_syscall_32);
>
> @@ -468,8 +470,12 @@ static __always_inline long do_fast_sysc
> /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
> __visible notrace long do_fast_syscall_32(struct pt_regs *regs)
> {
> + long ret;
> +
> syscall_entry_fixups();
> - return do_fast_syscall_32_irqs_on(regs);
> + ret = do_fast_syscall_32_irqs_on(regs);
> + trace_hardirqs_on();
> + return ret;
> }
> NOKPROBE_SYMBOL(do_fast_syscall_32);
Same here:
do_fast_syscall_32()
-> do_fast_syscall_32_irqs_on()
-> do_syscall_32_irqs_on()
-> syscall_return_slowpath()
-> trace_hardirqs_on()
Except for one case (if the get_user() call is true in
do_fast_syscall_32_irqs_on()):
do_fast_syscall_32()
-> do_fast_syscall_32_irqs_on()
-> prepare_exit_to_usermode()
So we need to call trace_hardirqs_on() but only in that case:
static __always_inline long do_fast_syscall_32_irqs_on(struct pt_regs *regs)
{
...
if (
#ifdef CONFIG_X86_64
/*
* Micro-optimization: the pointer we're following is explicitly
* 32 bits, so it can't be out of range.
*/
__get_user(*(u32 *)®s->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#else
get_user(*(u32 *)®s->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif
) {
/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
prepare_exit_to_usermode(regs);
trace_hardirqs_on(); <<<=== HERE
return 0; /* Keep it simple: use IRET. */
}
...
}
alex.
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -811,8 +811,7 @@ SYM_CODE_START(ret_from_fork)
> /* When we fork, we trace the syscall return in the child, too. */
> movl %esp, %eax
> call syscall_return_slowpath
> - STACKLEAK_ERASE
> - jmp restore_all_switch_stack
> + jmp .Lsyscall_32_done
>
> /* kernel thread */
> 1: movl %edi, %eax
> @@ -968,8 +967,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
>
> STACKLEAK_ERASE
>
> -/* Opportunistic SYSEXIT */
> - TRACE_IRQS_ON /* User mode traces as IRQs on. */
> + /* Opportunistic SYSEXIT */
>
> /*
> * Setup entry stack - we keep the pointer in %eax and do the
> @@ -1072,11 +1070,8 @@ SYM_FUNC_START(entry_INT80_32)
> movl %esp, %eax
> call do_int80_syscall_32
> .Lsyscall_32_done:
> -
> STACKLEAK_ERASE
>
> -restore_all:
> - TRACE_IRQS_IRET
> restore_all_switch_stack:
> SWITCH_TO_ENTRY_STACK
> CHECK_AND_APPLY_ESPFIX
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -132,8 +132,8 @@ SYM_FUNC_START(entry_SYSENTER_compat)
> movq %rsp, %rdi
> call do_fast_syscall_32
> /* XEN PV guests always use IRET path */
> - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
> - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
> + ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
> + "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
> jmp sysret32_from_system_call
>
> .Lsysenter_fix_flags:
> @@ -244,8 +244,8 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft
> movq %rsp, %rdi
> call do_fast_syscall_32
> /* XEN PV guests always use IRET path */
> - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
> - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
> + ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
> + "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
>
> /* Opportunistic SYSRET */
> sysret32_from_system_call:
> @@ -254,7 +254,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft
> * stack. So let's erase the thread stack right now.
> */
> STACKLEAK_ERASE
> - TRACE_IRQS_ON /* User mode traces as IRQs on. */
> +
> movq RBX(%rsp), %rbx /* pt_regs->rbx */
> movq RBP(%rsp), %rbp /* pt_regs->rbp */
> movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
> @@ -393,9 +393,5 @@ SYM_CODE_START(entry_INT80_compat)
>
> movq %rsp, %rdi
> call do_int80_syscall_32
> -.Lsyscall_32_done:
> -
> - /* Go back to user mode. */
> - TRACE_IRQS_ON
> jmp swapgs_restore_regs_and_return_to_usermode
> SYM_CODE_END(entry_INT80_compat)
>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space
2020-02-25 22:08 ` [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space Thomas Gleixner
@ 2020-02-27 19:49 ` Borislav Petkov
2020-02-27 22:45 ` Frederic Weisbecker
2020-02-28 8:58 ` Alexandre Chartre
2 siblings, 0 replies; 56+ messages in thread
From: Borislav Petkov @ 2020-02-27 19:49 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, x86, Steven Rostedt, Brian Gerst, Juergen Gross,
Paolo Bonzini, Arnd Bergmann
Just formulations improvements:
> Subject: [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space
s/on //
On Tue, Feb 25, 2020 at 11:08:02PM +0100, Thomas Gleixner wrote:
> User space cannot longer disable interrupts so trace return to user space
"Userspace cannot disable interrupts any longer... "
> unconditionally as IRQS_ON.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
...
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 2/8] x86/entry/common: Consolidate syscall entry code
2020-02-25 22:08 ` [patch 2/8] x86/entry/common: Consolidate syscall entry code Thomas Gleixner
@ 2020-02-27 19:57 ` Borislav Petkov
2020-02-27 22:52 ` Frederic Weisbecker
2020-02-28 8:59 ` Alexandre Chartre
2 siblings, 0 replies; 56+ messages in thread
From: Borislav Petkov @ 2020-02-27 19:57 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, x86, Steven Rostedt, Brian Gerst, Juergen Gross,
Paolo Bonzini, Arnd Bergmann
On Tue, Feb 25, 2020 at 11:08:03PM +0100, Thomas Gleixner wrote:
> All syscall entry points call enter_from_user_mode() and
> local_irq_enable(). Move that into an inline helper so the interrupt
> tracing can be moved into that helper later instead of sprinkling
> it all over the place.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/common.c | 48 +++++++++++++++++++++++++++++++++---------------
> 1 file changed, 33 insertions(+), 15 deletions(-)
>
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -49,6 +49,18 @@
> static inline void enter_from_user_mode(void) {}
> #endif
>
> +/*
> + * All syscall entry variants call with interrupts disabled.
> + *
> + * Invoke context tracking if enabled and enable interrupts for further
> + * processing.
> + */
> +static __always_inline void syscall_entry_fixups(void)
Function name needs a verb. "do_syscall_entry_fixups()" seems to fit well
with where it is called or "apply_..." or so.
Rest looks ok.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space
2020-02-25 22:08 ` [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space Thomas Gleixner
2020-02-27 19:49 ` Borislav Petkov
@ 2020-02-27 22:45 ` Frederic Weisbecker
2020-02-28 8:58 ` Alexandre Chartre
2 siblings, 0 replies; 56+ messages in thread
From: Frederic Weisbecker @ 2020-02-27 22:45 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, x86, Steven Rostedt, Brian Gerst, Juergen Gross,
Paolo Bonzini, Arnd Bergmann
On Tue, Feb 25, 2020 at 11:08:02PM +0100, Thomas Gleixner wrote:
> User space cannot longer disable interrupts so trace return to user space
> unconditionally as IRQS_ON.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 2/8] x86/entry/common: Consolidate syscall entry code
2020-02-25 22:08 ` [patch 2/8] x86/entry/common: Consolidate syscall entry code Thomas Gleixner
2020-02-27 19:57 ` Borislav Petkov
@ 2020-02-27 22:52 ` Frederic Weisbecker
2020-02-28 8:59 ` Alexandre Chartre
2 siblings, 0 replies; 56+ messages in thread
From: Frederic Weisbecker @ 2020-02-27 22:52 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, x86, Steven Rostedt, Brian Gerst, Juergen Gross,
Paolo Bonzini, Arnd Bergmann
On Tue, Feb 25, 2020 at 11:08:03PM +0100, Thomas Gleixner wrote:
> All syscall entry points call enter_from_user_mode() and
> local_irq_enable(). Move that into an inline helper so the interrupt
> tracing can be moved into that helper later instead of sprinkling
> it all over the place.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-25 22:08 ` [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code Thomas Gleixner
2020-02-26 5:43 ` Andy Lutomirski
@ 2020-02-27 23:11 ` Frederic Weisbecker
2020-02-28 9:00 ` Alexandre Chartre
2 siblings, 0 replies; 56+ messages in thread
From: Frederic Weisbecker @ 2020-02-27 23:11 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, x86, Steven Rostedt, Brian Gerst, Juergen Gross,
Paolo Bonzini, Arnd Bergmann
On Tue, Feb 25, 2020 at 11:08:05PM +0100, Thomas Gleixner wrote:
> Now that the C entry points are safe, move the irq flags tracing code into
> the entry helper.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 3/8] x86/entry/common: Mark syscall entry points notrace/nokprobe
2020-02-25 22:08 ` [patch 3/8] x86/entry/common: Mark syscall entry points notrace/nokprobe Thomas Gleixner
@ 2020-02-27 23:15 ` Frederic Weisbecker
2020-02-28 8:59 ` Alexandre Chartre
1 sibling, 0 replies; 56+ messages in thread
From: Frederic Weisbecker @ 2020-02-27 23:15 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, x86, Steven Rostedt, Brian Gerst, Juergen Gross,
Paolo Bonzini, Arnd Bergmann
On Tue, Feb 25, 2020 at 11:08:04PM +0100, Thomas Gleixner wrote:
> Anything before enter_from_user_mode() is not safe to be traced or probed.
Perhaps the changelog should clarify as to why exactly it isn't
safe to trace before enter_from_user_mode(). There have been
so many patches on that lately that I'm not clear on that either.
Thanks.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space
2020-02-25 22:08 ` [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space Thomas Gleixner
2020-02-27 19:49 ` Borislav Petkov
2020-02-27 22:45 ` Frederic Weisbecker
@ 2020-02-28 8:58 ` Alexandre Chartre
2 siblings, 0 replies; 56+ messages in thread
From: Alexandre Chartre @ 2020-02-28 8:58 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 11:08 PM, Thomas Gleixner wrote:
> User space cannot longer disable interrupts so trace return to user space
> unconditionally as IRQS_ON.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/entry_64.S | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
alex.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 2/8] x86/entry/common: Consolidate syscall entry code
2020-02-25 22:08 ` [patch 2/8] x86/entry/common: Consolidate syscall entry code Thomas Gleixner
2020-02-27 19:57 ` Borislav Petkov
2020-02-27 22:52 ` Frederic Weisbecker
@ 2020-02-28 8:59 ` Alexandre Chartre
2 siblings, 0 replies; 56+ messages in thread
From: Alexandre Chartre @ 2020-02-28 8:59 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 11:08 PM, Thomas Gleixner wrote:
> All syscall entry points call enter_from_user_mode() and
> local_irq_enable(). Move that into an inline helper so the interrupt
> tracing can be moved into that helper later instead of sprinkling
> it all over the place.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/common.c | 48 +++++++++++++++++++++++++++++++++---------------
> 1 file changed, 33 insertions(+), 15 deletions(-)
>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
alex.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 3/8] x86/entry/common: Mark syscall entry points notrace/nokprobe
2020-02-25 22:08 ` [patch 3/8] x86/entry/common: Mark syscall entry points notrace/nokprobe Thomas Gleixner
2020-02-27 23:15 ` Frederic Weisbecker
@ 2020-02-28 8:59 ` Alexandre Chartre
1 sibling, 0 replies; 56+ messages in thread
From: Alexandre Chartre @ 2020-02-28 8:59 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 11:08 PM, Thomas Gleixner wrote:
> Anything before enter_from_user_mode() is not safe to be traced or probed.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/common.c | 9 ++++++---
> 1 file changed, 6 insertions(+), 3 deletions(-)
>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
alex.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-25 22:08 ` [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code Thomas Gleixner
2020-02-26 5:43 ` Andy Lutomirski
2020-02-27 23:11 ` Frederic Weisbecker
@ 2020-02-28 9:00 ` Alexandre Chartre
2 siblings, 0 replies; 56+ messages in thread
From: Alexandre Chartre @ 2020-02-28 9:00 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On 2/25/20 11:08 PM, Thomas Gleixner wrote:
> Now that the C entry points are safe, move the irq flags tracing code into
> the entry helper.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/common.c | 5 +++++
> arch/x86/entry/entry_32.S | 12 ------------
> arch/x86/entry/entry_64.S | 2 --
> arch/x86/entry/entry_64_compat.S | 18 ------------------
> 4 files changed, 5 insertions(+), 32 deletions(-)
>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
alex.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 8/8] x86/entry: Move irqflags tracing to do_int80_syscall_32()
2020-02-27 16:46 ` Alexandre Chartre
@ 2020-02-28 13:49 ` Thomas Gleixner
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-28 13:49 UTC (permalink / raw)
To: Alexandre Chartre, LKML
Cc: x86, Steven Rostedt, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
Alexandre Chartre <alexandre.chartre@oracle.com> writes:
> On 2/25/20 11:08 PM, Thomas Gleixner wrote:
>> which cleans up the ASM maze.
>>
>> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>> ---
>> arch/x86/entry/common.c | 8 +++++++-
>> arch/x86/entry/entry_32.S | 9 ++-------
>> arch/x86/entry/entry_64_compat.S | 14 +++++---------
>> 3 files changed, 14 insertions(+), 17 deletions(-)
>>
>> --- a/arch/x86/entry/common.c
>> +++ b/arch/x86/entry/common.c
>> @@ -333,6 +333,7 @@ void do_syscall_64_irqs_on(unsigned long
>> {
>> syscall_entry_fixups();
>> do_syscall_64_irqs_on(nr, regs);
>> + trace_hardirqs_on();
>> }
>
> trace_hardirqs_on() is already called through syscall_return_slowpath()
> (from the previous patch):
>
> do_syscall_64()
> -> do_syscall_64_irqs_on()
> -> syscall_return_slowpath()
> -> trace_hardirqs_on()
Duh, indeed.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-26 19:51 ` Thomas Gleixner
@ 2020-02-29 14:44 ` Thomas Gleixner
2020-02-29 19:25 ` Andy Lutomirski
0 siblings, 1 reply; 56+ messages in thread
From: Thomas Gleixner @ 2020-02-29 14:44 UTC (permalink / raw)
To: Andy Lutomirski, Peter Zijlstra
Cc: Andy Lutomirski, LKML, x86, Steven Rostedt, Brian Gerst,
Juergen Gross, Paolo Bonzini, Arnd Bergmann, Paul E. McKenney
Thomas Gleixner <tglx@linutronix.de> writes:
> Andy Lutomirski <luto@amacapital.net> writes:
>>> On Feb 26, 2020, at 12:17 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>>> On Tue, Feb 25, 2020 at 09:43:46PM -0800, Andy Lutomirski wrote:
>>>> Your earlier patches suggest quite strongly that tracing isn't safe
>>>> until enter_from_user_mode(). But trace_hardirqs_off() calls
>>>> trace_irq_disable_rcuidle(), which looks [0] like a tracepoint.
>>>>
>>>> Did you perhaps mean to do this *after* enter_from_user_mode()?
>>>
>>> aside from the fact that enter_from_user_mode() itself also has a
>>> tracepoint, the crucial detail is that we must not trace/kprobe the
>>> function calling this.
>>>
>>> Specifically for #PF, because we need read_cr2() before this. See later
>>> patches.
>>
>> Indeed. I’m fine with this patch, but I still don’t understand what
>> the changelog is about.
>
> Yeah, the changelog is not really helpful. Let me fix that.
>
>> And I’m still rather baffled by most of the notrace annotations in the
>> series.
>
> As discussed on IRC, this might be too broad, but then I rather have the
> actual C-entry points neither traceable nor probable in general and
> relax this by calling functions which can be traced and probed.
>
> My rationale for this decision was that enter_from_user_mode() is marked
> notrace/noprobe as well, so I kept the protection scope the same as we
> had in the ASM maze which is marked noprobe already.
I have second thoughts vs. tracing in this context.
While the tracer itself seems to handle this correctly, what about
things like BPF programs which can be attached to tracepoints and
function trace entries?
Is that really safe _before_ context tracking has updated RCU state?
Thanks,
tglx
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-29 14:44 ` Thomas Gleixner
@ 2020-02-29 19:25 ` Andy Lutomirski
2020-02-29 23:58 ` Steven Rostedt
0 siblings, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2020-02-29 19:25 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Peter Zijlstra, Andy Lutomirski, LKML, x86, Steven Rostedt,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann,
Paul E. McKenney
> On Feb 29, 2020, at 6:44 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Thomas Gleixner <tglx@linutronix.de> writes:
>> Andy Lutomirski <luto@amacapital.net> writes:
>>>>> On Feb 26, 2020, at 12:17 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>>>>> On Tue, Feb 25, 2020 at 09:43:46PM -0800, Andy Lutomirski wrote:
>>>>>> Your earlier patches suggest quite strongly that tracing isn't safe
>>>>>> until enter_from_user_mode(). But trace_hardirqs_off() calls
>>>>>> trace_irq_disable_rcuidle(), which looks [0] like a tracepoint.
>>>>>>
>>>>>> Did you perhaps mean to do this *after* enter_from_user_mode()?
>>>>>
>>>>> aside from the fact that enter_from_user_mode() itself also has a
>>>>> tracepoint, the crucial detail is that we must not trace/kprobe the
>>>>> function calling this.
>>>>>
>>>>> Specifically for #PF, because we need read_cr2() before this. See later
>>>>> patches.
>>>
>>> Indeed. I’m fine with this patch, but I still don’t understand what
>>> the changelog is about.
>>
>> Yeah, the changelog is not really helpful. Let me fix that.
>>
>>> And I’m still rather baffled by most of the notrace annotations in the
>>> series.
>>
>> As discussed on IRC, this might be too broad, but then I rather have the
>> actual C-entry points neither traceable nor probable in general and
>> relax this by calling functions which can be traced and probed.
>>
>> My rationale for this decision was that enter_from_user_mode() is marked
>> notrace/noprobe as well, so I kept the protection scope the same as we
>> had in the ASM maze which is marked noprobe already.
>
> I have second thoughts vs. tracing in this context.
>
> While the tracer itself seems to handle this correctly, what about
> things like BPF programs which can be attached to tracepoints and
> function trace entries?
I think that everything using the tracing code, including BPF, should either do its own rcuidle stuff or explicitly not execute if we’re not in CONTEXT_KERNEL. That is, we probably need to patch BPF.
>
> Is that really safe _before_ context tracking has updated RCU state?
>
> Thanks,
>
> tglx
>
>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-29 19:25 ` Andy Lutomirski
@ 2020-02-29 23:58 ` Steven Rostedt
2020-03-01 10:16 ` Thomas Gleixner
0 siblings, 1 reply; 56+ messages in thread
From: Steven Rostedt @ 2020-02-29 23:58 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Thomas Gleixner, Peter Zijlstra, Andy Lutomirski, LKML, x86,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann,
Paul E. McKenney
On Sat, 29 Feb 2020 11:25:24 -0800
Andy Lutomirski <luto@amacapital.net> wrote:
> > While the tracer itself seems to handle this correctly, what about
> > things like BPF programs which can be attached to tracepoints and
> > function trace entries?
>
> I think that everything using the tracing code, including BPF, should
> either do its own rcuidle stuff or explicitly not execute if we’re
> not in CONTEXT_KERNEL. That is, we probably need to patch BPF.
That's basically the route we are taking.
-- Steve
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-02-29 23:58 ` Steven Rostedt
@ 2020-03-01 10:16 ` Thomas Gleixner
2020-03-01 14:37 ` Andy Lutomirski
0 siblings, 1 reply; 56+ messages in thread
From: Thomas Gleixner @ 2020-03-01 10:16 UTC (permalink / raw)
To: Steven Rostedt, Andy Lutomirski
Cc: Peter Zijlstra, Andy Lutomirski, LKML, x86, Brian Gerst,
Juergen Gross, Paolo Bonzini, Arnd Bergmann, Paul E. McKenney
Steven Rostedt <rostedt@goodmis.org> writes:
> On Sat, 29 Feb 2020 11:25:24 -0800
> Andy Lutomirski <luto@amacapital.net> wrote:
>
>> > While the tracer itself seems to handle this correctly, what about
>> > things like BPF programs which can be attached to tracepoints and
>> > function trace entries?
>>
>> I think that everything using the tracing code, including BPF, should
>> either do its own rcuidle stuff or explicitly not execute if we’re
>> not in CONTEXT_KERNEL. That is, we probably need to patch BPF.
>
> That's basically the route we are taking.
Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
except trace_hardirq_off/on() as those trace functions do not allow to
attach anything AFAICT.
Thanks,
tglx
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 10:16 ` Thomas Gleixner
@ 2020-03-01 14:37 ` Andy Lutomirski
2020-03-01 15:21 ` Thomas Gleixner
0 siblings, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2020-03-01 14:37 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Steven Rostedt, Peter Zijlstra, Andy Lutomirski, LKML, x86,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann,
Paul E. McKenney
> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Steven Rostedt <rostedt@goodmis.org> writes:
>
>> On Sat, 29 Feb 2020 11:25:24 -0800
>> Andy Lutomirski <luto@amacapital.net> wrote:
>>
>>>> While the tracer itself seems to handle this correctly, what about
>>>> things like BPF programs which can be attached to tracepoints and
>>>> function trace entries?
>>>
>>> I think that everything using the tracing code, including BPF, should
>>> either do its own rcuidle stuff or explicitly not execute if we’re
>>> not in CONTEXT_KERNEL. That is, we probably need to patch BPF.
>>
>> That's basically the route we are taking.
>
> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> except trace_hardirq_off/on() as those trace functions do not allow to
> attach anything AFAICT.
Can you point to whatever makes those particular functions special? I failed to follow the macro maze.
>
> Thanks,
>
> tglx
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 14:37 ` Andy Lutomirski
@ 2020-03-01 15:21 ` Thomas Gleixner
2020-03-01 16:00 ` Andy Lutomirski
2020-03-01 18:20 ` Steven Rostedt
0 siblings, 2 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-03-01 15:21 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Steven Rostedt, Peter Zijlstra, Andy Lutomirski, LKML, x86,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann,
Paul E. McKenney
Andy Lutomirski <luto@amacapital.net> writes:
>> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
>> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
>> except trace_hardirq_off/on() as those trace functions do not allow to
>> attach anything AFAICT.
>
> Can you point to whatever makes those particular functions special? I
> failed to follow the macro maze.
Those are not tracepoints and not going through the macro maze. See
kernel/trace/trace_preemptirq.c
Thanks,
tglx
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 15:21 ` Thomas Gleixner
@ 2020-03-01 16:00 ` Andy Lutomirski
2020-03-01 18:12 ` Thomas Gleixner
2020-03-01 18:23 ` Steven Rostedt
2020-03-01 18:20 ` Steven Rostedt
1 sibling, 2 replies; 56+ messages in thread
From: Andy Lutomirski @ 2020-03-01 16:00 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Steven Rostedt, Peter Zijlstra, Andy Lutomirski, LKML, X86 ML,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann,
Paul E. McKenney
On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Andy Lutomirski <luto@amacapital.net> writes:
> >> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> >> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> >> except trace_hardirq_off/on() as those trace functions do not allow to
> >> attach anything AFAICT.
> >
> > Can you point to whatever makes those particular functions special? I
> > failed to follow the macro maze.
>
> Those are not tracepoints and not going through the macro maze. See
> kernel/trace/trace_preemptirq.c
That has:
void trace_hardirqs_on(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
if (!in_nmi())
trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
this_cpu_write(tracing_irq_cpu, 0);
}
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on);
NOKPROBE_SYMBOL(trace_hardirqs_on);
But this calls trace_irq_enable_rcuidle(), and that's the part of the
macro maze I got lost in. I found:
#ifdef CONFIG_TRACE_IRQFLAGS
DEFINE_EVENT(preemptirq_template, irq_disable,
TP_PROTO(unsigned long ip, unsigned long parent_ip),
TP_ARGS(ip, parent_ip));
DEFINE_EVENT(preemptirq_template, irq_enable,
TP_PROTO(unsigned long ip, unsigned long parent_ip),
TP_ARGS(ip, parent_ip));
#else
#define trace_irq_enable(...)
#define trace_irq_disable(...)
#define trace_irq_enable_rcuidle(...)
#define trace_irq_disable_rcuidle(...)
#endif
But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
where I got lost in the macro maze. I looked at the gcc asm output,
and there is, indeed:
# ./include/trace/events/preemptirq.h:40:
DEFINE_EVENT(preemptirq_template, irq_enable,
with a bunch of asm magic that looks like it's probably a tracepoint.
I still don't quite see where the "_rcuidle" went.
But I also don't see why this is any different from any other tracepoint.
--Andy
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 16:00 ` Andy Lutomirski
@ 2020-03-01 18:12 ` Thomas Gleixner
2020-03-01 18:26 ` Paul E. McKenney
2020-03-01 18:23 ` Steven Rostedt
1 sibling, 1 reply; 56+ messages in thread
From: Thomas Gleixner @ 2020-03-01 18:12 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Steven Rostedt, Peter Zijlstra, Andy Lutomirski, LKML, X86 ML,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann,
Paul E. McKenney
Andy Lutomirski <luto@kernel.org> writes:
> On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>> Andy Lutomirski <luto@amacapital.net> writes:
>> >> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
>> >> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
>> >> except trace_hardirq_off/on() as those trace functions do not allow to
>> >> attach anything AFAICT.
>> >
>> > Can you point to whatever makes those particular functions special? I
>> > failed to follow the macro maze.
>>
>> Those are not tracepoints and not going through the macro maze. See
>> kernel/trace/trace_preemptirq.c
>
> That has:
>
> void trace_hardirqs_on(void)
> {
> if (this_cpu_read(tracing_irq_cpu)) {
> if (!in_nmi())
> trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
> tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
> this_cpu_write(tracing_irq_cpu, 0);
> }
>
> lockdep_hardirqs_on(CALLER_ADDR0);
> }
> EXPORT_SYMBOL(trace_hardirqs_on);
> NOKPROBE_SYMBOL(trace_hardirqs_on);
>
> But this calls trace_irq_enable_rcuidle(), and that's the part of the
> macro maze I got lost in. I found:
>
> #ifdef CONFIG_TRACE_IRQFLAGS
> DEFINE_EVENT(preemptirq_template, irq_disable,
> TP_PROTO(unsigned long ip, unsigned long parent_ip),
> TP_ARGS(ip, parent_ip));
>
> DEFINE_EVENT(preemptirq_template, irq_enable,
> TP_PROTO(unsigned long ip, unsigned long parent_ip),
> TP_ARGS(ip, parent_ip));
> #else
> #define trace_irq_enable(...)
> #define trace_irq_disable(...)
> #define trace_irq_enable_rcuidle(...)
> #define trace_irq_disable_rcuidle(...)
> #endif
>
> But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
> where I got lost in the macro maze. I looked at the gcc asm output,
> and there is, indeed:
DEFINE_EVENT
DECLARE_TRACE
__DECLARE_TRACE
__DECLARE_TRACE_RCU
static inline void trace_##name##_rcuidle(proto)
__DO_TRACE
if (rcuidle)
....
> But I also don't see why this is any different from any other tracepoint.
Indeed. I took a wrong turn at some point in the macro jungle :)
So tracing itself is fine, but then if you have probes or bpf programs
attached to a tracepoint these use rcu_read_lock()/unlock() which is
obviosly wrong in rcuidle context.
Thanks,
tglx
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 15:21 ` Thomas Gleixner
2020-03-01 16:00 ` Andy Lutomirski
@ 2020-03-01 18:20 ` Steven Rostedt
1 sibling, 0 replies; 56+ messages in thread
From: Steven Rostedt @ 2020-03-01 18:20 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Andy Lutomirski, Peter Zijlstra, Andy Lutomirski, LKML, x86,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann,
Paul E. McKenney, Joel Fernandes
On Sun, 01 Mar 2020 16:21:15 +0100
Thomas Gleixner <tglx@linutronix.de> wrote:
> Andy Lutomirski <luto@amacapital.net> writes:
> >> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> >> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> >> except trace_hardirq_off/on() as those trace functions do not allow to
> >> attach anything AFAICT.
> >
> > Can you point to whatever makes those particular functions special? I
> > failed to follow the macro maze.
>
> Those are not tracepoints and not going through the macro maze. See
> kernel/trace/trace_preemptirq.c
For the latency tracing, they do call into the tracing infrastructure,
not just lockdep. And Joel Fernandez did try to make these into trace
events as well.
-- Steve
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 16:00 ` Andy Lutomirski
2020-03-01 18:12 ` Thomas Gleixner
@ 2020-03-01 18:23 ` Steven Rostedt
1 sibling, 0 replies; 56+ messages in thread
From: Steven Rostedt @ 2020-03-01 18:23 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Thomas Gleixner, Peter Zijlstra, LKML, X86 ML, Brian Gerst,
Juergen Gross, Paolo Bonzini, Arnd Bergmann, Paul E. McKenney
On Sun, 1 Mar 2020 08:00:01 -0800
Andy Lutomirski <luto@kernel.org> wrote:
> But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
> where I got lost in the macro maze. I looked at the gcc asm output,
> and there is, indeed:
>
> # ./include/trace/events/preemptirq.h:40:
> DEFINE_EVENT(preemptirq_template, irq_enable,
>
> with a bunch of asm magic that looks like it's probably a tracepoint.
> I still don't quite see where the "_rcuidle" went.
See the code in include/linux/tracepoint.h and search for "rcuidle"
there. All defined trace events get a "_rcuidle" version, but as it is
declared a static inline, then they only show up if they are used.
-- Steve
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 18:12 ` Thomas Gleixner
@ 2020-03-01 18:26 ` Paul E. McKenney
2020-03-01 18:54 ` Andy Lutomirski
0 siblings, 1 reply; 56+ messages in thread
From: Paul E. McKenney @ 2020-03-01 18:26 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Andy Lutomirski, Steven Rostedt, Peter Zijlstra, LKML, X86 ML,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann
On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
> Andy Lutomirski <luto@kernel.org> writes:
> > On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> >> Andy Lutomirski <luto@amacapital.net> writes:
> >> >> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> >> >> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> >> >> except trace_hardirq_off/on() as those trace functions do not allow to
> >> >> attach anything AFAICT.
> >> >
> >> > Can you point to whatever makes those particular functions special? I
> >> > failed to follow the macro maze.
> >>
> >> Those are not tracepoints and not going through the macro maze. See
> >> kernel/trace/trace_preemptirq.c
> >
> > That has:
> >
> > void trace_hardirqs_on(void)
> > {
> > if (this_cpu_read(tracing_irq_cpu)) {
> > if (!in_nmi())
> > trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
> > tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
> > this_cpu_write(tracing_irq_cpu, 0);
> > }
> >
> > lockdep_hardirqs_on(CALLER_ADDR0);
> > }
> > EXPORT_SYMBOL(trace_hardirqs_on);
> > NOKPROBE_SYMBOL(trace_hardirqs_on);
> >
> > But this calls trace_irq_enable_rcuidle(), and that's the part of the
> > macro maze I got lost in. I found:
> >
> > #ifdef CONFIG_TRACE_IRQFLAGS
> > DEFINE_EVENT(preemptirq_template, irq_disable,
> > TP_PROTO(unsigned long ip, unsigned long parent_ip),
> > TP_ARGS(ip, parent_ip));
> >
> > DEFINE_EVENT(preemptirq_template, irq_enable,
> > TP_PROTO(unsigned long ip, unsigned long parent_ip),
> > TP_ARGS(ip, parent_ip));
> > #else
> > #define trace_irq_enable(...)
> > #define trace_irq_disable(...)
> > #define trace_irq_enable_rcuidle(...)
> > #define trace_irq_disable_rcuidle(...)
> > #endif
> >
> > But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
> > where I got lost in the macro maze. I looked at the gcc asm output,
> > and there is, indeed:
>
> DEFINE_EVENT
> DECLARE_TRACE
> __DECLARE_TRACE
> __DECLARE_TRACE_RCU
> static inline void trace_##name##_rcuidle(proto)
> __DO_TRACE
> if (rcuidle)
> ....
>
> > But I also don't see why this is any different from any other tracepoint.
>
> Indeed. I took a wrong turn at some point in the macro jungle :)
>
> So tracing itself is fine, but then if you have probes or bpf programs
> attached to a tracepoint these use rcu_read_lock()/unlock() which is
> obviosly wrong in rcuidle context.
Definitely, any such code needs to use tricks similar to that of the
tracing code. Or instead use something like SRCU, which is OK with
readers from idle. Or use something like Steve Rostedt's workqueue-based
approach, though please be very careful with this latter, lest the
battery-powered embedded guys come after you for waking up idle CPUs
too often. ;-)
Thanx, Paul
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 18:26 ` Paul E. McKenney
@ 2020-03-01 18:54 ` Andy Lutomirski
2020-03-01 19:30 ` Paul E. McKenney
` (2 more replies)
0 siblings, 3 replies; 56+ messages in thread
From: Andy Lutomirski @ 2020-03-01 18:54 UTC (permalink / raw)
To: Paul E. McKenney
Cc: Thomas Gleixner, Andy Lutomirski, Steven Rostedt, Peter Zijlstra,
LKML, X86 ML, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
>
> On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
> > Andy Lutomirski <luto@kernel.org> writes:
> > > On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> > >> Andy Lutomirski <luto@amacapital.net> writes:
> > >> >> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > >> >> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> > >> >> except trace_hardirq_off/on() as those trace functions do not allow to
> > >> >> attach anything AFAICT.
> > >> >
> > >> > Can you point to whatever makes those particular functions special? I
> > >> > failed to follow the macro maze.
> > >>
> > >> Those are not tracepoints and not going through the macro maze. See
> > >> kernel/trace/trace_preemptirq.c
> > >
> > > That has:
> > >
> > > void trace_hardirqs_on(void)
> > > {
> > > if (this_cpu_read(tracing_irq_cpu)) {
> > > if (!in_nmi())
> > > trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
> > > tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
> > > this_cpu_write(tracing_irq_cpu, 0);
> > > }
> > >
> > > lockdep_hardirqs_on(CALLER_ADDR0);
> > > }
> > > EXPORT_SYMBOL(trace_hardirqs_on);
> > > NOKPROBE_SYMBOL(trace_hardirqs_on);
> > >
> > > But this calls trace_irq_enable_rcuidle(), and that's the part of the
> > > macro maze I got lost in. I found:
> > >
> > > #ifdef CONFIG_TRACE_IRQFLAGS
> > > DEFINE_EVENT(preemptirq_template, irq_disable,
> > > TP_PROTO(unsigned long ip, unsigned long parent_ip),
> > > TP_ARGS(ip, parent_ip));
> > >
> > > DEFINE_EVENT(preemptirq_template, irq_enable,
> > > TP_PROTO(unsigned long ip, unsigned long parent_ip),
> > > TP_ARGS(ip, parent_ip));
> > > #else
> > > #define trace_irq_enable(...)
> > > #define trace_irq_disable(...)
> > > #define trace_irq_enable_rcuidle(...)
> > > #define trace_irq_disable_rcuidle(...)
> > > #endif
> > >
> > > But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
> > > where I got lost in the macro maze. I looked at the gcc asm output,
> > > and there is, indeed:
> >
> > DEFINE_EVENT
> > DECLARE_TRACE
> > __DECLARE_TRACE
> > __DECLARE_TRACE_RCU
> > static inline void trace_##name##_rcuidle(proto)
> > __DO_TRACE
> > if (rcuidle)
> > ....
> >
> > > But I also don't see why this is any different from any other tracepoint.
> >
> > Indeed. I took a wrong turn at some point in the macro jungle :)
> >
> > So tracing itself is fine, but then if you have probes or bpf programs
> > attached to a tracepoint these use rcu_read_lock()/unlock() which is
> > obviosly wrong in rcuidle context.
>
> Definitely, any such code needs to use tricks similar to that of the
> tracing code. Or instead use something like SRCU, which is OK with
> readers from idle. Or use something like Steve Rostedt's workqueue-based
> approach, though please be very careful with this latter, lest the
> battery-powered embedded guys come after you for waking up idle CPUs
> too often. ;-)
>
Are we okay if we somehow ensure that all the entry code before
enter_from_user_mode() only does rcuidle tracing variants and has
kprobes off? Including for BPF use cases?
It would be *really* nice if we could statically verify this, as has
been mentioned elsewhere in the thread. It would also probably be
good enough if we could do it at runtime. Maybe with lockdep on, we
verify rcu state in tracepoints even if the tracepoint isn't active?
And we could plausibly have some widget that could inject something
into *every* kprobeable function to check rcu state.
--Andy
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 18:54 ` Andy Lutomirski
@ 2020-03-01 19:30 ` Paul E. McKenney
2020-03-01 19:39 ` Andy Lutomirski
2020-03-02 1:10 ` Joel Fernandes
2020-03-02 8:10 ` Thomas Gleixner
2 siblings, 1 reply; 56+ messages in thread
From: Paul E. McKenney @ 2020-03-01 19:30 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Thomas Gleixner, Steven Rostedt, Peter Zijlstra, LKML, X86 ML,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann
On Sun, Mar 01, 2020 at 10:54:23AM -0800, Andy Lutomirski wrote:
> On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
> >
> > On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
> > > Andy Lutomirski <luto@kernel.org> writes:
> > > > On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> > > >> Andy Lutomirski <luto@amacapital.net> writes:
> > > >> >> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > > >> >> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> > > >> >> except trace_hardirq_off/on() as those trace functions do not allow to
> > > >> >> attach anything AFAICT.
> > > >> >
> > > >> > Can you point to whatever makes those particular functions special? I
> > > >> > failed to follow the macro maze.
> > > >>
> > > >> Those are not tracepoints and not going through the macro maze. See
> > > >> kernel/trace/trace_preemptirq.c
> > > >
> > > > That has:
> > > >
> > > > void trace_hardirqs_on(void)
> > > > {
> > > > if (this_cpu_read(tracing_irq_cpu)) {
> > > > if (!in_nmi())
> > > > trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
> > > > tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
> > > > this_cpu_write(tracing_irq_cpu, 0);
> > > > }
> > > >
> > > > lockdep_hardirqs_on(CALLER_ADDR0);
> > > > }
> > > > EXPORT_SYMBOL(trace_hardirqs_on);
> > > > NOKPROBE_SYMBOL(trace_hardirqs_on);
> > > >
> > > > But this calls trace_irq_enable_rcuidle(), and that's the part of the
> > > > macro maze I got lost in. I found:
> > > >
> > > > #ifdef CONFIG_TRACE_IRQFLAGS
> > > > DEFINE_EVENT(preemptirq_template, irq_disable,
> > > > TP_PROTO(unsigned long ip, unsigned long parent_ip),
> > > > TP_ARGS(ip, parent_ip));
> > > >
> > > > DEFINE_EVENT(preemptirq_template, irq_enable,
> > > > TP_PROTO(unsigned long ip, unsigned long parent_ip),
> > > > TP_ARGS(ip, parent_ip));
> > > > #else
> > > > #define trace_irq_enable(...)
> > > > #define trace_irq_disable(...)
> > > > #define trace_irq_enable_rcuidle(...)
> > > > #define trace_irq_disable_rcuidle(...)
> > > > #endif
> > > >
> > > > But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
> > > > where I got lost in the macro maze. I looked at the gcc asm output,
> > > > and there is, indeed:
> > >
> > > DEFINE_EVENT
> > > DECLARE_TRACE
> > > __DECLARE_TRACE
> > > __DECLARE_TRACE_RCU
> > > static inline void trace_##name##_rcuidle(proto)
> > > __DO_TRACE
> > > if (rcuidle)
> > > ....
> > >
> > > > But I also don't see why this is any different from any other tracepoint.
> > >
> > > Indeed. I took a wrong turn at some point in the macro jungle :)
> > >
> > > So tracing itself is fine, but then if you have probes or bpf programs
> > > attached to a tracepoint these use rcu_read_lock()/unlock() which is
> > > obviosly wrong in rcuidle context.
> >
> > Definitely, any such code needs to use tricks similar to that of the
> > tracing code. Or instead use something like SRCU, which is OK with
> > readers from idle. Or use something like Steve Rostedt's workqueue-based
> > approach, though please be very careful with this latter, lest the
> > battery-powered embedded guys come after you for waking up idle CPUs
> > too often. ;-)
>
> Are we okay if we somehow ensure that all the entry code before
> enter_from_user_mode() only does rcuidle tracing variants and has
> kprobes off? Including for BPF use cases?
That would work, though if BPF used SRCU instead of RCU, this would
be unnecessary. Sadly, SRCU has full memory barriers in each of
srcu_read_lock() and srcu_read_unlock(), but we are working on it.
(As always, no promises!)
> It would be *really* nice if we could statically verify this, as has
> been mentioned elsewhere in the thread. It would also probably be
> good enough if we could do it at runtime. Maybe with lockdep on, we
> verify rcu state in tracepoints even if the tracepoint isn't active?
> And we could plausibly have some widget that could inject something
> into *every* kprobeable function to check rcu state.
Or just have at least one testing step that activates all tracepoints,
but with lockdep enabled?
Thanx, Paul
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 19:30 ` Paul E. McKenney
@ 2020-03-01 19:39 ` Andy Lutomirski
2020-03-01 20:18 ` Paul E. McKenney
2020-03-02 0:35 ` Steven Rostedt
0 siblings, 2 replies; 56+ messages in thread
From: Andy Lutomirski @ 2020-03-01 19:39 UTC (permalink / raw)
To: paulmck
Cc: Andy Lutomirski, Thomas Gleixner, Steven Rostedt, Peter Zijlstra,
LKML, X86 ML, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
> On Mar 1, 2020, at 11:30 AM, Paul E. McKenney <paulmck@kernel.org> wrote:
>
> On Sun, Mar 01, 2020 at 10:54:23AM -0800, Andy Lutomirski wrote:
>>> On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
>>>
>>> On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
>>>> Andy Lutomirski <luto@kernel.org> writes:
>>>>> On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>>>>>> Andy Lutomirski <luto@amacapital.net> writes:
>>>>>>>> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
>>>>>>>> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
>>>>>>>> except trace_hardirq_off/on() as those trace functions do not allow to
>>>>>>>> attach anything AFAICT.
>>>>>>>
>>>>>>> Can you point to whatever makes those particular functions special? I
>>>>>>> failed to follow the macro maze.
>>>>>>
>>>>>> Those are not tracepoints and not going through the macro maze. See
>>>>>> kernel/trace/trace_preemptirq.c
>>>>>
>>>>> That has:
>>>>>
>>>>> void trace_hardirqs_on(void)
>>>>> {
>>>>> if (this_cpu_read(tracing_irq_cpu)) {
>>>>> if (!in_nmi())
>>>>> trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>>>>> tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
>>>>> this_cpu_write(tracing_irq_cpu, 0);
>>>>> }
>>>>>
>>>>> lockdep_hardirqs_on(CALLER_ADDR0);
>>>>> }
>>>>> EXPORT_SYMBOL(trace_hardirqs_on);
>>>>> NOKPROBE_SYMBOL(trace_hardirqs_on);
>>>>>
>>>>> But this calls trace_irq_enable_rcuidle(), and that's the part of the
>>>>> macro maze I got lost in. I found:
>>>>>
>>>>> #ifdef CONFIG_TRACE_IRQFLAGS
>>>>> DEFINE_EVENT(preemptirq_template, irq_disable,
>>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
>>>>> TP_ARGS(ip, parent_ip));
>>>>>
>>>>> DEFINE_EVENT(preemptirq_template, irq_enable,
>>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
>>>>> TP_ARGS(ip, parent_ip));
>>>>> #else
>>>>> #define trace_irq_enable(...)
>>>>> #define trace_irq_disable(...)
>>>>> #define trace_irq_enable_rcuidle(...)
>>>>> #define trace_irq_disable_rcuidle(...)
>>>>> #endif
>>>>>
>>>>> But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
>>>>> where I got lost in the macro maze. I looked at the gcc asm output,
>>>>> and there is, indeed:
>>>>
>>>> DEFINE_EVENT
>>>> DECLARE_TRACE
>>>> __DECLARE_TRACE
>>>> __DECLARE_TRACE_RCU
>>>> static inline void trace_##name##_rcuidle(proto)
>>>> __DO_TRACE
>>>> if (rcuidle)
>>>> ....
>>>>
>>>>> But I also don't see why this is any different from any other tracepoint.
>>>>
>>>> Indeed. I took a wrong turn at some point in the macro jungle :)
>>>>
>>>> So tracing itself is fine, but then if you have probes or bpf programs
>>>> attached to a tracepoint these use rcu_read_lock()/unlock() which is
>>>> obviosly wrong in rcuidle context.
>>>
>>> Definitely, any such code needs to use tricks similar to that of the
>>> tracing code. Or instead use something like SRCU, which is OK with
>>> readers from idle. Or use something like Steve Rostedt's workqueue-based
>>> approach, though please be very careful with this latter, lest the
>>> battery-powered embedded guys come after you for waking up idle CPUs
>>> too often. ;-)
>>
>> Are we okay if we somehow ensure that all the entry code before
>> enter_from_user_mode() only does rcuidle tracing variants and has
>> kprobes off? Including for BPF use cases?
>
> That would work, though if BPF used SRCU instead of RCU, this would
> be unnecessary. Sadly, SRCU has full memory barriers in each of
> srcu_read_lock() and srcu_read_unlock(), but we are working on it.
> (As always, no promises!)
>
>> It would be *really* nice if we could statically verify this, as has
>> been mentioned elsewhere in the thread. It would also probably be
>> good enough if we could do it at runtime. Maybe with lockdep on, we
>> verify rcu state in tracepoints even if the tracepoint isn't active?
>> And we could plausibly have some widget that could inject something
>> into *every* kprobeable function to check rcu state.
>
> Or just have at least one testing step that activates all tracepoints,
> but with lockdep enabled?
Also kprobe.
I don’t suppose we could make notrace imply nokprobe. Then all kprobeable functions would also have entry/exit tracepoints, right?
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 19:39 ` Andy Lutomirski
@ 2020-03-01 20:18 ` Paul E. McKenney
2020-03-02 0:35 ` Steven Rostedt
1 sibling, 0 replies; 56+ messages in thread
From: Paul E. McKenney @ 2020-03-01 20:18 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Andy Lutomirski, Thomas Gleixner, Steven Rostedt, Peter Zijlstra,
LKML, X86 ML, Brian Gerst, Juergen Gross, Paolo Bonzini,
Arnd Bergmann
On Sun, Mar 01, 2020 at 11:39:42AM -0800, Andy Lutomirski wrote:
>
>
> > On Mar 1, 2020, at 11:30 AM, Paul E. McKenney <paulmck@kernel.org> wrote:
> >
> > On Sun, Mar 01, 2020 at 10:54:23AM -0800, Andy Lutomirski wrote:
> >>> On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
> >>>
> >>> On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
> >>>> Andy Lutomirski <luto@kernel.org> writes:
> >>>>> On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> >>>>>> Andy Lutomirski <luto@amacapital.net> writes:
> >>>>>>>> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> >>>>>>>> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> >>>>>>>> except trace_hardirq_off/on() as those trace functions do not allow to
> >>>>>>>> attach anything AFAICT.
> >>>>>>>
> >>>>>>> Can you point to whatever makes those particular functions special? I
> >>>>>>> failed to follow the macro maze.
> >>>>>>
> >>>>>> Those are not tracepoints and not going through the macro maze. See
> >>>>>> kernel/trace/trace_preemptirq.c
> >>>>>
> >>>>> That has:
> >>>>>
> >>>>> void trace_hardirqs_on(void)
> >>>>> {
> >>>>> if (this_cpu_read(tracing_irq_cpu)) {
> >>>>> if (!in_nmi())
> >>>>> trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
> >>>>> tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
> >>>>> this_cpu_write(tracing_irq_cpu, 0);
> >>>>> }
> >>>>>
> >>>>> lockdep_hardirqs_on(CALLER_ADDR0);
> >>>>> }
> >>>>> EXPORT_SYMBOL(trace_hardirqs_on);
> >>>>> NOKPROBE_SYMBOL(trace_hardirqs_on);
> >>>>>
> >>>>> But this calls trace_irq_enable_rcuidle(), and that's the part of the
> >>>>> macro maze I got lost in. I found:
> >>>>>
> >>>>> #ifdef CONFIG_TRACE_IRQFLAGS
> >>>>> DEFINE_EVENT(preemptirq_template, irq_disable,
> >>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
> >>>>> TP_ARGS(ip, parent_ip));
> >>>>>
> >>>>> DEFINE_EVENT(preemptirq_template, irq_enable,
> >>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
> >>>>> TP_ARGS(ip, parent_ip));
> >>>>> #else
> >>>>> #define trace_irq_enable(...)
> >>>>> #define trace_irq_disable(...)
> >>>>> #define trace_irq_enable_rcuidle(...)
> >>>>> #define trace_irq_disable_rcuidle(...)
> >>>>> #endif
> >>>>>
> >>>>> But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
> >>>>> where I got lost in the macro maze. I looked at the gcc asm output,
> >>>>> and there is, indeed:
> >>>>
> >>>> DEFINE_EVENT
> >>>> DECLARE_TRACE
> >>>> __DECLARE_TRACE
> >>>> __DECLARE_TRACE_RCU
> >>>> static inline void trace_##name##_rcuidle(proto)
> >>>> __DO_TRACE
> >>>> if (rcuidle)
> >>>> ....
> >>>>
> >>>>> But I also don't see why this is any different from any other tracepoint.
> >>>>
> >>>> Indeed. I took a wrong turn at some point in the macro jungle :)
> >>>>
> >>>> So tracing itself is fine, but then if you have probes or bpf programs
> >>>> attached to a tracepoint these use rcu_read_lock()/unlock() which is
> >>>> obviosly wrong in rcuidle context.
> >>>
> >>> Definitely, any such code needs to use tricks similar to that of the
> >>> tracing code. Or instead use something like SRCU, which is OK with
> >>> readers from idle. Or use something like Steve Rostedt's workqueue-based
> >>> approach, though please be very careful with this latter, lest the
> >>> battery-powered embedded guys come after you for waking up idle CPUs
> >>> too often. ;-)
> >>
> >> Are we okay if we somehow ensure that all the entry code before
> >> enter_from_user_mode() only does rcuidle tracing variants and has
> >> kprobes off? Including for BPF use cases?
> >
> > That would work, though if BPF used SRCU instead of RCU, this would
> > be unnecessary. Sadly, SRCU has full memory barriers in each of
> > srcu_read_lock() and srcu_read_unlock(), but we are working on it.
> > (As always, no promises!)
> >
> >> It would be *really* nice if we could statically verify this, as has
> >> been mentioned elsewhere in the thread. It would also probably be
> >> good enough if we could do it at runtime. Maybe with lockdep on, we
> >> verify rcu state in tracepoints even if the tracepoint isn't active?
> >> And we could plausibly have some widget that could inject something
> >> into *every* kprobeable function to check rcu state.
> >
> > Or just have at least one testing step that activates all tracepoints,
> > but with lockdep enabled?
>
> Also kprobe.
I didn't realize we could test all kprobe points easily, but yes,
that would also be good.
> I don’t suppose we could make notrace imply nokprobe. Then all kprobeable functions would also have entry/exit tracepoints, right?
On this, I must defer to the tracing and kprobes people.
Thanx, Paul
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 19:39 ` Andy Lutomirski
2020-03-01 20:18 ` Paul E. McKenney
@ 2020-03-02 0:35 ` Steven Rostedt
2020-03-02 6:47 ` Masami Hiramatsu
1 sibling, 1 reply; 56+ messages in thread
From: Steven Rostedt @ 2020-03-02 0:35 UTC (permalink / raw)
To: Andy Lutomirski
Cc: paulmck, Andy Lutomirski, Thomas Gleixner, Peter Zijlstra, LKML,
X86 ML, Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann,
Masami Hiramatsu
On Sun, 1 Mar 2020 11:39:42 -0800
Andy Lutomirski <luto@amacapital.net> wrote:
> > On Mar 1, 2020, at 11:30 AM, Paul E. McKenney <paulmck@kernel.org> wrote:
> >
> > On Sun, Mar 01, 2020 at 10:54:23AM -0800, Andy Lutomirski wrote:
> >>> On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
> >>>
> >>> On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
> >>>> Andy Lutomirski <luto@kernel.org> writes:
> >>>>> On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> >>>>>> Andy Lutomirski <luto@amacapital.net> writes:
> >>>>>>>> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> >>>>>>>> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> >>>>>>>> except trace_hardirq_off/on() as those trace functions do not allow to
> >>>>>>>> attach anything AFAICT.
> >>>>>>>
> >>>>>>> Can you point to whatever makes those particular functions special? I
> >>>>>>> failed to follow the macro maze.
> >>>>>>
> >>>>>> Those are not tracepoints and not going through the macro maze. See
> >>>>>> kernel/trace/trace_preemptirq.c
> >>>>>
> >>>>> That has:
> >>>>>
> >>>>> void trace_hardirqs_on(void)
> >>>>> {
> >>>>> if (this_cpu_read(tracing_irq_cpu)) {
> >>>>> if (!in_nmi())
> >>>>> trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
> >>>>> tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
> >>>>> this_cpu_write(tracing_irq_cpu, 0);
> >>>>> }
> >>>>>
> >>>>> lockdep_hardirqs_on(CALLER_ADDR0);
> >>>>> }
> >>>>> EXPORT_SYMBOL(trace_hardirqs_on);
> >>>>> NOKPROBE_SYMBOL(trace_hardirqs_on);
> >>>>>
> >>>>> But this calls trace_irq_enable_rcuidle(), and that's the part of the
> >>>>> macro maze I got lost in. I found:
> >>>>>
> >>>>> #ifdef CONFIG_TRACE_IRQFLAGS
> >>>>> DEFINE_EVENT(preemptirq_template, irq_disable,
> >>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
> >>>>> TP_ARGS(ip, parent_ip));
> >>>>>
> >>>>> DEFINE_EVENT(preemptirq_template, irq_enable,
> >>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
> >>>>> TP_ARGS(ip, parent_ip));
> >>>>> #else
> >>>>> #define trace_irq_enable(...)
> >>>>> #define trace_irq_disable(...)
> >>>>> #define trace_irq_enable_rcuidle(...)
> >>>>> #define trace_irq_disable_rcuidle(...)
> >>>>> #endif
> >>>>>
> >>>>> But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
> >>>>> where I got lost in the macro maze. I looked at the gcc asm output,
> >>>>> and there is, indeed:
> >>>>
> >>>> DEFINE_EVENT
> >>>> DECLARE_TRACE
> >>>> __DECLARE_TRACE
> >>>> __DECLARE_TRACE_RCU
> >>>> static inline void trace_##name##_rcuidle(proto)
> >>>> __DO_TRACE
> >>>> if (rcuidle)
> >>>> ....
> >>>>
> >>>>> But I also don't see why this is any different from any other tracepoint.
> >>>>
> >>>> Indeed. I took a wrong turn at some point in the macro jungle :)
> >>>>
> >>>> So tracing itself is fine, but then if you have probes or bpf programs
> >>>> attached to a tracepoint these use rcu_read_lock()/unlock() which is
> >>>> obviosly wrong in rcuidle context.
> >>>
> >>> Definitely, any such code needs to use tricks similar to that of the
> >>> tracing code. Or instead use something like SRCU, which is OK with
> >>> readers from idle. Or use something like Steve Rostedt's workqueue-based
> >>> approach, though please be very careful with this latter, lest the
> >>> battery-powered embedded guys come after you for waking up idle CPUs
> >>> too often. ;-)
> >>
> >> Are we okay if we somehow ensure that all the entry code before
> >> enter_from_user_mode() only does rcuidle tracing variants and has
> >> kprobes off? Including for BPF use cases?
> >
> > That would work, though if BPF used SRCU instead of RCU, this would
> > be unnecessary. Sadly, SRCU has full memory barriers in each of
> > srcu_read_lock() and srcu_read_unlock(), but we are working on it.
> > (As always, no promises!)
> >
> >> It would be *really* nice if we could statically verify this, as has
> >> been mentioned elsewhere in the thread. It would also probably be
> >> good enough if we could do it at runtime. Maybe with lockdep on, we
> >> verify rcu state in tracepoints even if the tracepoint isn't active?
> >> And we could plausibly have some widget that could inject something
> >> into *every* kprobeable function to check rcu state.
> >
> > Or just have at least one testing step that activates all tracepoints,
> > but with lockdep enabled?
>
> Also kprobe.
>
> I don’t suppose we could make notrace imply nokprobe. Then all
> kprobeable functions would also have entry/exit tracepoints, right?
There was some code before that prevented a kprobe from being allowed
in something that was not in the ftrace mcount table (which would make
this happen). But I think that was changed because it was too
restrictive.
Masami?
-- Steve
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 18:54 ` Andy Lutomirski
2020-03-01 19:30 ` Paul E. McKenney
@ 2020-03-02 1:10 ` Joel Fernandes
2020-03-02 2:18 ` Andy Lutomirski
2020-03-02 8:10 ` Thomas Gleixner
2 siblings, 1 reply; 56+ messages in thread
From: Joel Fernandes @ 2020-03-02 1:10 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Paul E. McKenney, Thomas Gleixner, Steven Rostedt,
Peter Zijlstra, LKML, X86 ML, Brian Gerst, Juergen Gross,
Paolo Bonzini, Arnd Bergmann
On Sun, Mar 01, 2020 at 10:54:23AM -0800, Andy Lutomirski wrote:
> On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
> >
> > On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
> > > Andy Lutomirski <luto@kernel.org> writes:
> > > > On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> > > >> Andy Lutomirski <luto@amacapital.net> writes:
> > > >> >> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > > >> >> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> > > >> >> except trace_hardirq_off/on() as those trace functions do not allow to
> > > >> >> attach anything AFAICT.
> > > >> >
> > > >> > Can you point to whatever makes those particular functions special? I
> > > >> > failed to follow the macro maze.
> > > >>
> > > >> Those are not tracepoints and not going through the macro maze. See
> > > >> kernel/trace/trace_preemptirq.c
> > > >
> > > > That has:
> > > >
> > > > void trace_hardirqs_on(void)
> > > > {
> > > > if (this_cpu_read(tracing_irq_cpu)) {
> > > > if (!in_nmi())
> > > > trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
> > > > tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
> > > > this_cpu_write(tracing_irq_cpu, 0);
> > > > }
> > > >
> > > > lockdep_hardirqs_on(CALLER_ADDR0);
> > > > }
> > > > EXPORT_SYMBOL(trace_hardirqs_on);
> > > > NOKPROBE_SYMBOL(trace_hardirqs_on);
> > > >
> > > > But this calls trace_irq_enable_rcuidle(), and that's the part of the
> > > > macro maze I got lost in. I found:
> > > >
> > > > #ifdef CONFIG_TRACE_IRQFLAGS
> > > > DEFINE_EVENT(preemptirq_template, irq_disable,
> > > > TP_PROTO(unsigned long ip, unsigned long parent_ip),
> > > > TP_ARGS(ip, parent_ip));
> > > >
> > > > DEFINE_EVENT(preemptirq_template, irq_enable,
> > > > TP_PROTO(unsigned long ip, unsigned long parent_ip),
> > > > TP_ARGS(ip, parent_ip));
> > > > #else
> > > > #define trace_irq_enable(...)
> > > > #define trace_irq_disable(...)
> > > > #define trace_irq_enable_rcuidle(...)
> > > > #define trace_irq_disable_rcuidle(...)
> > > > #endif
> > > >
> > > > But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
> > > > where I got lost in the macro maze. I looked at the gcc asm output,
> > > > and there is, indeed:
> > >
> > > DEFINE_EVENT
> > > DECLARE_TRACE
> > > __DECLARE_TRACE
> > > __DECLARE_TRACE_RCU
> > > static inline void trace_##name##_rcuidle(proto)
> > > __DO_TRACE
> > > if (rcuidle)
> > > ....
> > >
> > > > But I also don't see why this is any different from any other tracepoint.
> > >
> > > Indeed. I took a wrong turn at some point in the macro jungle :)
> > >
> > > So tracing itself is fine, but then if you have probes or bpf programs
> > > attached to a tracepoint these use rcu_read_lock()/unlock() which is
> > > obviosly wrong in rcuidle context.
> >
> > Definitely, any such code needs to use tricks similar to that of the
> > tracing code. Or instead use something like SRCU, which is OK with
> > readers from idle. Or use something like Steve Rostedt's workqueue-based
> > approach, though please be very careful with this latter, lest the
> > battery-powered embedded guys come after you for waking up idle CPUs
> > too often. ;-)
> >
>
> Are we okay if we somehow ensure that all the entry code before
> enter_from_user_mode() only does rcuidle tracing variants and has
> kprobes off? Including for BPF use cases?
>
> It would be *really* nice if we could statically verify this, as has
> been mentioned elsewhere in the thread. It would also probably be
> good enough if we could do it at runtime. Maybe with lockdep on, we
> verify rcu state in tracepoints even if the tracepoint isn't active?
> And we could plausibly have some widget that could inject something
> into *every* kprobeable function to check rcu state.
You are talking about verifying that a non-rcuidle tracepoint is not called
into when RCU is not watching right? I think that's fine, though I feel
lockdep kernels should not be slowed down any more than they already are. I
feel over time if we add too many checks to lockdep enabled kernels, then it
becomes too slow even for "debug" kernels. May be it is time for a
CONFIG_LOCKDEP_SLOW or some such? And then anyone who wants to go crazy on
runtime checking can do so. I myself want to add a few.
Note that the checking is being added into "non rcu-idle" tracepoints many of
which are probably always called when RCU is watching, making such checking
useless for those tracepoints (and slowing them down however less).
Also another note would be that the whole reason we are getting rid of the
"make RCU watch when rcuidle" logic in DO_TRACE is because it is slow for
tracepoints that are frequently called into. Another reason to do it is
because tracepoint callbacks are expected to know what they are doing and
turn on RCU watching as appropriate (as consensus on the matter suggests).
thanks,
- Joel
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-02 1:10 ` Joel Fernandes
@ 2020-03-02 2:18 ` Andy Lutomirski
2020-03-02 2:36 ` Joel Fernandes
0 siblings, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2020-03-02 2:18 UTC (permalink / raw)
To: Joel Fernandes
Cc: Andy Lutomirski, Paul E. McKenney, Thomas Gleixner,
Steven Rostedt, Peter Zijlstra, LKML, X86 ML, Brian Gerst,
Juergen Gross, Paolo Bonzini, Arnd Bergmann
> On Mar 1, 2020, at 5:10 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
>
> On Sun, Mar 01, 2020 at 10:54:23AM -0800, Andy Lutomirski wrote:
>>> On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
>>>
>>> On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
>>>> Andy Lutomirski <luto@kernel.org> writes:
>>>>> On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>>>>>> Andy Lutomirski <luto@amacapital.net> writes:
>>>>>>>> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
>>>>>>>> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
>>>>>>>> except trace_hardirq_off/on() as those trace functions do not allow to
>>>>>>>> attach anything AFAICT.
>>>>>>>
>>>>>>> Can you point to whatever makes those particular functions special? I
>>>>>>> failed to follow the macro maze.
>>>>>>
>>>>>> Those are not tracepoints and not going through the macro maze. See
>>>>>> kernel/trace/trace_preemptirq.c
>>>>>
>>>>> That has:
>>>>>
>>>>> void trace_hardirqs_on(void)
>>>>> {
>>>>> if (this_cpu_read(tracing_irq_cpu)) {
>>>>> if (!in_nmi())
>>>>> trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>>>>> tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
>>>>> this_cpu_write(tracing_irq_cpu, 0);
>>>>> }
>>>>>
>>>>> lockdep_hardirqs_on(CALLER_ADDR0);
>>>>> }
>>>>> EXPORT_SYMBOL(trace_hardirqs_on);
>>>>> NOKPROBE_SYMBOL(trace_hardirqs_on);
>>>>>
>>>>> But this calls trace_irq_enable_rcuidle(), and that's the part of the
>>>>> macro maze I got lost in. I found:
>>>>>
>>>>> #ifdef CONFIG_TRACE_IRQFLAGS
>>>>> DEFINE_EVENT(preemptirq_template, irq_disable,
>>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
>>>>> TP_ARGS(ip, parent_ip));
>>>>>
>>>>> DEFINE_EVENT(preemptirq_template, irq_enable,
>>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
>>>>> TP_ARGS(ip, parent_ip));
>>>>> #else
>>>>> #define trace_irq_enable(...)
>>>>> #define trace_irq_disable(...)
>>>>> #define trace_irq_enable_rcuidle(...)
>>>>> #define trace_irq_disable_rcuidle(...)
>>>>> #endif
>>>>>
>>>>> But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
>>>>> where I got lost in the macro maze. I looked at the gcc asm output,
>>>>> and there is, indeed:
>>>>
>>>> DEFINE_EVENT
>>>> DECLARE_TRACE
>>>> __DECLARE_TRACE
>>>> __DECLARE_TRACE_RCU
>>>> static inline void trace_##name##_rcuidle(proto)
>>>> __DO_TRACE
>>>> if (rcuidle)
>>>> ....
>>>>
>>>>> But I also don't see why this is any different from any other tracepoint.
>>>>
>>>> Indeed. I took a wrong turn at some point in the macro jungle :)
>>>>
>>>> So tracing itself is fine, but then if you have probes or bpf programs
>>>> attached to a tracepoint these use rcu_read_lock()/unlock() which is
>>>> obviosly wrong in rcuidle context.
>>>
>>> Definitely, any such code needs to use tricks similar to that of the
>>> tracing code. Or instead use something like SRCU, which is OK with
>>> readers from idle. Or use something like Steve Rostedt's workqueue-based
>>> approach, though please be very careful with this latter, lest the
>>> battery-powered embedded guys come after you for waking up idle CPUs
>>> too often. ;-)
>>>
>>
>> Are we okay if we somehow ensure that all the entry code before
>> enter_from_user_mode() only does rcuidle tracing variants and has
>> kprobes off? Including for BPF use cases?
>>
>> It would be *really* nice if we could statically verify this, as has
>> been mentioned elsewhere in the thread. It would also probably be
>> good enough if we could do it at runtime. Maybe with lockdep on, we
>> verify rcu state in tracepoints even if the tracepoint isn't active?
>> And we could plausibly have some widget that could inject something
>> into *every* kprobeable function to check rcu state.
>
> You are talking about verifying that a non-rcuidle tracepoint is not called
> into when RCU is not watching right? I think that's fine, though I feel
> lockdep kernels should not be slowed down any more than they already are. I
> feel over time if we add too many checks to lockdep enabled kernels, then it
> becomes too slow even for "debug" kernels. May be it is time for a
> CONFIG_LOCKDEP_SLOW or some such? And then anyone who wants to go crazy on
> runtime checking can do so. I myself want to add a few.
>
> Note that the checking is being added into "non rcu-idle" tracepoints many of
> which are probably always called when RCU is watching, making such checking
> useless for those tracepoints (and slowing them down however less).
>
Indeed. Static analysis would help a lot here.
> Also another note would be that the whole reason we are getting rid of the
> "make RCU watch when rcuidle" logic in DO_TRACE is because it is slow for
> tracepoints that are frequently called into. Another reason to do it is
> because tracepoint callbacks are expected to know what they are doing and
> turn on RCU watching as appropriate (as consensus on the matter suggests).
Whoa there. We arch people need crystal clear rules as to what tracepoints can be called in what contexts and what is the responsibility of the callbacks.
>
> thanks,
>
> - Joel
>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-02 2:18 ` Andy Lutomirski
@ 2020-03-02 2:36 ` Joel Fernandes
2020-03-02 5:40 ` Andy Lutomirski
0 siblings, 1 reply; 56+ messages in thread
From: Joel Fernandes @ 2020-03-02 2:36 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Andy Lutomirski, Paul E. McKenney, Thomas Gleixner,
Steven Rostedt, Peter Zijlstra, LKML, X86 ML, Brian Gerst,
Juergen Gross, Paolo Bonzini, Arnd Bergmann
On Sun, Mar 01, 2020 at 06:18:51PM -0800, Andy Lutomirski wrote:
>
>
> > On Mar 1, 2020, at 5:10 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
> >
> > On Sun, Mar 01, 2020 at 10:54:23AM -0800, Andy Lutomirski wrote:
> >>> On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
> >>>
> >>> On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
> >>>> Andy Lutomirski <luto@kernel.org> writes:
> >>>>> On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> >>>>>> Andy Lutomirski <luto@amacapital.net> writes:
> >>>>>>>> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> >>>>>>>> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> >>>>>>>> except trace_hardirq_off/on() as those trace functions do not allow to
> >>>>>>>> attach anything AFAICT.
> >>>>>>>
> >>>>>>> Can you point to whatever makes those particular functions special? I
> >>>>>>> failed to follow the macro maze.
> >>>>>>
> >>>>>> Those are not tracepoints and not going through the macro maze. See
> >>>>>> kernel/trace/trace_preemptirq.c
> >>>>>
> >>>>> That has:
> >>>>>
> >>>>> void trace_hardirqs_on(void)
> >>>>> {
> >>>>> if (this_cpu_read(tracing_irq_cpu)) {
> >>>>> if (!in_nmi())
> >>>>> trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
> >>>>> tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
> >>>>> this_cpu_write(tracing_irq_cpu, 0);
> >>>>> }
> >>>>>
> >>>>> lockdep_hardirqs_on(CALLER_ADDR0);
> >>>>> }
> >>>>> EXPORT_SYMBOL(trace_hardirqs_on);
> >>>>> NOKPROBE_SYMBOL(trace_hardirqs_on);
> >>>>>
> >>>>> But this calls trace_irq_enable_rcuidle(), and that's the part of the
> >>>>> macro maze I got lost in. I found:
> >>>>>
> >>>>> #ifdef CONFIG_TRACE_IRQFLAGS
> >>>>> DEFINE_EVENT(preemptirq_template, irq_disable,
> >>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
> >>>>> TP_ARGS(ip, parent_ip));
> >>>>>
> >>>>> DEFINE_EVENT(preemptirq_template, irq_enable,
> >>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
> >>>>> TP_ARGS(ip, parent_ip));
> >>>>> #else
> >>>>> #define trace_irq_enable(...)
> >>>>> #define trace_irq_disable(...)
> >>>>> #define trace_irq_enable_rcuidle(...)
> >>>>> #define trace_irq_disable_rcuidle(...)
> >>>>> #endif
> >>>>>
> >>>>> But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
> >>>>> where I got lost in the macro maze. I looked at the gcc asm output,
> >>>>> and there is, indeed:
> >>>>
> >>>> DEFINE_EVENT
> >>>> DECLARE_TRACE
> >>>> __DECLARE_TRACE
> >>>> __DECLARE_TRACE_RCU
> >>>> static inline void trace_##name##_rcuidle(proto)
> >>>> __DO_TRACE
> >>>> if (rcuidle)
> >>>> ....
> >>>>
> >>>>> But I also don't see why this is any different from any other tracepoint.
> >>>>
> >>>> Indeed. I took a wrong turn at some point in the macro jungle :)
> >>>>
> >>>> So tracing itself is fine, but then if you have probes or bpf programs
> >>>> attached to a tracepoint these use rcu_read_lock()/unlock() which is
> >>>> obviosly wrong in rcuidle context.
> >>>
> >>> Definitely, any such code needs to use tricks similar to that of the
> >>> tracing code. Or instead use something like SRCU, which is OK with
> >>> readers from idle. Or use something like Steve Rostedt's workqueue-based
> >>> approach, though please be very careful with this latter, lest the
> >>> battery-powered embedded guys come after you for waking up idle CPUs
> >>> too often. ;-)
> >>>
> >>
> >> Are we okay if we somehow ensure that all the entry code before
> >> enter_from_user_mode() only does rcuidle tracing variants and has
> >> kprobes off? Including for BPF use cases?
> >>
> >> It would be *really* nice if we could statically verify this, as has
> >> been mentioned elsewhere in the thread. It would also probably be
> >> good enough if we could do it at runtime. Maybe with lockdep on, we
> >> verify rcu state in tracepoints even if the tracepoint isn't active?
> >> And we could plausibly have some widget that could inject something
> >> into *every* kprobeable function to check rcu state.
> >
> > You are talking about verifying that a non-rcuidle tracepoint is not called
> > into when RCU is not watching right? I think that's fine, though I feel
> > lockdep kernels should not be slowed down any more than they already are. I
> > feel over time if we add too many checks to lockdep enabled kernels, then it
> > becomes too slow even for "debug" kernels. May be it is time for a
> > CONFIG_LOCKDEP_SLOW or some such? And then anyone who wants to go crazy on
> > runtime checking can do so. I myself want to add a few.
> >
> > Note that the checking is being added into "non rcu-idle" tracepoints many of
> > which are probably always called when RCU is watching, making such checking
> > useless for those tracepoints (and slowing them down however less).
> >
>
> Indeed. Static analysis would help a lot here.
>
> > Also another note would be that the whole reason we are getting rid of the
> > "make RCU watch when rcuidle" logic in DO_TRACE is because it is slow for
> > tracepoints that are frequently called into. Another reason to do it is
> > because tracepoint callbacks are expected to know what they are doing and
> > turn on RCU watching as appropriate (as consensus on the matter suggests).
>
> Whoa there. We arch people need crystal clear rules as to what tracepoints
> can be called in what contexts and what is the responsibility of the
> callbacks.
>
The direction that Peter, Mathieu and Steve are going is that callbacks
registered on "rcu idle" tracepoints need to turn on "RCU watching"
themselves. Such as perf. Turning on "RCU watching" is non-free as I tested
in 2017 and we removed it back then, but it was added right back when perf
started splatting. Now it is being removed again, and the turning on of RCU's
eyes happens in the perf callback itself since perf uses RCU.
If you are calling trace_.._rcuidle(), can you not ensure that RCU is
watching by calling the appropriate RCU APIs in your callbacks? Or did I miss
the point?
thanks,
- Joel
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-02 2:36 ` Joel Fernandes
@ 2020-03-02 5:40 ` Andy Lutomirski
0 siblings, 0 replies; 56+ messages in thread
From: Andy Lutomirski @ 2020-03-02 5:40 UTC (permalink / raw)
To: Joel Fernandes
Cc: Andy Lutomirski, Paul E. McKenney, Thomas Gleixner,
Steven Rostedt, Peter Zijlstra, LKML, X86 ML, Brian Gerst,
Juergen Gross, Paolo Bonzini, Arnd Bergmann
> On Mar 1, 2020, at 6:36 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
>
> On Sun, Mar 01, 2020 at 06:18:51PM -0800, Andy Lutomirski wrote:
>>
>>
>>>> On Mar 1, 2020, at 5:10 PM, Joel Fernandes <joel@joelfernandes.org> wrote:
>>>
>>> On Sun, Mar 01, 2020 at 10:54:23AM -0800, Andy Lutomirski wrote:
>>>>> On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
>>>>>
>>>>> On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
>>>>>> Andy Lutomirski <luto@kernel.org> writes:
>>>>>>> On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>>>>>>>> Andy Lutomirski <luto@amacapital.net> writes:
>>>>>>>>>> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
>>>>>>>>>> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
>>>>>>>>>> except trace_hardirq_off/on() as those trace functions do not allow to
>>>>>>>>>> attach anything AFAICT.
>>>>>>>>>
>>>>>>>>> Can you point to whatever makes those particular functions special? I
>>>>>>>>> failed to follow the macro maze.
>>>>>>>>
>>>>>>>> Those are not tracepoints and not going through the macro maze. See
>>>>>>>> kernel/trace/trace_preemptirq.c
>>>>>>>
>>>>>>> That has:
>>>>>>>
>>>>>>> void trace_hardirqs_on(void)
>>>>>>> {
>>>>>>> if (this_cpu_read(tracing_irq_cpu)) {
>>>>>>> if (!in_nmi())
>>>>>>> trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>>>>>>> tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
>>>>>>> this_cpu_write(tracing_irq_cpu, 0);
>>>>>>> }
>>>>>>>
>>>>>>> lockdep_hardirqs_on(CALLER_ADDR0);
>>>>>>> }
>>>>>>> EXPORT_SYMBOL(trace_hardirqs_on);
>>>>>>> NOKPROBE_SYMBOL(trace_hardirqs_on);
>>>>>>>
>>>>>>> But this calls trace_irq_enable_rcuidle(), and that's the part of the
>>>>>>> macro maze I got lost in. I found:
>>>>>>>
>>>>>>> #ifdef CONFIG_TRACE_IRQFLAGS
>>>>>>> DEFINE_EVENT(preemptirq_template, irq_disable,
>>>>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
>>>>>>> TP_ARGS(ip, parent_ip));
>>>>>>>
>>>>>>> DEFINE_EVENT(preemptirq_template, irq_enable,
>>>>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
>>>>>>> TP_ARGS(ip, parent_ip));
>>>>>>> #else
>>>>>>> #define trace_irq_enable(...)
>>>>>>> #define trace_irq_disable(...)
>>>>>>> #define trace_irq_enable_rcuidle(...)
>>>>>>> #define trace_irq_disable_rcuidle(...)
>>>>>>> #endif
>>>>>>>
>>>>>>> But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
>>>>>>> where I got lost in the macro maze. I looked at the gcc asm output,
>>>>>>> and there is, indeed:
>>>>>>
>>>>>> DEFINE_EVENT
>>>>>> DECLARE_TRACE
>>>>>> __DECLARE_TRACE
>>>>>> __DECLARE_TRACE_RCU
>>>>>> static inline void trace_##name##_rcuidle(proto)
>>>>>> __DO_TRACE
>>>>>> if (rcuidle)
>>>>>> ....
>>>>>>
>>>>>>> But I also don't see why this is any different from any other tracepoint.
>>>>>>
>>>>>> Indeed. I took a wrong turn at some point in the macro jungle :)
>>>>>>
>>>>>> So tracing itself is fine, but then if you have probes or bpf programs
>>>>>> attached to a tracepoint these use rcu_read_lock()/unlock() which is
>>>>>> obviosly wrong in rcuidle context.
>>>>>
>>>>> Definitely, any such code needs to use tricks similar to that of the
>>>>> tracing code. Or instead use something like SRCU, which is OK with
>>>>> readers from idle. Or use something like Steve Rostedt's workqueue-based
>>>>> approach, though please be very careful with this latter, lest the
>>>>> battery-powered embedded guys come after you for waking up idle CPUs
>>>>> too often. ;-)
>>>>>
>>>>
>>>> Are we okay if we somehow ensure that all the entry code before
>>>> enter_from_user_mode() only does rcuidle tracing variants and has
>>>> kprobes off? Including for BPF use cases?
>>>>
>>>> It would be *really* nice if we could statically verify this, as has
>>>> been mentioned elsewhere in the thread. It would also probably be
>>>> good enough if we could do it at runtime. Maybe with lockdep on, we
>>>> verify rcu state in tracepoints even if the tracepoint isn't active?
>>>> And we could plausibly have some widget that could inject something
>>>> into *every* kprobeable function to check rcu state.
>>>
>>> You are talking about verifying that a non-rcuidle tracepoint is not called
>>> into when RCU is not watching right? I think that's fine, though I feel
>>> lockdep kernels should not be slowed down any more than they already are. I
>>> feel over time if we add too many checks to lockdep enabled kernels, then it
>>> becomes too slow even for "debug" kernels. May be it is time for a
>>> CONFIG_LOCKDEP_SLOW or some such? And then anyone who wants to go crazy on
>>> runtime checking can do so. I myself want to add a few.
>>>
>>> Note that the checking is being added into "non rcu-idle" tracepoints many of
>>> which are probably always called when RCU is watching, making such checking
>>> useless for those tracepoints (and slowing them down however less).
>>>
>>
>> Indeed. Static analysis would help a lot here.
>>
>>> Also another note would be that the whole reason we are getting rid of the
>>> "make RCU watch when rcuidle" logic in DO_TRACE is because it is slow for
>>> tracepoints that are frequently called into. Another reason to do it is
>>> because tracepoint callbacks are expected to know what they are doing and
>>> turn on RCU watching as appropriate (as consensus on the matter suggests).
>>
>> Whoa there. We arch people need crystal clear rules as to what tracepoints
>> can be called in what contexts and what is the responsibility of the
>> callbacks.
>>
>
> The direction that Peter, Mathieu and Steve are going is that callbacks
> registered on "rcu idle" tracepoints need to turn on "RCU watching"
> themselves. Such as perf. Turning on "RCU watching" is non-free as I tested
> in 2017 and we removed it back then, but it was added right back when perf
> started splatting. Now it is being removed again, and the turning on of RCU's
> eyes happens in the perf callback itself since perf uses RCU.
>
> If you are calling trace_.._rcuidle(), can you not ensure that RCU is
> watching by calling the appropriate RCU APIs in your callbacks? Or did I miss
> the point?
Unless I’ve misunderstood you, the problem is that the callback and the caller are totally different subsystems. I can count the number of people who know which tracepoints need RCU turned on in their callbacks without using any fingers (tglx and I are still at least a bit confused— I’m not convinced *anyone* knows for sure).
I think we need some credible rule for which events need special handling in their callbacks, and then we need to ensure that all subsystems that install callbacks, including BPF, follow the rule. It should not be up to the author of a BPF program to get this right.
>
> thanks,
>
> - Joel
>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-02 0:35 ` Steven Rostedt
@ 2020-03-02 6:47 ` Masami Hiramatsu
0 siblings, 0 replies; 56+ messages in thread
From: Masami Hiramatsu @ 2020-03-02 6:47 UTC (permalink / raw)
To: Steven Rostedt
Cc: Andy Lutomirski, paulmck, Andy Lutomirski, Thomas Gleixner,
Peter Zijlstra, LKML, X86 ML, Brian Gerst, Juergen Gross,
Paolo Bonzini, Arnd Bergmann, Masami Hiramatsu
On Sun, 1 Mar 2020 19:35:01 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:
> On Sun, 1 Mar 2020 11:39:42 -0800
> Andy Lutomirski <luto@amacapital.net> wrote:
>
> > > On Mar 1, 2020, at 11:30 AM, Paul E. McKenney <paulmck@kernel.org> wrote:
> > >
> > > On Sun, Mar 01, 2020 at 10:54:23AM -0800, Andy Lutomirski wrote:
> > >>> On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
> > >>>
> > >>> On Sun, Mar 01, 2020 at 07:12:25PM +0100, Thomas Gleixner wrote:
> > >>>> Andy Lutomirski <luto@kernel.org> writes:
> > >>>>> On Sun, Mar 1, 2020 at 7:21 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> > >>>>>> Andy Lutomirski <luto@amacapital.net> writes:
> > >>>>>>>> On Mar 1, 2020, at 2:16 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > >>>>>>>> Ok, but for the time being anything before/after CONTEXT_KERNEL is unsafe
> > >>>>>>>> except trace_hardirq_off/on() as those trace functions do not allow to
> > >>>>>>>> attach anything AFAICT.
> > >>>>>>>
> > >>>>>>> Can you point to whatever makes those particular functions special? I
> > >>>>>>> failed to follow the macro maze.
> > >>>>>>
> > >>>>>> Those are not tracepoints and not going through the macro maze. See
> > >>>>>> kernel/trace/trace_preemptirq.c
> > >>>>>
> > >>>>> That has:
> > >>>>>
> > >>>>> void trace_hardirqs_on(void)
> > >>>>> {
> > >>>>> if (this_cpu_read(tracing_irq_cpu)) {
> > >>>>> if (!in_nmi())
> > >>>>> trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
> > >>>>> tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
> > >>>>> this_cpu_write(tracing_irq_cpu, 0);
> > >>>>> }
> > >>>>>
> > >>>>> lockdep_hardirqs_on(CALLER_ADDR0);
> > >>>>> }
> > >>>>> EXPORT_SYMBOL(trace_hardirqs_on);
> > >>>>> NOKPROBE_SYMBOL(trace_hardirqs_on);
> > >>>>>
> > >>>>> But this calls trace_irq_enable_rcuidle(), and that's the part of the
> > >>>>> macro maze I got lost in. I found:
> > >>>>>
> > >>>>> #ifdef CONFIG_TRACE_IRQFLAGS
> > >>>>> DEFINE_EVENT(preemptirq_template, irq_disable,
> > >>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
> > >>>>> TP_ARGS(ip, parent_ip));
> > >>>>>
> > >>>>> DEFINE_EVENT(preemptirq_template, irq_enable,
> > >>>>> TP_PROTO(unsigned long ip, unsigned long parent_ip),
> > >>>>> TP_ARGS(ip, parent_ip));
> > >>>>> #else
> > >>>>> #define trace_irq_enable(...)
> > >>>>> #define trace_irq_disable(...)
> > >>>>> #define trace_irq_enable_rcuidle(...)
> > >>>>> #define trace_irq_disable_rcuidle(...)
> > >>>>> #endif
> > >>>>>
> > >>>>> But the DEFINE_EVENT doesn't have the "_rcuidle" part. And that's
> > >>>>> where I got lost in the macro maze. I looked at the gcc asm output,
> > >>>>> and there is, indeed:
> > >>>>
> > >>>> DEFINE_EVENT
> > >>>> DECLARE_TRACE
> > >>>> __DECLARE_TRACE
> > >>>> __DECLARE_TRACE_RCU
> > >>>> static inline void trace_##name##_rcuidle(proto)
> > >>>> __DO_TRACE
> > >>>> if (rcuidle)
> > >>>> ....
> > >>>>
> > >>>>> But I also don't see why this is any different from any other tracepoint.
> > >>>>
> > >>>> Indeed. I took a wrong turn at some point in the macro jungle :)
> > >>>>
> > >>>> So tracing itself is fine, but then if you have probes or bpf programs
> > >>>> attached to a tracepoint these use rcu_read_lock()/unlock() which is
> > >>>> obviosly wrong in rcuidle context.
> > >>>
> > >>> Definitely, any such code needs to use tricks similar to that of the
> > >>> tracing code. Or instead use something like SRCU, which is OK with
> > >>> readers from idle. Or use something like Steve Rostedt's workqueue-based
> > >>> approach, though please be very careful with this latter, lest the
> > >>> battery-powered embedded guys come after you for waking up idle CPUs
> > >>> too often. ;-)
> > >>
> > >> Are we okay if we somehow ensure that all the entry code before
> > >> enter_from_user_mode() only does rcuidle tracing variants and has
> > >> kprobes off? Including for BPF use cases?
> > >
> > > That would work, though if BPF used SRCU instead of RCU, this would
> > > be unnecessary. Sadly, SRCU has full memory barriers in each of
> > > srcu_read_lock() and srcu_read_unlock(), but we are working on it.
> > > (As always, no promises!)
> > >
> > >> It would be *really* nice if we could statically verify this, as has
> > >> been mentioned elsewhere in the thread. It would also probably be
> > >> good enough if we could do it at runtime. Maybe with lockdep on, we
> > >> verify rcu state in tracepoints even if the tracepoint isn't active?
> > >> And we could plausibly have some widget that could inject something
> > >> into *every* kprobeable function to check rcu state.
I'm still not clear about this point, should I check rcuidle in kprobes
int3 handler or jump optimized handler? (int3 handler will run in
irq context so is not able to use srcu anyway...) Maybe I missed the point.
> > >
> > > Or just have at least one testing step that activates all tracepoints,
> > > but with lockdep enabled?
> >
> > Also kprobe.
> >
> > I don’t suppose we could make notrace imply nokprobe. Then all
> > kprobeable functions would also have entry/exit tracepoints, right?
>
> There was some code before that prevented a kprobe from being allowed
> in something that was not in the ftrace mcount table (which would make
> this happen). But I think that was changed because it was too
> restrictive.
Would you mean CONFIG_KPROBE_EVENTS_ON_NOTRACE? By default notrace
means noprobe too now. With CONFIG_KPROBE_EVENTS_ON_NOTRACE=y, we can
put kprobe events on notrace functions. So if you unsure, we can put
a kprobe on those functions and see what happens.
(Note that this is only for kprobe event, not kprobes itself)
It is actually very restrictive, but it is hard to make a whitelist
maually, especially if the CC_FLAGS_FTRACE is removed while building.
Thank you,
--
Masami Hiramatsu <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code
2020-03-01 18:54 ` Andy Lutomirski
2020-03-01 19:30 ` Paul E. McKenney
2020-03-02 1:10 ` Joel Fernandes
@ 2020-03-02 8:10 ` Thomas Gleixner
2 siblings, 0 replies; 56+ messages in thread
From: Thomas Gleixner @ 2020-03-02 8:10 UTC (permalink / raw)
To: Andy Lutomirski, Paul E. McKenney
Cc: Andy Lutomirski, Steven Rostedt, Peter Zijlstra, LKML, X86 ML,
Brian Gerst, Juergen Gross, Paolo Bonzini, Arnd Bergmann
Andy Lutomirski <luto@kernel.org> writes:
> On Sun, Mar 1, 2020 at 10:26 AM Paul E. McKenney <paulmck@kernel.org> wrote:
>> > So tracing itself is fine, but then if you have probes or bpf programs
>> > attached to a tracepoint these use rcu_read_lock()/unlock() which is
>> > obviosly wrong in rcuidle context.
>>
>> Definitely, any such code needs to use tricks similar to that of the
>> tracing code. Or instead use something like SRCU, which is OK with
>> readers from idle. Or use something like Steve Rostedt's workqueue-based
>> approach, though please be very careful with this latter, lest the
>> battery-powered embedded guys come after you for waking up idle CPUs
>> too often. ;-)
>>
>
> Are we okay if we somehow ensure that all the entry code before
> enter_from_user_mode() only does rcuidle tracing variants and has
> kprobes off? Including for BPF use cases?
I think this is the right thing to do. The only requirement we have
_before_ enter_from_user_mode() is to tell lockdep that interrupts are
off. There is not even the need for a real tracepoint IMO. The fact that
the lockdep call is hidden in that tracepoint is just an implementation
detail.
That would clearly set the rules straight: Anything low level entry code
before enter_from_user_mode() returns is neither probable nor
traceable.
I know that some people will argue that this is too restrictive in terms
of instrumentation, but OTOH the whole low level entry code has to be
excluded from instrumentation anyway, so having a dozen instructions
more excluded does not matter at all. Keep it simple!
> It would be *really* nice if we could statically verify this, as has
> been mentioned elsewhere in the thread. It would also probably be
> good enough if we could do it at runtime. Maybe with lockdep on, we
> verify rcu state in tracepoints even if the tracepoint isn't active?
> And we could plausibly have some widget that could inject something
> into *every* kprobeable function to check rcu state.
That surely would be useful.
Thanks,
tglx
^ permalink raw reply [flat|nested] 56+ messages in thread
end of thread, other threads:[~2020-03-02 8:10 UTC | newest]
Thread overview: 56+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-25 22:08 [patch 0/8] x86/entry: Consolidation - Part II Thomas Gleixner
2020-02-25 22:08 ` [patch 1/8] x86/entry/64: Trace irqflags unconditionally on when returing to user space Thomas Gleixner
2020-02-27 19:49 ` Borislav Petkov
2020-02-27 22:45 ` Frederic Weisbecker
2020-02-28 8:58 ` Alexandre Chartre
2020-02-25 22:08 ` [patch 2/8] x86/entry/common: Consolidate syscall entry code Thomas Gleixner
2020-02-27 19:57 ` Borislav Petkov
2020-02-27 22:52 ` Frederic Weisbecker
2020-02-28 8:59 ` Alexandre Chartre
2020-02-25 22:08 ` [patch 3/8] x86/entry/common: Mark syscall entry points notrace/nokprobe Thomas Gleixner
2020-02-27 23:15 ` Frederic Weisbecker
2020-02-28 8:59 ` Alexandre Chartre
2020-02-25 22:08 ` [patch 4/8] x86/entry: Move irq tracing on syscall entry to C-code Thomas Gleixner
2020-02-26 5:43 ` Andy Lutomirski
2020-02-26 8:17 ` Peter Zijlstra
2020-02-26 11:20 ` Andy Lutomirski
2020-02-26 19:51 ` Thomas Gleixner
2020-02-29 14:44 ` Thomas Gleixner
2020-02-29 19:25 ` Andy Lutomirski
2020-02-29 23:58 ` Steven Rostedt
2020-03-01 10:16 ` Thomas Gleixner
2020-03-01 14:37 ` Andy Lutomirski
2020-03-01 15:21 ` Thomas Gleixner
2020-03-01 16:00 ` Andy Lutomirski
2020-03-01 18:12 ` Thomas Gleixner
2020-03-01 18:26 ` Paul E. McKenney
2020-03-01 18:54 ` Andy Lutomirski
2020-03-01 19:30 ` Paul E. McKenney
2020-03-01 19:39 ` Andy Lutomirski
2020-03-01 20:18 ` Paul E. McKenney
2020-03-02 0:35 ` Steven Rostedt
2020-03-02 6:47 ` Masami Hiramatsu
2020-03-02 1:10 ` Joel Fernandes
2020-03-02 2:18 ` Andy Lutomirski
2020-03-02 2:36 ` Joel Fernandes
2020-03-02 5:40 ` Andy Lutomirski
2020-03-02 8:10 ` Thomas Gleixner
2020-03-01 18:23 ` Steven Rostedt
2020-03-01 18:20 ` Steven Rostedt
2020-02-27 23:11 ` Frederic Weisbecker
2020-02-28 9:00 ` Alexandre Chartre
2020-02-25 22:08 ` [patch 5/8] x86/entry/common: Provide trace/kprobe safe exit to user space functions Thomas Gleixner
2020-02-26 5:45 ` Andy Lutomirski
2020-02-26 8:15 ` Peter Zijlstra
2020-02-27 15:43 ` Alexandre Chartre
2020-02-27 15:53 ` Thomas Gleixner
2020-02-25 22:08 ` [patch 6/8] x86/entry: Move irq tracing to syscall_slow_exit_work Thomas Gleixner
2020-02-26 5:47 ` Andy Lutomirski
2020-02-27 16:12 ` Alexandre Chartre
2020-02-25 22:08 ` [patch 7/8] x86/entry: Move irq tracing to prepare_exit_to_user_mode() Thomas Gleixner
2020-02-26 5:50 ` Andy Lutomirski
2020-02-26 19:53 ` Thomas Gleixner
2020-02-26 20:07 ` Andy Lutomirski
2020-02-25 22:08 ` [patch 8/8] x86/entry: Move irqflags tracing to do_int80_syscall_32() Thomas Gleixner
2020-02-27 16:46 ` Alexandre Chartre
2020-02-28 13:49 ` Thomas Gleixner
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.