From: Daniel Bristot de Oliveira <bristot@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: Steven Rostedt <rostedt@goodmis.org>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Ingo Molnar <mingo@redhat.com>, Andy Lutomirski <luto@kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Borislav Petkov <bp@alien8.de>,
Peter Zijlstra <peterz@infradead.org>,
"H. Peter Anvin" <hpa@zytor.com>,
"Joel Fernandes (Google)" <joel@joelfernandes.org>,
Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Tommaso Cucinotta <tommaso.cucinotta@santannapisa.it>,
Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>,
Clark Williams <williams@redhat.com>,
x86@kernel.org
Subject: [RFC PATCH 1/7] x86/entry: Add support for early task context tracking
Date: Tue, 2 Apr 2019 22:03:53 +0200 [thread overview]
Message-ID: <90ce8a6a4ca02e1e8a2a43185f193cd72a59d020.1554234787.git.bristot@redhat.com> (raw)
In-Reply-To: <cover.1554234787.git.bristot@redhat.com>
Currently, the identification of the context is made through the
preempt_counter, but it is set after the execution of the first functions
of the IRQ/NMI, causing potential problems in the identification of the
current status. For instance, ftrace/perf might drop events in the early
stage of IRQ/NMI handlers because the preempt_counter was not set.
The proposed approach is to use a dedicated per-cpu variable to keep
track of the context of execution, with values set before the execution
of the first C function of the interrupt handler.
This is a PoC in the x86_64.
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Tommaso Cucinotta <tommaso.cucinotta@santannapisa.it>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>
Cc: Clark Williams <williams@redhat.com>
Cc: linux-kernel@vger.kernel.org
Cc: x86@kernel.org
---
arch/x86/entry/entry_64.S | 9 +++++++++
arch/x86/include/asm/irqflags.h | 30 ++++++++++++++++++++++++++++++
arch/x86/kernel/cpu/common.c | 4 ++++
include/linux/irqflags.h | 4 ++++
kernel/softirq.c | 5 ++++-
5 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b629..1471b544241f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -545,6 +545,7 @@ ENTRY(interrupt_entry)
testb $3, CS+8(%rsp)
jz 1f
+ TASK_CONTEXT_SET_BIT context=TASK_CTX_IRQ
/*
* IRQ from user mode.
*
@@ -561,6 +562,8 @@ ENTRY(interrupt_entry)
1:
ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
+
+ TASK_CONTEXT_SET_BIT context=TASK_CTX_IRQ
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
@@ -586,6 +589,7 @@ ret_from_intr:
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
+ TASK_CONTEXT_RESET_BIT context=TASK_CTX_IRQ
LEAVE_IRQ_STACK
testb $3, CS(%rsp)
@@ -780,6 +784,7 @@ ENTRY(\sym)
call interrupt_entry
UNWIND_HINT_REGS indirect=1
call \do_sym /* rdi points to pt_regs */
+ TASK_CONTEXT_RESET_BIT context=TASK_CTX_IRQ
jmp ret_from_intr
END(\sym)
_ASM_NOKPROBE(\sym)
@@ -1403,9 +1408,11 @@ ENTRY(nmi)
* done with the NMI stack.
*/
+ TASK_CONTEXT_SET_BIT context=TASK_CTX_NMI
movq %rsp, %rdi
movq $-1, %rsi
call do_nmi
+ TASK_CONTEXT_RESET_BIT context=TASK_CTX_NMI
/*
* Return back to user mode. We must *not* do the normal exit
@@ -1615,10 +1622,12 @@ end_repeat_nmi:
call paranoid_entry
UNWIND_HINT_REGS
+ TASK_CONTEXT_SET_BIT context=TASK_CTX_NMI
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp, %rdi
movq $-1, %rsi
call do_nmi
+ TASK_CONTEXT_RESET_BIT context=TASK_CTX_NMI
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 058e40fed167..5a12bc3ea02b 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -3,6 +3,7 @@
#define _X86_IRQFLAGS_H_
#include <asm/processor-flags.h>
+#include <asm/percpu.h>
#ifndef __ASSEMBLY__
@@ -202,4 +203,33 @@ static inline int arch_irqs_disabled(void)
#endif
#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_X86_64
+/*
+ * NOTE: I know I need to implement this to the 32 bits as well.
+ * But... this is just a POC.
+ */
+#define ARCH_HAS_TASK_CONTEXT 1
+
+#define TASK_CTX_THREAD 0x0
+#define TASK_CTX_SOFTIRQ 0x1
+#define TASK_CTX_IRQ 0x2
+#define TASK_CTX_NMI 0x4
+
+#ifdef __ASSEMBLY__
+.macro TASK_CONTEXT_SET_BIT context:req
+ orb $\context, PER_CPU_VAR(task_context)
+.endm
+
+.macro TASK_CONTEXT_RESET_BIT context:req
+ andb $~\context, PER_CPU_VAR(task_context)
+.endm
+#else /* __ASSEMBLY__ */
+DECLARE_PER_CPU(unsigned char, task_context);
+
+static __always_inline void task_context_set(unsigned char context)
+{
+ raw_cpu_write_1(task_context, context);
+}
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_X86_64 */
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659..1acbec22319b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1531,6 +1531,8 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
+DEFINE_PER_CPU(unsigned char, task_context) __visible = 0;
+
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
@@ -1604,6 +1606,8 @@ EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
+DEFINE_PER_CPU(unsigned char, task_context) __visible = 0;
+
/*
* On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
* the top of the kernel stack. Use an extra percpu variable to track the
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 21619c92c377..1c3473bbe5d2 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -168,4 +168,8 @@ do { \
#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
+#ifndef ARCH_HAS_TASK_CONTEXT
+#define task_context_set(context) do {} while (0)
+#endif
+
#endif
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 10277429ed84..324de769dc07 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -410,8 +410,11 @@ void irq_exit(void)
#endif
account_irq_exit_time(current);
preempt_count_sub(HARDIRQ_OFFSET);
- if (!in_interrupt() && local_softirq_pending())
+ if (!in_interrupt() && local_softirq_pending()) {
+ task_context_set(TASK_CTX_SOFTIRQ);
invoke_softirq();
+ task_context_set(TASK_CTX_IRQ);
+ }
tick_irq_exit();
rcu_irq_exit();
--
2.20.1
next prev parent reply other threads:[~2019-04-02 20:04 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-02 20:03 [RFC PATCH 0/7] Early task context tracking Daniel Bristot de Oliveira
2019-04-02 20:03 ` Daniel Bristot de Oliveira [this message]
2019-04-02 20:03 ` [RFC PATCH 2/7] trace: Move the trace recursion context enum to trace.h and reuse it Daniel Bristot de Oliveira
2019-04-02 20:03 ` [RFC PATCH 3/7] trace: Optimize trace_get_context_bit() Daniel Bristot de Oliveira
2019-04-02 20:03 ` [RFC PATCH 4/7] trace/ring_buffer: Use trace_get_context_bit() Daniel Bristot de Oliveira
2019-04-02 20:03 ` [RFC PATCH 5/7] trace: Use early task context tracking if available Daniel Bristot de Oliveira
2019-04-02 20:03 ` [RFC PATCH 6/7] events: Create an trace_get_context_bit() Daniel Bristot de Oliveira
2019-04-02 20:03 ` [RFC PATCH 7/7] events: Use early task context tracking if available Daniel Bristot de Oliveira
2019-04-04 0:01 ` [RFC PATCH 0/7] Early task context tracking Andy Lutomirski
2019-04-04 9:42 ` Peter Zijlstra
2019-04-08 12:47 ` Daniel Bristot de Oliveira
2019-04-08 16:08 ` Andy Lutomirski
2019-04-04 17:40 ` Joel Fernandes
2019-04-08 12:54 ` Daniel Bristot de Oliveira
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=90ce8a6a4ca02e1e8a2a43185f193cd72a59d020.1554234787.git.bristot@redhat.com \
--to=bristot@redhat.com \
--cc=acme@kernel.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=bp@alien8.de \
--cc=hpa@zytor.com \
--cc=joel@joelfernandes.org \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=romulo.deoliveira@ufsc.br \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=tommaso.cucinotta@santannapisa.it \
--cc=williams@redhat.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).