linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Daniel Bristot de Oliveira <bristot@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: Steven Rostedt <rostedt@goodmis.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Ingo Molnar <mingo@redhat.com>, Andy Lutomirski <luto@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Borislav Petkov <bp@alien8.de>,
	Peter Zijlstra <peterz@infradead.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Joel Fernandes (Google)" <joel@joelfernandes.org>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Tommaso Cucinotta <tommaso.cucinotta@santannapisa.it>,
	Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>,
	Clark Williams <williams@redhat.com>,
	x86@kernel.org
Subject: [RFC PATCH 1/7] x86/entry: Add support for early task context tracking
Date: Tue,  2 Apr 2019 22:03:53 +0200	[thread overview]
Message-ID: <90ce8a6a4ca02e1e8a2a43185f193cd72a59d020.1554234787.git.bristot@redhat.com> (raw)
In-Reply-To: <cover.1554234787.git.bristot@redhat.com>

Currently, the identification of the context is made through the
preempt_counter, but it is set after the execution of the first functions
of the IRQ/NMI, causing potential problems in the identification of the
current status. For instance, ftrace/perf might drop events in the early
stage of IRQ/NMI handlers because the preempt_counter was not set.

The proposed approach is to use a dedicated per-cpu variable to keep
track of the context of execution, with values set before the execution
of the first C function of the interrupt handler.

This is a PoC in the x86_64.

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Tommaso Cucinotta <tommaso.cucinotta@santannapisa.it>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>
Cc: Clark Williams <williams@redhat.com>
Cc: linux-kernel@vger.kernel.org
Cc: x86@kernel.org
---
 arch/x86/entry/entry_64.S       |  9 +++++++++
 arch/x86/include/asm/irqflags.h | 30 ++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c    |  4 ++++
 include/linux/irqflags.h        |  4 ++++
 kernel/softirq.c                |  5 ++++-
 5 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b629..1471b544241f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -545,6 +545,7 @@ ENTRY(interrupt_entry)
 	testb	$3, CS+8(%rsp)
 	jz	1f
 
+	TASK_CONTEXT_SET_BIT context=TASK_CTX_IRQ
 	/*
 	 * IRQ from user mode.
 	 *
@@ -561,6 +562,8 @@ ENTRY(interrupt_entry)
 
 1:
 	ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
+
+	TASK_CONTEXT_SET_BIT context=TASK_CTX_IRQ
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
 
@@ -586,6 +589,7 @@ ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 
+	TASK_CONTEXT_RESET_BIT context=TASK_CTX_IRQ
 	LEAVE_IRQ_STACK
 
 	testb	$3, CS(%rsp)
@@ -780,6 +784,7 @@ ENTRY(\sym)
 	call	interrupt_entry
 	UNWIND_HINT_REGS indirect=1
 	call	\do_sym	/* rdi points to pt_regs */
+	TASK_CONTEXT_RESET_BIT context=TASK_CTX_IRQ
 	jmp	ret_from_intr
 END(\sym)
 _ASM_NOKPROBE(\sym)
@@ -1403,9 +1408,11 @@ ENTRY(nmi)
 	 * done with the NMI stack.
 	 */
 
+	TASK_CONTEXT_SET_BIT context=TASK_CTX_NMI
 	movq	%rsp, %rdi
 	movq	$-1, %rsi
 	call	do_nmi
+	TASK_CONTEXT_RESET_BIT context=TASK_CTX_NMI
 
 	/*
 	 * Return back to user mode.  We must *not* do the normal exit
@@ -1615,10 +1622,12 @@ end_repeat_nmi:
 	call	paranoid_entry
 	UNWIND_HINT_REGS
 
+	TASK_CONTEXT_SET_BIT context=TASK_CTX_NMI
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	movq	%rsp, %rdi
 	movq	$-1, %rsi
 	call	do_nmi
+	TASK_CONTEXT_RESET_BIT context=TASK_CTX_NMI
 
 	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 058e40fed167..5a12bc3ea02b 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -3,6 +3,7 @@
 #define _X86_IRQFLAGS_H_
 
 #include <asm/processor-flags.h>
+#include <asm/percpu.h>
 
 #ifndef __ASSEMBLY__
 
@@ -202,4 +203,33 @@ static inline int arch_irqs_disabled(void)
 #endif
 #endif /* __ASSEMBLY__ */
 
+#ifdef CONFIG_X86_64
+/*
+ * NOTE: I know I need to implement this to the 32 bits as well.
+ * But... this is just a POC.
+ */
+#define ARCH_HAS_TASK_CONTEXT   1
+
+#define TASK_CTX_THREAD		0x0
+#define TASK_CTX_SOFTIRQ	0x1
+#define TASK_CTX_IRQ		0x2
+#define TASK_CTX_NMI		0x4
+
+#ifdef __ASSEMBLY__
+.macro TASK_CONTEXT_SET_BIT context:req
+	orb	$\context, PER_CPU_VAR(task_context)
+.endm
+
+.macro TASK_CONTEXT_RESET_BIT context:req
+	andb	$~\context, PER_CPU_VAR(task_context)
+.endm
+#else /* __ASSEMBLY__ */
+DECLARE_PER_CPU(unsigned char, task_context);
+
+static __always_inline void task_context_set(unsigned char context)
+{
+	raw_cpu_write_1(task_context, context);
+}
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_X86_64 */
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659..1acbec22319b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1531,6 +1531,8 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
+DEFINE_PER_CPU(unsigned char, task_context) __visible = 0;
+
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
@@ -1604,6 +1606,8 @@ EXPORT_PER_CPU_SYMBOL(current_task);
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
+DEFINE_PER_CPU(unsigned char, task_context) __visible = 0;
+
 /*
  * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
  * the top of the kernel stack.  Use an extra percpu variable to track the
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 21619c92c377..1c3473bbe5d2 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -168,4 +168,8 @@ do {						\
 
 #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
 
+#ifndef ARCH_HAS_TASK_CONTEXT
+#define task_context_set(context) do {} while (0)
+#endif
+
 #endif
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 10277429ed84..324de769dc07 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -410,8 +410,11 @@ void irq_exit(void)
 #endif
 	account_irq_exit_time(current);
 	preempt_count_sub(HARDIRQ_OFFSET);
-	if (!in_interrupt() && local_softirq_pending())
+	if (!in_interrupt() && local_softirq_pending()) {
+		task_context_set(TASK_CTX_SOFTIRQ);
 		invoke_softirq();
+		task_context_set(TASK_CTX_IRQ);
+	}
 
 	tick_irq_exit();
 	rcu_irq_exit();
-- 
2.20.1


  reply	other threads:[~2019-04-02 20:04 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-02 20:03 [RFC PATCH 0/7] Early task context tracking Daniel Bristot de Oliveira
2019-04-02 20:03 ` Daniel Bristot de Oliveira [this message]
2019-04-02 20:03 ` [RFC PATCH 2/7] trace: Move the trace recursion context enum to trace.h and reuse it Daniel Bristot de Oliveira
2019-04-02 20:03 ` [RFC PATCH 3/7] trace: Optimize trace_get_context_bit() Daniel Bristot de Oliveira
2019-04-02 20:03 ` [RFC PATCH 4/7] trace/ring_buffer: Use trace_get_context_bit() Daniel Bristot de Oliveira
2019-04-02 20:03 ` [RFC PATCH 5/7] trace: Use early task context tracking if available Daniel Bristot de Oliveira
2019-04-02 20:03 ` [RFC PATCH 6/7] events: Create an trace_get_context_bit() Daniel Bristot de Oliveira
2019-04-02 20:03 ` [RFC PATCH 7/7] events: Use early task context tracking if available Daniel Bristot de Oliveira
2019-04-04  0:01 ` [RFC PATCH 0/7] Early task context tracking Andy Lutomirski
2019-04-04  9:42   ` Peter Zijlstra
2019-04-08 12:47   ` Daniel Bristot de Oliveira
2019-04-08 16:08     ` Andy Lutomirski
2019-04-04 17:40 ` Joel Fernandes
2019-04-08 12:54   ` Daniel Bristot de Oliveira

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=90ce8a6a4ca02e1e8a2a43185f193cd72a59d020.1554234787.git.bristot@redhat.com \
    --to=bristot@redhat.com \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=hpa@zytor.com \
    --cc=joel@joelfernandes.org \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=romulo.deoliveira@ufsc.br \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=tommaso.cucinotta@santannapisa.it \
    --cc=williams@redhat.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).