All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: 王贇 <yun.wang@linux.alibaba.com>
Cc: Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>,
	Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>,
	John Fastabend <john.fastabend@gmail.com>,
	KP Singh <kpsingh@kernel.org>,
	"open list:PERFORMANCE EVENTS SUBSYSTEM" 
	<linux-perf-users@vger.kernel.org>,
	"open list:PERFORMANCE EVENTS SUBSYSTEM" 
	<linux-kernel@vger.kernel.org>,
	"open list:BPF (Safe dynamic programs and tools)" 
	<netdev@vger.kernel.org>,
	"open list:BPF (Safe dynamic programs and tools)" 
	<bpf@vger.kernel.org>
Subject: Re: [RFC PATCH] perf: fix panic by mark recursion inside perf_log_throttle
Date: Tue, 14 Sep 2021 12:28:37 +0200	[thread overview]
Message-ID: <YUB5VchM3a/MiZpX@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <3fb7c51f-696b-da70-1965-1dda9910cb14@linux.alibaba.com>

On Tue, Sep 14, 2021 at 09:58:44AM +0800, 王贇 wrote:
> On 2021/9/13 下午6:24, Peter Zijlstra wrote:

> > I'm confused tho; where does the #DF come from? Because taking a #PF
> > from NMI should be perfectly fine.
> > 
> > AFAICT that callchain is something like:
> > 
> > 	NMI
> > 	  perf_event_nmi_handler()
> > 	    (part of the chain is missing here)
> > 	      perf_log_throttle()
> > 	        perf_output_begin() /* events/ring_buffer.c */
> > 		  rcu_read_lock()
> > 		    rcu_lock_acquire()
> > 		      lock_acquire()
> > 		        trace_lock_acquire() --> perf_trace_foo
> > 
> > 			  ...
> > 			    perf_callchain()
> > 			      perf_callchain_user()
> > 			        #PF (fully expected during a userspace callchain)
> > 				  (some stuff, until the first __fentry)
> > 				    perf_trace_function_call
> > 				      perf_trace_buf_alloc()
> > 				        perf_swevent_get_recursion_context()
> > 					  *BOOM*
> > 
> > Now, supposedly we then take another #PF from get_recursion_context() or
> > something, but that doesn't make sense. That should just work...
> > 
> > Can you figure out what's going wrong there? going with the RIP, this
> > almost looks like 'swhash->recursion' goes splat, but again that makes
> > no sense, that's a per-cpu variable.
> 
> That's true, I actually have tried several approach to avoid the issue, but
> it trigger panic as long as we access 'swhash->recursion', the array should
> be accessible but somehow broken, that's why I consider this a suspected
> stack overflow, since nmi repeated and trace seems very long, but just a
> suspect...

You can simply increase the exception stack size to test this:

diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index a8d4ad856568..e9e2c3ba5923 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -15,7 +15,7 @@
 #define THREAD_SIZE_ORDER	(2 + KASAN_STACK_ORDER)
 #define THREAD_SIZE  (PAGE_SIZE << THREAD_SIZE_ORDER)
 
-#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
+#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER)
 #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
 
 #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)



Also, something like this might be useful:


diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f248eb2ac2d4..4dfdbb9395eb 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -33,6 +33,8 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
 
 bool in_entry_stack(unsigned long *stack, struct stack_info *info);
 
+bool in_exception_stack_guard(unsigned long *stack);
+
 int get_stack_info(unsigned long *stack, struct task_struct *task,
 		   struct stack_info *info, unsigned long *visit_mask);
 bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5601b95944fa..056cf4f31599 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -126,6 +126,39 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
 	return true;
 }
 
+noinstr bool in_exception_stack_guard(unsigned long *stack)
+{
+	unsigned long begin, end, stk = (unsigned long)stack;
+	const struct estack_pages *ep;
+	unsigned int k;
+
+	BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
+
+	begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
+	/*
+	 * Handle the case where stack trace is collected _before_
+	 * cea_exception_stacks had been initialized.
+	 */
+	if (!begin)
+		return false;
+
+	end = begin + sizeof(struct cea_exception_stacks);
+	/* Bail if @stack is outside the exception stack area. */
+	if (stk < begin || stk >= end)
+		return false;
+
+	/* Calc page offset from start of exception stacks */
+	k = (stk - begin) >> PAGE_SHIFT;
+	/* Lookup the page descriptor */
+	ep = &estack_pages[k];
+	/* Guard page? */
+	if (!ep->size)
+		return true;
+
+	return false;
+}
+
+
 static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
 {
 	unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a58800973aed..8b043ed02c0d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -459,6 +459,9 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
 		handle_stack_overflow("kernel stack overflow (double-fault)",
 				      regs, address);
 	}
+
+	if (in_exception_stack_guard((void *)address))
+		pr_emerg("PANIC: exception stack guard: 0x%lx\n", address);
 #endif
 
 	pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);

  reply	other threads:[~2021-09-14 10:29 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-09  3:13 [RFC PATCH] perf: fix panic by mark recursion inside perf_log_throttle 王贇
2021-09-09  6:10 ` 王贇
2021-09-10 15:38 ` Peter Zijlstra
2021-09-13  3:00   ` 王贇
2021-09-13  3:21     ` 王贇
2021-09-13 10:24     ` Peter Zijlstra
2021-09-13 10:36       ` Peter Zijlstra
2021-09-14  2:02         ` 王贇
2021-09-14  1:58       ` 王贇
2021-09-14 10:28         ` Peter Zijlstra [this message]
2021-09-15  1:51           ` 王贇
2021-09-15 15:17             ` [PATCH] x86/dumpstack/64: Add guard pages to stack_info Peter Zijlstra
2021-09-16  3:34               ` 王贇
2021-09-16  3:47               ` 王贇
2021-09-16  8:00                 ` Peter Zijlstra
2021-09-16  8:03                   ` Peter Zijlstra
2021-09-16 10:02                     ` Peter Zijlstra
2021-09-17  2:15                       ` 王贇
2021-09-17  3:02                       ` 王贇
2021-09-17 10:21                         ` Peter Zijlstra
2021-09-17 16:40                           ` Peter Zijlstra
2021-09-18  2:30                             ` 王贇
2021-09-18  6:56                               ` Peter Zijlstra
2021-09-18  2:38                             ` 王贇
2021-09-13  3:30 ` [PATCH] perf: fix panic by disable ftrace on fault.c 王贇
2021-09-13 14:49   ` Dave Hansen
2021-09-14  1:52     ` 王贇
2021-09-14  3:02       ` 王贇
2021-09-14  7:23         ` 王贇
2021-09-14 16:16           ` Dave Hansen
2021-09-15  1:56             ` 王贇
2021-09-15  3:27               ` Dave Hansen
2021-09-15  7:22                 ` 王贇
2021-09-15  7:34                   ` 王贇
2021-09-15 15:19                     ` [PATCH] x86: Increase exception stack sizes Peter Zijlstra
2021-09-16  3:42                       ` 王贇
2021-09-21  7:28                       ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-09-21 12:41                       ` tip-bot2 for Peter Zijlstra
2021-09-14  2:08     ` [PATCH] perf: fix panic by disable ftrace on fault.c 王贇

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YUB5VchM3a/MiZpX@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@redhat.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=yhs@fb.com \
    --cc=yun.wang@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.