live-patching.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mark Rutland <mark.rutland@arm.com>
To: madvenka@linux.microsoft.com
Cc: broonie@kernel.org, jpoimboe@redhat.com, ardb@kernel.org,
	nobuta.keiya@fujitsu.com, sjitindarsingh@gmail.com,
	catalin.marinas@arm.com, will@kernel.org, jmorris@namei.org,
	pasha.tatashin@soleen.com, jthierry@redhat.com,
	linux-arm-kernel@lists.infradead.org,
	live-patching@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [RFC PATCH v8 1/4] arm64: Make all stack walking functions use arch_stack_walk()
Date: Tue, 24 Aug 2021 14:13:44 +0100	[thread overview]
Message-ID: <20210824131344.GE96738@C02TD0UTHF1T.local> (raw)
In-Reply-To: <20210812190603.25326-2-madvenka@linux.microsoft.com>

On Thu, Aug 12, 2021 at 02:06:00PM -0500, madvenka@linux.microsoft.com wrote:
> From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>
> 
> Currently, there are multiple functions in ARM64 code that walk the
> stack using start_backtrace() and unwind_frame(). Convert all of
> them to use arch_stack_walk(). This makes maintenance easier.

It would be good to split this into a series of patches as Mark Brown
suggested in v7.

> Here is the list of functions:
> 
> 	perf_callchain_kernel()
> 	get_wchan()
> 	return_address()
> 	dump_backtrace()
> 	profile_pc()

Note that arch_stack_walk() depends on CONFIG_STACKTRACE (which is not in
defconfig), so we'll need to reorganise things such that it's always defined,
or factor out the core of that function and add a wrapper such that we
can always use it.

> Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
> ---
>  arch/arm64/include/asm/stacktrace.h |  3 ---
>  arch/arm64/kernel/perf_callchain.c  |  5 +---
>  arch/arm64/kernel/process.c         | 39 ++++++++++++++++++-----------
>  arch/arm64/kernel/return_address.c  |  6 +----
>  arch/arm64/kernel/stacktrace.c      | 38 +++-------------------------
>  arch/arm64/kernel/time.c            | 22 +++++++++-------
>  6 files changed, 43 insertions(+), 70 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
> index 8aebc00c1718..e43dea1c6b41 100644
> --- a/arch/arm64/include/asm/stacktrace.h
> +++ b/arch/arm64/include/asm/stacktrace.h
> @@ -61,9 +61,6 @@ struct stackframe {
>  #endif
>  };
>  
> -extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
> -extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
> -			    bool (*fn)(void *, unsigned long), void *data);
>  extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
>  			   const char *loglvl);
>  
> diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
> index 4a72c2727309..2f289013c9c9 100644
> --- a/arch/arm64/kernel/perf_callchain.c
> +++ b/arch/arm64/kernel/perf_callchain.c
> @@ -147,15 +147,12 @@ static bool callchain_trace(void *data, unsigned long pc)
>  void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
>  			   struct pt_regs *regs)
>  {
> -	struct stackframe frame;
> -
>  	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
>  		/* We don't support guest os callchain now */
>  		return;
>  	}
>  
> -	start_backtrace(&frame, regs->regs[29], regs->pc);
> -	walk_stackframe(current, &frame, callchain_trace, entry);
> +	arch_stack_walk(callchain_trace, entry, current, regs);
>  }

We can also update callchain_trace take the return value of
perf_callchain_store into acount, e.g.

| static bool callchain_trace(void *data, unsigned long pc) 
| {
| 	struct perf_callchain_entry_ctx *entry = data;
| 	return perf_callchain_store(entry, pc) == 0;
| }

>  
>  unsigned long perf_instruction_pointer(struct pt_regs *regs)
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index c8989b999250..52c12fd26407 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -544,11 +544,28 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
>  	return last;
>  }
>  
> +struct wchan_info {
> +	unsigned long	pc;
> +	int		count;
> +};
> +
> +static bool get_wchan_cb(void *arg, unsigned long pc)
> +{
> +	struct wchan_info *wchan_info = arg;
> +
> +	if (!in_sched_functions(pc)) {
> +		wchan_info->pc = pc;
> +		return false;
> +	}
> +	wchan_info->count--;
> +	return !!wchan_info->count;
> +}

This will terminate one entry earlier than the old logic since we used
to use a post-increment (testing the prior value), and now we're
effectively using a pre-decrement (testing the new value).

I don't think that matters all that much in practice, but it might be
best to keep the original logic, e.g. initialize `count` to 0 and here
do:

	return wchan_info->count++ < 16;

> +
>  unsigned long get_wchan(struct task_struct *p)
>  {
> -	struct stackframe frame;
> -	unsigned long stack_page, ret = 0;
> -	int count = 0;
> +	unsigned long stack_page;
> +	struct wchan_info wchan_info;
> +
>  	if (!p || p == current || task_is_running(p))
>  		return 0;
>  
> @@ -556,20 +573,12 @@ unsigned long get_wchan(struct task_struct *p)
>  	if (!stack_page)
>  		return 0;
>  
> -	start_backtrace(&frame, thread_saved_fp(p), thread_saved_pc(p));
> +	wchan_info.pc = 0;
> +	wchan_info.count = 16;
> +	arch_stack_walk(get_wchan_cb, &wchan_info, p, NULL);
>  
> -	do {
> -		if (unwind_frame(p, &frame))
> -			goto out;
> -		if (!in_sched_functions(frame.pc)) {
> -			ret = frame.pc;
> -			goto out;
> -		}
> -	} while (count++ < 16);
> -
> -out:
>  	put_task_stack(p);
> -	return ret;
> +	return wchan_info.pc;
>  }

Other than the comment above, this looks good to me.

>  unsigned long arch_align_stack(unsigned long sp)
> diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
> index a6d18755652f..92a0f4d434e4 100644
> --- a/arch/arm64/kernel/return_address.c
> +++ b/arch/arm64/kernel/return_address.c
> @@ -35,15 +35,11 @@ NOKPROBE_SYMBOL(save_return_addr);
>  void *return_address(unsigned int level)
>  {
>  	struct return_address_data data;
> -	struct stackframe frame;
>  
>  	data.level = level + 2;
>  	data.addr = NULL;
>  
> -	start_backtrace(&frame,
> -			(unsigned long)__builtin_frame_address(0),
> -			(unsigned long)return_address);
> -	walk_stackframe(current, &frame, save_return_addr, &data);
> +	arch_stack_walk(save_return_addr, &data, current, NULL);
>  
>  	if (!data.level)
>  		return data.addr;

Nor that arch_stack_walk() will start with it's caller, so
return_address() will be included in the trace where it wasn't
previously, which implies we need to skip an additional level.

That said, I'm not entirely sure why we need to skip 2 levels today; it
might be worth checking that's correct.

We should also mark return_address() as noinline to avoid surprises with
LTO.

> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index 8982a2b78acf..1800310f92be 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -151,23 +151,21 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
>  }
>  NOKPROBE_SYMBOL(walk_stackframe);
>  
> -static void dump_backtrace_entry(unsigned long where, const char *loglvl)
> +static bool dump_backtrace_entry(void *arg, unsigned long where)
>  {
> +	char *loglvl = arg;
>  	printk("%s %pSb\n", loglvl, (void *)where);
> +	return true;
>  }
>  
>  void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
>  		    const char *loglvl)
>  {
> -	struct stackframe frame;
> -	int skip = 0;
> -
>  	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
>  
>  	if (regs) {
>  		if (user_mode(regs))
>  			return;
> -		skip = 1;
>  	}

We can simplifiy this to:

	if (regs && user_mode(regs))
		return;

>  
>  	if (!tsk)
> @@ -176,36 +174,8 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
>  	if (!try_get_task_stack(tsk))
>  		return;
>  
> -	if (tsk == current) {
> -		start_backtrace(&frame,
> -				(unsigned long)__builtin_frame_address(0),
> -				(unsigned long)dump_backtrace);
> -	} else {
> -		/*
> -		 * task blocked in __switch_to
> -		 */
> -		start_backtrace(&frame,
> -				thread_saved_fp(tsk),
> -				thread_saved_pc(tsk));
> -	}
> -
>  	printk("%sCall trace:\n", loglvl);
> -	do {
> -		/* skip until specified stack frame */
> -		if (!skip) {
> -			dump_backtrace_entry(frame.pc, loglvl);
> -		} else if (frame.fp == regs->regs[29]) {
> -			skip = 0;
> -			/*
> -			 * Mostly, this is the case where this function is
> -			 * called in panic/abort. As exception handler's
> -			 * stack frame does not contain the corresponding pc
> -			 * at which an exception has taken place, use regs->pc
> -			 * instead.
> -			 */
> -			dump_backtrace_entry(regs->pc, loglvl);
> -		}
> -	} while (!unwind_frame(tsk, &frame));
> +	arch_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);

It turns out we currently need this skipping to get the balance the
ftrace call stack, and arch_stack_walk() doesn't currently do the right
thing when starting from regs. That balancing isn't quite right, and
will be wrong in some case when unwinding across exception boundaries;
we could implement HAVE_FUNCTION_GRAPH_RET_ADDR_PTR using the FP to
solve that.

>  
>  	put_task_stack(tsk);
>  }
> diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
> index eebbc8d7123e..671b3038a772 100644
> --- a/arch/arm64/kernel/time.c
> +++ b/arch/arm64/kernel/time.c
> @@ -32,22 +32,26 @@
>  #include <asm/stacktrace.h>
>  #include <asm/paravirt.h>
>  
> +static bool profile_pc_cb(void *arg, unsigned long pc)
> +{
> +	unsigned long *prof_pc = arg;
> +
> +	if (in_lock_functions(pc))
> +		return true;
> +	*prof_pc = pc;
> +	return false;
> +}
> +
>  unsigned long profile_pc(struct pt_regs *regs)
>  {
> -	struct stackframe frame;
> +	unsigned long prof_pc = 0;
>  
>  	if (!in_lock_functions(regs->pc))
>  		return regs->pc;
>  
> -	start_backtrace(&frame, regs->regs[29], regs->pc);
> -
> -	do {
> -		int ret = unwind_frame(NULL, &frame);
> -		if (ret < 0)
> -			return 0;
> -	} while (in_lock_functions(frame.pc));
> +	arch_stack_walk(profile_pc_cb, &prof_pc, current, regs);
>  
> -	return frame.pc;
> +	return prof_pc;
>  }
>  EXPORT_SYMBOL(profile_pc);

Mdoulo the problem above w.r.t. unwinding from regs, this looks good.

Thanks,
Mark.

>  
> -- 
> 2.25.1
> 

  reply	other threads:[~2021-08-24 13:13 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <b45aac2843f16ca759e065ea547ab0afff8c0f01>
2021-08-12 19:05 ` [RFC PATCH v8 0/4] arm64: Reorganize the unwinder and implement stack trace reliability checks madvenka
2021-08-12 19:06   ` [RFC PATCH v8 1/4] arm64: Make all stack walking functions use arch_stack_walk() madvenka
2021-08-24 13:13     ` Mark Rutland [this message]
2021-08-24 17:21       ` Madhavan T. Venkataraman
2021-08-24 17:38         ` Madhavan T. Venkataraman
2021-08-24 17:38         ` Mark Brown
2021-08-24 17:40           ` Madhavan T. Venkataraman
2021-08-26  4:52       ` Madhavan T. Venkataraman
2021-10-09 23:41       ` Madhavan T. Venkataraman
2021-08-12 19:06   ` [RFC PATCH v8 2/4] arm64: Reorganize the unwinder code for better consistency and maintenance madvenka
2021-08-26 15:46     ` Mark Brown
2021-08-26 23:19       ` Madhavan T. Venkataraman
2021-09-01 16:20         ` Mark Brown
2021-09-02  7:10           ` Madhavan T. Venkataraman
2021-08-12 19:06   ` [RFC PATCH v8 3/4] arm64: Introduce stack trace reliability checks in the unwinder madvenka
2021-08-24  5:55     ` nobuta.keiya
2021-08-24 12:19       ` Madhavan T. Venkataraman
2021-08-25  0:01         ` nobuta.keiya
2021-08-26 15:57     ` Mark Brown
2021-08-26 23:31       ` Madhavan T. Venkataraman
2021-08-12 19:06   ` [RFC PATCH v8 4/4] arm64: Create a list of SYM_CODE functions, check return PC against list madvenka
2021-08-12 19:17   ` [RFC PATCH v8 0/4] arm64: Reorganize the unwinder and implement stack trace reliability checks Madhavan T. Venkataraman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210824131344.GE96738@C02TD0UTHF1T.local \
    --to=mark.rutland@arm.com \
    --cc=ardb@kernel.org \
    --cc=broonie@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=jmorris@namei.org \
    --cc=jpoimboe@redhat.com \
    --cc=jthierry@redhat.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=live-patching@vger.kernel.org \
    --cc=madvenka@linux.microsoft.com \
    --cc=nobuta.keiya@fujitsu.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=sjitindarsingh@gmail.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).