live-patching.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Petr Mladek <pmladek@suse.com>
To: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-kernel@vger.kernel.org,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Jiri Kosina <jikos@kernel.org>, Miroslav Benes <mbenes@suse.cz>,
	Jonathan Corbet <corbet@lwn.net>, Guo Ren <guoren@kernel.org>,
	"James E.J. Bottomley" <James.Bottomley@hansenpartnership.com>,
	Helge Deller <deller@gmx.de>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Paul Mackerras <paulus@samba.org>,
	Heiko Carstens <hca@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Christian Borntraeger <borntraeger@de.ibm.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Borislav Petkov <bp@alien8.de>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
	Kees Cook <keescook@chromium.org>,
	Anton Vorontsov <anton@enomsg.org>,
	Colin Cross <ccross@android.com>, Tony Luck <tony.luck@intel.com>,
	Joe Lawrence <joe.lawrence@redhat.com>,
	Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>,
	Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	linux-doc@vger.kernel.org, linux-csky@vger.kernel.org,
	linux-parisc@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	linux-s390@vger.kernel.org, live-patching@vger.kernel.org
Subject: Re: [PATCH 11/11 v2.2] ftrace: Add recording of functions that caused recursion
Date: Tue, 3 Nov 2020 15:10:43 +0100	[thread overview]
Message-ID: <20201103141043.GO20201@alley> (raw)
In-Reply-To: <20201102142254.7e148f8a@gandalf.local.home>

On Mon 2020-11-02 14:23:14, Steven Rostedt wrote:
> From c532ff6b048dd4a12943b05c7b8ce30666c587c8 Mon Sep 17 00:00:00 2001
> From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> Date: Thu, 29 Oct 2020 15:27:06 -0400
> Subject: [PATCH] ftrace: Add recording of functions that caused recursion
> 
> This adds CONFIG_FTRACE_RECORD_RECURSION that will record to a file
> "recursed_functions" all the functions that caused recursion while a
> callback to the function tracer was running.
> 
> diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
> index ac3d73484cb2..1cba5fe8777a 100644
> --- a/include/linux/trace_recursion.h
> +++ b/include/linux/trace_recursion.h
> @@ -142,7 +142,28 @@ static __always_inline int trace_get_context_bit(void)
>  			pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ;
>  }
>  
> -static __always_inline int trace_test_and_set_recursion(int start, int max)
> +#ifdef CONFIG_FTRACE_RECORD_RECURSION
> +extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
> +/*
> +* The paranoid_test check can cause dropped reports (unlikely), but
> +* if the recursion is common, it will likely still be recorded later.
> +* But the paranoid_test is needed to make sure we don't crash.
> +*/
> +# define do_ftrace_record_recursion(ip, pip)				\
> +	do {								\
> +		static atomic_t paranoid_test;				\
> +		if (!atomic_read(&paranoid_test)) {			\
> +			atomic_inc(&paranoid_test);			\
> +			ftrace_record_recursion(ip, pip);		\
> +			atomic_dec(&paranoid_test);			\

BTW: What is actually the purpose of paranoid_test, please?

It prevents nested ftrace_record_recursion() calls on the same CPU
(recursion, nesting from IRQ, NMI context).

Parallel calls from different CPUs are still possible:

CPU0					CPU1
if (!atomic_read(&paranoid_test))	if (!atomic_read(&paranoid_test))
   // passes				  // passes
   atomic_inc(&paranoid_test);            atomic_inc(&paranoid_test);


I do not see how a nested call could cause crash while a parallel
one would be OK.


> --- /dev/null
> +++ b/kernel/trace/trace_recursion_record.c
> @@ -0,0 +1,236 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <linux/seq_file.h>
> +#include <linux/kallsyms.h>
> +#include <linux/module.h>
> +#include <linux/ftrace.h>
> +#include <linux/fs.h>
> +
> +#include "trace_output.h"
> +
> +struct recursed_functions {
> +	unsigned long		ip;
> +	unsigned long		parent_ip;
> +};
> +
> +static struct recursed_functions recursed_functions[CONFIG_FTRACE_RECORD_RECURSION_SIZE];
> +static atomic_t nr_records;
> +
> +/*
> + * Cache the last found function. Yes, updates to this is racey, but
> + * so is memory cache ;-)
> + */
> +static unsigned long cached_function;
> +
> +void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip)
> +{
> +	int index = 0;
> +	int i;
> +	unsigned long old;
> +
> + again:
> +	/* First check the last one recorded */
> +	if (ip == cached_function)
> +		return;
> +
> +	i = atomic_read(&nr_records);
> +	/* nr_records is -1 when clearing records */
> +	smp_mb__after_atomic();
> +	if (i < 0)
> +		return;
> +
> +	/*
> +	 * If there's two writers and this writer comes in second,
> +	 * the cmpxchg() below to update the ip will fail. Then this
> +	 * writer will try again. It is possible that index will now
> +	 * be greater than nr_records. This is because the writer
> +	 * that succeeded has not updated the nr_records yet.
> +	 * This writer could keep trying again until the other writer
> +	 * updates nr_records. But if the other writer takes an
> +	 * interrupt, and that interrupt locks up that CPU, we do
> +	 * not want this CPU to lock up due to the recursion protection,
> +	 * and have a bug report showing this CPU as the cause of
> +	 * locking up the computer. To not lose this record, this
> +	 * writer will simply use the next position to update the
> +	 * recursed_functions, and it will update the nr_records
> +	 * accordingly.
> +	 */
> +	if (index < i)
> +		index = i;
> +	if (index >= CONFIG_FTRACE_RECORD_RECURSION_SIZE)
> +		return;
> +
> +	for (i = index - 1; i >= 0; i--) {
> +		if (recursed_functions[i].ip == ip) {
> +			cached_function = ip;
> +			return;
> +		}
> +	}
> +
> +	cached_function = ip;
> +
> +	/*
> +	 * We only want to add a function if it hasn't been added before.
> +	 * Add to the current location before incrementing the count.
> +	 * If it fails to add, then increment the index (save in i)
> +	 * and try again.
> +	 */
> +	old = cmpxchg(&recursed_functions[index].ip, 0, ip);
> +	if (old != 0) {
> +		/* Did something else already added this for us? */
> +		if (old == ip)
> +			return;
> +		/* Try the next location (use i for the next index) */
> +		index++;
> +		goto again;
> +	}
> +
> +	recursed_functions[index].parent_ip = parent_ip;
> +
> +	/*
> +	 * It's still possible that we could race with the clearing
> +	 *    CPU0                                    CPU1
> +	 *    ----                                    ----
> +	 *                                       ip = func
> +	 *  nr_records = -1;
> +	 *  recursed_functions[0] = 0;
> +	 *                                       i = -1
> +	 *                                       if (i < 0)
> +	 *  nr_records = 0;
> +	 *  (new recursion detected)
> +	 *      recursed_functions[0] = func
> +	 *                                            cmpxchg(recursed_functions[0],
> +	 *                                                    func, 0)
> +	 *
> +	 * But the worse that could happen is that we get a zero in
> +	 * the recursed_functions array, and it's likely that "func" will
> +	 * be recorded again.
> +	 */
> +	i = atomic_read(&nr_records);
> +	smp_mb__after_atomic();
> +	if (i < 0)
> +		cmpxchg(&recursed_functions[index].ip, ip, 0);
> +	else if (i <= index)
> +		atomic_cmpxchg(&nr_records, i, index + 1);

Are you aware of the following race, please?

CPU0					CPU1

ftrace_record_recursion()

   i = atomic_read(&nr_records);
   // i = 20   (for example)
   if (i < index)
     index = i;
     // index = 20;

					recursed_function_open()
					atomic_set(&nr_records, -1);
					memset(recursed_functions, 0, );
					atomic_set(&nr_records, 0);

   // successfully store ip at index == 20
   cmpxchg(&recursed_functions[index].ip, 0, ip);
   recursed_functions[index].parent_ip = parent_ip;

   // check race with clearing
   i = atomic_read(&nr_records);
   // i == 0
   if (i < 0)
      // no
   else
	atomic_cmpxchg(&nr_records, i, index + 1);

RESULT:

   + nr_records == 21
   + and slots 0..19 are zeroed


I played with the code and ended with head entangled by chicken & egg
like problems.

I believe that a solution might be a combined atomic variable from
nr_records + cleanup_count.

ftrace_record_recursion() would be allowed to increase nr_records
only when cleanup_count is still the same. cleanup_count would
be incremented together with clearing nr_records.


Well, I am not sure if it is worth the effort. The race is rather
theoretical. In the worst case, the cache might contain many
zero values.

Anyway, it is yet another experience for me that lockless algorithms
are more tricky that one would expect.

Best Regards,
Petr

  reply	other threads:[~2020-11-03 14:10 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20201030213142.096102821@goodmis.org>
2020-10-30 21:31 ` [PATCH 06/11 v2] livepatch/ftrace: Add recursion protection to the ftrace callback Steven Rostedt
2020-10-30 21:31 ` [PATCH 07/11 v2] livepatch: Trigger WARNING if livepatch function fails due to recursion Steven Rostedt
2020-11-02 14:41   ` Petr Mladek
2020-11-02 16:56     ` Steven Rostedt
2020-10-30 21:31 ` [PATCH 11/11 v2] ftrace: Add recording of functions that caused recursion Steven Rostedt
2020-11-02 16:41   ` Petr Mladek
2020-11-02 17:09     ` Steven Rostedt
2020-11-02 17:19       ` Steven Rostedt
2020-11-03 10:40       ` Petr Mladek
2020-11-02 17:37     ` Steven Rostedt
2020-11-02 17:46       ` Steven Rostedt
2020-11-02 19:23         ` [PATCH 11/11 v2.2] " Steven Rostedt
2020-11-03 14:10           ` Petr Mladek [this message]
2020-11-03 16:14             ` Steven Rostedt
2020-11-04 19:13             ` Steven Rostedt
2020-11-02 19:14       ` [PATCH 11/11 v2.1] " Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201103141043.GO20201@alley \
    --to=pmladek@suse.com \
    --cc=James.Bottomley@hansenpartnership.com \
    --cc=akpm@linux-foundation.org \
    --cc=anton@enomsg.org \
    --cc=benh@kernel.crashing.org \
    --cc=bigeasy@linutronix.de \
    --cc=borntraeger@de.ibm.com \
    --cc=bp@alien8.de \
    --cc=ccross@android.com \
    --cc=corbet@lwn.net \
    --cc=deller@gmx.de \
    --cc=gor@linux.ibm.com \
    --cc=guoren@kernel.org \
    --cc=hca@linux.ibm.com \
    --cc=hpa@zytor.com \
    --cc=jikos@kernel.org \
    --cc=joe.lawrence@redhat.com \
    --cc=jpoimboe@redhat.com \
    --cc=kamalesh@linux.vnet.ibm.com \
    --cc=keescook@chromium.org \
    --cc=linux-csky@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-parisc@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=live-patching@vger.kernel.org \
    --cc=mbenes@suse.cz \
    --cc=mchehab+huawei@kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=paulus@samba.org \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).