linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Marco Elver <elver@google.com>
To: Marco Elver <elver@google.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>,
	Andrey Konovalov <andreyknvl@google.com>,
	Alexander Potapenko <glider@google.com>,
	Dmitry Vyukov <dvyukov@google.com>,
	kasan-dev <kasan-dev@googlegroups.com>,
	LKML <linux-kernel@vger.kernel.org>, Qian Cai <cai@lca.pw>
Subject: Re: [PATCH v2] kcsan: Add option for verbose reporting
Date: Sat, 22 Feb 2020 00:11:39 +0100	[thread overview]
Message-ID: <CANpmjNNxtyQy2+-v85=PcjBAqGt=7dcqLi+WA3FS8U94nuVYnw@mail.gmail.com> (raw)
In-Reply-To: <20200221225635.218857-1-elver@google.com>

Superseded by v3 due to typos:
http://lkml.kernel.org/r/20200221231027.230147-1-elver@google.com

Thanks,
-- Marco

On Fri, 21 Feb 2020 at 23:57, Marco Elver <elver@google.com> wrote:
>
> Adds CONFIG_KCSAN_VERBOSE to optionally enable more verbose reports.
> Currently information about the reporting task's held locks and IRQ
> trace events are shown, if they are enabled.
>
> Signed-off-by: Marco Elver <elver@google.com>
> Suggested-by: Qian Cai <cai@lca.pw>
> ---
> v2:
> * Rework obtaining 'current' for the "other thread" -- it now passes
>   'current' and ensures that we stall until the report was printed, so
>   that the lockdep information contained in 'current' is accurate. This
>   was non-trivial but testing so far leads me to conclude this now
>   reliably prints the held locks for the "other thread" (please test
>   more!).
> ---
>  kernel/kcsan/core.c   |   4 +-
>  kernel/kcsan/kcsan.h  |   3 ++
>  kernel/kcsan/report.c | 103 +++++++++++++++++++++++++++++++++++++++++-
>  lib/Kconfig.kcsan     |  13 ++++++
>  4 files changed, 120 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> index e7387fec66795..065615df88eaa 100644
> --- a/kernel/kcsan/core.c
> +++ b/kernel/kcsan/core.c
> @@ -18,8 +18,8 @@
>  #include "kcsan.h"
>
>  static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
> -static unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
> -static unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT;
> +unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
> +unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT;
>  static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH;
>  static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER);
>
> diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h
> index 892de5120c1b6..e282f8b5749e9 100644
> --- a/kernel/kcsan/kcsan.h
> +++ b/kernel/kcsan/kcsan.h
> @@ -13,6 +13,9 @@
>  /* The number of adjacent watchpoints to check. */
>  #define KCSAN_CHECK_ADJACENT 1
>
> +extern unsigned int kcsan_udelay_task;
> +extern unsigned int kcsan_udelay_interrupt;
> +
>  /*
>   * Globally enable and disable KCSAN.
>   */
> diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
> index 11c791b886f3c..ee8f33d7405fb 100644
> --- a/kernel/kcsan/report.c
> +++ b/kernel/kcsan/report.c
> @@ -1,5 +1,7 @@
>  // SPDX-License-Identifier: GPL-2.0
>
> +#include <linux/debug_locks.h>
> +#include <linux/delay.h>
>  #include <linux/jiffies.h>
>  #include <linux/kernel.h>
>  #include <linux/lockdep.h>
> @@ -31,7 +33,26 @@ static struct {
>         int                     cpu_id;
>         unsigned long           stack_entries[NUM_STACK_ENTRIES];
>         int                     num_stack_entries;
> -} other_info = { .ptr = NULL };
> +
> +       /*
> +        * Optionally pass @current. Typically we do not need to pass @current
> +        * via @other_info since just @task_pid is sufficient. Passing @current
> +        * has additional overhead.
> +        *
> +        * To safely pass @current, we must either use get_task_struct/
> +        * put_task_struct, or stall the thread that populated @other_info.
> +        *
> +        * We cannot rely on get_task_struct/put_task_struct in case
> +        * release_report() races with a task being released, and would have to
> +        * free it in release_report(). This may result in deadlock if we want
> +        * to use KCSAN on the allocators.
> +        *
> +        * Since we also want to reliably print held locks for
> +        * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread
> +        * that populated @other_info until it has been consumed.
> +        */
> +       struct task_struct      *task;
> +} other_info;
>
>  /*
>   * Information about reported races; used to rate limit reporting.
> @@ -245,6 +266,16 @@ static int sym_strcmp(void *addr1, void *addr2)
>         return strncmp(buf1, buf2, sizeof(buf1));
>  }
>
> +static void print_verbose_info(struct task_struct *task)
> +{
> +       if (!task)
> +               return;
> +
> +       pr_err("\n");
> +       debug_show_held_locks(task);
> +       print_irqtrace_events(task);
> +}
> +
>  /*
>   * Returns true if a report was generated, false otherwise.
>   */
> @@ -319,6 +350,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
>                                   other_info.num_stack_entries - other_skipnr,
>                                   0);
>
> +               if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
> +                   print_verbose_info(other_info.task);
> +
>                 pr_err("\n");
>                 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
>                        get_access_type(access_type), ptr, size,
> @@ -340,6 +374,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
>         stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr,
>                           0);
>
> +       if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
> +               print_verbose_info(current);
> +
>         /* Print report footer. */
>         pr_err("\n");
>         pr_err("Reported by Kernel Concurrency Sanitizer on:\n");
> @@ -357,6 +394,67 @@ static void release_report(unsigned long *flags, enum kcsan_report_type type)
>         spin_unlock_irqrestore(&report_lock, *flags);
>  }
>
> +/*
> + * Sets @other_info.task and awaits consumption of @other_info.
> + *
> + * Precondition: report_lock is held.
> + * Postcontiion: report_lock is held.
> + */
> +static void
> +set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr)
> +{
> +       /*
> +        * We may be instrumenting a code-path where current->state is already
> +        * something other than TASK_RUNNING.
> +        */
> +       const bool is_running = current->state == TASK_RUNNING;
> +       /*
> +        * To avoid deadlock in case we are in an interrupt here and this is a
> +        * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
> +        * timeout to ensure this works in all contexts.
> +        *
> +        * Await approximately the worst case delay of the reporting thread (if
> +        * we are not interrupted).
> +        */
> +       int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt);
> +
> +       other_info.task = current;
> +       do {
> +               if (is_running) {
> +                       /*
> +                        * Let lockdep know the real task is sleeping, to print
> +                        * the held locks (recall we turned lockdep off, so
> +                        * locking/unlocking @report_lock won't be recorded).
> +                        */
> +                       set_current_state(TASK_UNINTERRUPTIBLE);
> +               }
> +               spin_unlock_irqrestore(&report_lock, *flags);
> +               /*
> +                * We cannot call schedule() since we also cannot reliably
> +                * determine if sleeping here is permitted -- see in_atomic().
> +                */
> +
> +               udelay(1);
> +               spin_lock_irqsave(&report_lock, *flags);
> +               if (timeout-- < 0) {
> +                       /*
> +                        * Abort. Reset other_info.task to NULL, since it
> +                        * appears the other thread is still going to consume
> +                        * it. It will result in no verbose info printed for
> +                        * this task.
> +                        */
> +                       other_info.task = NULL;
> +                       break;
> +               }
> +               /*
> +                * If @ptr nor @current matches, then our information has been
> +                * consumed and we may continue. If not, retry.
> +                */
> +       } while (other_info.ptr == ptr && other_info.task == current);
> +       if (is_running)
> +               set_current_state(TASK_RUNNING);
> +}
> +
>  /*
>   * Depending on the report type either sets other_info and returns false, or
>   * acquires the matching other_info and returns true. If other_info is not
> @@ -388,6 +486,9 @@ static bool prepare_report(unsigned long *flags, const volatile void *ptr,
>                 other_info.cpu_id               = cpu_id;
>                 other_info.num_stack_entries    = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1);
>
> +               if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
> +                       set_other_info_task_blocking(flags, ptr);
> +
>                 spin_unlock_irqrestore(&report_lock, *flags);
>
>                 /*
> diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
> index 081ed2e1bf7b1..0f1447ff8f558 100644
> --- a/lib/Kconfig.kcsan
> +++ b/lib/Kconfig.kcsan
> @@ -20,6 +20,19 @@ menuconfig KCSAN
>
>  if KCSAN
>
> +config KCSAN_VERBOSE
> +       bool "Show verbose reports with more information about system state"
> +       depends on PROVE_LOCKING
> +       help
> +         If enabled, reports show more information about the system state that
> +         may help better analyze and debug races. This includes held locks and
> +         IRQ trace events.
> +
> +         While this option should generally be benign, we call into more
> +         external functions on report generation; if a race report is
> +         generated from any one of them, system stability may suffer due to
> +         deadlocks or recursion.  If in doubt, say N.
> +
>  config KCSAN_DEBUG
>         bool "Debugging of KCSAN internals"
>
> --
> 2.25.0.265.gbab2e86ba0-goog
>

      reply	other threads:[~2020-02-21 23:11 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-21 22:56 [PATCH v2] kcsan: Add option for verbose reporting Marco Elver
2020-02-21 23:11 ` Marco Elver [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CANpmjNNxtyQy2+-v85=PcjBAqGt=7dcqLi+WA3FS8U94nuVYnw@mail.gmail.com' \
    --to=elver@google.com \
    --cc=andreyknvl@google.com \
    --cc=cai@lca.pw \
    --cc=dvyukov@google.com \
    --cc=glider@google.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).