All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kunit: added lockdep support
@ 2020-08-06 20:37 Uriel Guajardo
  2020-08-06 20:43 ` Uriel Guajardo
  2020-08-10 20:34 ` Brendan Higgins
  0 siblings, 2 replies; 11+ messages in thread
From: Uriel Guajardo @ 2020-08-06 20:37 UTC (permalink / raw)
  To: brendanhiggins; +Cc: urielguajardo, linux-kselftest, linux-kernel

From: Uriel Guajardo <urielguajardo@google.com>

KUnit tests will now fail if lockdep detects an error during a test
case.

The idea comes from how lib/locking-selftest [1] checks for lock errors: we
first if lock debugging is turned on. If not, an error must have
occurred, so we fail the test and restart lockdep for the next test case.

Like the locking selftests, we also fix possible preemption count
corruption from lock bugs.

Depends on kunit: support failure from dynamic analysis tools [2]

[1] https://elixir.bootlin.com/linux/v5.7.12/source/lib/locking-selftest.c#L1137

[2] https://lore.kernel.org/linux-kselftest/20200806174326.3577537-1-urielguajardojr@gmail.com/

Signed-off-by: Uriel Guajardo <urielguajardo@google.com>
---
 lib/kunit/test.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index d8189d827368..0838ececa005 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -11,6 +11,8 @@
 #include <linux/kref.h>
 #include <linux/sched/debug.h>
 #include <linux/sched.h>
+#include <linux/lockdep.h>
+#include <linux/debug_locks.h>
 
 #include "debugfs.h"
 #include "string-stream.h"
@@ -22,6 +24,26 @@ void kunit_fail_current_test(void)
 		kunit_set_failure(current->kunit_test);
 }
 
+static inline void kunit_check_locking_bugs(struct kunit *test,
+					    unsigned long saved_preempt_count)
+{
+	preempt_count_set(saved_preempt_count);
+#ifdef CONFIG_TRACE_IRQFLAGS
+	if (softirq_count())
+		current->softirqs_enabled = 0;
+	else
+		current->softirqs_enabled = 1;
+#endif
+#if IS_ENABLED(CONFIG_LOCKDEP)
+	local_irq_disable();
+	if (!debug_locks) {
+		kunit_set_failure(test);
+		lockdep_reset();
+	}
+	local_irq_enable();
+#endif
+}
+
 static void kunit_print_tap_version(void)
 {
 	static bool kunit_has_printed_tap_version;
@@ -289,6 +311,7 @@ static void kunit_try_run_case(void *data)
 	struct kunit *test = ctx->test;
 	struct kunit_suite *suite = ctx->suite;
 	struct kunit_case *test_case = ctx->test_case;
+	unsigned long saved_preempt_count = preempt_count();
 
 	current->kunit_test = test;
 
@@ -298,7 +321,8 @@ static void kunit_try_run_case(void *data)
 	 * thread will resume control and handle any necessary clean up.
 	 */
 	kunit_run_case_internal(test, suite, test_case);
-	/* This line may never be reached. */
+	/* These lines may never be reached. */
+	kunit_check_locking_bugs(test, saved_preempt_count);
 	kunit_run_case_cleanup(test, suite);
 }
 
-- 
2.28.0.236.gb10cc79966-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] kunit: added lockdep support
  2020-08-06 20:37 [PATCH] kunit: added lockdep support Uriel Guajardo
@ 2020-08-06 20:43 ` Uriel Guajardo
  2020-08-10 20:35   ` Brendan Higgins
  2020-08-10 20:34 ` Brendan Higgins
  1 sibling, 1 reply; 11+ messages in thread
From: Uriel Guajardo @ 2020-08-06 20:43 UTC (permalink / raw)
  To: Uriel Guajardo; +Cc: Brendan Higgins, linux-kselftest, linux-kernel

On Thu, Aug 6, 2020 at 3:37 PM Uriel Guajardo <urielguajardojr@gmail.com> wrote:
>
> From: Uriel Guajardo <urielguajardo@google.com>
>
> KUnit tests will now fail if lockdep detects an error during a test
> case.
>
> The idea comes from how lib/locking-selftest [1] checks for lock errors: we
> first if lock debugging is turned on. If not, an error must have
> occurred, so we fail the test and restart lockdep for the next test case.
>
> Like the locking selftests, we also fix possible preemption count
> corruption from lock bugs.
>
> Depends on kunit: support failure from dynamic analysis tools [2]
>
> [1] https://elixir.bootlin.com/linux/v5.7.12/source/lib/locking-selftest.c#L1137
>
> [2] https://lore.kernel.org/linux-kselftest/20200806174326.3577537-1-urielguajardojr@gmail.com/
>
> Signed-off-by: Uriel Guajardo <urielguajardo@google.com>
> ---
>  lib/kunit/test.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/lib/kunit/test.c b/lib/kunit/test.c
> index d8189d827368..0838ececa005 100644
> --- a/lib/kunit/test.c
> +++ b/lib/kunit/test.c
> @@ -11,6 +11,8 @@
>  #include <linux/kref.h>
>  #include <linux/sched/debug.h>
>  #include <linux/sched.h>
> +#include <linux/lockdep.h>
> +#include <linux/debug_locks.h>
>
>  #include "debugfs.h"
>  #include "string-stream.h"
> @@ -22,6 +24,26 @@ void kunit_fail_current_test(void)
>                 kunit_set_failure(current->kunit_test);
>  }
>
> +static inline void kunit_check_locking_bugs(struct kunit *test,
> +                                           unsigned long saved_preempt_count)
> +{
> +       preempt_count_set(saved_preempt_count);
> +#ifdef CONFIG_TRACE_IRQFLAGS
> +       if (softirq_count())
> +               current->softirqs_enabled = 0;
> +       else
> +               current->softirqs_enabled = 1;
> +#endif

I am not entirely sure why lib/locking-selftests enables/disables
softirqs, but I suspect it has to do with the fact that preempt_count
became corrupted, and somehow softirqs became incorrectly
enabled/disabled as a result. The resetting of the preemption count
will undo the enabling/disabling accordingly. Any insight on this
would be appreciated!

> +#if IS_ENABLED(CONFIG_LOCKDEP)
> +       local_irq_disable();
> +       if (!debug_locks) {
> +               kunit_set_failure(test);
> +               lockdep_reset();
> +       }
> +       local_irq_enable();
> +#endif
> +}
> +
>  static void kunit_print_tap_version(void)
>  {
>         static bool kunit_has_printed_tap_version;
> @@ -289,6 +311,7 @@ static void kunit_try_run_case(void *data)
>         struct kunit *test = ctx->test;
>         struct kunit_suite *suite = ctx->suite;
>         struct kunit_case *test_case = ctx->test_case;
> +       unsigned long saved_preempt_count = preempt_count();
>
>         current->kunit_test = test;
>
> @@ -298,7 +321,8 @@ static void kunit_try_run_case(void *data)
>          * thread will resume control and handle any necessary clean up.
>          */
>         kunit_run_case_internal(test, suite, test_case);
> -       /* This line may never be reached. */
> +       /* These lines may never be reached. */
> +       kunit_check_locking_bugs(test, saved_preempt_count);
>         kunit_run_case_cleanup(test, suite);
>  }
>
> --
> 2.28.0.236.gb10cc79966-goog
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kunit: added lockdep support
  2020-08-06 20:37 [PATCH] kunit: added lockdep support Uriel Guajardo
  2020-08-06 20:43 ` Uriel Guajardo
@ 2020-08-10 20:34 ` Brendan Higgins
  1 sibling, 0 replies; 11+ messages in thread
From: Brendan Higgins @ 2020-08-10 20:34 UTC (permalink / raw)
  To: Uriel Guajardo
  Cc: Uriel Guajardo, open list:KERNEL SELFTEST FRAMEWORK,
	Linux Kernel Mailing List

On Thu, Aug 6, 2020 at 1:37 PM Uriel Guajardo <urielguajardojr@gmail.com> wrote:
>
> From: Uriel Guajardo <urielguajardo@google.com>
>
> KUnit tests will now fail if lockdep detects an error during a test
> case.
>
> The idea comes from how lib/locking-selftest [1] checks for lock errors: we
> first if lock debugging is turned on. If not, an error must have
> occurred, so we fail the test and restart lockdep for the next test case.
>
> Like the locking selftests, we also fix possible preemption count
> corruption from lock bugs.
>
> Depends on kunit: support failure from dynamic analysis tools [2]
>
> [1] https://elixir.bootlin.com/linux/v5.7.12/source/lib/locking-selftest.c#L1137
>
> [2] https://lore.kernel.org/linux-kselftest/20200806174326.3577537-1-urielguajardojr@gmail.com/
>
> Signed-off-by: Uriel Guajardo <urielguajardo@google.com>

Reviewed-by: Brendan Higgins <brendanhiggins@google.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kunit: added lockdep support
  2020-08-06 20:43 ` Uriel Guajardo
@ 2020-08-10 20:35   ` Brendan Higgins
  2020-08-10 21:17     ` Uriel Guajardo
  0 siblings, 1 reply; 11+ messages in thread
From: Brendan Higgins @ 2020-08-10 20:35 UTC (permalink / raw)
  To: Uriel Guajardo
  Cc: Uriel Guajardo, open list:KERNEL SELFTEST FRAMEWORK,
	Linux Kernel Mailing List

On Thu, Aug 6, 2020 at 1:43 PM Uriel Guajardo <urielguajardo@google.com> wrote:
>
> On Thu, Aug 6, 2020 at 3:37 PM Uriel Guajardo <urielguajardojr@gmail.com> wrote:
> >
> > From: Uriel Guajardo <urielguajardo@google.com>
> >
> > KUnit tests will now fail if lockdep detects an error during a test
> > case.
> >
> > The idea comes from how lib/locking-selftest [1] checks for lock errors: we
> > first if lock debugging is turned on. If not, an error must have
> > occurred, so we fail the test and restart lockdep for the next test case.
> >
> > Like the locking selftests, we also fix possible preemption count
> > corruption from lock bugs.

Sorry, just noticed: You probably want to send this to some of the
lockdep maintainers or the maintainers of the kselftest for lockdep.

> > Depends on kunit: support failure from dynamic analysis tools [2]
> >
> > [1] https://elixir.bootlin.com/linux/v5.7.12/source/lib/locking-selftest.c#L1137
> >
> > [2] https://lore.kernel.org/linux-kselftest/20200806174326.3577537-1-urielguajardojr@gmail.com/
> >
> > Signed-off-by: Uriel Guajardo <urielguajardo@google.com>
> > ---
> >  lib/kunit/test.c | 26 +++++++++++++++++++++++++-
> >  1 file changed, 25 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/kunit/test.c b/lib/kunit/test.c
> > index d8189d827368..0838ececa005 100644
> > --- a/lib/kunit/test.c
> > +++ b/lib/kunit/test.c
> > @@ -11,6 +11,8 @@
> >  #include <linux/kref.h>
> >  #include <linux/sched/debug.h>
> >  #include <linux/sched.h>
> > +#include <linux/lockdep.h>
> > +#include <linux/debug_locks.h>
> >
> >  #include "debugfs.h"
> >  #include "string-stream.h"
> > @@ -22,6 +24,26 @@ void kunit_fail_current_test(void)
> >                 kunit_set_failure(current->kunit_test);
> >  }
> >
> > +static inline void kunit_check_locking_bugs(struct kunit *test,
> > +                                           unsigned long saved_preempt_count)
> > +{
> > +       preempt_count_set(saved_preempt_count);
> > +#ifdef CONFIG_TRACE_IRQFLAGS
> > +       if (softirq_count())
> > +               current->softirqs_enabled = 0;
> > +       else
> > +               current->softirqs_enabled = 1;
> > +#endif
>
> I am not entirely sure why lib/locking-selftests enables/disables
> softirqs, but I suspect it has to do with the fact that preempt_count
> became corrupted, and somehow softirqs became incorrectly
> enabled/disabled as a result. The resetting of the preemption count
> will undo the enabling/disabling accordingly. Any insight on this
> would be appreciated!
>
> > +#if IS_ENABLED(CONFIG_LOCKDEP)
> > +       local_irq_disable();
> > +       if (!debug_locks) {
> > +               kunit_set_failure(test);
> > +               lockdep_reset();
> > +       }
> > +       local_irq_enable();
> > +#endif
> > +}
> > +
> >  static void kunit_print_tap_version(void)
> >  {
> >         static bool kunit_has_printed_tap_version;
> > @@ -289,6 +311,7 @@ static void kunit_try_run_case(void *data)
> >         struct kunit *test = ctx->test;
> >         struct kunit_suite *suite = ctx->suite;
> >         struct kunit_case *test_case = ctx->test_case;
> > +       unsigned long saved_preempt_count = preempt_count();
> >
> >         current->kunit_test = test;
> >
> > @@ -298,7 +321,8 @@ static void kunit_try_run_case(void *data)
> >          * thread will resume control and handle any necessary clean up.
> >          */
> >         kunit_run_case_internal(test, suite, test_case);
> > -       /* This line may never be reached. */
> > +       /* These lines may never be reached. */
> > +       kunit_check_locking_bugs(test, saved_preempt_count);
> >         kunit_run_case_cleanup(test, suite);
> >  }
> >
> > --
> > 2.28.0.236.gb10cc79966-goog
> >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kunit: added lockdep support
  2020-08-10 20:35   ` Brendan Higgins
@ 2020-08-10 21:17     ` Uriel Guajardo
  2020-08-10 21:25       ` Peter Zijlstra
  0 siblings, 1 reply; 11+ messages in thread
From: Uriel Guajardo @ 2020-08-10 21:17 UTC (permalink / raw)
  To: Brendan Higgins, peterz, mingo, will
  Cc: Uriel Guajardo, open list:KERNEL SELFTEST FRAMEWORK,
	Linux Kernel Mailing List

On Mon, Aug 10, 2020 at 3:35 PM Brendan Higgins
<brendanhiggins@google.com> wrote:
>
> On Thu, Aug 6, 2020 at 1:43 PM Uriel Guajardo <urielguajardo@google.com> wrote:
> >
> > On Thu, Aug 6, 2020 at 3:37 PM Uriel Guajardo <urielguajardojr@gmail.com> wrote:
> > >
> > > From: Uriel Guajardo <urielguajardo@google.com>
> > >
> > > KUnit tests will now fail if lockdep detects an error during a test
> > > case.
> > >
> > > The idea comes from how lib/locking-selftest [1] checks for lock errors: we
> > > first if lock debugging is turned on. If not, an error must have
> > > occurred, so we fail the test and restart lockdep for the next test case.
> > >
> > > Like the locking selftests, we also fix possible preemption count
> > > corruption from lock bugs.
>
> Sorry, just noticed: You probably want to send this to some of the
> lockdep maintainers or the maintainers of the kselftest for lockdep.
>

Thanks for the reminder. CC'ed lockdep maintainers.


> > > Depends on kunit: support failure from dynamic analysis tools [2]
> > >
> > > [1] https://elixir.bootlin.com/linux/v5.7.12/source/lib/locking-selftest.c#L1137
> > >
> > > [2] https://lore.kernel.org/linux-kselftest/20200806174326.3577537-1-urielguajardojr@gmail.com/
> > >
> > > Signed-off-by: Uriel Guajardo <urielguajardo@google.com>
> > > ---
> > >  lib/kunit/test.c | 26 +++++++++++++++++++++++++-
> > >  1 file changed, 25 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/lib/kunit/test.c b/lib/kunit/test.c
> > > index d8189d827368..0838ececa005 100644
> > > --- a/lib/kunit/test.c
> > > +++ b/lib/kunit/test.c
> > > @@ -11,6 +11,8 @@
> > >  #include <linux/kref.h>
> > >  #include <linux/sched/debug.h>
> > >  #include <linux/sched.h>
> > > +#include <linux/lockdep.h>
> > > +#include <linux/debug_locks.h>
> > >
> > >  #include "debugfs.h"
> > >  #include "string-stream.h"
> > > @@ -22,6 +24,26 @@ void kunit_fail_current_test(void)
> > >                 kunit_set_failure(current->kunit_test);
> > >  }
> > >
> > > +static inline void kunit_check_locking_bugs(struct kunit *test,
> > > +                                           unsigned long saved_preempt_count)
> > > +{
> > > +       preempt_count_set(saved_preempt_count);
> > > +#ifdef CONFIG_TRACE_IRQFLAGS
> > > +       if (softirq_count())
> > > +               current->softirqs_enabled = 0;
> > > +       else
> > > +               current->softirqs_enabled = 1;
> > > +#endif
> >
> > I am not entirely sure why lib/locking-selftests enables/disables
> > softirqs, but I suspect it has to do with the fact that preempt_count
> > became corrupted, and somehow softirqs became incorrectly
> > enabled/disabled as a result. The resetting of the preemption count
> > will undo the enabling/disabling accordingly. Any insight on this
> > would be appreciated!
> >
> > > +#if IS_ENABLED(CONFIG_LOCKDEP)
> > > +       local_irq_disable();
> > > +       if (!debug_locks) {
> > > +               kunit_set_failure(test);
> > > +               lockdep_reset();
> > > +       }
> > > +       local_irq_enable();
> > > +#endif
> > > +}
> > > +
> > >  static void kunit_print_tap_version(void)
> > >  {
> > >         static bool kunit_has_printed_tap_version;
> > > @@ -289,6 +311,7 @@ static void kunit_try_run_case(void *data)
> > >         struct kunit *test = ctx->test;
> > >         struct kunit_suite *suite = ctx->suite;
> > >         struct kunit_case *test_case = ctx->test_case;
> > > +       unsigned long saved_preempt_count = preempt_count();
> > >
> > >         current->kunit_test = test;
> > >
> > > @@ -298,7 +321,8 @@ static void kunit_try_run_case(void *data)
> > >          * thread will resume control and handle any necessary clean up.
> > >          */
> > >         kunit_run_case_internal(test, suite, test_case);
> > > -       /* This line may never be reached. */
> > > +       /* These lines may never be reached. */
> > > +       kunit_check_locking_bugs(test, saved_preempt_count);
> > >         kunit_run_case_cleanup(test, suite);
> > >  }
> > >
> > > --
> > > 2.28.0.236.gb10cc79966-goog
> > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kunit: added lockdep support
  2020-08-10 21:17     ` Uriel Guajardo
@ 2020-08-10 21:25       ` Peter Zijlstra
  0 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2020-08-10 21:25 UTC (permalink / raw)
  To: Uriel Guajardo
  Cc: Brendan Higgins, mingo, will, Uriel Guajardo,
	open list:KERNEL SELFTEST FRAMEWORK, Linux Kernel Mailing List

On Mon, Aug 10, 2020 at 04:17:42PM -0500, Uriel Guajardo wrote:
> On Mon, Aug 10, 2020 at 3:35 PM Brendan Higgins
> <brendanhiggins@google.com> wrote:
> >
> > On Thu, Aug 6, 2020 at 1:43 PM Uriel Guajardo <urielguajardo@google.com> wrote:
> > >
> > > On Thu, Aug 6, 2020 at 3:37 PM Uriel Guajardo <urielguajardojr@gmail.com> wrote:
> > > >
> > > > From: Uriel Guajardo <urielguajardo@google.com>
> > > >
> > > > KUnit tests will now fail if lockdep detects an error during a test
> > > > case.
> > > >
> > > > The idea comes from how lib/locking-selftest [1] checks for lock errors: we
> > > > first if lock debugging is turned on. If not, an error must have
> > > > occurred, so we fail the test and restart lockdep for the next test case.
> > > >
> > > > Like the locking selftests, we also fix possible preemption count
> > > > corruption from lock bugs.
> >
> > Sorry, just noticed: You probably want to send this to some of the
> > lockdep maintainers or the maintainers of the kselftest for lockdep.
> >
> 
> Thanks for the reminder. CC'ed lockdep maintainers.

Please resend as a whole, this is an unreadable mess.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kunit: added lockdep support
  2020-08-11 19:05     ` peterz
@ 2020-08-11 22:22       ` Uriel Guajardo
  0 siblings, 0 replies; 11+ messages in thread
From: Uriel Guajardo @ 2020-08-11 22:22 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Uriel Guajardo, Brendan Higgins, mingo, will,
	open list:KERNEL SELFTEST FRAMEWORK, kunit-dev,
	Linux Kernel Mailing List

On Tue, Aug 11, 2020 at 2:05 PM <peterz@infradead.org> wrote:
>
> On Tue, Aug 11, 2020 at 12:03:51PM -0500, Uriel Guajardo wrote:
> > On Mon, Aug 10, 2020 at 4:43 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Mon, Aug 10, 2020 at 09:32:57PM +0000, Uriel Guajardo wrote:
> > > > +static inline void kunit_check_locking_bugs(struct kunit *test,
> > > > +                                         unsigned long saved_preempt_count)
> > > > +{
> > > > +     preempt_count_set(saved_preempt_count);
> > > > +#ifdef CONFIG_TRACE_IRQFLAGS
> > > > +     if (softirq_count())
> > > > +             current->softirqs_enabled = 0;
> > > > +     else
> > > > +             current->softirqs_enabled = 1;
> > > > +#endif
> > > > +#if IS_ENABLED(CONFIG_LOCKDEP)
> > > > +     local_irq_disable();
> > > > +     if (!debug_locks) {
> > > > +             kunit_set_failure(test);
> > > > +             lockdep_reset();
> > > > +     }
> > > > +     local_irq_enable();
> > > > +#endif
> > > > +}
> > >
> > > Unless you can guarantee this runs before SMP brinup, that
> > > lockdep_reset() is terminally broken.
> >
> > Good point. KUnit is initialized after SMP is set up, and KUnit can
> > also be built as a module, so it's not a guarantee that we can make.
>
> Even if you could, there's still the question of wether throwing out all
> the dependencies learned during boot is a sensible idea.
>
> > Is there any other way to turn lockdep back on after we detect a
> > failure? It would be ideal if lockdep could still run in the next test
> > case after a failure in a previous one.
>
> Not really; the moment lockdep reports a failure it turns off all
> tracking and we instantly loose state.
>
> You'd have to:
>
>  - delete the 'mistaken' dependency from the graph such that we loose
>    the cycle, otherwise it will continue to find and report the cycle.
>
>  - put every task through a known empty state which turns the tracking
>    back on.
>
> Bart implemented most of what you need for the first item last year or
> so, but the remaining bit and the second item would still be a fair
> amount of work.
>
> Also, I'm really not sure it's worth it, the kernel should be free of
> lock cycles, so just fix one, reboot and continue.
>
> > I suppose we could only display the first failure that occurs, similar
> > to how lockdep does it. But it could also be useful to developers if
> > they saw failures in subsequent test cases, with the knowledge that
> > those failures may be unreliable.
>
> People already struggle with lockdep reports enough; I really don't want
> to given them dodgy report to worry about.

Ah, ok! Fair enough, thanks for the info. Although resetting lockdep
would be nice to have in the future, I think it's enough to only
report the first failure and warn the user that further test cases
will have lockdep disabled. People can then fix the issue and then
re-run it. I'll follow up with a patch that does this.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kunit: added lockdep support
  2020-08-11 17:03   ` Uriel Guajardo
@ 2020-08-11 19:05     ` peterz
  2020-08-11 22:22       ` Uriel Guajardo
  0 siblings, 1 reply; 11+ messages in thread
From: peterz @ 2020-08-11 19:05 UTC (permalink / raw)
  To: Uriel Guajardo
  Cc: Uriel Guajardo, Brendan Higgins, mingo, will,
	open list:KERNEL SELFTEST FRAMEWORK, kunit-dev,
	Linux Kernel Mailing List

On Tue, Aug 11, 2020 at 12:03:51PM -0500, Uriel Guajardo wrote:
> On Mon, Aug 10, 2020 at 4:43 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Mon, Aug 10, 2020 at 09:32:57PM +0000, Uriel Guajardo wrote:
> > > +static inline void kunit_check_locking_bugs(struct kunit *test,
> > > +                                         unsigned long saved_preempt_count)
> > > +{
> > > +     preempt_count_set(saved_preempt_count);
> > > +#ifdef CONFIG_TRACE_IRQFLAGS
> > > +     if (softirq_count())
> > > +             current->softirqs_enabled = 0;
> > > +     else
> > > +             current->softirqs_enabled = 1;
> > > +#endif
> > > +#if IS_ENABLED(CONFIG_LOCKDEP)
> > > +     local_irq_disable();
> > > +     if (!debug_locks) {
> > > +             kunit_set_failure(test);
> > > +             lockdep_reset();
> > > +     }
> > > +     local_irq_enable();
> > > +#endif
> > > +}
> >
> > Unless you can guarantee this runs before SMP brinup, that
> > lockdep_reset() is terminally broken.
> 
> Good point. KUnit is initialized after SMP is set up, and KUnit can
> also be built as a module, so it's not a guarantee that we can make.

Even if you could, there's still the question of wether throwing out all
the dependencies learned during boot is a sensible idea.

> Is there any other way to turn lockdep back on after we detect a
> failure? It would be ideal if lockdep could still run in the next test
> case after a failure in a previous one.

Not really; the moment lockdep reports a failure it turns off all
tracking and we instantly loose state.

You'd have to:

 - delete the 'mistaken' dependency from the graph such that we loose
   the cycle, otherwise it will continue to find and report the cycle.

 - put every task through a known empty state which turns the tracking
   back on.

Bart implemented most of what you need for the first item last year or
so, but the remaining bit and the second item would still be a fair
amount of work.

Also, I'm really not sure it's worth it, the kernel should be free of
lock cycles, so just fix one, reboot and continue.

> I suppose we could only display the first failure that occurs, similar
> to how lockdep does it. But it could also be useful to developers if
> they saw failures in subsequent test cases, with the knowledge that
> those failures may be unreliable.

People already struggle with lockdep reports enough; I really don't want
to given them dodgy report to worry about.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kunit: added lockdep support
  2020-08-10 21:43 ` Peter Zijlstra
@ 2020-08-11 17:03   ` Uriel Guajardo
  2020-08-11 19:05     ` peterz
  0 siblings, 1 reply; 11+ messages in thread
From: Uriel Guajardo @ 2020-08-11 17:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Uriel Guajardo, Brendan Higgins, mingo, will,
	open list:KERNEL SELFTEST FRAMEWORK, kunit-dev,
	Linux Kernel Mailing List

On Mon, Aug 10, 2020 at 4:43 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Aug 10, 2020 at 09:32:57PM +0000, Uriel Guajardo wrote:
> > +static inline void kunit_check_locking_bugs(struct kunit *test,
> > +                                         unsigned long saved_preempt_count)
> > +{
> > +     preempt_count_set(saved_preempt_count);
> > +#ifdef CONFIG_TRACE_IRQFLAGS
> > +     if (softirq_count())
> > +             current->softirqs_enabled = 0;
> > +     else
> > +             current->softirqs_enabled = 1;
> > +#endif
> > +#if IS_ENABLED(CONFIG_LOCKDEP)
> > +     local_irq_disable();
> > +     if (!debug_locks) {
> > +             kunit_set_failure(test);
> > +             lockdep_reset();
> > +     }
> > +     local_irq_enable();
> > +#endif
> > +}
>
> Unless you can guarantee this runs before SMP brinup, that
> lockdep_reset() is terminally broken.

Good point. KUnit is initialized after SMP is set up, and KUnit can
also be built as a module, so it's not a guarantee that we can make.
Is there any other way to turn lockdep back on after we detect a
failure? It would be ideal if lockdep could still run in the next test
case after a failure in a previous one.

I suppose we could only display the first failure that occurs, similar
to how lockdep does it. But it could also be useful to developers if
they saw failures in subsequent test cases, with the knowledge that
those failures may be unreliable.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kunit: added lockdep support
  2020-08-10 21:32 Uriel Guajardo
@ 2020-08-10 21:43 ` Peter Zijlstra
  2020-08-11 17:03   ` Uriel Guajardo
  0 siblings, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2020-08-10 21:43 UTC (permalink / raw)
  To: Uriel Guajardo
  Cc: brendanhiggins, mingo, will, urielguajardo, linux-kselftest,
	kunit-dev, linux-kernel

On Mon, Aug 10, 2020 at 09:32:57PM +0000, Uriel Guajardo wrote:
> +static inline void kunit_check_locking_bugs(struct kunit *test,
> +					    unsigned long saved_preempt_count)
> +{
> +	preempt_count_set(saved_preempt_count);
> +#ifdef CONFIG_TRACE_IRQFLAGS
> +	if (softirq_count())
> +		current->softirqs_enabled = 0;
> +	else
> +		current->softirqs_enabled = 1;
> +#endif
> +#if IS_ENABLED(CONFIG_LOCKDEP)
> +	local_irq_disable();
> +	if (!debug_locks) {
> +		kunit_set_failure(test);
> +		lockdep_reset();
> +	}
> +	local_irq_enable();
> +#endif
> +}

Unless you can guarantee this runs before SMP brinup, that
lockdep_reset() is terminally broken.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] kunit: added lockdep support
@ 2020-08-10 21:32 Uriel Guajardo
  2020-08-10 21:43 ` Peter Zijlstra
  0 siblings, 1 reply; 11+ messages in thread
From: Uriel Guajardo @ 2020-08-10 21:32 UTC (permalink / raw)
  To: brendanhiggins, peterz, mingo, will
  Cc: urielguajardo, linux-kselftest, kunit-dev, linux-kernel

From: Uriel Guajardo <urielguajardo@google.com>

KUnit tests will now fail if lockdep detects an error during a test
case.

The idea comes from how lib/locking-selftest [1] checks for lock errors: we
first if lock debugging is turned on. If not, an error must have
occurred, so we fail the test and restart lockdep for the next test case.

Like the locking selftests, we also fix possible preemption count
corruption from lock bugs.

Depends on kunit: support failure from dynamic analysis tools [2]

[1] https://elixir.bootlin.com/linux/v5.7.12/source/lib/locking-selftest.c#L1137

[2] https://lore.kernel.org/linux-kselftest/20200806174326.3577537-1-urielguajardojr@gmail.com/

Signed-off-by: Uriel Guajardo <urielguajardo@google.com>
---
 lib/kunit/test.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index d8189d827368..0838ececa005 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -11,6 +11,8 @@
 #include <linux/kref.h>
 #include <linux/sched/debug.h>
 #include <linux/sched.h>
+#include <linux/lockdep.h>
+#include <linux/debug_locks.h>
 
 #include "debugfs.h"
 #include "string-stream.h"
@@ -22,6 +24,26 @@ void kunit_fail_current_test(void)
 		kunit_set_failure(current->kunit_test);
 }
 
+static inline void kunit_check_locking_bugs(struct kunit *test,
+					    unsigned long saved_preempt_count)
+{
+	preempt_count_set(saved_preempt_count);
+#ifdef CONFIG_TRACE_IRQFLAGS
+	if (softirq_count())
+		current->softirqs_enabled = 0;
+	else
+		current->softirqs_enabled = 1;
+#endif
+#if IS_ENABLED(CONFIG_LOCKDEP)
+	local_irq_disable();
+	if (!debug_locks) {
+		kunit_set_failure(test);
+		lockdep_reset();
+	}
+	local_irq_enable();
+#endif
+}
+
 static void kunit_print_tap_version(void)
 {
 	static bool kunit_has_printed_tap_version;
@@ -289,6 +311,7 @@ static void kunit_try_run_case(void *data)
 	struct kunit *test = ctx->test;
 	struct kunit_suite *suite = ctx->suite;
 	struct kunit_case *test_case = ctx->test_case;
+	unsigned long saved_preempt_count = preempt_count();
 
 	current->kunit_test = test;
 
@@ -298,7 +321,8 @@ static void kunit_try_run_case(void *data)
 	 * thread will resume control and handle any necessary clean up.
 	 */
 	kunit_run_case_internal(test, suite, test_case);
-	/* This line may never be reached. */
+	/* These lines may never be reached. */
+	kunit_check_locking_bugs(test, saved_preempt_count);
 	kunit_run_case_cleanup(test, suite);
 }
 
-- 
2.28.0.236.gb10cc79966-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-08-11 22:22 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-06 20:37 [PATCH] kunit: added lockdep support Uriel Guajardo
2020-08-06 20:43 ` Uriel Guajardo
2020-08-10 20:35   ` Brendan Higgins
2020-08-10 21:17     ` Uriel Guajardo
2020-08-10 21:25       ` Peter Zijlstra
2020-08-10 20:34 ` Brendan Higgins
2020-08-10 21:32 Uriel Guajardo
2020-08-10 21:43 ` Peter Zijlstra
2020-08-11 17:03   ` Uriel Guajardo
2020-08-11 19:05     ` peterz
2020-08-11 22:22       ` Uriel Guajardo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.