RCU Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v2 1/3] kernel: rename taint flag TAINT_SOFTLOCKUP into TAINT_LOCKUP
@ 2020-01-27 14:03 Konstantin Khlebnikov
  2020-01-27 14:03 ` [PATCH v2 2/3] kernel: set taint flag 'L' at any kind of lockup Konstantin Khlebnikov
  2020-01-27 14:03 ` [PATCH v2 3/3] kernel: add sysctl kernel.nr_taints Konstantin Khlebnikov
  0 siblings, 2 replies; 3+ messages in thread
From: Konstantin Khlebnikov @ 2020-01-27 14:03 UTC (permalink / raw)
  To: linux-kernel, linux-doc
  Cc: Sasha Levin, Kees Cook, Paul E. McKenney, Greg Kroah-Hartman,
	rcu, Tejun Heo, Andrew Morton, Linus Torvalds, Thomas Gleixner

Any lockup or stall detector notifies about unexpected lack of progress.
It's better to know about these splats at investigating further problems.

Right now only softlockup watchdog leaves own taint flag.
Let's generalize it and set at any kind of detected lockup.

This patch removes 'soft' from its name and descriptions.
User visible letter stays the same: 'L'.

Next patch wires TAINT_LOCKUP into other kinds of lockup detectors.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Link: https://lore.kernel.org/lkml/157503370645.8187.6335564487789994134.stgit@buzz/ (v1)
---
 Documentation/admin-guide/sysctl/kernel.rst   |    2 +-
 Documentation/admin-guide/tainted-kernels.rst |    4 ++--
 include/linux/kernel.h                        |    2 +-
 kernel/panic.c                                |    2 +-
 kernel/watchdog.c                             |    2 +-
 tools/debugging/kernel-chktaint               |    2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index def074807cee..8456c8ed0ca5 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1084,7 +1084,7 @@ ORed together. The letters are seen in "Tainted" line of Oops reports.
   2048  `(I)`  workaround for bug in platform firmware applied
   4096  `(O)`  externally-built ("out-of-tree") module was loaded
   8192  `(E)`  unsigned module was loaded
- 16384  `(L)`  soft lockup occurred
+ 16384  `(L)`  lockup occurred
  32768  `(K)`  kernel has been live patched
  65536  `(X)`  Auxiliary taint, defined and used by for distros
 131072  `(T)`  The kernel was built with the struct randomization plugin
diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst
index 71e9184a9079..55d45211cb41 100644
--- a/Documentation/admin-guide/tainted-kernels.rst
+++ b/Documentation/admin-guide/tainted-kernels.rst
@@ -96,7 +96,7 @@ Bit  Log  Number  Reason that got the kernel tainted
  11  _/I    2048  workaround for bug in platform firmware applied
  12  _/O    4096  externally-built ("out-of-tree") module was loaded
  13  _/E    8192  unsigned module was loaded
- 14  _/L   16384  soft lockup occurred
+ 14  _/L   16384  lockup occurred
  15  _/K   32768  kernel has been live patched
  16  _/X   65536  auxiliary taint, defined for and used by distros
  17  _/T  131072  kernel was built with the struct randomization plugin
@@ -152,7 +152,7 @@ More detailed explanation for tainting
  13) ``E`` if an unsigned module has been loaded in a kernel supporting
      module signature.
 
- 14) ``L`` if a soft lockup has previously occurred on the system.
+ 14) ``L`` if a lockup has previously occurred on the system.
 
  15) ``K`` if the kernel has been live patched.
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 0d9db2a14f44..3554456b2d40 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -584,7 +584,7 @@ extern enum system_states {
 #define TAINT_FIRMWARE_WORKAROUND	11
 #define TAINT_OOT_MODULE		12
 #define TAINT_UNSIGNED_MODULE		13
-#define TAINT_SOFTLOCKUP		14
+#define TAINT_LOCKUP			14
 #define TAINT_LIVEPATCH			15
 #define TAINT_AUX			16
 #define TAINT_RANDSTRUCT		17
diff --git a/kernel/panic.c b/kernel/panic.c
index b69ee9e76cb2..a0ea0c6992b9 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -372,7 +372,7 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
 	[ TAINT_FIRMWARE_WORKAROUND ]	= { 'I', ' ', false },
 	[ TAINT_OOT_MODULE ]		= { 'O', ' ', true },
 	[ TAINT_UNSIGNED_MODULE ]	= { 'E', ' ', true },
-	[ TAINT_SOFTLOCKUP ]		= { 'L', ' ', false },
+	[ TAINT_LOCKUP ]		= { 'L', ' ', false },
 	[ TAINT_LIVEPATCH ]		= { 'K', ' ', true },
 	[ TAINT_AUX ]			= { 'X', ' ', true },
 	[ TAINT_RANDSTRUCT ]		= { 'T', ' ', true },
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f41334ef0971..d60b195708f7 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -466,7 +466,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 			smp_mb__after_atomic();
 		}
 
-		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
+		add_taint(TAINT_LOCKUP, LOCKDEP_STILL_OK);
 		if (softlockup_panic)
 			panic("softlockup: hung tasks");
 		__this_cpu_write(soft_watchdog_warn, true);
diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index 2240cb56e6e5..9f24719d8c80 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -168,7 +168,7 @@ if [ `expr $T % 2` -eq 0 ]; then
 	addout " "
 else
 	addout "L"
-	echo " * soft lockup occurred (#14)"
+	echo " * lockup occurred (#14)"
 fi
 
 T=`expr $T / 2`


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2 2/3] kernel: set taint flag 'L' at any kind of lockup
  2020-01-27 14:03 [PATCH v2 1/3] kernel: rename taint flag TAINT_SOFTLOCKUP into TAINT_LOCKUP Konstantin Khlebnikov
@ 2020-01-27 14:03 ` Konstantin Khlebnikov
  2020-01-27 14:03 ` [PATCH v2 3/3] kernel: add sysctl kernel.nr_taints Konstantin Khlebnikov
  1 sibling, 0 replies; 3+ messages in thread
From: Konstantin Khlebnikov @ 2020-01-27 14:03 UTC (permalink / raw)
  To: linux-kernel, linux-doc
  Cc: Sasha Levin, Kees Cook, Paul E. McKenney, Greg Kroah-Hartman,
	rcu, Tejun Heo, Andrew Morton, Linus Torvalds, Thomas Gleixner

Any lockup or stall detector notifies about unexpected lack of progress.
It's better to know about these splats at investigating further problems.

This patch set TAINT_LOCKUP at:
- softlockup (CONFIG_SOFTLOCKUP_DETECTOR)
- hardlockup (CONFIG_HARDLOCKUP_DETECTOR)
- RCU stall (Documentation/RCU/stallwarn.txt)
- hung task (CONFIG_DETECT_HUNG_TASK)
- stuck in workqueues (CONFIG_WQ_WATCHDOG)

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Paul E. McKenney <paulmck@kernel.org> (RCU part)
Link: https://lore.kernel.org/lkml/157503370645.8187.6335564487789994134.stgit@buzz/ (v1)
---
 Documentation/admin-guide/tainted-kernels.rst |    4 ++++
 kernel/hung_task.c                            |    2 ++
 kernel/rcu/tree_stall.h                       |    1 +
 kernel/watchdog_hld.c                         |    1 +
 kernel/workqueue.c                            |    1 +
 5 files changed, 9 insertions(+)

diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst
index 55d45211cb41..13249240283c 100644
--- a/Documentation/admin-guide/tainted-kernels.rst
+++ b/Documentation/admin-guide/tainted-kernels.rst
@@ -153,6 +153,10 @@ More detailed explanation for tainting
      module signature.
 
  14) ``L`` if a lockup has previously occurred on the system.
+     - soft/hardlockup, see Documentation/admin-guide/lockup-watchdogs.rst
+     - RCU stall, see Documentation/RCU/stallwarn.txt
+     - hung task detected, see CONFIG_DETECT_HUNG_TASK
+     - kernel workqueue lockup, see CONFIG_WQ_WATCHDOG
 
  15) ``K`` if the kernel has been live patched.
 
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 14a625c16cb3..521eb2fbf5fc 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -139,6 +139,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 		hung_task_show_lock = true;
 	}
 
+	add_taint(TAINT_LOCKUP, LOCKDEP_STILL_OK);
+
 	touch_nmi_watchdog();
 }
 
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index c0b8c458d8a6..181495efff80 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -74,6 +74,7 @@ early_initcall(check_cpu_stall_init);
 /* If so specified via sysctl, panic, yielding cleaner stall-warning output. */
 static void panic_on_rcu_stall(void)
 {
+	add_taint(TAINT_LOCKUP, LOCKDEP_STILL_OK);
 	if (sysctl_panic_on_rcu_stall)
 		panic("RCU Stall\n");
 }
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 247bf0b1582c..f77256f47422 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -152,6 +152,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
 				!test_and_set_bit(0, &hardlockup_allcpu_dumped))
 			trigger_allbutself_cpu_backtrace();
 
+		add_taint(TAINT_LOCKUP, LOCKDEP_STILL_OK);
 		if (hardlockup_panic)
 			nmi_panic(regs, "Hard LOCKUP");
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cfc923558e04..1b3c81d87a0d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5774,6 +5774,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
 			pr_cont_pool_info(pool);
 			pr_cont(" stuck for %us!\n",
 				jiffies_to_msecs(jiffies - pool_ts) / 1000);
+			add_taint(TAINT_LOCKUP, LOCKDEP_STILL_OK);
 		}
 	}
 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2 3/3] kernel: add sysctl kernel.nr_taints
  2020-01-27 14:03 [PATCH v2 1/3] kernel: rename taint flag TAINT_SOFTLOCKUP into TAINT_LOCKUP Konstantin Khlebnikov
  2020-01-27 14:03 ` [PATCH v2 2/3] kernel: set taint flag 'L' at any kind of lockup Konstantin Khlebnikov
@ 2020-01-27 14:03 ` Konstantin Khlebnikov
  1 sibling, 0 replies; 3+ messages in thread
From: Konstantin Khlebnikov @ 2020-01-27 14:03 UTC (permalink / raw)
  To: linux-kernel, linux-doc
  Cc: Sasha Levin, Kees Cook, Paul E. McKenney, Greg Kroah-Hartman,
	rcu, Tejun Heo, Andrew Morton, Linus Torvalds, Thomas Gleixner

Raised taint flag is never cleared. Following taint could be detected only
via parsing kernel log messages which are different for each occasion.

For repeatable taints like TAINT_MACHINE_CHECK, TAINT_BAD_PAGE, TAINT_DIE,
TAINT_WARN, TAINT_LOCKUP it would be good to know count to see their rate.

This patch adds sysctl with vector of counters. One for each taint flag.
Counters are non-atomic in favor of simplicity. Exact count doesn't matter.

Writing vector of zeroes resets counters:
# tr 1-9 0 < /proc/sys/kernel/nr_taints > /proc/sys/kernel/nr_taints

This is useful for detecting frequent problems with automatic monitoring.
Also tests could use this for separating expected and unexpected taints.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Link: https://lore.kernel.org/lkml/157503370887.8187.1663761929323284758.stgit@buzz/ (v1)
---
 Documentation/admin-guide/sysctl/kernel.rst   |   10 ++++++++++
 Documentation/admin-guide/tainted-kernels.rst |   10 ++++++++++
 include/linux/kernel.h                        |    1 +
 kernel/panic.c                                |    5 +++++
 kernel/sysctl.c                               |    9 +++++++++
 5 files changed, 35 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 8456c8ed0ca5..6250575bec9f 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -56,6 +56,7 @@ show up in /proc/sys/kernel:
 - msgmnb
 - msgmni
 - nmi_watchdog
+- nr_taints                   ==> Documentation/admin-guide/tainted-kernels.rst
 - osrelease
 - ostype
 - overflowgid
@@ -495,6 +496,15 @@ in a KVM virtual machine. This default can be overridden by adding::
 to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst).
 
 
+nr_taints:
+==========
+
+This shows vector of counters for taint flags.
+Writing vector of zeroes resets counters.
+
+See Documentation/admin-guide/tainted-kernels.rst for more information.
+
+
 numa_balancing:
 ===============
 
diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst
index 13249240283c..2c5181d5e8ae 100644
--- a/Documentation/admin-guide/tainted-kernels.rst
+++ b/Documentation/admin-guide/tainted-kernels.rst
@@ -166,3 +166,13 @@ More detailed explanation for tainting
      produce extremely unusual kernel structure layouts (even performance
      pathological ones), which is important to know when debugging. Set at
      build time.
+
+
+Taint flag counters
+-------------------
+
+For detecting repeatedly set taint flags kernel counts them in sysctl:
+``cat /proc/sys/kernel/nr_taints``
+
+Writing vector of zeros resets counters but not taint flags itself:
+``tr 1-9 0 < /proc/sys/kernel/nr_taints > /proc/sys/kernel/nr_taints``
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3554456b2d40..2e2c4d008ac1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -597,6 +597,7 @@ struct taint_flag {
 };
 
 extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT];
+extern int sysctl_nr_taints[TAINT_FLAGS_COUNT];
 
 extern const char hex_asc[];
 #define hex_asc_lo(x)	hex_asc[((x) & 0x0f)]
diff --git a/kernel/panic.c b/kernel/panic.c
index a0ea0c6992b9..2e86387bbea0 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -39,6 +39,7 @@
 int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
 static unsigned long tainted_mask =
 	IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0;
+int sysctl_nr_taints[TAINT_FLAGS_COUNT];
 static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
@@ -434,6 +435,10 @@ void add_taint(unsigned flag, enum lockdep_ok lockdep_ok)
 		pr_warn("Disabling lock debugging due to kernel taint\n");
 
 	set_bit(flag, &tainted_mask);
+
+	/* proc_taint() could set unknown taint flag */
+	if (flag < ARRAY_SIZE(sysctl_nr_taints))
+		sysctl_nr_taints[flag]++;
 }
 EXPORT_SYMBOL(add_taint);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 70665934d53e..21911a79305b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -553,6 +553,15 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_taint,
 	},
+	{
+		.procname	= "nr_taints",
+		.data		= &sysctl_nr_taints,
+		.maxlen		= sizeof(sysctl_nr_taints),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ZERO,
+	},
 	{
 		.procname	= "sysctl_writes_strict",
 		.data		= &sysctl_writes_strict,


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, back to index

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-27 14:03 [PATCH v2 1/3] kernel: rename taint flag TAINT_SOFTLOCKUP into TAINT_LOCKUP Konstantin Khlebnikov
2020-01-27 14:03 ` [PATCH v2 2/3] kernel: set taint flag 'L' at any kind of lockup Konstantin Khlebnikov
2020-01-27 14:03 ` [PATCH v2 3/3] kernel: add sysctl kernel.nr_taints Konstantin Khlebnikov

RCU Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/rcu/0 rcu/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 rcu rcu/ https://lore.kernel.org/rcu \
		rcu@vger.kernel.org
	public-inbox-index rcu

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.rcu


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git