All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up.
@ 2018-07-07  6:31 Tetsuo Handa
  2018-07-07 11:31 ` kbuild test robot
  2018-07-10 11:47 ` [PATCH v2] locking/lockdep: Dump state of percpu_rwsem upon hung up Tetsuo Handa
  0 siblings, 2 replies; 11+ messages in thread
From: Tetsuo Handa @ 2018-07-07  6:31 UTC (permalink / raw)
  To: akpm
  Cc: linux-kernel, Tetsuo Handa, Dmitry Vyukov, Ingo Molnar,
	Peter Zijlstra, Will Deacon

This is a temporary patch which should not go to linux.git.

syzbot is hitting hung task problems at __sb_start_write() [1]. While hung
tasks at __sb_start_write() suggest that filesystem was frozen, syzbot is
not doing ioctl(FIFREEZE) requests. Therefore, the root cause of hung tasks
is currently unknown. As a first step for debugging this problem, let's
check what atomic_long_read(&sem->rw_sem.count) says when hung task is
reported. Since it is impossible to reproduce this problem locally, this
patch was made in order to test linux-next.git using syzbot infrastructure.

[1] https://syzkaller.appspot.com/bug?id=287aa8708bc940d0ca1645223c53dd4c2d203be6

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
---
 include/linux/sched.h         |  1 +
 kernel/hung_task.c            | 25 +++++++++++++++++++++++++
 kernel/locking/percpu-rwsem.c |  2 ++
 3 files changed, 28 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cf1edc0..2aaaf49 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1183,6 +1183,7 @@ struct task_struct {
 	/* Used by LSM modules for access restriction: */
 	void				*security;
 #endif
+	struct percpu_rw_semaphore	*pcpu_rwsem_read_waiting;
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	unsigned long			lowest_stack;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index b9132d1..03d7d9b 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -18,6 +18,7 @@
 #include <linux/utsname.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/percpu-rwsem.h>
 
 #include <trace/events/sched.h>
 
@@ -82,6 +83,29 @@ static int __init hung_task_panic_setup(char *str)
 	.notifier_call = hung_task_panic,
 };
 
+static void dump_percpu_rwsem_state(struct percpu_rw_semaphore *sem)
+{
+	unsigned int sum = 0;
+	int cpu;
+
+	if (!sem)
+		return;
+	pr_info("percpu_rw_semaphore(%p)\n", sem);
+	pr_info("  ->rw_sem.count=0x%lx\n",
+		atomic_long_read(&sem->rw_sem.count));
+	pr_info("  ->rss.gp_state=%d\n", sem->rss.gp_state);
+	pr_info("  ->rss.gp_count=%d\n", sem->rss.gp_count);
+	pr_info("  ->rss.cb_state=%d\n", sem->rss.cb_state);
+	pr_info("  ->rss.gp_type=%d\n", sem->rss.gp_type);
+	pr_info("  ->readers_block=%d\n", sem->readers_block);
+	for_each_possible_cpu(cpu)
+		sum += per_cpu(*sem->read_count, cpu);
+	pr_info("  ->read_count=%d\n", sum);
+	pr_info("  ->list_empty(rw_sem.wait_list)=%d\n",
+	       list_empty(&sem->rw_sem.wait_list));
+	pr_info("  ->writer.task=%p\n", sem->writer.task);
+}
+
 static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
 	unsigned long switch_count = t->nvcsw + t->nivcsw;
@@ -130,6 +154,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 		pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
 			" disables this message.\n");
 		sched_show_task(t);
+		dump_percpu_rwsem_state(t->pcpu_rwsem_read_waiting);
 		hung_task_show_lock = true;
 	}
 
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 883cf1b..b3654ab 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -82,7 +82,9 @@ int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
 	/*
 	 * Avoid lockdep for the down/up_read() we already have them.
 	 */
+	current->pcpu_rwsem_read_waiting = sem;
 	__down_read(&sem->rw_sem);
+	current->pcpu_rwsem_read_waiting = NULL;
 	this_cpu_inc(*sem->read_count);
 	__up_read(&sem->rw_sem);
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up.
  2018-07-07  6:31 [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up Tetsuo Handa
@ 2018-07-07 11:31 ` kbuild test robot
  2018-07-07 14:09   ` Tetsuo Handa
  2018-07-10  6:10   ` Tetsuo Handa
  2018-07-10 11:47 ` [PATCH v2] locking/lockdep: Dump state of percpu_rwsem upon hung up Tetsuo Handa
  1 sibling, 2 replies; 11+ messages in thread
From: kbuild test robot @ 2018-07-07 11:31 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: kbuild-all, akpm, linux-kernel, Tetsuo Handa, Dmitry Vyukov,
	Ingo Molnar, Peter Zijlstra, Will Deacon

[-- Attachment #1: Type: text/plain, Size: 6382 bytes --]

Hi Tetsuo,

I love your patch! Yet something to improve:

[auto build test ERROR on next-20180706]
[also build test ERROR on v4.18-rc3]
[cannot apply to tip/auto-latest tip/sched/core linus/master v4.18-rc3 v4.18-rc2 v4.18-rc1]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Tetsuo-Handa/locking-lockdep-Dump-state-of-percpu_rwsem-upon-hung-up/20180707-143406
config: mips-allmodconfig (attached as .config)
compiler: mips-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.2.0 make.cross ARCH=mips 

All error/warnings (new ones prefixed by >>):

   In file included from include/linux/kernel.h:14:0,
                    from include/asm-generic/bug.h:18,
                    from arch/mips/include/asm/bug.h:42,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/mm.h:9,
                    from kernel/hung_task.c:8:
   kernel/hung_task.c: In function 'dump_percpu_rwsem_state':
>> kernel/hung_task.c:95:20: error: passing argument 1 of 'atomic_long_read' from incompatible pointer type [-Werror=incompatible-pointer-types]
      atomic_long_read(&sem->rw_sem.count));
                       ^
   include/linux/printk.h:311:34: note: in definition of macro 'pr_info'
     printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
                                     ^~~~~~~~~~~
   In file included from include/linux/atomic.h:1309:0,
                    from arch/mips/include/asm/processor.h:14,
                    from arch/mips/include/asm/thread_info.h:16,
                    from include/linux/thread_info.h:38,
                    from include/asm-generic/preempt.h:5,
                    from ./arch/mips/include/generated/asm/preempt.h:1,
                    from include/linux/preempt.h:81,
                    from include/linux/spinlock.h:51,
                    from include/linux/mmzone.h:8,
                    from include/linux/gfp.h:6,
                    from include/linux/mm.h:10,
                    from kernel/hung_task.c:8:
   include/asm-generic/atomic-long.h:41:20: note: expected 'const atomic_long_t * {aka const struct <anonymous> *}' but argument is of type '__s32 * {aka int *}'
    static inline long atomic_long_read##mo(const atomic_long_t *l)  \
                       ^
>> include/asm-generic/atomic-long.h:47:1: note: in expansion of macro 'ATOMIC_LONG_READ_OP'
    ATOMIC_LONG_READ_OP()
    ^~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/atomic_long_read +95 kernel/hung_task.c

   > 8	#include <linux/mm.h>
     9	#include <linux/cpu.h>
    10	#include <linux/nmi.h>
    11	#include <linux/init.h>
    12	#include <linux/delay.h>
    13	#include <linux/freezer.h>
    14	#include <linux/kthread.h>
    15	#include <linux/lockdep.h>
    16	#include <linux/export.h>
    17	#include <linux/sysctl.h>
    18	#include <linux/utsname.h>
    19	#include <linux/sched/signal.h>
    20	#include <linux/sched/debug.h>
    21	#include <linux/percpu-rwsem.h>
    22	
    23	#include <trace/events/sched.h>
    24	
    25	/*
    26	 * The number of tasks checked:
    27	 */
    28	int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
    29	
    30	/*
    31	 * Limit number of tasks checked in a batch.
    32	 *
    33	 * This value controls the preemptibility of khungtaskd since preemption
    34	 * is disabled during the critical section. It also controls the size of
    35	 * the RCU grace period. So it needs to be upper-bound.
    36	 */
    37	#define HUNG_TASK_BATCHING 1024
    38	
    39	/*
    40	 * Zero means infinite timeout - no checking done:
    41	 */
    42	unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
    43	
    44	/*
    45	 * Zero (default value) means use sysctl_hung_task_timeout_secs:
    46	 */
    47	unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
    48	
    49	int __read_mostly sysctl_hung_task_warnings = 10;
    50	
    51	static int __read_mostly did_panic;
    52	static bool hung_task_show_lock;
    53	static bool hung_task_call_panic;
    54	
    55	static struct task_struct *watchdog_task;
    56	
    57	/*
    58	 * Should we panic (and reboot, if panic_timeout= is set) when a
    59	 * hung task is detected:
    60	 */
    61	unsigned int __read_mostly sysctl_hung_task_panic =
    62					CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
    63	
    64	static int __init hung_task_panic_setup(char *str)
    65	{
    66		int rc = kstrtouint(str, 0, &sysctl_hung_task_panic);
    67	
    68		if (rc)
    69			return rc;
    70		return 1;
    71	}
    72	__setup("hung_task_panic=", hung_task_panic_setup);
    73	
    74	static int
    75	hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
    76	{
    77		did_panic = 1;
    78	
    79		return NOTIFY_DONE;
    80	}
    81	
    82	static struct notifier_block panic_block = {
    83		.notifier_call = hung_task_panic,
    84	};
    85	
    86	static void dump_percpu_rwsem_state(struct percpu_rw_semaphore *sem)
    87	{
    88		unsigned int sum = 0;
    89		int cpu;
    90	
    91		if (!sem)
    92			return;
    93		pr_info("percpu_rw_semaphore(%p)\n", sem);
    94		pr_info("  ->rw_sem.count=0x%lx\n",
  > 95			atomic_long_read(&sem->rw_sem.count));
    96		pr_info("  ->rss.gp_state=%d\n", sem->rss.gp_state);
    97		pr_info("  ->rss.gp_count=%d\n", sem->rss.gp_count);
    98		pr_info("  ->rss.cb_state=%d\n", sem->rss.cb_state);
    99		pr_info("  ->rss.gp_type=%d\n", sem->rss.gp_type);
   100		pr_info("  ->readers_block=%d\n", sem->readers_block);
   101		for_each_possible_cpu(cpu)
   102			sum += per_cpu(*sem->read_count, cpu);
   103		pr_info("  ->read_count=%d\n", sum);
   104		pr_info("  ->list_empty(rw_sem.wait_list)=%d\n",
   105		       list_empty(&sem->rw_sem.wait_list));
   106		pr_info("  ->writer.task=%p\n", sem->writer.task);
   107	}
   108	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 56614 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up.
  2018-07-07 11:31 ` kbuild test robot
@ 2018-07-07 14:09   ` Tetsuo Handa
  2018-07-10  6:10   ` Tetsuo Handa
  1 sibling, 0 replies; 11+ messages in thread
From: Tetsuo Handa @ 2018-07-07 14:09 UTC (permalink / raw)
  To: kbuild test robot
  Cc: kbuild-all, akpm, linux-kernel, Dmitry Vyukov, Ingo Molnar,
	Peter Zijlstra, Will Deacon

Hello.

Thanks for doing build test, but I don't understand what is wrong.
As far as I can see, "struct percpu_rw_semaphore" contains "struct rw_semaphore"
and "struct rw_semaphore" contains "atomic_long_t count".
Then, why trying to read using atomic_long_read(&sem->rw_sem.count) causes

  note: expected 'const atomic_long_t * {aka const struct <anonymous> *}' but argument is of type '__s32 * {aka int *}'

warning? Is mips using different type?

struct percpu_rw_semaphore {
        struct rcu_sync         rss;
        unsigned int __percpu   *read_count;
        struct rw_semaphore     rw_sem; /* slowpath */
        struct rcuwait          writer; /* blocked writer */
        int                     readers_block;
};

/* All arch specific implementations share the same struct */
struct rw_semaphore {
        atomic_long_t count;
        struct list_head wait_list;
        raw_spinlock_t wait_lock;
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
        struct optimistic_spin_queue osq; /* spinner MCS lock */
        /*
         * Write owner. Used as a speculative check to see
         * if the owner is running on the cpu.
         */
        struct task_struct *owner;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map      dep_map;
#endif
};

On 2018/07/07 20:31, kbuild test robot wrote:
> Hi Tetsuo,
> 
> I love your patch! Yet something to improve:
> 
> [auto build test ERROR on next-20180706]
> [also build test ERROR on v4.18-rc3]
> [cannot apply to tip/auto-latest tip/sched/core linus/master v4.18-rc3 v4.18-rc2 v4.18-rc1]
> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
> 
> url:    https://github.com/0day-ci/linux/commits/Tetsuo-Handa/locking-lockdep-Dump-state-of-percpu_rwsem-upon-hung-up/20180707-143406
> config: mips-allmodconfig (attached as .config)
> compiler: mips-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
> reproduce:
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # save the attached .config to linux build tree
>         GCC_VERSION=7.2.0 make.cross ARCH=mips 
> 
> All error/warnings (new ones prefixed by >>):
> 
>    In file included from include/linux/kernel.h:14:0,
>                     from include/asm-generic/bug.h:18,
>                     from arch/mips/include/asm/bug.h:42,
>                     from include/linux/bug.h:5,
>                     from include/linux/mmdebug.h:5,
>                     from include/linux/mm.h:9,
>                     from kernel/hung_task.c:8:
>    kernel/hung_task.c: In function 'dump_percpu_rwsem_state':
>>> kernel/hung_task.c:95:20: error: passing argument 1 of 'atomic_long_read' from incompatible pointer type [-Werror=incompatible-pointer-types]
>       atomic_long_read(&sem->rw_sem.count));
>                        ^
>    include/linux/printk.h:311:34: note: in definition of macro 'pr_info'
>      printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
>                                      ^~~~~~~~~~~
>    In file included from include/linux/atomic.h:1309:0,
>                     from arch/mips/include/asm/processor.h:14,
>                     from arch/mips/include/asm/thread_info.h:16,
>                     from include/linux/thread_info.h:38,
>                     from include/asm-generic/preempt.h:5,
>                     from ./arch/mips/include/generated/asm/preempt.h:1,
>                     from include/linux/preempt.h:81,
>                     from include/linux/spinlock.h:51,
>                     from include/linux/mmzone.h:8,
>                     from include/linux/gfp.h:6,
>                     from include/linux/mm.h:10,
>                     from kernel/hung_task.c:8:
>    include/asm-generic/atomic-long.h:41:20: note: expected 'const atomic_long_t * {aka const struct <anonymous> *}' but argument is of type '__s32 * {aka int *}'
>     static inline long atomic_long_read##mo(const atomic_long_t *l)  \
>                        ^
>>> include/asm-generic/atomic-long.h:47:1: note: in expansion of macro 'ATOMIC_LONG_READ_OP'
>     ATOMIC_LONG_READ_OP()
>     ^~~~~~~~~~~~~~~~~~~
>    cc1: some warnings being treated as errors


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up.
  2018-07-07 11:31 ` kbuild test robot
  2018-07-07 14:09   ` Tetsuo Handa
@ 2018-07-10  6:10   ` Tetsuo Handa
  2018-07-10  9:20     ` Will Deacon
                       ` (3 more replies)
  1 sibling, 4 replies; 11+ messages in thread
From: Tetsuo Handa @ 2018-07-10  6:10 UTC (permalink / raw)
  To: kbuild test robot
  Cc: kbuild-all, akpm, linux-kernel, Dmitry Vyukov, Ingo Molnar,
	Peter Zijlstra, Will Deacon

From 2642b4a1904259384f2018ea8df03ac49509c57a Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SKAURA.ne.jp>
Date: Tue, 10 Jul 2018 15:01:20 +0900
Subject: [PATCH] locking/rwsem: Convert the other sem->count to 'atomic_long_t'

Since "locking/rwsem: Convert sem->count to 'atomic_long_t'" forgot to
convert "struct rw_semaphore"->count in linux/rwsem-spinlock.h which is
used when CONFIG_RWSEM_GENERIC_SPINLOCK=y, trying to access it in MIPS
architecture causes build error due to type mismatch.

  error: passing argument 1 of 'atomic_long_read' from incompatible pointer type [-Werror=incompatible-pointer-types]
  note: expected 'const atomic_long_t * {aka const struct <anonymous> *}' but argument is of type '__s32 * {aka int *}'

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SKAURA.ne.jp>
Reported-by: kbuild test robot <lkp@intel.com>
Fixes: 8ee62b1870be8e63 ("locking/rwsem: Convert sem->count to 'atomic_long_t'")
Cc: stable <stable@vger.kernel.org> # v4.8+
Cc: Jason Low <jason.low2@hpe.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/rwsem-spinlock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index e475683..1164965 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -22,7 +22,7 @@
  * - if wait_list is not empty, then there are processes waiting for the semaphore
  */
 struct rw_semaphore {
-	__s32			count;
+	atomic_long_t		count;
 	raw_spinlock_t		wait_lock;
 	struct list_head	wait_list;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up.
  2018-07-10  6:10   ` Tetsuo Handa
@ 2018-07-10  9:20     ` Will Deacon
  2018-07-10  9:30     ` Peter Zijlstra
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2018-07-10  9:20 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: kbuild test robot, kbuild-all, akpm, linux-kernel, Dmitry Vyukov,
	Ingo Molnar, Peter Zijlstra

On Tue, Jul 10, 2018 at 03:10:51PM +0900, Tetsuo Handa wrote:
> From 2642b4a1904259384f2018ea8df03ac49509c57a Mon Sep 17 00:00:00 2001
> From: Tetsuo Handa <penguin-kernel@I-love.SKAURA.ne.jp>
> Date: Tue, 10 Jul 2018 15:01:20 +0900
> Subject: [PATCH] locking/rwsem: Convert the other sem->count to 'atomic_long_t'
> 
> Since "locking/rwsem: Convert sem->count to 'atomic_long_t'" forgot to
> convert "struct rw_semaphore"->count in linux/rwsem-spinlock.h which is
> used when CONFIG_RWSEM_GENERIC_SPINLOCK=y, trying to access it in MIPS
> architecture causes build error due to type mismatch.
> 
>   error: passing argument 1 of 'atomic_long_read' from incompatible pointer type [-Werror=incompatible-pointer-types]
>   note: expected 'const atomic_long_t * {aka const struct <anonymous> *}' but argument is of type '__s32 * {aka int *}'
> 
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SKAURA.ne.jp>
> Reported-by: kbuild test robot <lkp@intel.com>
> Fixes: 8ee62b1870be8e63 ("locking/rwsem: Convert sem->count to 'atomic_long_t'")
> Cc: stable <stable@vger.kernel.org> # v4.8+
> Cc: Jason Low <jason.low2@hpe.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> ---
>  include/linux/rwsem-spinlock.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Hmm, how come you don't have to update kernel/locking/rwsem-spinlock.c
as well? I see stuff such as:

	sem->count += woken;

in there.

Will

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up.
  2018-07-10  6:10   ` Tetsuo Handa
  2018-07-10  9:20     ` Will Deacon
@ 2018-07-10  9:30     ` Peter Zijlstra
  2018-07-10 10:48       ` Tetsuo Handa
  2018-07-10  9:41     ` [PATCH] locking/rwsem: Convert the other sem->count to 'atomic_long_t' kbuild test robot
  2018-07-10  9:41     ` kbuild test robot
  3 siblings, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2018-07-10  9:30 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: kbuild test robot, kbuild-all, akpm, linux-kernel, Dmitry Vyukov,
	Ingo Molnar, Will Deacon

On Tue, Jul 10, 2018 at 03:10:51PM +0900, Tetsuo Handa wrote:
> From 2642b4a1904259384f2018ea8df03ac49509c57a Mon Sep 17 00:00:00 2001
> From: Tetsuo Handa <penguin-kernel@I-love.SKAURA.ne.jp>
> Date: Tue, 10 Jul 2018 15:01:20 +0900
> Subject: [PATCH] locking/rwsem: Convert the other sem->count to 'atomic_long_t'
> 
> Since "locking/rwsem: Convert sem->count to 'atomic_long_t'" forgot to
> convert "struct rw_semaphore"->count in linux/rwsem-spinlock.h which is
> used when CONFIG_RWSEM_GENERIC_SPINLOCK=y, trying to access it in MIPS
> architecture causes build error due to type mismatch.
> 
>   error: passing argument 1 of 'atomic_long_read' from incompatible pointer type [-Werror=incompatible-pointer-types]
>   note: expected 'const atomic_long_t * {aka const struct <anonymous> *}' but argument is of type '__s32 * {aka int *}'
> 
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SKAURA.ne.jp>
> Reported-by: kbuild test robot <lkp@intel.com>
> Fixes: 8ee62b1870be8e63 ("locking/rwsem: Convert sem->count to 'atomic_long_t'")
> Cc: stable <stable@vger.kernel.org> # v4.8+
> Cc: Jason Low <jason.low2@hpe.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> ---
>  include/linux/rwsem-spinlock.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
> index e475683..1164965 100644
> --- a/include/linux/rwsem-spinlock.h
> +++ b/include/linux/rwsem-spinlock.h
> @@ -22,7 +22,7 @@
>   * - if wait_list is not empty, then there are processes waiting for the semaphore
>   */
>  struct rw_semaphore {
> -	__s32			count;
> +	atomic_long_t		count;

Uhhh... how does this even build? rwsem-spinlock.c doesn't use
atomic_long primitives to change count.

And the atomic_long count usage is completely private to rwsem-xadd.c,
nothing outside of that should use that field, ever.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] locking/rwsem: Convert the other sem->count to 'atomic_long_t'
  2018-07-10  6:10   ` Tetsuo Handa
  2018-07-10  9:20     ` Will Deacon
  2018-07-10  9:30     ` Peter Zijlstra
@ 2018-07-10  9:41     ` kbuild test robot
  2018-07-10  9:41     ` kbuild test robot
  3 siblings, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2018-07-10  9:41 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: kbuild-all, kbuild test robot, akpm, linux-kernel, Dmitry Vyukov,
	Ingo Molnar, Peter Zijlstra, Will Deacon

[-- Attachment #1: Type: text/plain, Size: 2792 bytes --]

Hi Tetsuo,

I love your patch! Perhaps something to improve:

[auto build test WARNING on tip/locking/core]
[also build test WARNING on v4.18-rc4 next-20180709]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Tetsuo-Handa/locking-rwsem-Convert-the-other-sem-count-to-atomic_long_t/20180710-141553
config: m68k-m5475evb_defconfig (attached as .config)
compiler: m68k-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.2.0 make.cross ARCH=m68k 

All warnings (new ones prefixed by >>):

   In file included from include/linux/notifier.h:15:0,
                    from include/linux/memory_hotplug.h:7,
                    from include/linux/mmzone.h:777,
                    from include/linux/gfp.h:6,
                    from include/linux/idr.h:16,
                    from include/linux/kernfs.h:14,
                    from include/linux/sysfs.h:16,
                    from include/linux/kobject.h:20,
                    from include/linux/pci.h:29,
                    from drivers//pci/search.c:11:
   include/linux/rwsem.h:87:2: warning: missing braces around initializer [-Wmissing-braces]
     { __RWSEM_INIT_COUNT(name),    \
     ^
   include/linux/rwsem.h:94:29: note: in expansion of macro '__RWSEM_INITIALIZER'
     struct rw_semaphore name = __RWSEM_INITIALIZER(name)
                                ^~~~~~~~~~~~~~~~~~~
>> drivers//pci/search.c:17:1: note: in expansion of macro 'DECLARE_RWSEM'
    DECLARE_RWSEM(pci_bus_sem);
    ^~~~~~~~~~~~~

vim +/DECLARE_RWSEM +17 drivers//pci/search.c

^1da177e Linus Torvalds 2005-04-16 @11  #include <linux/pci.h>
5a0e3ad6 Tejun Heo      2010-03-24  12  #include <linux/slab.h>
^1da177e Linus Torvalds 2005-04-16  13  #include <linux/module.h>
^1da177e Linus Torvalds 2005-04-16  14  #include <linux/interrupt.h>
^1da177e Linus Torvalds 2005-04-16  15  #include "pci.h"
^1da177e Linus Torvalds 2005-04-16  16  
d71374da Zhang Yanmin   2006-06-02 @17  DECLARE_RWSEM(pci_bus_sem);
ce29ca3e Amos Kong      2012-05-23  18  EXPORT_SYMBOL_GPL(pci_bus_sem);
ce29ca3e Amos Kong      2012-05-23  19  

:::::: The code at line 17 was first introduced by commit
:::::: d71374dafbba7ec3f67371d3b7e9f6310a588808 [PATCH] PCI: fix race with pci_walk_bus and pci_destroy_dev

:::::: TO: Zhang Yanmin <yanmin.zhang@intel.com>
:::::: CC: Greg Kroah-Hartman <gregkh@suse.de>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 6225 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] locking/rwsem: Convert the other sem->count to 'atomic_long_t'
  2018-07-10  6:10   ` Tetsuo Handa
                       ` (2 preceding siblings ...)
  2018-07-10  9:41     ` [PATCH] locking/rwsem: Convert the other sem->count to 'atomic_long_t' kbuild test robot
@ 2018-07-10  9:41     ` kbuild test robot
  3 siblings, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2018-07-10  9:41 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: kbuild-all, kbuild test robot, akpm, linux-kernel, Dmitry Vyukov,
	Ingo Molnar, Peter Zijlstra, Will Deacon

[-- Attachment #1: Type: text/plain, Size: 3744 bytes --]

Hi Tetsuo,

I love your patch! Perhaps something to improve:

[auto build test WARNING on tip/locking/core]
[also build test WARNING on v4.18-rc4 next-20180709]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Tetsuo-Handa/locking-rwsem-Convert-the-other-sem-count-to-atomic_long_t/20180710-141553
config: parisc-c3000_defconfig (attached as .config)
compiler: hppa-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.2.0 make.cross ARCH=parisc 

All warnings (new ones prefixed by >>):

   In file included from include/linux/notifier.h:15:0,
                    from include/linux/memory_hotplug.h:7,
                    from include/linux/mmzone.h:777,
                    from include/linux/gfp.h:6,
                    from include/linux/umh.h:4,
                    from include/linux/kmod.h:22,
                    from include/linux/module.h:13,
                    from drivers/usb/core/hcd-pci.c:7:
   include/linux/rwsem.h:87:2: warning: missing braces around initializer [-Wmissing-braces]
     { __RWSEM_INIT_COUNT(name),    \
     ^
   include/linux/rwsem.h:94:29: note: in expansion of macro '__RWSEM_INITIALIZER'
     struct rw_semaphore name = __RWSEM_INITIALIZER(name)
                                ^~~~~~~~~~~~~~~~~~~
>> drivers/usb/core/hcd-pci.c:31:8: note: in expansion of macro 'DECLARE_RWSEM'
    static DECLARE_RWSEM(companions_rwsem);
           ^~~~~~~~~~~~~

vim +/DECLARE_RWSEM +31 drivers/usb/core/hcd-pci.c

^1da177e4 Linus Torvalds 2005-04-16  @7  #include <linux/module.h>
^1da177e4 Linus Torvalds 2005-04-16   8  #include <linux/pci.h>
21b1861fb David Brownell 2005-11-23   9  #include <linux/usb.h>
27729aadd Eric Lescouet  2010-04-24  10  #include <linux/usb/hcd.h>
21b1861fb David Brownell 2005-11-23  11  
^1da177e4 Linus Torvalds 2005-04-16  12  #include <asm/io.h>
^1da177e4 Linus Torvalds 2005-04-16  13  #include <asm/irq.h>
21b1861fb David Brownell 2005-11-23  14  
21b1861fb David Brownell 2005-11-23  15  #ifdef CONFIG_PPC_PMAC
21b1861fb David Brownell 2005-11-23  16  #include <asm/machdep.h>
21b1861fb David Brownell 2005-11-23  17  #include <asm/pmac_feature.h>
21b1861fb David Brownell 2005-11-23  18  #include <asm/prom.h>
21b1861fb David Brownell 2005-11-23  19  #endif
5f827ea3c David Brownell 2005-09-22  20  
5f827ea3c David Brownell 2005-09-22  21  #include "usb.h"
^1da177e4 Linus Torvalds 2005-04-16  22  
^1da177e4 Linus Torvalds 2005-04-16  23  
c6053ecff David Brownell 2005-04-18  24  /* PCI-based HCs are common, but plenty of non-PCI HCs are used too */
^1da177e4 Linus Torvalds 2005-04-16  25  
05768918b Alan Stern     2013-03-28  26  /*
05768918b Alan Stern     2013-03-28  27   * Coordinate handoffs between EHCI and companion controllers
05768918b Alan Stern     2013-03-28  28   * during EHCI probing and system resume.
6d19c009c Alan Stern     2010-02-12  29   */
6d19c009c Alan Stern     2010-02-12  30  
05768918b Alan Stern     2013-03-28 @31  static DECLARE_RWSEM(companions_rwsem);
6d19c009c Alan Stern     2010-02-12  32  

:::::: The code at line 31 was first introduced by commit
:::::: 05768918b9a122ce21bd55950df5054ff6c57f28 USB: improve port transitions when EHCI starts up

:::::: TO: Alan Stern <stern@rowland.harvard.edu>
:::::: CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 14543 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up.
  2018-07-10  9:30     ` Peter Zijlstra
@ 2018-07-10 10:48       ` Tetsuo Handa
  2018-07-10 11:28         ` Peter Zijlstra
  0 siblings, 1 reply; 11+ messages in thread
From: Tetsuo Handa @ 2018-07-10 10:48 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: kbuild test robot, kbuild-all, akpm, linux-kernel, Dmitry Vyukov,
	Ingo Molnar, Will Deacon

On 2018/07/10 18:30, Peter Zijlstra wrote:
>> diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
>> index e475683..1164965 100644
>> --- a/include/linux/rwsem-spinlock.h
>> +++ b/include/linux/rwsem-spinlock.h
>> @@ -22,7 +22,7 @@
>>   * - if wait_list is not empty, then there are processes waiting for the semaphore
>>   */
>>  struct rw_semaphore {
>> -	__s32			count;
>> +	atomic_long_t		count;
> 
> Uhhh... how does this even build? rwsem-spinlock.c doesn't use
> atomic_long primitives to change count.

Sorry, I forgot to build test rwsem-spinlock.c side.

> 
> And the atomic_long count usage is completely private to rwsem-xadd.c,
> nothing outside of that should use that field, ever.
> 

The reason I made above patch is to make
"[PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up." patch
possible to build on x86_64. Although "nothing outside of that should use
that field", I want to check "struct rw_semaphore"->count for isolating
the problem.

Should I update
"[PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up." side
to use "#ifndef CONFIG_RWSEM_GENERIC_SPINLOCK" ?


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up.
  2018-07-10 10:48       ` Tetsuo Handa
@ 2018-07-10 11:28         ` Peter Zijlstra
  0 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2018-07-10 11:28 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: kbuild test robot, kbuild-all, akpm, linux-kernel, Dmitry Vyukov,
	Ingo Molnar, Will Deacon

On Tue, Jul 10, 2018 at 07:48:31PM +0900, Tetsuo Handa wrote:
> Should I update
> "[PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up." side
> to use "#ifndef CONFIG_RWSEM_GENERIC_SPINLOCK" ?

Since it's a debug patch, who cares if there's a config for which it
doesn't build?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] locking/lockdep: Dump state of percpu_rwsem upon hung up.
  2018-07-07  6:31 [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up Tetsuo Handa
  2018-07-07 11:31 ` kbuild test robot
@ 2018-07-10 11:47 ` Tetsuo Handa
  1 sibling, 0 replies; 11+ messages in thread
From: Tetsuo Handa @ 2018-07-10 11:47 UTC (permalink / raw)
  To: akpm
  Cc: linux-kernel, Dmitry Vyukov, Ingo Molnar, Peter Zijlstra, Will Deacon

Added "#ifndef CONFIG_RWSEM_GENERIC_SPINLOCK" check.

From 88f979f8f019f32de94327878ef1f7ccb7500321 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Tue, 10 Jul 2018 20:43:18 +0900
Subject: [PATCH v2] locking/lockdep: Dump state of percpu_rwsem upon hung up.

This is a temporary patch which should not go to linux.git.

syzbot is hitting hung task problems at __sb_start_write() [1]. While hung
tasks at __sb_start_write() suggest that filesystem was frozen, syzbot is
not doing ioctl(FIFREEZE) requests. Therefore, the root cause of hung tasks
is currently unknown. As a first step for debugging this problem, let's
check what atomic_long_read(&sem->rw_sem.count) says when hung task is
reported. Since it is impossible to reproduce this problem locally, this
patch was made in order to test linux-next.git using syzbot infrastructure.

[1] https://syzkaller.appspot.com/bug?id=287aa8708bc940d0ca1645223c53dd4c2d203be6

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
---
 include/linux/sched.h         |  1 +
 kernel/hung_task.c            | 27 +++++++++++++++++++++++++++
 kernel/locking/percpu-rwsem.c |  2 ++
 3 files changed, 30 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2974378..df56c65 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1191,6 +1191,7 @@ struct task_struct {
 	/* Used by LSM modules for access restriction: */
 	void				*security;
 #endif
+	struct percpu_rw_semaphore	*pcpu_rwsem_read_waiting;
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	unsigned long			lowest_stack;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index b9132d1..43975df 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -18,6 +18,7 @@
 #include <linux/utsname.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/percpu-rwsem.h>
 
 #include <trace/events/sched.h>
 
@@ -82,6 +83,31 @@ static int __init hung_task_panic_setup(char *str)
 	.notifier_call = hung_task_panic,
 };
 
+static void dump_percpu_rwsem_state(struct percpu_rw_semaphore *sem)
+{
+#ifndef CONFIG_RWSEM_GENERIC_SPINLOCK
+	unsigned int sum = 0;
+	int cpu;
+
+	if (!sem)
+		return;
+	pr_info("percpu_rw_semaphore(%p)\n", sem);
+	pr_info("  ->rw_sem.count=0x%lx\n",
+		atomic_long_read(&sem->rw_sem.count));
+	pr_info("  ->rss.gp_state=%d\n", sem->rss.gp_state);
+	pr_info("  ->rss.gp_count=%d\n", sem->rss.gp_count);
+	pr_info("  ->rss.cb_state=%d\n", sem->rss.cb_state);
+	pr_info("  ->rss.gp_type=%d\n", sem->rss.gp_type);
+	pr_info("  ->readers_block=%d\n", sem->readers_block);
+	for_each_possible_cpu(cpu)
+		sum += per_cpu(*sem->read_count, cpu);
+	pr_info("  ->read_count=%d\n", sum);
+	pr_info("  ->list_empty(rw_sem.wait_list)=%d\n",
+	       list_empty(&sem->rw_sem.wait_list));
+	pr_info("  ->writer.task=%p\n", sem->writer.task);
+#endif
+}
+
 static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
 	unsigned long switch_count = t->nvcsw + t->nivcsw;
@@ -130,6 +156,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 		pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
 			" disables this message.\n");
 		sched_show_task(t);
+		dump_percpu_rwsem_state(t->pcpu_rwsem_read_waiting);
 		hung_task_show_lock = true;
 	}
 
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 883cf1b..b3654ab 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -82,7 +82,9 @@ int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
 	/*
 	 * Avoid lockdep for the down/up_read() we already have them.
 	 */
+	current->pcpu_rwsem_read_waiting = sem;
 	__down_read(&sem->rw_sem);
+	current->pcpu_rwsem_read_waiting = NULL;
 	this_cpu_inc(*sem->read_count);
 	__up_read(&sem->rw_sem);
 
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-07-10 11:48 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-07  6:31 [PATCH] locking/lockdep: Dump state of percpu_rwsem upon hung up Tetsuo Handa
2018-07-07 11:31 ` kbuild test robot
2018-07-07 14:09   ` Tetsuo Handa
2018-07-10  6:10   ` Tetsuo Handa
2018-07-10  9:20     ` Will Deacon
2018-07-10  9:30     ` Peter Zijlstra
2018-07-10 10:48       ` Tetsuo Handa
2018-07-10 11:28         ` Peter Zijlstra
2018-07-10  9:41     ` [PATCH] locking/rwsem: Convert the other sem->count to 'atomic_long_t' kbuild test robot
2018-07-10  9:41     ` kbuild test robot
2018-07-10 11:47 ` [PATCH v2] locking/lockdep: Dump state of percpu_rwsem upon hung up Tetsuo Handa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.