linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Baokun Li <libaokun1@huawei.com>
To: Yi Zhang <yi.zhang@redhat.com>, Ming Lei <ming.lei@redhat.com>,
	<mark.rutland@arm.com>
Cc: Christian Brauner <brauner@kernel.org>,
	<linux-fsdevel@vger.kernel.org>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	<linux-kernel@vger.kernel.org>, <linux-scsi@vger.kernel.org>,
	Changhui Zhong <czhong@redhat.com>,
	yangerkun <yangerkun@huawei.com>,
	"zhangyi (F)" <yi.zhang@huawei.com>,
	Baokun Li <libaokun1@huawei.com>, <peterz@infradead.org>,
	Kees Cook <keescook@chromium.org>,
	chengzhihao <chengzhihao1@huawei.com>
Subject: Re: [czhong@redhat.com: [bug report] WARNING: CPU: 121 PID: 93233 at fs/dcache.c:365 __dentry_kill+0x214/0x278]
Date: Sat, 16 Sep 2023 14:55:47 +0800	[thread overview]
Message-ID: <89d049ed-6bbf-bba7-80d4-06c060e65e5b@huawei.com> (raw)
In-Reply-To: <CAHj4cs_MqqWYy+pKrNrLqTb=eoSOXcZdjPXy44x-aA1WvdVv0w@mail.gmail.com>

On 2023/9/13 16:59, Yi Zhang wrote:
> The issue still can be reproduced on the latest linux tree[2].
> To reproduce I need to run about 1000 times blktests block/001, and
> bisect shows it was introduced with commit[1], as it was not 100%
> reproduced, not sure if it's the culprit?
>
>
> [1] 9257959a6e5b locking/atomic: scripts: restructure fallback ifdeffery
Hello, everyone!

We have confirmed that the merge-in of this patch caused hlist_bl_lock
(aka, bit_spin_lock) to fail, which in turn triggered the issue above.


The process in which VFS issue arise is as follows:
1.  bl_head >>> first==dentry2 >>> dentry1
dentry2->next = dentry1
dentry2->pprev = head
dentry1->next = NULL
dentry1->pprev = dentry2

2. Concurrent deletion of dentry, hlist_bl_lock lock protection failure
```
__hlist_bl_del(dentry2)
                                __hlist_bl_del(dentry1)
                                dentry2->next = NULL;
                                dentry1->next = NULL;
                                dentry1->pprev = NULL;
head->first = dentry1
dentry1->pprev = head
dentry2->next = NULL;
dentry2->pprev = NULL;
```
3. WARN_ON/BUG_ON is triggered because dentry1 is still on the
  hlist after being deleted.

dentry1->next = NULL
dentry1->pprev = head


Verify that hlist_bl_lock is not working with the following mod:
mymod.c
```
#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/moduleloader.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/percpu.h>
#include <linux/threads.h>
#include <linux/kthread.h>
#include <linux/kernel_stat.h>
#include <linux/version.h>
#include <linux/slab.h>
#include <linux/smpboot.h>
#include <linux/pagemap.h>
#include <linux/notifier.h>
#include <linux/syscalls.h>
#include <linux/namei.h>

#include <asm/atomic.h>
#include <asm/bitops.h>

static unsigned long long a = 0, b = 0;
static struct hlist_bl_head bl_head;

struct task_struct *Thread1;
struct task_struct *Thread2;
struct task_struct *Thread3;
struct task_struct *Thread4;
struct task_struct *Thread5;
struct task_struct *Thread6;
int increase_ab(void *arg);

int increase_ab(void *arg)
{
     while (1) {
         hlist_bl_lock(&bl_head);
         if (a != b) {
             pr_err(">>> a = %llu, b = %llu \n", a, b);
             BUG();
             return -1;
         }
         if (a > (ULLONG_MAX - 4096)) {
             a = 0;
             b = 0;
         }
         a++;
         b++;
         hlist_bl_unlock(&bl_head);
         schedule();
     }
     return 0;
}

static int mymod_init(void)
{
     INIT_HLIST_BL_HEAD(&bl_head);

     Thread1 = kthread_create(increase_ab, NULL, "bl_lock_thread1");
     wake_up_process(Thread1);

     Thread2 = kthread_create(increase_ab, NULL, "bl_lock_thread2");
     wake_up_process(Thread2);

     Thread3 = kthread_create(increase_ab, NULL, "bl_lock_thread3");
     wake_up_process(Thread3);

     Thread4 = kthread_create(increase_ab, NULL, "bl_lock_thread4");
     wake_up_process(Thread4);

     Thread5 = kthread_create(increase_ab, NULL, "bl_lock_thread5");
     wake_up_process(Thread5);

     Thread6 = kthread_create(increase_ab, NULL, "bl_lock_thread6");
     wake_up_process(Thread6);

         return 0;
}

static void mymod_exit(void)
{
     if (Thread1)
         kthread_stop(Thread1);
         if (Thread2)
                 kthread_stop(Thread2);
         if (Thread3)
                 kthread_stop(Thread3);
         if (Thread4)
                 kthread_stop(Thread4);
         if (Thread5)
                 kthread_stop(Thread5);
         if (Thread6)
                 kthread_stop(Thread6);
}

module_init(mymod_init);
module_exit(mymod_exit);

MODULE_LICENSE("Dual BSD/GPL");

```


After 9257959a6e5b ("locking/atomic: scripts: restructure fallback 
ifdeffery") is
merged in, we can see the problem when inserting the ko:
```
[root@localhost ~]# insmod mymod.ko
[   37.994787][  T621] >>> a = 725, b = 724
[   37.995313][  T621] ------------[ cut here ]------------
[   37.995951][  T621] kernel BUG at fs/mymod/mymod.c:42!
[r[  oo 3t7@.l996o4c61al]h[o s T6t21] ~ ]#Int ernal error: Oops - BUG: 
00000000f2000800 [#1] SMP
[   37.997420][  T621] Modules linked in: mymod(E)
[   37.997891][  T621] CPU: 9 PID: 621 Comm: bl_lock_thread2 Tainted: 
G            E      6.4.0-rc2-00034-g9257959a6e5b-dirty #117
[   37.999038][  T621] Hardware name: linux,dummy-virt (DT)
[   37.999571][  T621] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT 
-SSBS BTYPE=--)
[   38.000344][  T621] pc : increase_ab+0xcc/0xe70 [mymod]
[   38.000882][  T621] lr : increase_ab+0xcc/0xe70 [mymod]
[   38.001416][  T621] sp : ffff800008b4be40
[   38.001822][  T621] x29: ffff800008b4be40 x28: 0000000000000000 x27: 
0000000000000000
[   38.002605][  T621] x26: 0000000000000000 x25: 0000000000000000 x24: 
0000000000000000
[   38.003385][  T621] x23: ffffd9930c698190 x22: ffff800008a0ba38 x21: 
0000000000000001
[   38.004174][  T621] x20: ffffffffffffefff x19: ffffd9930c69a580 x18: 
0000000000000000
[   38.004955][  T621] x17: 0000000000000000 x16: ffffd9933011bd38 x15: 
ffffffffffffffff
[   38.005754][  T621] x14: 0000000000000000 x13: 205d313236542020 x12: 
ffffd99332175b80
[   38.006538][  T621] x11: 0000000000000003 x10: 0000000000000001 x9 : 
ffffd9933022a9d8
[   38.007325][  T621] x8 : 00000000000bffe8 x7 : c0000000ffff7fff x6 : 
ffffd993320b5b40
[   38.008124][  T621] x5 : ffff0001f7d1c708 x4 : 0000000000000000 x3 : 
0000000000000000
[   38.008912][  T621] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 
0000000000000015
[   38.009709][  T621] Call trace:
[   38.010035][  T621]  increase_ab+0xcc/0xe70 [mymod]
[   38.010539][  T621]  kthread+0xdc/0xf0
[   38.010927][  T621]  ret_from_fork+0x10/0x20
[   38.011370][  T621] Code: 17ffffe0 90000020 91044000 9400000d (d4210000)
[   38.012067][  T621] ---[ end trace 0000000000000000 ]---
[   38.012603][  T621] Kernel panic - not syncing: Oops - BUG: Fatal 
exception
[   38.013311][  T621] SMP: stopping secondary CPUs
[   38.013818][  T621] Kernel Offset: 0x599328000000 from 0xffff800008000000
[   38.014508][  T621] PHYS_OFFSET: 0x40000000
[   38.014933][  T621] CPU features: 0x000000,0220080c,44016203
[   38.015510][  T621] Memory Limit: none
[   38.015950][  T621] ---[ end Kernel panic - not syncing: Oops - BUG: 
Fatal exception ]---
```

























  reply	other threads:[~2023-09-16  6:56 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-23  4:06 [czhong@redhat.com: [bug report] WARNING: CPU: 121 PID: 93233 at fs/dcache.c:365 __dentry_kill+0x214/0x278] Ming Lei
2023-08-23  8:47 ` Christian Brauner
2023-08-28 10:43   ` Ming Lei
2023-09-13  8:59     ` Yi Zhang
2023-09-16  6:55       ` Baokun Li [this message]
2023-09-17  9:10         ` Peter Zijlstra
2023-09-17  9:26           ` Peter Zijlstra
2023-09-18  1:52             ` Baokun Li
2023-09-18 18:42               ` Darrick J. Wong
2023-09-18  1:10           ` Baokun Li
2023-09-18 10:20             ` Yi Zhang
2023-09-19 15:10         ` Mark Rutland
2023-09-17  0:35       ` Bagas Sanjaya
2023-09-29 13:24         ` Linux regression tracking #update (Thorsten Leemhuis)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=89d049ed-6bbf-bba7-80d4-06c060e65e5b@huawei.com \
    --to=libaokun1@huawei.com \
    --cc=brauner@kernel.org \
    --cc=chengzhihao1@huawei.com \
    --cc=czhong@redhat.com \
    --cc=keescook@chromium.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=ming.lei@redhat.com \
    --cc=peterz@infradead.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=yangerkun@huawei.com \
    --cc=yi.zhang@huawei.com \
    --cc=yi.zhang@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).