linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: INFO: rcu detected stall in ext4_file_write_iter
       [not found] <0000000000009a01370582c6772a@google.com>
@ 2019-02-26 15:17 ` Theodore Y. Ts'o
  2019-02-27  9:58   ` Dmitry Vyukov
  2019-03-21 17:30 ` syzbot
  2020-09-11 19:33 ` syzbot
  2 siblings, 1 reply; 7+ messages in thread
From: Theodore Y. Ts'o @ 2019-02-26 15:17 UTC (permalink / raw)
  To: syzbot
  Cc: adilger.kernel, linux-ext4, linux-kernel, linux-fsdevel,
	syzkaller-bugs, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo

[-- Attachment #1: Type: text/plain, Size: 2446 bytes --]

TL;DR: This doesn't appear to be ext4 specific, and seems to involve
an unholy combination of the perf_event_open(2) and sendfile(2) system
calls.

On Mon, Feb 25, 2019 at 10:50:05PM -0800, syzbot wrote:
> syzbot found the following crash on:
> 
> HEAD commit:    8a61716ff2ab Merge tag 'ceph-for-5.0-rc8' of git://github...
> git tree:       upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=161b71d4c00000
> kernel config:  https://syzkaller.appspot.com/x/.config?x=7132344728e7ec3f
> dashboard link: https://syzkaller.appspot.com/bug?extid=7d19c5fe6a3f1161abb7
> compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
> syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=103908f8c00000
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=105e5cd0c00000
> 
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+7d19c5fe6a3f1161abb7@syzkaller.appspotmail.com
> 
> audit: type=1400 audit(1550814986.750:36): avc:  denied  { map } for
> pid=8058 comm="syz-executor004" path="/root/syz-executor004991115"
> dev="sda1" ino=1426 scontext=unconfined_u:system_r:insmod_t:s0-s0:c0.c1023
> tcontext=unconfined_u:object_r:user_home_t:s0 tclass=file permissive=1
> hrtimer: interrupt took 42841 ns
> rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
> rcu: 	(detected by 1, t=10502 jiffies, g=5873, q=2)
> rcu: All QSes seen, last rcu_preempt kthread activity 10502
> (4295059997-4295049495), jiffies_till_next_fqs=1, root ->qsmask 0x0
> syz-executor004 R  running task    26448  8069   8060 0x00000000

This particular repro seems to induce similar failures when I tried it
with xfs and btrfs as well as ext4.

The repro seems to involve the perf_event_open(2) and sendfile(2)
system calls, and killing the process which is performing the
sendfile(2).  The repro also uses the sched_setattr(2) system call,
but when I commented it out, the failure still happened, so this
appears to be another case of "Syzkaller?  We don't need to bug
developers with a minimal test case!  Open source developers are a
free unlimited resource, after all!"  :-)

Commenting out the perf_event_open(2) does seem to make the problem go
away.

Since there are zillions of ways to self-DOS a Linux server without
having to resert to exotic combination of system calls, this isn't
something I'm going to prioritize for myself, but I'm hoping someone
else has time and curiosity.

					- Ted

[-- Attachment #2: repro.c --]
[-- Type: text/x-csrc, Size: 11507 bytes --]

// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include <linux/futex.h>

unsigned long long procid;

static void sleep_ms(uint64_t ms)
{
  usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
  struct timespec ts;
  if (clock_gettime(CLOCK_MONOTONIC, &ts))
    exit(1);
  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static void use_temporary_dir(void)
{
  char tmpdir_template[] = "./syzkaller.XXXXXX";
  char* tmpdir = mkdtemp(tmpdir_template);
  if (!tmpdir)
    exit(1);
  if (chmod(tmpdir, 0777))
    exit(1);
  if (chdir(tmpdir))
    exit(1);
}

static void thread_start(void* (*fn)(void*), void* arg)
{
  pthread_t th;
  pthread_attr_t attr;
  pthread_attr_init(&attr);
  pthread_attr_setstacksize(&attr, 128 << 10);
  int i;
  for (i = 0; i < 100; i++) {
    if (pthread_create(&th, &attr, fn, arg) == 0) {
      pthread_attr_destroy(&attr);
      return;
    }
    if (errno == EAGAIN) {
      usleep(50);
      continue;
    }
    break;
  }
  exit(1);
}

#define BITMASK(bf_off, bf_len) (((1ull << (bf_len)) - 1) << (bf_off))
#define STORE_BY_BITMASK(type, htobe, addr, val, bf_off, bf_len)               \
  *(type*)(addr) =                                                             \
      htobe((htobe(*(type*)(addr)) & ~BITMASK((bf_off), (bf_len))) |           \
            (((type)(val) << (bf_off)) & BITMASK((bf_off), (bf_len))))

typedef struct {
  int state;
} event_t;

static void event_init(event_t* ev)
{
  ev->state = 0;
}

static void event_reset(event_t* ev)
{
  ev->state = 0;
}

static void event_set(event_t* ev)
{
  if (ev->state)
    exit(1);
  __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
  syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG);
}

static void event_wait(event_t* ev)
{
  while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}

static int event_isset(event_t* ev)
{
  return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}

static int event_timedwait(event_t* ev, uint64_t timeout)
{
  uint64_t start = current_time_ms();
  uint64_t now = start;
  for (;;) {
    uint64_t remain = timeout - (now - start);
    struct timespec ts;
    ts.tv_sec = remain / 1000;
    ts.tv_nsec = (remain % 1000) * 1000 * 1000;
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
    if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED))
      return 1;
    now = current_time_ms();
    if (now - start > timeout)
      return 0;
  }
}

#define FS_IOC_SETFLAGS _IOW('f', 2, long)
static void remove_dir(const char* dir)
{
  DIR* dp;
  struct dirent* ep;
  int iter = 0;
retry:
  while (umount2(dir, MNT_DETACH) == 0) {
  }
  dp = opendir(dir);
  if (dp == NULL) {
    if (errno == EMFILE) {
      exit(1);
    }
    exit(1);
  }
  while ((ep = readdir(dp))) {
    if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
      continue;
    char filename[FILENAME_MAX];
    snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
    while (umount2(filename, MNT_DETACH) == 0) {
    }
    struct stat st;
    if (lstat(filename, &st))
      exit(1);
    if (S_ISDIR(st.st_mode)) {
      remove_dir(filename);
      continue;
    }
    int i;
    for (i = 0;; i++) {
      if (unlink(filename) == 0)
        break;
      if (errno == EPERM) {
        int fd = open(filename, O_RDONLY);
        if (fd != -1) {
          long flags = 0;
          if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
            close(fd);
          continue;
        }
      }
      if (errno == EROFS) {
        break;
      }
      if (errno != EBUSY || i > 100)
        exit(1);
      if (umount2(filename, MNT_DETACH))
        exit(1);
    }
  }
  closedir(dp);
  int i;
  for (i = 0;; i++) {
    if (rmdir(dir) == 0)
      break;
    if (i < 100) {
      if (errno == EPERM) {
        int fd = open(dir, O_RDONLY);
        if (fd != -1) {
          long flags = 0;
          if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
            close(fd);
          continue;
        }
      }
      if (errno == EROFS) {
        break;
      }
      if (errno == EBUSY) {
        if (umount2(dir, MNT_DETACH))
          exit(1);
        continue;
      }
      if (errno == ENOTEMPTY) {
        if (iter < 100) {
          iter++;
          goto retry;
        }
      }
    }
    exit(1);
  }
}

static void kill_and_wait(int pid, int* status)
{
  kill(-pid, SIGKILL);
  kill(pid, SIGKILL);
  int i;
  for (i = 0; i < 100; i++) {
    if (waitpid(-1, status, WNOHANG | __WALL) == pid)
      return;
    usleep(1000);
  }
  DIR* dir = opendir("/sys/fs/fuse/connections");
  if (dir) {
    for (;;) {
      struct dirent* ent = readdir(dir);
      if (!ent)
        break;
      if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
        continue;
      char abort[300];
      snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
               ent->d_name);
      int fd = open(abort, O_WRONLY);
      if (fd == -1) {
        continue;
      }
      if (write(fd, abort, 1) < 0) {
      }
      close(fd);
    }
    closedir(dir);
  } else {
  }
  while (waitpid(-1, status, __WALL) != pid) {
  }
}

#define SYZ_HAVE_SETUP_TEST 1
static void setup_test()
{
  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  setpgrp();
}

#define SYZ_HAVE_RESET_TEST 1
static void reset_test()
{
  int fd;
  for (fd = 3; fd < 30; fd++)
    close(fd);
}

struct thread_t {
  int created, call;
  event_t ready, done;
};

static struct thread_t threads[16];
static void execute_call(int call);
static int running;

static void* thr(void* arg)
{
  struct thread_t* th = (struct thread_t*)arg;
  for (;;) {
    event_wait(&th->ready);
    event_reset(&th->ready);
    execute_call(th->call);
    __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
    event_set(&th->done);
  }
  return 0;
}

static void execute_one(void)
{
  int i, call, thread;
  int collide = 0;
again:
  for (call = 0; call < 5; call++) {
    for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
         thread++) {
      struct thread_t* th = &threads[thread];
      if (!th->created) {
        th->created = 1;
        event_init(&th->ready);
        event_init(&th->done);
        event_set(&th->done);
        thread_start(thr, th);
      }
      if (!event_isset(&th->done))
        continue;
      event_reset(&th->done);
      th->call = call;
      __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
      event_set(&th->ready);
      if (collide && (call % 2) == 0)
        break;
      event_timedwait(&th->done, 45);
      break;
    }
  }
  for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
    sleep_ms(1);
  if (!collide) {
    collide = 1;
    goto again;
  }
}

static void execute_one(void);

#define WAIT_FLAGS __WALL

static void loop(void)
{
  int iter;
  for (iter = 0;; iter++) {
    char cwdbuf[32];
    sprintf(cwdbuf, "./%d", iter);
    if (mkdir(cwdbuf, 0777))
      exit(1);
    int pid = fork();
    if (pid < 0)
      exit(1);
    if (pid == 0) {
      if (chdir(cwdbuf))
        exit(1);
      setup_test();
      execute_one();
      reset_test();
      exit(0);
    }
    int status = 0;
    uint64_t start = current_time_ms();
    for (;;) {
      if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
        break;
      sleep_ms(1);
      if (current_time_ms() - start < 5 * 1000)
        continue;
      kill_and_wait(pid, &status);
      break;
    }
    remove_dir(cwdbuf);
  }
}
#ifndef __NR_sched_setattr
#define __NR_sched_setattr 314
#endif

uint64_t r[1] = {0xffffffffffffffff};

void execute_call(int call)
{
  long res;
  switch (call) {
  case 0:
    memcpy((void*)0x20000040, "./bus\000", 6);
    res = syscall(__NR_open, 0x20000040, 0x1fe, 0);
    if (res != -1)
      r[0] = res;
    break;
  case 1:
    memcpy((void*)0x200001c0, "\x15\x8a\xdd\x00", 4);
    syscall(__NR_write, r[0], 0x200001c0, 4);
    break;
  case 2:
    *(uint32_t*)0x20000800 = 1;
    *(uint32_t*)0x20000804 = 0x70;
    *(uint8_t*)0x20000808 = 0;
    *(uint8_t*)0x20000809 = 0;
    *(uint8_t*)0x2000080a = 0;
    *(uint8_t*)0x2000080b = 0;
    *(uint32_t*)0x2000080c = 0;
    *(uint64_t*)0x20000810 = 0x50a;
    *(uint64_t*)0x20000818 = 0;
    *(uint64_t*)0x20000820 = 0;
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 0, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 1, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 2, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 3, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 4, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 7, 5, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 6, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 7, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 8, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 9, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 10, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 11, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 12, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 13, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0x7fff, 14, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 15, 2);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 17, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 18, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 19, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 20, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 21, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 22, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 23, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 24, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 25, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 26, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 27, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 28, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 29, 35);
    *(uint32_t*)0x20000830 = 0;
    *(uint32_t*)0x20000834 = 0;
    *(uint64_t*)0x20000838 = 0;
    *(uint64_t*)0x20000840 = 0;
    *(uint64_t*)0x20000848 = 0;
    *(uint64_t*)0x20000850 = 0;
    *(uint32_t*)0x20000858 = 0;
    *(uint32_t*)0x2000085c = 0;
    *(uint64_t*)0x20000860 = 0;
    *(uint32_t*)0x20000868 = 0xfffffffd;
    *(uint16_t*)0x2000086c = 0;
    *(uint16_t*)0x2000086e = 0;
    syscall(__NR_perf_event_open, 0x20000800, 0, -1, -1, 0);
    break;
  case 3:
    *(uint64_t*)0x20000000 = 0;
    syscall(__NR_sendfile, r[0], r[0], 0x20000000, 0x8080fffffffe);
    break;
#if 0
  case 4:
    *(uint32_t*)0x200002c0 = 0;
    *(uint32_t*)0x200002c4 = 2;
    *(uint64_t*)0x200002c8 = 0;
    *(uint32_t*)0x200002d0 = 0;
    *(uint32_t*)0x200002d4 = 3;
    *(uint64_t*)0x200002d8 = 0;
    *(uint64_t*)0x200002e0 = 0;
    *(uint64_t*)0x200002e8 = 0;
    syscall(__NR_sched_setattr, 0, 0x200002c0, 0);
    break;
#endif
  }
}
int main(void)
{
  syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);
  for (procid = 0; procid < 6; procid++) {
    if (fork() == 0) {
      use_temporary_dir();
      loop();
    }
  }
  sleep(1000000);
  return 0;
}

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: INFO: rcu detected stall in ext4_file_write_iter
  2019-02-26 15:17 ` INFO: rcu detected stall in ext4_file_write_iter Theodore Y. Ts'o
@ 2019-02-27  9:58   ` Dmitry Vyukov
  2019-02-27 21:57     ` Theodore Y. Ts'o
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Vyukov @ 2019-02-27  9:58 UTC (permalink / raw)
  To: Theodore Y. Ts'o, syzbot, Andreas Dilger, linux-ext4, LKML,
	linux-fsdevel, syzkaller-bugs, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo

On Tue, Feb 26, 2019 at 4:17 PM Theodore Y. Ts'o <tytso@mit.edu> wrote:
>
> TL;DR: This doesn't appear to be ext4 specific, and seems to involve
> an unholy combination of the perf_event_open(2) and sendfile(2) system
> calls.
>
> On Mon, Feb 25, 2019 at 10:50:05PM -0800, syzbot wrote:
> > syzbot found the following crash on:
> >
> > HEAD commit:    8a61716ff2ab Merge tag 'ceph-for-5.0-rc8' of git://github...
> > git tree:       upstream
> > console output: https://syzkaller.appspot.com/x/log.txt?x=161b71d4c00000
> > kernel config:  https://syzkaller.appspot.com/x/.config?x=7132344728e7ec3f
> > dashboard link: https://syzkaller.appspot.com/bug?extid=7d19c5fe6a3f1161abb7
> > compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
> > syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=103908f8c00000
> > C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=105e5cd0c00000
> >
> > IMPORTANT: if you fix the bug, please add the following tag to the commit:
> > Reported-by: syzbot+7d19c5fe6a3f1161abb7@syzkaller.appspotmail.com
> >
> > audit: type=1400 audit(1550814986.750:36): avc:  denied  { map } for
> > pid=8058 comm="syz-executor004" path="/root/syz-executor004991115"
> > dev="sda1" ino=1426 scontext=unconfined_u:system_r:insmod_t:s0-s0:c0.c1023
> > tcontext=unconfined_u:object_r:user_home_t:s0 tclass=file permissive=1
> > hrtimer: interrupt took 42841 ns
> > rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
> > rcu:  (detected by 1, t=10502 jiffies, g=5873, q=2)
> > rcu: All QSes seen, last rcu_preempt kthread activity 10502
> > (4295059997-4295049495), jiffies_till_next_fqs=1, root ->qsmask 0x0
> > syz-executor004 R  running task    26448  8069   8060 0x00000000
>
> This particular repro seems to induce similar failures when I tried it
> with xfs and btrfs as well as ext4.
>
> The repro seems to involve the perf_event_open(2) and sendfile(2)
> system calls, and killing the process which is performing the
> sendfile(2).  The repro also uses the sched_setattr(2) system call,
> but when I commented it out, the failure still happened, so this
> appears to be another case of "Syzkaller?  We don't need to bug
> developers with a minimal test case!  Open source developers are a
> free unlimited resource, after all!"  :-)
>
> Commenting out the perf_event_open(2) does seem to make the problem go
> away.
>
> Since there are zillions of ways to self-DOS a Linux server without
> having to resert to exotic combination of system calls, this isn't
> something I'm going to prioritize for myself, but I'm hoping someone
> else has time and curiosity.

Peter, Ingo, do you have any updates on the
perf_event_open/sched_setattr stalls? This bug cause assorted hangs
throughout kernel and so is nasty.

syzkaller tries to remove all syscalls from reproducers one-by-one.
Somehow without sched_setattr the hang did not reproduce (a bunch of
repros have perf_event_open+sched_setattr so somehow they seem to be
related). Kernel is not as simple as a single-threaded deterministic
fully-reproducible user-space xml parsing library, more (almost all)
aspects are flaky and non-deterministic and thus require more human
intelligence. But even with perfect repros machines still won't be
able to tell in all cases that even though the hang happened in ext4
code, the root cause is actually another scheduler-related system
call. So thanks for looking into this.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: INFO: rcu detected stall in ext4_file_write_iter
  2019-02-27  9:58   ` Dmitry Vyukov
@ 2019-02-27 21:57     ` Theodore Y. Ts'o
  2019-02-28  9:34       ` Dmitry Vyukov
  0 siblings, 1 reply; 7+ messages in thread
From: Theodore Y. Ts'o @ 2019-02-27 21:57 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: syzbot, Andreas Dilger, linux-ext4, LKML, linux-fsdevel,
	syzkaller-bugs, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo

On Wed, Feb 27, 2019 at 10:58:50AM +0100, Dmitry Vyukov wrote:
> Peter, Ingo, do you have any updates on the
> perf_event_open/sched_setattr stalls? This bug cause assorted hangs
> throughout kernel and so is nasty.
> 
> syzkaller tries to remove all syscalls from reproducers one-by-one.
> Somehow without sched_setattr the hang did not reproduce (a bunch of
> repros have perf_event_open+sched_setattr so somehow they seem to be
> related)

FWIW, at least for me, the repro.c with sched_setattr commented out
(see the repro.c attached to a message[1] earlier in the thread) it
was reproducing reliably on a 2 CPU, 2 GB memory KVM using the
ext4.git tree (dev branch, 5.0-rc3 plus ext4 commits for the next
merge window) using a Debian stable-based VM[2].

[1] https://groups.google.com/d/msg/syzkaller-bugs/ByPpM3WZw1s/li7SsaEyAgAJ
[2] https://mirrors.edge.kernel.org/pub/linux/kernel/people/tytso/kvm-xfstests/root_fs.img.amd64

> But even with perfect repros machines still won't be
> able to tell in all cases that even though the hang happened in ext4
> code, the root cause is actually another scheduler-related system
> call. So thanks for looking into this.

To be clear, there was *not* a scheduler-related system call in the
repro.c I was playing with (see [2]); just perf_event_open(2) and
sendfile(2).

Cheers,

	      	      	    	 		    	- Ted

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: INFO: rcu detected stall in ext4_file_write_iter
  2019-02-27 21:57     ` Theodore Y. Ts'o
@ 2019-02-28  9:34       ` Dmitry Vyukov
  0 siblings, 0 replies; 7+ messages in thread
From: Dmitry Vyukov @ 2019-02-28  9:34 UTC (permalink / raw)
  To: Theodore Y. Ts'o, Dmitry Vyukov, syzbot, Andreas Dilger,
	linux-ext4, LKML, linux-fsdevel, syzkaller-bugs, Peter Zijlstra,
	Ingo Molnar, Arnaldo Carvalho de Melo

On Wed, Feb 27, 2019 at 10:58 PM Theodore Y. Ts'o <tytso@mit.edu> wrote:
>
> On Wed, Feb 27, 2019 at 10:58:50AM +0100, Dmitry Vyukov wrote:
> > Peter, Ingo, do you have any updates on the
> > perf_event_open/sched_setattr stalls? This bug cause assorted hangs
> > throughout kernel and so is nasty.
> >
> > syzkaller tries to remove all syscalls from reproducers one-by-one.
> > Somehow without sched_setattr the hang did not reproduce (a bunch of
> > repros have perf_event_open+sched_setattr so somehow they seem to be
> > related)
>
> FWIW, at least for me, the repro.c with sched_setattr commented out
> (see the repro.c attached to a message[1] earlier in the thread) it
> was reproducing reliably on a 2 CPU, 2 GB memory KVM using the
> ext4.git tree (dev branch, 5.0-rc3 plus ext4 commits for the next
> merge window) using a Debian stable-based VM[2].
>
> [1] https://groups.google.com/d/msg/syzkaller-bugs/ByPpM3WZw1s/li7SsaEyAgAJ
> [2] https://mirrors.edge.kernel.org/pub/linux/kernel/people/tytso/kvm-xfstests/root_fs.img.amd64
>
> > But even with perfect repros machines still won't be
> > able to tell in all cases that even though the hang happened in ext4
> > code, the root cause is actually another scheduler-related system
> > call. So thanks for looking into this.
>
> To be clear, there was *not* a scheduler-related system call in the
> repro.c I was playing with (see [2]); just perf_event_open(2) and
> sendfile(2).

Let me correct the statement then:

But even with perfect repros machines still won't be able to tell in
all cases that even though the hang happened in ext4 code, the root
cause is actually another perf-related system call. So thanks for
looking into this.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: INFO: rcu detected stall in ext4_file_write_iter
       [not found] <0000000000009a01370582c6772a@google.com>
  2019-02-26 15:17 ` INFO: rcu detected stall in ext4_file_write_iter Theodore Y. Ts'o
@ 2019-03-21 17:30 ` syzbot
  2020-09-11 19:33 ` syzbot
  2 siblings, 0 replies; 7+ messages in thread
From: syzbot @ 2019-03-21 17:30 UTC (permalink / raw)
  To: acme, adilger.kernel, dvyukov, linux-ext4, linux-fsdevel,
	linux-kernel, mingo, peterz, syzkaller-bugs, tytso

Bisection is inconclusive: the bug happens on the oldest tested release.

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=17b1becf200000
start commit:   [unknown
git tree:       upstream
final crash:    https://syzkaller.appspot.com/x/report.txt?x=1471becf200000
console output: https://syzkaller.appspot.com/x/log.txt?x=1071becf200000
dashboard link: https://syzkaller.appspot.com/bug?extid=7d19c5fe6a3f1161abb7
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=103908f8c00000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=105e5cd0c00000

For information about bisection process see: https://goo.gl/tpsmEJ#bisection

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: INFO: rcu detected stall in ext4_file_write_iter
       [not found] <0000000000009a01370582c6772a@google.com>
  2019-02-26 15:17 ` INFO: rcu detected stall in ext4_file_write_iter Theodore Y. Ts'o
  2019-03-21 17:30 ` syzbot
@ 2020-09-11 19:33 ` syzbot
  2020-09-11 20:03   ` Jens Axboe
  2 siblings, 1 reply; 7+ messages in thread
From: syzbot @ 2020-09-11 19:33 UTC (permalink / raw)
  To: acme, adilger.kernel, axboe, dvyukov, fweisbec, linux-ext4,
	linux-fsdevel, linux-kernel, ming.lei, mingo, mingo,
	penguin-kernel, peterz, syzkaller-bugs, tglx, tytso

syzbot suspects this issue was fixed by commit:

commit 7e24969022cbd61ddc586f14824fc205661bb124
Author: Ming Lei <ming.lei@redhat.com>
Date:   Mon Aug 17 10:00:55 2020 +0000

    block: allow for_each_bvec to support zero len bvec

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=1120f201900000
start commit:   92ed3019 Linux 5.8-rc7
git tree:       upstream
kernel config:  https://syzkaller.appspot.com/x/.config?x=84f076779e989e69
dashboard link: https://syzkaller.appspot.com/bug?extid=7d19c5fe6a3f1161abb7
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=170f7cbc900000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=16a25e38900000

If the result looks correct, please mark the issue as fixed by replying with:

#syz fix: block: allow for_each_bvec to support zero len bvec

For information about bisection process see: https://goo.gl/tpsmEJ#bisection

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: INFO: rcu detected stall in ext4_file_write_iter
  2020-09-11 19:33 ` syzbot
@ 2020-09-11 20:03   ` Jens Axboe
  0 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2020-09-11 20:03 UTC (permalink / raw)
  To: syzbot, acme, adilger.kernel, dvyukov, fweisbec, linux-ext4,
	linux-fsdevel, linux-kernel, ming.lei, mingo, mingo,
	penguin-kernel, peterz, syzkaller-bugs, tglx, tytso

#syz fix: block: allow for_each_bvec to support zero len bvec


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-09-11 20:03 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <0000000000009a01370582c6772a@google.com>
2019-02-26 15:17 ` INFO: rcu detected stall in ext4_file_write_iter Theodore Y. Ts'o
2019-02-27  9:58   ` Dmitry Vyukov
2019-02-27 21:57     ` Theodore Y. Ts'o
2019-02-28  9:34       ` Dmitry Vyukov
2019-03-21 17:30 ` syzbot
2020-09-11 19:33 ` syzbot
2020-09-11 20:03   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).