All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Theodore Y. Ts'o" <tytso@mit.edu>
To: syzbot <syzbot+7d19c5fe6a3f1161abb7@syzkaller.appspotmail.com>
Cc: <adilger.kernel@dilger.ca>, <linux-ext4@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>,
	<syzkaller-bugs@googlegroups.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>
Subject: Re: INFO: rcu detected stall in ext4_file_write_iter
Date: Tue, 26 Feb 2019 10:17:38 -0500	[thread overview]
Message-ID: <20190226151738.GA6430@mit.edu> (raw)
In-Reply-To: <0000000000009a01370582c6772a@google.com>

[-- Attachment #1: Type: text/plain, Size: 2446 bytes --]

TL;DR: This doesn't appear to be ext4 specific, and seems to involve
an unholy combination of the perf_event_open(2) and sendfile(2) system
calls.

On Mon, Feb 25, 2019 at 10:50:05PM -0800, syzbot wrote:
> syzbot found the following crash on:
> 
> HEAD commit:    8a61716ff2ab Merge tag 'ceph-for-5.0-rc8' of git://github...
> git tree:       upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=161b71d4c00000
> kernel config:  https://syzkaller.appspot.com/x/.config?x=7132344728e7ec3f
> dashboard link: https://syzkaller.appspot.com/bug?extid=7d19c5fe6a3f1161abb7
> compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
> syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=103908f8c00000
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=105e5cd0c00000
> 
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+7d19c5fe6a3f1161abb7@syzkaller.appspotmail.com
> 
> audit: type=1400 audit(1550814986.750:36): avc:  denied  { map } for
> pid=8058 comm="syz-executor004" path="/root/syz-executor004991115"
> dev="sda1" ino=1426 scontext=unconfined_u:system_r:insmod_t:s0-s0:c0.c1023
> tcontext=unconfined_u:object_r:user_home_t:s0 tclass=file permissive=1
> hrtimer: interrupt took 42841 ns
> rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
> rcu: 	(detected by 1, t=10502 jiffies, g=5873, q=2)
> rcu: All QSes seen, last rcu_preempt kthread activity 10502
> (4295059997-4295049495), jiffies_till_next_fqs=1, root ->qsmask 0x0
> syz-executor004 R  running task    26448  8069   8060 0x00000000

This particular repro seems to induce similar failures when I tried it
with xfs and btrfs as well as ext4.

The repro seems to involve the perf_event_open(2) and sendfile(2)
system calls, and killing the process which is performing the
sendfile(2).  The repro also uses the sched_setattr(2) system call,
but when I commented it out, the failure still happened, so this
appears to be another case of "Syzkaller?  We don't need to bug
developers with a minimal test case!  Open source developers are a
free unlimited resource, after all!"  :-)

Commenting out the perf_event_open(2) does seem to make the problem go
away.

Since there are zillions of ways to self-DOS a Linux server without
having to resert to exotic combination of system calls, this isn't
something I'm going to prioritize for myself, but I'm hoping someone
else has time and curiosity.

					- Ted

[-- Attachment #2: repro.c --]
[-- Type: text/x-csrc, Size: 11507 bytes --]

// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include <linux/futex.h>

unsigned long long procid;

static void sleep_ms(uint64_t ms)
{
  usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
  struct timespec ts;
  if (clock_gettime(CLOCK_MONOTONIC, &ts))
    exit(1);
  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static void use_temporary_dir(void)
{
  char tmpdir_template[] = "./syzkaller.XXXXXX";
  char* tmpdir = mkdtemp(tmpdir_template);
  if (!tmpdir)
    exit(1);
  if (chmod(tmpdir, 0777))
    exit(1);
  if (chdir(tmpdir))
    exit(1);
}

static void thread_start(void* (*fn)(void*), void* arg)
{
  pthread_t th;
  pthread_attr_t attr;
  pthread_attr_init(&attr);
  pthread_attr_setstacksize(&attr, 128 << 10);
  int i;
  for (i = 0; i < 100; i++) {
    if (pthread_create(&th, &attr, fn, arg) == 0) {
      pthread_attr_destroy(&attr);
      return;
    }
    if (errno == EAGAIN) {
      usleep(50);
      continue;
    }
    break;
  }
  exit(1);
}

#define BITMASK(bf_off, bf_len) (((1ull << (bf_len)) - 1) << (bf_off))
#define STORE_BY_BITMASK(type, htobe, addr, val, bf_off, bf_len)               \
  *(type*)(addr) =                                                             \
      htobe((htobe(*(type*)(addr)) & ~BITMASK((bf_off), (bf_len))) |           \
            (((type)(val) << (bf_off)) & BITMASK((bf_off), (bf_len))))

typedef struct {
  int state;
} event_t;

static void event_init(event_t* ev)
{
  ev->state = 0;
}

static void event_reset(event_t* ev)
{
  ev->state = 0;
}

static void event_set(event_t* ev)
{
  if (ev->state)
    exit(1);
  __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
  syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG);
}

static void event_wait(event_t* ev)
{
  while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}

static int event_isset(event_t* ev)
{
  return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}

static int event_timedwait(event_t* ev, uint64_t timeout)
{
  uint64_t start = current_time_ms();
  uint64_t now = start;
  for (;;) {
    uint64_t remain = timeout - (now - start);
    struct timespec ts;
    ts.tv_sec = remain / 1000;
    ts.tv_nsec = (remain % 1000) * 1000 * 1000;
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
    if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED))
      return 1;
    now = current_time_ms();
    if (now - start > timeout)
      return 0;
  }
}

#define FS_IOC_SETFLAGS _IOW('f', 2, long)
static void remove_dir(const char* dir)
{
  DIR* dp;
  struct dirent* ep;
  int iter = 0;
retry:
  while (umount2(dir, MNT_DETACH) == 0) {
  }
  dp = opendir(dir);
  if (dp == NULL) {
    if (errno == EMFILE) {
      exit(1);
    }
    exit(1);
  }
  while ((ep = readdir(dp))) {
    if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
      continue;
    char filename[FILENAME_MAX];
    snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
    while (umount2(filename, MNT_DETACH) == 0) {
    }
    struct stat st;
    if (lstat(filename, &st))
      exit(1);
    if (S_ISDIR(st.st_mode)) {
      remove_dir(filename);
      continue;
    }
    int i;
    for (i = 0;; i++) {
      if (unlink(filename) == 0)
        break;
      if (errno == EPERM) {
        int fd = open(filename, O_RDONLY);
        if (fd != -1) {
          long flags = 0;
          if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
            close(fd);
          continue;
        }
      }
      if (errno == EROFS) {
        break;
      }
      if (errno != EBUSY || i > 100)
        exit(1);
      if (umount2(filename, MNT_DETACH))
        exit(1);
    }
  }
  closedir(dp);
  int i;
  for (i = 0;; i++) {
    if (rmdir(dir) == 0)
      break;
    if (i < 100) {
      if (errno == EPERM) {
        int fd = open(dir, O_RDONLY);
        if (fd != -1) {
          long flags = 0;
          if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
            close(fd);
          continue;
        }
      }
      if (errno == EROFS) {
        break;
      }
      if (errno == EBUSY) {
        if (umount2(dir, MNT_DETACH))
          exit(1);
        continue;
      }
      if (errno == ENOTEMPTY) {
        if (iter < 100) {
          iter++;
          goto retry;
        }
      }
    }
    exit(1);
  }
}

static void kill_and_wait(int pid, int* status)
{
  kill(-pid, SIGKILL);
  kill(pid, SIGKILL);
  int i;
  for (i = 0; i < 100; i++) {
    if (waitpid(-1, status, WNOHANG | __WALL) == pid)
      return;
    usleep(1000);
  }
  DIR* dir = opendir("/sys/fs/fuse/connections");
  if (dir) {
    for (;;) {
      struct dirent* ent = readdir(dir);
      if (!ent)
        break;
      if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
        continue;
      char abort[300];
      snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
               ent->d_name);
      int fd = open(abort, O_WRONLY);
      if (fd == -1) {
        continue;
      }
      if (write(fd, abort, 1) < 0) {
      }
      close(fd);
    }
    closedir(dir);
  } else {
  }
  while (waitpid(-1, status, __WALL) != pid) {
  }
}

#define SYZ_HAVE_SETUP_TEST 1
static void setup_test()
{
  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  setpgrp();
}

#define SYZ_HAVE_RESET_TEST 1
static void reset_test()
{
  int fd;
  for (fd = 3; fd < 30; fd++)
    close(fd);
}

struct thread_t {
  int created, call;
  event_t ready, done;
};

static struct thread_t threads[16];
static void execute_call(int call);
static int running;

static void* thr(void* arg)
{
  struct thread_t* th = (struct thread_t*)arg;
  for (;;) {
    event_wait(&th->ready);
    event_reset(&th->ready);
    execute_call(th->call);
    __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
    event_set(&th->done);
  }
  return 0;
}

static void execute_one(void)
{
  int i, call, thread;
  int collide = 0;
again:
  for (call = 0; call < 5; call++) {
    for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
         thread++) {
      struct thread_t* th = &threads[thread];
      if (!th->created) {
        th->created = 1;
        event_init(&th->ready);
        event_init(&th->done);
        event_set(&th->done);
        thread_start(thr, th);
      }
      if (!event_isset(&th->done))
        continue;
      event_reset(&th->done);
      th->call = call;
      __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
      event_set(&th->ready);
      if (collide && (call % 2) == 0)
        break;
      event_timedwait(&th->done, 45);
      break;
    }
  }
  for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
    sleep_ms(1);
  if (!collide) {
    collide = 1;
    goto again;
  }
}

static void execute_one(void);

#define WAIT_FLAGS __WALL

static void loop(void)
{
  int iter;
  for (iter = 0;; iter++) {
    char cwdbuf[32];
    sprintf(cwdbuf, "./%d", iter);
    if (mkdir(cwdbuf, 0777))
      exit(1);
    int pid = fork();
    if (pid < 0)
      exit(1);
    if (pid == 0) {
      if (chdir(cwdbuf))
        exit(1);
      setup_test();
      execute_one();
      reset_test();
      exit(0);
    }
    int status = 0;
    uint64_t start = current_time_ms();
    for (;;) {
      if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
        break;
      sleep_ms(1);
      if (current_time_ms() - start < 5 * 1000)
        continue;
      kill_and_wait(pid, &status);
      break;
    }
    remove_dir(cwdbuf);
  }
}
#ifndef __NR_sched_setattr
#define __NR_sched_setattr 314
#endif

uint64_t r[1] = {0xffffffffffffffff};

void execute_call(int call)
{
  long res;
  switch (call) {
  case 0:
    memcpy((void*)0x20000040, "./bus\000", 6);
    res = syscall(__NR_open, 0x20000040, 0x1fe, 0);
    if (res != -1)
      r[0] = res;
    break;
  case 1:
    memcpy((void*)0x200001c0, "\x15\x8a\xdd\x00", 4);
    syscall(__NR_write, r[0], 0x200001c0, 4);
    break;
  case 2:
    *(uint32_t*)0x20000800 = 1;
    *(uint32_t*)0x20000804 = 0x70;
    *(uint8_t*)0x20000808 = 0;
    *(uint8_t*)0x20000809 = 0;
    *(uint8_t*)0x2000080a = 0;
    *(uint8_t*)0x2000080b = 0;
    *(uint32_t*)0x2000080c = 0;
    *(uint64_t*)0x20000810 = 0x50a;
    *(uint64_t*)0x20000818 = 0;
    *(uint64_t*)0x20000820 = 0;
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 0, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 1, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 2, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 3, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 4, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 7, 5, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 6, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 7, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 8, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 9, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 10, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 11, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 12, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 13, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0x7fff, 14, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 15, 2);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 17, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 18, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 19, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 20, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 21, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 22, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 23, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 24, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 25, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 26, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 27, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 28, 1);
    STORE_BY_BITMASK(uint64_t, , 0x20000828, 0, 29, 35);
    *(uint32_t*)0x20000830 = 0;
    *(uint32_t*)0x20000834 = 0;
    *(uint64_t*)0x20000838 = 0;
    *(uint64_t*)0x20000840 = 0;
    *(uint64_t*)0x20000848 = 0;
    *(uint64_t*)0x20000850 = 0;
    *(uint32_t*)0x20000858 = 0;
    *(uint32_t*)0x2000085c = 0;
    *(uint64_t*)0x20000860 = 0;
    *(uint32_t*)0x20000868 = 0xfffffffd;
    *(uint16_t*)0x2000086c = 0;
    *(uint16_t*)0x2000086e = 0;
    syscall(__NR_perf_event_open, 0x20000800, 0, -1, -1, 0);
    break;
  case 3:
    *(uint64_t*)0x20000000 = 0;
    syscall(__NR_sendfile, r[0], r[0], 0x20000000, 0x8080fffffffe);
    break;
#if 0
  case 4:
    *(uint32_t*)0x200002c0 = 0;
    *(uint32_t*)0x200002c4 = 2;
    *(uint64_t*)0x200002c8 = 0;
    *(uint32_t*)0x200002d0 = 0;
    *(uint32_t*)0x200002d4 = 3;
    *(uint64_t*)0x200002d8 = 0;
    *(uint64_t*)0x200002e0 = 0;
    *(uint64_t*)0x200002e8 = 0;
    syscall(__NR_sched_setattr, 0, 0x200002c0, 0);
    break;
#endif
  }
}
int main(void)
{
  syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);
  for (procid = 0; procid < 6; procid++) {
    if (fork() == 0) {
      use_temporary_dir();
      loop();
    }
  }
  sleep(1000000);
  return 0;
}

  reply	other threads:[~2019-02-26 15:17 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-26  6:50 INFO: rcu detected stall in ext4_file_write_iter syzbot
2019-02-26 15:17 ` Theodore Y. Ts'o [this message]
2019-02-27  9:58   ` Dmitry Vyukov
2019-02-27 21:57     ` Theodore Y. Ts'o
2019-02-28  9:34       ` Dmitry Vyukov
2019-03-21 17:30 ` syzbot
2020-09-11 19:33 ` syzbot
2020-09-11 20:03   ` Jens Axboe
2021-10-06  8:20 Hao Sun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190226151738.GA6430@mit.edu \
    --to=tytso@mit.edu \
    --cc=acme@kernel.org \
    --cc=adilger.kernel@dilger.ca \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=syzbot+7d19c5fe6a3f1161abb7@syzkaller.appspotmail.com \
    --cc=syzkaller-bugs@googlegroups.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.