All of lore.kernel.org
 help / color / mirror / Atom feed
From: Austin Schuh <austin@peloton-tech.com>
To: Richard Weinberger <richard.weinberger@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: LKML <linux-kernel@vger.kernel.org>,
	rt-users <linux-rt-users@vger.kernel.org>
Subject: Re: Filesystem lockup with CONFIG_PREEMPT_RT
Date: Thu, 26 Jun 2014 12:50:24 -0700	[thread overview]
Message-ID: <CANGgnMYVoP-Z0Bv-VDEkJnvfa7Fi4-zY2F4A0PhMewGvwo3VVw@mail.gmail.com> (raw)
In-Reply-To: <CAFLxGvxfBt7OvW=a2Kz08GLHSEiiOZsN-vB19CXnQiwqFxqMsA@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1371 bytes --]

On Wed, May 21, 2014 at 12:33 AM, Richard Weinberger
<richard.weinberger@gmail.com> wrote:
> CC'ing RT folks
>
> On Wed, May 21, 2014 at 8:23 AM, Austin Schuh <austin@peloton-tech.com> wrote:
>> On Tue, May 13, 2014 at 7:29 PM, Austin Schuh <austin@peloton-tech.com> wrote:
>>> Hi,
>>>
>>> I am observing a filesystem lockup with XFS on a CONFIG_PREEMPT_RT
>>> patched kernel.  I have currently only triggered it using dpkg.  Dave
>>> Chinner on the XFS mailing list suggested that it was a rt-kernel
>>> workqueue issue as opposed to a XFS problem after looking at the
>>> kernel messages.

I've got a 100% reproducible test case that doesn't involve a
filesystem.  I wrote a module that triggers the bug when the device is
written to, making it easy to enable tracing during the event and
capture everything.

It looks like rw_semaphores don't trigger wq_worker_sleeping to run
when work goes to sleep on a rw_semaphore.  This only happens with the
RT patches, not with the mainline kernel.  I'm foreseeing a second
deadlock/bug coming into play shortly.  If a task holding the work
pool spinlock gets preempted, and we need to schedule more work from
another worker thread which was just blocked by a mutex, we'll then
end up trying to go to sleep on 2 locks at once.

That is getting a bit deep into the scheduler for me...  Any
suggestions on how to fix it?

Austin

[-- Attachment #2: killer_module.c --]
[-- Type: text/x-csrc, Size: 4183 bytes --]

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <asm/uaccess.h>
#include <linux/semaphore.h>

static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char *, size_t, loff_t *);

// Dev name as it appears in /proc/devices
#define DEVICE_NAME "aschuh"

// Major number assigned to our device driver
static int major;
static struct workqueue_struct *lockup_wq1;
static struct workqueue_struct *lockup_wq2;

static struct file_operations fops = {
  .read = device_read,
  .write = device_write,
  .open = device_open,
  .release = device_release
};

static int __init init_killer_module(void) {

  lockup_wq1 = alloc_workqueue("lockup_wq1", WQ_MEM_RECLAIM, 0);
  if (!lockup_wq1) return -ENOMEM;

  lockup_wq2 = alloc_workqueue("lockup_wq2", WQ_MEM_RECLAIM, 0);
  if (!lockup_wq2) {
    destroy_workqueue(lockup_wq1);
    return -ENOMEM;
  }

  major = register_chrdev(0, DEVICE_NAME, &fops);
  if (major < 0) {
    printk(KERN_ALERT "Registering char device failed with %d\n", major);
    destroy_workqueue(lockup_wq1);
    destroy_workqueue(lockup_wq2);

    return major;
  }

  printk(KERN_INFO "'mknod /dev/%s c %d 0'.\n", DEVICE_NAME, major);

  // A non 0 return means init_module failed; module can't be loaded.
  return 0;
}

// Called when a process tries to open the device file.
static int device_open(struct inode *inode, struct file *file) {
  try_module_get(THIS_MODULE);
  return 0;
}

// Called when a process closes the device file.
static int device_release(struct inode *inode, struct file *file) {
  // Decrement the usage count, or else once you opened the file, you'll never
  // get get rid of the module.
  module_put(THIS_MODULE);

  return 0;
}

static ssize_t device_read(struct file *filp, char *buffer, size_t length,
                           loff_t *offset) {
  return 0;
}

#if 0

#define SEM_INIT(sem) sema_init(sem, 1)
#define SEM_TYPE struct semaphore
#define SEM_DOWN(sem) down(sem)
#define SEM_UP(sem) up(sem)

#else

#define SEM_INIT(sem) init_rwsem(sem)
#define SEM_TYPE struct rw_semaphore
#define SEM_DOWN(sem) down_write_nested(sem, 0)
#define SEM_UP(sem) up_write(sem)

#endif

struct mywork {
  struct work_struct work;
  int index;
  SEM_TYPE *sem;
};

static void work1(struct work_struct *work) {
  struct mywork *my_work = container_of(work, struct mywork, work);
  trace_printk("work1 Called with index %d\n", my_work->index);
}

static void work2(struct work_struct *work) {
  struct mywork *my_work = container_of(work, struct mywork, work);
  trace_printk("work2 Called with index %d\n", my_work->index);
  SEM_DOWN(my_work->sem);
  SEM_UP(my_work->sem);
  trace_printk("work2 Finished with index %d\n", my_work->index);
}


static ssize_t device_write(struct file *filp, const char *buff, size_t len,
                            loff_t *off) {
  SEM_TYPE write_sem;
  SEM_INIT(&write_sem);
  
  struct mywork my_work1;
  struct mywork my_work2;
  trace_printk("lockup_wq1 %p lockup_wq2 %p\n", lockup_wq1, lockup_wq2);

  trace_printk("Got a write\n");

  SEM_DOWN(&write_sem);
  my_work1.index = len;
  my_work1.sem = &write_sem;
  INIT_WORK_ONSTACK(&my_work1.work, work1);

  my_work2.index = len;
  my_work2.sem = &write_sem;
  INIT_WORK_ONSTACK(&my_work2.work, work2);

  queue_work(lockup_wq2, &my_work2.work);

  queue_work(lockup_wq1, &my_work1.work);
  flush_work(&my_work1.work);
  destroy_work_on_stack(&my_work1.work);

  SEM_UP(&write_sem);

  flush_work(&my_work2.work);
  destroy_work_on_stack(&my_work2.work);
  trace_printk("Write done\n");
  tracing_off();

  return len;
}

static void __exit cleanup_killer_module(void) {
  printk(KERN_INFO "Goodbye world 1.\n");
  unregister_chrdev(major, DEVICE_NAME);
	destroy_workqueue(lockup_wq1);
	destroy_workqueue(lockup_wq2);
}

module_init(init_killer_module);
module_exit(cleanup_killer_module);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Austin Schuh <austin@peloton-tech.com>");
MODULE_DESCRIPTION("Triggers a workqueue bug on write.");

  reply	other threads:[~2014-06-26 19:50 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-14  2:29 Filesystem lockup with CONFIG_PREEMPT_RT Austin Schuh
2014-05-14  2:29 ` Austin Schuh
2014-05-21  6:23 ` Austin Schuh
2014-05-21  6:23   ` Austin Schuh
2014-05-21  7:33   ` Richard Weinberger
2014-05-21  7:33     ` Richard Weinberger
2014-06-26 19:50     ` Austin Schuh [this message]
2014-06-26 22:35       ` Thomas Gleixner
2014-06-27  0:07         ` Austin Schuh
2014-06-27  3:22           ` Mike Galbraith
2014-06-27 12:57           ` Mike Galbraith
2014-06-27 14:01             ` Steven Rostedt
2014-06-27 17:34               ` Mike Galbraith
2014-06-27 17:54                 ` Steven Rostedt
2014-06-27 18:07                   ` Mike Galbraith
2014-06-27 18:19                     ` Steven Rostedt
2014-06-27 19:11                       ` Mike Galbraith
2014-06-28  1:18                       ` Austin Schuh
2014-06-28  3:32                         ` Mike Galbraith
2014-06-28  6:20                           ` Austin Schuh
2014-06-28  7:11                             ` Mike Galbraith
2014-06-27 14:24           ` Thomas Gleixner
2014-06-28  4:51             ` Mike Galbraith
2014-07-01  0:12             ` Austin Schuh
2014-07-01  0:53               ` Austin Schuh
2014-07-05 20:26                 ` Thomas Gleixner
2014-07-06  4:55                   ` Austin Schuh
2014-07-01  3:01             ` Austin Schuh
2014-07-01 19:32               ` Austin Schuh
2014-07-03 23:08                 ` Austin Schuh
2014-07-04  4:42                   ` Mike Galbraith
2014-05-21 19:30 John Blackwood
2014-05-21 19:30 ` John Blackwood
2014-05-21 21:59 ` Austin Schuh
2014-05-21 21:59   ` Austin Schuh
2014-07-05 20:36 ` Thomas Gleixner
2014-07-05 20:36   ` Thomas Gleixner
2014-07-05 19:30 Jan de Kruyf
2014-07-07  8:48 Jan de Kruyf
2014-07-07 13:00 ` Thomas Gleixner
2014-07-07 16:23 ` Austin Schuh
2014-07-08  8:03   ` Jan de Kruyf
2014-07-08 16:09     ` Austin Schuh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CANGgnMYVoP-Z0Bv-VDEkJnvfa7Fi4-zY2F4A0PhMewGvwo3VVw@mail.gmail.com \
    --to=austin@peloton-tech.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=richard.weinberger@gmail.com \
    --cc=tglx@linutronix.de \
    --cc=umgwanakikbuti@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.