All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] fixes for hangs and error reporting in CDC_WDM
@ 2020-09-22 11:21 Oliver Neukum
  2020-09-22 11:21 ` [RFC 1/7] CDC-WDM: fix hangs in flush() in multithreaded cases Oliver Neukum
                   ` (7 more replies)
  0 siblings, 8 replies; 17+ messages in thread
From: Oliver Neukum @ 2020-09-22 11:21 UTC (permalink / raw)
  To: penguin-kernel, bjorn, linux-usb

Stress testing has shown that CDC-WDM has some issues with hangs
and error reporting

1. wakeups are not correctly handled in multhreaded environments
2. unresponsive hardware is not handled
3. errors are not correctly reported. This needs flush() to be
implemented.

This version makes wdm_flush() use interruptible sleep.

For easier review all squashed together:

--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -58,6 +58,9 @@ MODULE_DEVICE_TABLE (usb, wdm_ids);
 
 #define WDM_MAX                        16
 
+/* flush() is uninterruptible, but we cannot wait forever */
+#define WDM_FLUSH_TIMEOUT      (30 * HZ)
+
 /* CDC-WMC r1.1 requires wMaxCommand to be "at least 256 decimal (0x100)" */
 #define WDM_DEFAULT_BUFSIZE    256
 
@@ -151,7 +154,7 @@ static void wdm_out_callback(struct urb *urb)
        kfree(desc->outbuf);
        desc->outbuf = NULL;
        clear_bit(WDM_IN_USE, &desc->flags);
-       wake_up(&desc->wait);
+       wake_up_all(&desc->wait);
 }
 
 static void wdm_in_callback(struct urb *urb)
@@ -393,6 +396,9 @@ static ssize_t wdm_write
        if (test_bit(WDM_RESETTING, &desc->flags))
                r = -EIO;
 
+       if (test_bit(WDM_DISCONNECTING, &desc->flags))
+               r = -ENODEV;
+
        if (r < 0) {
                rv = r;
                goto out_free_mem_pm;
@@ -424,6 +430,7 @@ static ssize_t wdm_write
        if (rv < 0) {
                desc->outbuf = NULL;
                clear_bit(WDM_IN_USE, &desc->flags);
+               wake_up_all(&desc->wait); /* for flush() */
                dev_err(&desc->intf->dev, "Tx URB error: %d\n", rv);
                rv = usb_translate_errors(rv);
                goto out_free_mem_pm;
@@ -583,11 +590,39 @@ static ssize_t wdm_read
        return rv;
 }
 
+/*
+ * The difference to flush is that we wait forever. If you don't like
+ * that behavior, you need to send a signal.
+ */
+
+static int wdm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+       struct wdm_device *desc = file->private_data;
+       int rv;
+
+       rv = wait_event_interruptible(desc->wait,
+                       !test_bit(WDM_IN_USE, &desc->flags) ||
+                       test_bit(WDM_DISCONNECTING, &desc->flags));
+
+       if (test_bit(WDM_DISCONNECTING, &desc->flags))
+               return -ENODEV;
+       if (rv < 0)
+               return -EINTR;
+
+       spin_lock_irq(&desc->iuspin);
+       rv = desc->werr;
+       desc->werr = 0;
+       spin_unlock_irq(&desc->iuspin);
+
+       return usb_translate_errors(rv);
+}
+
 static int wdm_flush(struct file *file, fl_owner_t id)
 {
        struct wdm_device *desc = file->private_data;
+       int rv;
 
-       wait_event(desc->wait,
+       rv = wait_event_interruptible_timeout(desc->wait,
                        /*
                         * needs both flags. We cannot do with one
                         * because resetting it would cause a race
@@ -595,16 +630,27 @@ static int wdm_flush(struct file *file, fl_owner_t id)
                         * a disconnect
                         */
                        !test_bit(WDM_IN_USE, &desc->flags) ||
-                       test_bit(WDM_DISCONNECTING, &desc->flags));
+                       test_bit(WDM_DISCONNECTING, &desc->flags),
+                       WDM_FLUSH_TIMEOUT);
 
-       /* cannot dereference desc->intf if WDM_DISCONNECTING */
+       /*
+        * to report the correct error.
+        * This is best effort
+        * We are inevitably racing with the hardware.
+        */
        if (test_bit(WDM_DISCONNECTING, &desc->flags))
                return -ENODEV;
-       if (desc->werr < 0)
-               dev_err(&desc->intf->dev, "Error in flush path: %d\n",
-                       desc->werr);
+       if (!rv)
+               return -EIO;
+       if (rv < 0)
+               return -EINTR;
+
+       spin_lock_irq(&desc->iuspin);
+       rv = desc->werr;
+       desc->werr = 0;
+       spin_unlock_irq(&desc->iuspin);
 
-       return usb_translate_errors(desc->werr);
+       return usb_translate_errors(rv);
 }
 
 static __poll_t wdm_poll(struct file *file, struct poll_table_struct *wait)
@@ -729,6 +775,7 @@ static const struct file_operations wdm_fops = {
        .owner =        THIS_MODULE,
        .read =         wdm_read,
        .write =        wdm_write,
+       .fsync =        wdm_fsync,
        .open =         wdm_open,
        .flush =        wdm_flush,
        .release =      wdm_release,



^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2020-09-29 10:18 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-22 11:21 [RFC] fixes for hangs and error reporting in CDC_WDM Oliver Neukum
2020-09-22 11:21 ` [RFC 1/7] CDC-WDM: fix hangs in flush() in multithreaded cases Oliver Neukum
2020-09-22 11:21 ` [RFC 2/7] CDC-WDM: introduce a timeout in flush() Oliver Neukum
2020-09-22 11:21 ` [RFC 3/7] CDC-WDM: remove use of intf->dev after potential disconnect Oliver Neukum
2020-09-22 11:21 ` [RFC 4/7] CDC-WDM: fix race reporting errors in flush Oliver Neukum
2020-09-22 11:21 ` [RFC 5/7] CDC-WDM: correct error reporting in write() Oliver Neukum
2020-09-22 11:21 ` [RFC 6/7] CDC-WDM: implement fsync Oliver Neukum
2020-09-22 11:21 ` [RFC 7/7] CDC-WDM: making flush() interruptible Oliver Neukum
2020-09-22 11:38   ` Tetsuo Handa
2020-09-22 11:49     ` Oliver Neukum
2020-09-25 15:11 ` [RFC] fixes for hangs and error reporting in CDC_WDM Greg KH
2020-09-25 15:20   ` Tetsuo Handa
2020-09-25 15:28     ` Greg KH
2020-09-25 15:34       ` Tetsuo Handa
2020-09-26  5:40         ` Greg KH
2020-09-29  8:46           ` Oliver Neukum
2020-09-29 10:17             ` Tetsuo Handa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.