linux-usb.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* circular submissions in cdc-wdm and how to break them on disconnect
@ 2021-01-21 15:30 Oliver Neukum
  2021-01-23  2:57 ` Tetsuo Handa
  0 siblings, 1 reply; 4+ messages in thread
From: Oliver Neukum @ 2021-01-21 15:30 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: linux-usb

[-- Attachment #1: Type: text/plain, Size: 881 bytes --]

Hi,

you have moved kill_urbs() below

        cancel_work_sync(&desc->rxwork);
        cancel_work_sync(&desc->service_outs_intr);

to close a race, as

rv = usb_submit_urb(desc->response, GFP_KERNEL);

in service_outstanding_interrupt() would submit the response URB,
right? Unfortunately we have in wdm_in_callback() the following code path

        if (desc->rerr) {
                /*
                 * Since there was an error, userspace may decide to not read
                 * any data after poll'ing.
                 * We should respond to further attempts from the device to send
                 * data, so that we can get unstuck.
                 */
                schedule_work(&desc->service_outs_intr);

It looks to me like we have a circular dependency here and this needs some
change to break. What do you think about the attached patch?

	Regards
		Oliver



[-- Attachment #2: 0001-cdc-wdm-support-for-poisoning-and-unpoisoning-the-UR.patch --]
[-- Type: text/x-patch, Size: 3316 bytes --]

From efdd52b67f24e4fa2552f8cc2cbedb7118f71291 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 4 Jan 2021 17:26:33 +0100
Subject: [PATCH] cdc-wdm: support for poisoning and unpoisoning the URBs

We have a cycle of callbacks scheduling works which submit
URBs. This needs to be broken.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
 drivers/usb/class/cdc-wdm.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 508b1c3f8b73..d1e4a7379beb 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -321,12 +321,23 @@ static void wdm_int_callback(struct urb *urb)
 
 }
 
-static void kill_urbs(struct wdm_device *desc)
+static void poison_urbs(struct wdm_device *desc)
 {
 	/* the order here is essential */
-	usb_kill_urb(desc->command);
-	usb_kill_urb(desc->validity);
-	usb_kill_urb(desc->response);
+	usb_poison_urb(desc->command);
+	usb_poison_urb(desc->validity);
+	usb_poison_urb(desc->response);
+}
+
+static void unpoison_urbs(struct wdm_device *desc)
+{
+	/*
+	 *  the order here is not essential
+	 *  it is symmetrical just to be nice
+	 */
+	usb_unpoison_urb(desc->response);
+	usb_unpoison_urb(desc->validity);
+	usb_unpoison_urb(desc->command);
 }
 
 static void free_urbs(struct wdm_device *desc)
@@ -741,11 +752,12 @@ static int wdm_release(struct inode *inode, struct file *file)
 	if (!desc->count) {
 		if (!test_bit(WDM_DISCONNECTING, &desc->flags)) {
 			dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n");
-			kill_urbs(desc);
+			poison_urbs(desc);
 			spin_lock_irq(&desc->iuspin);
 			desc->resp_count = 0;
 			spin_unlock_irq(&desc->iuspin);
 			desc->manage_power(desc->intf, 0);
+			unpoison_urbs(desc);
 		} else {
 			/* must avoid dev_printk here as desc->intf is invalid */
 			pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__);
@@ -1037,9 +1049,9 @@ static void wdm_disconnect(struct usb_interface *intf)
 	wake_up_all(&desc->wait);
 	mutex_lock(&desc->rlock);
 	mutex_lock(&desc->wlock);
+	poison_urbs(desc);
 	cancel_work_sync(&desc->rxwork);
 	cancel_work_sync(&desc->service_outs_intr);
-	kill_urbs(desc);
 	mutex_unlock(&desc->wlock);
 	mutex_unlock(&desc->rlock);
 
@@ -1080,9 +1092,10 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message)
 		set_bit(WDM_SUSPENDING, &desc->flags);
 		spin_unlock_irq(&desc->iuspin);
 		/* callback submits work - order is essential */
-		kill_urbs(desc);
+		poison_urbs(desc);
 		cancel_work_sync(&desc->rxwork);
 		cancel_work_sync(&desc->service_outs_intr);
+		unpoison_urbs(desc);
 	}
 	if (!PMSG_IS_AUTO(message)) {
 		mutex_unlock(&desc->wlock);
@@ -1140,7 +1153,7 @@ static int wdm_pre_reset(struct usb_interface *intf)
 	wake_up_all(&desc->wait);
 	mutex_lock(&desc->rlock);
 	mutex_lock(&desc->wlock);
-	kill_urbs(desc);
+	poison_urbs(desc);
 	cancel_work_sync(&desc->rxwork);
 	cancel_work_sync(&desc->service_outs_intr);
 	return 0;
@@ -1151,6 +1164,7 @@ static int wdm_post_reset(struct usb_interface *intf)
 	struct wdm_device *desc = wdm_find_device(intf);
 	int rv;
 
+	unpoison_urbs(desc);
 	clear_bit(WDM_OVERFLOW, &desc->flags);
 	clear_bit(WDM_RESETTING, &desc->flags);
 	rv = recover_from_urb_loss(desc);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: circular submissions in cdc-wdm and how to break them on disconnect
  2021-01-21 15:30 circular submissions in cdc-wdm and how to break them on disconnect Oliver Neukum
@ 2021-01-23  2:57 ` Tetsuo Handa
  2021-02-18 12:55   ` Oliver Neukum
  0 siblings, 1 reply; 4+ messages in thread
From: Tetsuo Handa @ 2021-01-23  2:57 UTC (permalink / raw)
  To: Oliver Neukum; +Cc: linux-usb

On 2021/01/22 0:30, Oliver Neukum wrote:
> Hi,
> 
> you have moved kill_urbs() below
> 
>         cancel_work_sync(&desc->rxwork);
>         cancel_work_sync(&desc->service_outs_intr);
> 
> to close a race, as
> 
> rv = usb_submit_urb(desc->response, GFP_KERNEL);
> 
> in service_outstanding_interrupt() would submit the response URB,
> right?

Right. Shouldn't remaining

  kill_urbs(desc);
  cancel_work_sync(&desc->rxwork);
  cancel_work_sync(&desc->service_outs_intr);

sequence in wdm_suspend() and wdm_pre_reset() be updated as well?

>        Unfortunately we have in wdm_in_callback() the following code path
> 
>         if (desc->rerr) {
>                 /*
>                  * Since there was an error, userspace may decide to not read
>                  * any data after poll'ing.
>                  * We should respond to further attempts from the device to send
>                  * data, so that we can get unstuck.
>                  */
>                 schedule_work(&desc->service_outs_intr);
> 
> It looks to me like we have a circular dependency here and this needs some
> change to break. What do you think about the attached patch?

I don't know how poisoning works. But why can't we simply use test_bit() on
WDM_SUSPENDING/WDM_RESETTING/WDM_DISCONNECTING flags, for schedule_work() in
wdm_in_callback() is called with desc->iuspin (which serializes setting of
these flags) held.

By the way, since someone might interpret "broken" as "out of order / not working",
I expect not using "This needs to be broken." in the commit message. There would be
some better idiom.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: circular submissions in cdc-wdm and how to break them on disconnect
  2021-01-23  2:57 ` Tetsuo Handa
@ 2021-02-18 12:55   ` Oliver Neukum
  2021-02-18 13:39     ` Tetsuo Handa
  0 siblings, 1 reply; 4+ messages in thread
From: Oliver Neukum @ 2021-02-18 12:55 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: linux-usb

[-- Attachment #1: Type: text/plain, Size: 1728 bytes --]

Am Samstag, den 23.01.2021, 11:57 +0900 schrieb Tetsuo Handa:
> On 2021/01/22 0:30, Oliver Neukum wrote:

Hi,

> Right. Shouldn't remaining
> 
>   kill_urbs(desc);
>   cancel_work_sync(&desc->rxwork);
>   cancel_work_sync(&desc->service_outs_intr);
> 
> sequence in wdm_suspend() and wdm_pre_reset() be updated as well?

Yes, they should.

> >        Unfortunately we have in wdm_in_callback() the following code path
> > 
> >         if (desc->rerr) {
> >                 /*
> >                  * Since there was an error, userspace may decide to not read
> >                  * any data after poll'ing.
> >                  * We should respond to further attempts from the device to send
> >                  * data, so that we can get unstuck.
> >                  */
> >                 schedule_work(&desc->service_outs_intr);
> > 
> > It looks to me like we have a circular dependency here and this needs some
> > change to break. What do you think about the attached patch?
> 
> I don't know how poisoning works. But why can't we simply use test_bit() on

It makes subsequent usb_submit_urb() fail.

> WDM_SUSPENDING/WDM_RESETTING/WDM_DISCONNECTING flags, for schedule_work() in
> wdm_in_callback() is called with desc->iuspin (which serializes setting of
> these flags) held.

In theory this could be done, yet that would take three additional
tests as opposed to the test for poisoning that usbcore does anyway.

> By the way, since someone might interpret "broken" as "out of order / not working",
> I expect not using "This needs to be broken." in the commit message. There would be
> some better idiom.

Right. I changed the message.

Could you test whether the attached patch fixes your issue?

	Regards
		Oliver


[-- Attachment #2: 0001-cdc-wdm-untangle-a-circular-dependency-between-callb.patch --]
[-- Type: text/x-patch, Size: 3403 bytes --]

From 307097e80657ca44ac99da8efc8397070b1aff3f Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 18 Feb 2021 13:42:40 +0100
Subject: [PATCH 1/2] cdc-wdm: untangle a circular dependency between callback
 and softint

We have a cycle of callbacks scheduling works which submit
URBs with thos callbacks. This needs to be blocked, stopped
and unblocked to untangle the circle..

Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
 drivers/usb/class/cdc-wdm.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 508b1c3f8b73..d1e4a7379beb 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -321,12 +321,23 @@ static void wdm_int_callback(struct urb *urb)
 
 }
 
-static void kill_urbs(struct wdm_device *desc)
+static void poison_urbs(struct wdm_device *desc)
 {
 	/* the order here is essential */
-	usb_kill_urb(desc->command);
-	usb_kill_urb(desc->validity);
-	usb_kill_urb(desc->response);
+	usb_poison_urb(desc->command);
+	usb_poison_urb(desc->validity);
+	usb_poison_urb(desc->response);
+}
+
+static void unpoison_urbs(struct wdm_device *desc)
+{
+	/*
+	 *  the order here is not essential
+	 *  it is symmetrical just to be nice
+	 */
+	usb_unpoison_urb(desc->response);
+	usb_unpoison_urb(desc->validity);
+	usb_unpoison_urb(desc->command);
 }
 
 static void free_urbs(struct wdm_device *desc)
@@ -741,11 +752,12 @@ static int wdm_release(struct inode *inode, struct file *file)
 	if (!desc->count) {
 		if (!test_bit(WDM_DISCONNECTING, &desc->flags)) {
 			dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n");
-			kill_urbs(desc);
+			poison_urbs(desc);
 			spin_lock_irq(&desc->iuspin);
 			desc->resp_count = 0;
 			spin_unlock_irq(&desc->iuspin);
 			desc->manage_power(desc->intf, 0);
+			unpoison_urbs(desc);
 		} else {
 			/* must avoid dev_printk here as desc->intf is invalid */
 			pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__);
@@ -1037,9 +1049,9 @@ static void wdm_disconnect(struct usb_interface *intf)
 	wake_up_all(&desc->wait);
 	mutex_lock(&desc->rlock);
 	mutex_lock(&desc->wlock);
+	poison_urbs(desc);
 	cancel_work_sync(&desc->rxwork);
 	cancel_work_sync(&desc->service_outs_intr);
-	kill_urbs(desc);
 	mutex_unlock(&desc->wlock);
 	mutex_unlock(&desc->rlock);
 
@@ -1080,9 +1092,10 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message)
 		set_bit(WDM_SUSPENDING, &desc->flags);
 		spin_unlock_irq(&desc->iuspin);
 		/* callback submits work - order is essential */
-		kill_urbs(desc);
+		poison_urbs(desc);
 		cancel_work_sync(&desc->rxwork);
 		cancel_work_sync(&desc->service_outs_intr);
+		unpoison_urbs(desc);
 	}
 	if (!PMSG_IS_AUTO(message)) {
 		mutex_unlock(&desc->wlock);
@@ -1140,7 +1153,7 @@ static int wdm_pre_reset(struct usb_interface *intf)
 	wake_up_all(&desc->wait);
 	mutex_lock(&desc->rlock);
 	mutex_lock(&desc->wlock);
-	kill_urbs(desc);
+	poison_urbs(desc);
 	cancel_work_sync(&desc->rxwork);
 	cancel_work_sync(&desc->service_outs_intr);
 	return 0;
@@ -1151,6 +1164,7 @@ static int wdm_post_reset(struct usb_interface *intf)
 	struct wdm_device *desc = wdm_find_device(intf);
 	int rv;
 
+	unpoison_urbs(desc);
 	clear_bit(WDM_OVERFLOW, &desc->flags);
 	clear_bit(WDM_RESETTING, &desc->flags);
 	rv = recover_from_urb_loss(desc);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: circular submissions in cdc-wdm and how to break them on disconnect
  2021-02-18 12:55   ` Oliver Neukum
@ 2021-02-18 13:39     ` Tetsuo Handa
  0 siblings, 0 replies; 4+ messages in thread
From: Tetsuo Handa @ 2021-02-18 13:39 UTC (permalink / raw)
  To: Oliver Neukum; +Cc: linux-usb

On 2021/02/18 21:55, Oliver Neukum wrote:
> Could you test whether the attached patch fixes your issue?

"INFO: task hung in wdm_flush" was fixed on 2020/11/16 12:12, and
as far as I know syzbot is not reporting any problem with this driver.
I don't have issues regardless of your patch. I can't test your patch.


> URBs with thos callbacks. This needs to be blocked, stopped

s/thos/those/

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-02-18 16:31 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-21 15:30 circular submissions in cdc-wdm and how to break them on disconnect Oliver Neukum
2021-01-23  2:57 ` Tetsuo Handa
2021-02-18 12:55   ` Oliver Neukum
2021-02-18 13:39     ` Tetsuo Handa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).