All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue
@ 2023-11-17  7:21 Kuen-Han Tsai
  2023-11-17 13:32 ` Mathias Nyman
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Kuen-Han Tsai @ 2023-11-17  7:21 UTC (permalink / raw)
  To: mathias.nyman, gregkh; +Cc: linux-usb, linux-kernel, Kuen-Han Tsai

The null pointer dereference happens when xhci_free_dev() frees the
xhci->devs[slot_id] virtual device while xhci_urb_enqueue() is
processing a urb and checking the max packet size.

[106913.850735][ T2068] usb 2-1: USB disconnect, device number 2
[106913.856999][ T4618] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
[106913.857488][ T4618] Call trace:
[106913.857491][ T4618]  xhci_check_maxpacket+0x30/0x2dc
[106913.857494][ T4618]  xhci_urb_enqueue+0x24c/0x47c
[106913.857498][ T4618]  usb_hcd_submit_urb+0x1f4/0xf34
[106913.857501][ T4618]  usb_submit_urb+0x4b8/0x4fc
[106913.857503][ T4618]  usb_control_msg+0x144/0x238
[106913.857507][ T4618]  do_proc_control+0x1f0/0x5bc
[106913.857509][ T4618]  usbdev_ioctl+0xdd8/0x15a8

This patch adds a spinlock to the xhci_urb_enqueue function to make sure
xhci_free_dev() and xhci_urb_enqueue() do not race and cause null
pointer dereference.

Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
---
 drivers/usb/host/xhci.c | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 884b0898d9c9..e0766ebeff0e 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1522,23 +1522,32 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 	struct urb_priv	*urb_priv;
 	int num_tds;
 
-	if (!urb)
-		return -EINVAL;
-	ret = xhci_check_args(hcd, urb->dev, urb->ep,
-					true, true, __func__);
-	if (ret <= 0)
-		return ret ? ret : -EINVAL;
+	spin_lock_irqsave(&xhci->lock, flags);
+
+	if (!urb) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	ret = xhci_check_args(hcd, urb->dev, urb->ep, true, true, __func__);
+	if (ret <= 0) {
+		ret = ret ? ret : -EINVAL;
+		goto done;
+	}
 
 	slot_id = urb->dev->slot_id;
 	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
 	ep_state = &xhci->devs[slot_id]->eps[ep_index].ep_state;
 
-	if (!HCD_HW_ACCESSIBLE(hcd))
-		return -ESHUTDOWN;
+	if (!HCD_HW_ACCESSIBLE(hcd)) {
+		ret = -ESHUTDOWN;
+		goto done;
+	}
 
 	if (xhci->devs[slot_id]->flags & VDEV_PORT_ERROR) {
 		xhci_dbg(xhci, "Can't queue urb, port error, link inactive\n");
-		return -ENODEV;
+		ret = -ENODEV;
+		goto done;
 	}
 
 	if (usb_endpoint_xfer_isoc(&urb->ep->desc))
@@ -1552,8 +1561,10 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 		num_tds = 1;
 
 	urb_priv = kzalloc(struct_size(urb_priv, td, num_tds), mem_flags);
-	if (!urb_priv)
-		return -ENOMEM;
+	if (!urb_priv) {
+		ret = -ENOMEM;
+		goto done;
+	}
 
 	urb_priv->num_tds = num_tds;
 	urb_priv->num_tds_done = 0;
@@ -1571,13 +1582,11 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 			if (ret < 0) {
 				xhci_urb_free_priv(urb_priv);
 				urb->hcpriv = NULL;
-				return ret;
+				goto done;
 			}
 		}
 	}
 
-	spin_lock_irqsave(&xhci->lock, flags);
-
 	if (xhci->xhc_state & XHCI_STATE_DYING) {
 		xhci_dbg(xhci, "Ep 0x%x: URB %p submitted for non-responsive xHCI host.\n",
 			 urb->ep->desc.bEndpointAddress, urb);
@@ -1620,6 +1629,7 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 		xhci_urb_free_priv(urb_priv);
 		urb->hcpriv = NULL;
 	}
+done:
 	spin_unlock_irqrestore(&xhci->lock, flags);
 	return ret;
 }
-- 
2.43.0.rc0.421.g78406f8d94-goog


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue
  2023-11-17  7:21 [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue Kuen-Han Tsai
@ 2023-11-17 13:32 ` Mathias Nyman
  2023-11-18 10:19   ` Kuen-Han Tsai
  2023-11-17 13:53 ` Greg KH
  2023-11-23  1:54 ` kernel test robot
  2 siblings, 1 reply; 13+ messages in thread
From: Mathias Nyman @ 2023-11-17 13:32 UTC (permalink / raw)
  To: Kuen-Han Tsai, mathias.nyman, gregkh; +Cc: linux-usb, linux-kernel

On 17.11.2023 9.21, Kuen-Han Tsai wrote:
> The null pointer dereference happens when xhci_free_dev() frees the
> xhci->devs[slot_id] virtual device while xhci_urb_enqueue() is
> processing a urb and checking the max packet size.
> 
> [106913.850735][ T2068] usb 2-1: USB disconnect, device number 2
> [106913.856999][ T4618] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
> [106913.857488][ T4618] Call trace:
> [106913.857491][ T4618]  xhci_check_maxpacket+0x30/0x2dc
> [106913.857494][ T4618]  xhci_urb_enqueue+0x24c/0x47c
> [106913.857498][ T4618]  usb_hcd_submit_urb+0x1f4/0xf34
> [106913.857501][ T4618]  usb_submit_urb+0x4b8/0x4fc
> [106913.857503][ T4618]  usb_control_msg+0x144/0x238
> [106913.857507][ T4618]  do_proc_control+0x1f0/0x5bc
> [106913.857509][ T4618]  usbdev_ioctl+0xdd8/0x15a8
> 
> This patch adds a spinlock to the xhci_urb_enqueue function to make sure
> xhci_free_dev() and xhci_urb_enqueue() do not race and cause null
> pointer dereference.

Thanks, nice catch

This patch does however need some additional tuning

> 
> Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
> ---
>   drivers/usb/host/xhci.c | 38 ++++++++++++++++++++++++--------------
>   1 file changed, 24 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
> index 884b0898d9c9..e0766ebeff0e 100644
> --- a/drivers/usb/host/xhci.c
> +++ b/drivers/usb/host/xhci.c
> @@ -1522,23 +1522,32 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
>   	struct urb_priv	*urb_priv;
>   	int num_tds;
>   
> -	if (!urb)
> -		return -EINVAL;
> -	ret = xhci_check_args(hcd, urb->dev, urb->ep,
> -					true, true, __func__);
> -	if (ret <= 0)
> -		return ret ? ret : -EINVAL;
> +	spin_lock_irqsave(&xhci->lock, flags);
> +
> +	if (!urb) {
> +		ret = -EINVAL;
> +		goto done;
> +	}
> +
> +	ret = xhci_check_args(hcd, urb->dev, urb->ep, true, true, __func__);
> +	if (ret <= 0) {
> +		ret = ret ? ret : -EINVAL;
> +		goto done;
> +	}
>   
>   	slot_id = urb->dev->slot_id;
>   	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
>   	ep_state = &xhci->devs[slot_id]->eps[ep_index].ep_state;
>   
> -	if (!HCD_HW_ACCESSIBLE(hcd))
> -		return -ESHUTDOWN;
> +	if (!HCD_HW_ACCESSIBLE(hcd)) {
> +		ret = -ESHUTDOWN;
> +		goto done;
> +	}
>   
>   	if (xhci->devs[slot_id]->flags & VDEV_PORT_ERROR) {
>   		xhci_dbg(xhci, "Can't queue urb, port error, link inactive\n");
> -		return -ENODEV;
> +		ret = -ENODEV;
> +		goto done;
>   	}
>   
>   	if (usb_endpoint_xfer_isoc(&urb->ep->desc))
> @@ -1552,8 +1561,10 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
>   		num_tds = 1;
>   
>   	urb_priv = kzalloc(struct_size(urb_priv, td, num_tds), mem_flags);

kzalloc with spinlock held, should preferably be moved outside lock, otherwise should use GFP_ATOMIC

> -	if (!urb_priv)
> -		return -ENOMEM;
> +	if (!urb_priv) {
> +		ret = -ENOMEM;
> +		goto done;
> +	}
>   
>   	urb_priv->num_tds = num_tds;
>   	urb_priv->num_tds_done = 0;
> @@ -1571,13 +1582,11 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag

xhci_check_maxpacket() called here can't be called with spinlock held

>   			if (ret < 0) {
>   				xhci_urb_free_priv(urb_priv);
>   				urb->hcpriv = NULL;
> -				return ret;
> +				goto done;

Thanks
Mathias

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue
  2023-11-17  7:21 [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue Kuen-Han Tsai
  2023-11-17 13:32 ` Mathias Nyman
@ 2023-11-17 13:53 ` Greg KH
  2023-11-18 11:19   ` Kuen-Han Tsai
  2023-11-23  1:54 ` kernel test robot
  2 siblings, 1 reply; 13+ messages in thread
From: Greg KH @ 2023-11-17 13:53 UTC (permalink / raw)
  To: Kuen-Han Tsai; +Cc: mathias.nyman, linux-usb, linux-kernel

On Fri, Nov 17, 2023 at 03:21:28PM +0800, Kuen-Han Tsai wrote:
> The null pointer dereference happens when xhci_free_dev() frees the
> xhci->devs[slot_id] virtual device while xhci_urb_enqueue() is
> processing a urb and checking the max packet size.
> 
> [106913.850735][ T2068] usb 2-1: USB disconnect, device number 2
> [106913.856999][ T4618] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
> [106913.857488][ T4618] Call trace:
> [106913.857491][ T4618]  xhci_check_maxpacket+0x30/0x2dc
> [106913.857494][ T4618]  xhci_urb_enqueue+0x24c/0x47c
> [106913.857498][ T4618]  usb_hcd_submit_urb+0x1f4/0xf34
> [106913.857501][ T4618]  usb_submit_urb+0x4b8/0x4fc
> [106913.857503][ T4618]  usb_control_msg+0x144/0x238
> [106913.857507][ T4618]  do_proc_control+0x1f0/0x5bc
> [106913.857509][ T4618]  usbdev_ioctl+0xdd8/0x15a8
> 
> This patch adds a spinlock to the xhci_urb_enqueue function to make sure
> xhci_free_dev() and xhci_urb_enqueue() do not race and cause null
> pointer dereference.

I thought we had a lock for this already, what changed to cause this to
start triggering now, all these years later?

> 
> Signed-off-by: Kuen-Han Tsai <khtsai@google.com>

What commit id does this fix?


> ---
>  drivers/usb/host/xhci.c | 38 ++++++++++++++++++++++++--------------
>  1 file changed, 24 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
> index 884b0898d9c9..e0766ebeff0e 100644
> --- a/drivers/usb/host/xhci.c
> +++ b/drivers/usb/host/xhci.c
> @@ -1522,23 +1522,32 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
>  	struct urb_priv	*urb_priv;
>  	int num_tds;
>  
> -	if (!urb)
> -		return -EINVAL;
> -	ret = xhci_check_args(hcd, urb->dev, urb->ep,
> -					true, true, __func__);
> -	if (ret <= 0)
> -		return ret ? ret : -EINVAL;
> +	spin_lock_irqsave(&xhci->lock, flags);
> +
> +	if (!urb) {
> +		ret = -EINVAL;
> +		goto done;
> +	}

Why does this have to be inside the lock?  The urb can't change here,
can it?

> +
> +	ret = xhci_check_args(hcd, urb->dev, urb->ep, true, true, __func__);
> +	if (ret <= 0) {
> +		ret = ret ? ret : -EINVAL;
> +		goto done;
> +	}
>  
>  	slot_id = urb->dev->slot_id;
>  	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
>  	ep_state = &xhci->devs[slot_id]->eps[ep_index].ep_state;
>  
> -	if (!HCD_HW_ACCESSIBLE(hcd))
> -		return -ESHUTDOWN;
> +	if (!HCD_HW_ACCESSIBLE(hcd)) {
> +		ret = -ESHUTDOWN;
> +		goto done;

Note, we now have completions, so all of this "goto done" doesn't need
to happen anymore.  Not a complaint, just a suggestion for future
changes or this one, your choice.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue
  2023-11-17 13:32 ` Mathias Nyman
@ 2023-11-18 10:19   ` Kuen-Han Tsai
  2023-11-20 15:33     ` Mathias Nyman
  0 siblings, 1 reply; 13+ messages in thread
From: Kuen-Han Tsai @ 2023-11-18 10:19 UTC (permalink / raw)
  To: Mathias Nyman; +Cc: mathias.nyman, gregkh, linux-usb, linux-kernel

Hi Mathias

>>       if (usb_endpoint_xfer_isoc(&urb->ep->desc))
>> @@ -1552,8 +1561,10 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
>>               num_tds = 1;
>>
>>       urb_priv = kzalloc(struct_size(urb_priv, td, num_tds), mem_flags);
> kzalloc with spinlock held, should preferably be moved outside lock, otherwise should use GFP_ATOMIC

Thanks for pointing this out. I realize this patch is incorrect and it
is non-ideal to include many codes unrelated to xhci->devs[slot_id]
within the lock.

> xhci_check_maxpacket() called here can't be called with spinlock held

It appears that xhci_check_maxpacket() might potentially lead to a
deadlock later if a spinlock is held. Is this the concern you were
referring to? If not, please let me know if there are any other
potential issues that I may have missed, thanks!


On Fri, Nov 17, 2023 at 9:31 PM Mathias Nyman
<mathias.nyman@linux.intel.com> wrote:
>
> On 17.11.2023 9.21, Kuen-Han Tsai wrote:
> > The null pointer dereference happens when xhci_free_dev() frees the
> > xhci->devs[slot_id] virtual device while xhci_urb_enqueue() is
> > processing a urb and checking the max packet size.
> >
> > [106913.850735][ T2068] usb 2-1: USB disconnect, device number 2
> > [106913.856999][ T4618] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
> > [106913.857488][ T4618] Call trace:
> > [106913.857491][ T4618]  xhci_check_maxpacket+0x30/0x2dc
> > [106913.857494][ T4618]  xhci_urb_enqueue+0x24c/0x47c
> > [106913.857498][ T4618]  usb_hcd_submit_urb+0x1f4/0xf34
> > [106913.857501][ T4618]  usb_submit_urb+0x4b8/0x4fc
> > [106913.857503][ T4618]  usb_control_msg+0x144/0x238
> > [106913.857507][ T4618]  do_proc_control+0x1f0/0x5bc
> > [106913.857509][ T4618]  usbdev_ioctl+0xdd8/0x15a8
> >
> > This patch adds a spinlock to the xhci_urb_enqueue function to make sure
> > xhci_free_dev() and xhci_urb_enqueue() do not race and cause null
> > pointer dereference.
>
> Thanks, nice catch
>
> This patch does however need some additional tuning
>
> >
> > Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
> > ---
> >   drivers/usb/host/xhci.c | 38 ++++++++++++++++++++++++--------------
> >   1 file changed, 24 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
> > index 884b0898d9c9..e0766ebeff0e 100644
> > --- a/drivers/usb/host/xhci.c
> > +++ b/drivers/usb/host/xhci.c
> > @@ -1522,23 +1522,32 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
> >       struct urb_priv *urb_priv;
> >       int num_tds;
> >
> > -     if (!urb)
> > -             return -EINVAL;
> > -     ret = xhci_check_args(hcd, urb->dev, urb->ep,
> > -                                     true, true, __func__);
> > -     if (ret <= 0)
> > -             return ret ? ret : -EINVAL;
> > +     spin_lock_irqsave(&xhci->lock, flags);
> > +
> > +     if (!urb) {
> > +             ret = -EINVAL;
> > +             goto done;
> > +     }
> > +
> > +     ret = xhci_check_args(hcd, urb->dev, urb->ep, true, true, __func__);
> > +     if (ret <= 0) {
> > +             ret = ret ? ret : -EINVAL;
> > +             goto done;
> > +     }
> >
> >       slot_id = urb->dev->slot_id;
> >       ep_index = xhci_get_endpoint_index(&urb->ep->desc);
> >       ep_state = &xhci->devs[slot_id]->eps[ep_index].ep_state;
> >
> > -     if (!HCD_HW_ACCESSIBLE(hcd))
> > -             return -ESHUTDOWN;
> > +     if (!HCD_HW_ACCESSIBLE(hcd)) {
> > +             ret = -ESHUTDOWN;
> > +             goto done;
> > +     }
> >
> >       if (xhci->devs[slot_id]->flags & VDEV_PORT_ERROR) {
> >               xhci_dbg(xhci, "Can't queue urb, port error, link inactive\n");
> > -             return -ENODEV;
> > +             ret = -ENODEV;
> > +             goto done;
> >       }
> >
> >       if (usb_endpoint_xfer_isoc(&urb->ep->desc))
> > @@ -1552,8 +1561,10 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
> >               num_tds = 1;
> >
> >       urb_priv = kzalloc(struct_size(urb_priv, td, num_tds), mem_flags);
>
> kzalloc with spinlock held, should preferably be moved outside lock, otherwise should use GFP_ATOMIC
>
> > -     if (!urb_priv)
> > -             return -ENOMEM;
> > +     if (!urb_priv) {
> > +             ret = -ENOMEM;
> > +             goto done;
> > +     }
> >
> >       urb_priv->num_tds = num_tds;
> >       urb_priv->num_tds_done = 0;
> > @@ -1571,13 +1582,11 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
>
> xhci_check_maxpacket() called here can't be called with spinlock held
>
> >                       if (ret < 0) {
> >                               xhci_urb_free_priv(urb_priv);
> >                               urb->hcpriv = NULL;
> > -                             return ret;
> > +                             goto done;
>
> Thanks
> Mathias

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue
  2023-11-17 13:53 ` Greg KH
@ 2023-11-18 11:19   ` Kuen-Han Tsai
  0 siblings, 0 replies; 13+ messages in thread
From: Kuen-Han Tsai @ 2023-11-18 11:19 UTC (permalink / raw)
  To: Greg KH; +Cc: mathias.nyman, linux-usb, linux-kernel

Hi Greg

On Fri, Nov 17, 2023 at 9:53 PM Greg KH <gregkh@linuxfoundation.org> wrote:
>
> On Fri, Nov 17, 2023 at 03:21:28PM +0800, Kuen-Han Tsai wrote:
> > The null pointer dereference happens when xhci_free_dev() frees the
> > xhci->devs[slot_id] virtual device while xhci_urb_enqueue() is
> > processing a urb and checking the max packet size.
> >
> > [106913.850735][ T2068] usb 2-1: USB disconnect, device number 2
> > [106913.856999][ T4618] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
> > [106913.857488][ T4618] Call trace:
> > [106913.857491][ T4618]  xhci_check_maxpacket+0x30/0x2dc
> > [106913.857494][ T4618]  xhci_urb_enqueue+0x24c/0x47c
> > [106913.857498][ T4618]  usb_hcd_submit_urb+0x1f4/0xf34
> > [106913.857501][ T4618]  usb_submit_urb+0x4b8/0x4fc
> > [106913.857503][ T4618]  usb_control_msg+0x144/0x238
> > [106913.857507][ T4618]  do_proc_control+0x1f0/0x5bc
> > [106913.857509][ T4618]  usbdev_ioctl+0xdd8/0x15a8
> >
> > This patch adds a spinlock to the xhci_urb_enqueue function to make sure
> > xhci_free_dev() and xhci_urb_enqueue() do not race and cause null
> > pointer dereference.
>
> I thought we had a lock for this already, what changed to cause this to
> start triggering now, all these years later?

Right, there is a lock in place for xhci_urb_enqueue(), but it doesn't
protect all code segments that use xhci->devs[slot_id] within the
function. I couldn't identify any specific changes that might have
introduced this issue. It's likely a long-standing potential problem
that's difficult to trigger under normal situations.

This issue happens when the USB enumeration process is complete, and a
user space program submits a control request to the peripheral, but
then the device is rapidly disconnected. I was able to reproduce this
issue by introducing a 3-second delay within xhci_check_maxpacket()
and disconnecting the peripheral while observing that the control
request is being processed by xhci_check_maxpacket().

>
> >
> > Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
>
> What commit id does this fix?

Should I include a "Fixes:" header even if this patch doesn't address
a bug from a specific commit?

>
>
> > ---
> >  drivers/usb/host/xhci.c | 38 ++++++++++++++++++++++++--------------
> >  1 file changed, 24 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
> > index 884b0898d9c9..e0766ebeff0e 100644
> > --- a/drivers/usb/host/xhci.c
> > +++ b/drivers/usb/host/xhci.c
> > @@ -1522,23 +1522,32 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
> >       struct urb_priv *urb_priv;
> >       int num_tds;
> >
> > -     if (!urb)
> > -             return -EINVAL;
> > -     ret = xhci_check_args(hcd, urb->dev, urb->ep,
> > -                                     true, true, __func__);
> > -     if (ret <= 0)
> > -             return ret ? ret : -EINVAL;
> > +     spin_lock_irqsave(&xhci->lock, flags);
> > +
> > +     if (!urb) {
> > +             ret = -EINVAL;
> > +             goto done;
> > +     }
>
> Why does this have to be inside the lock?  The urb can't change here,
> can it?

You're right, no need to place those inside the lock. I will move them
out of the protection.

>
> > +
> > +     ret = xhci_check_args(hcd, urb->dev, urb->ep, true, true, __func__);
> > +     if (ret <= 0) {
> > +             ret = ret ? ret : -EINVAL;
> > +             goto done;
> > +     }
> >
> >       slot_id = urb->dev->slot_id;
> >       ep_index = xhci_get_endpoint_index(&urb->ep->desc);
> >       ep_state = &xhci->devs[slot_id]->eps[ep_index].ep_state;
> >
> > -     if (!HCD_HW_ACCESSIBLE(hcd))
> > -             return -ESHUTDOWN;
> > +     if (!HCD_HW_ACCESSIBLE(hcd)) {
> > +             ret = -ESHUTDOWN;
> > +             goto done;
>
> Note, we now have completions, so all of this "goto done" doesn't need
> to happen anymore.  Not a complaint, just a suggestion for future
> changes or this one, your choice.
>

I'm not familiar with the concept of 'completions'. Can you please
provide some links or explanations to help me understand it? I use a
'goto done' statement because I follow this pattern seen in many
previous commits. However, I'm willing to modify this approach if
there's a more suitable alternative.

Please forgive me if any of my questions seem overly basic. I'm still
in the process of learning how to contribute to the kernel community.

Thanks,
Kuen-Han

> thanks,
>
> greg k-h

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue
  2023-11-18 10:19   ` Kuen-Han Tsai
@ 2023-11-20 15:33     ` Mathias Nyman
  2023-11-28 13:57       ` Mathias Nyman
  0 siblings, 1 reply; 13+ messages in thread
From: Mathias Nyman @ 2023-11-20 15:33 UTC (permalink / raw)
  To: Kuen-Han Tsai; +Cc: mathias.nyman, gregkh, linux-usb, linux-kernel, alan Stern

On 18.11.2023 12.19, Kuen-Han Tsai wrote:
> Hi Mathias
> 
>>>        if (usb_endpoint_xfer_isoc(&urb->ep->desc))
>>> @@ -1552,8 +1561,10 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
>>>                num_tds = 1;
>>>
>>>        urb_priv = kzalloc(struct_size(urb_priv, td, num_tds), mem_flags);
>> kzalloc with spinlock held, should preferably be moved outside lock, otherwise should use GFP_ATOMIC
> 
> Thanks for pointing this out. I realize this patch is incorrect and it
> is non-ideal to include many codes unrelated to xhci->devs[slot_id]
> within the lock.
> 
>> xhci_check_maxpacket() called here can't be called with spinlock held
> 
> It appears that xhci_check_maxpacket() might potentially lead to a
> deadlock later if a spinlock is held. Is this the concern you were
> referring to? If not, please let me know if there are any other
> potential issues that I may have missed, thanks!

xhci_check_maxpacket() will allocate memory, wait for completion, and use the same lock,
so there are several issues here.

I actually think we shouldn't call xhci_check_maxpacket() at all while queuing urbs.

usb core knows when there was max packet size mismatch during enumeration.
I think we should add a hook to the hcd that usb core can call in these cases

Thanks
Mathias


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue
  2023-11-17  7:21 [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue Kuen-Han Tsai
  2023-11-17 13:32 ` Mathias Nyman
  2023-11-17 13:53 ` Greg KH
@ 2023-11-23  1:54 ` kernel test robot
  2 siblings, 0 replies; 13+ messages in thread
From: kernel test robot @ 2023-11-23  1:54 UTC (permalink / raw)
  To: Kuen-Han Tsai
  Cc: oe-lkp, lkp, linux-usb, mathias.nyman, gregkh, linux-kernel,
	Kuen-Han Tsai, oliver.sang



Hello,

kernel test robot noticed "WARNING:HARDIRQ-safe->HARDIRQ-unsafe_lock_order_detected" on:

commit: 90703e106b4214512828bff96df3df2ecff5c7b7 ("[PATCH] xhci: fix null pointer deref for xhci_urb_enqueue")
url: https://github.com/intel-lab-lkp/linux/commits/Kuen-Han-Tsai/xhci-fix-null-pointer-deref-for-xhci_urb_enqueue/20231117-152346
base: https://git.kernel.org/cgit/linux/kernel/git/gregkh/usb.git usb-testing
patch link: https://lore.kernel.org/all/20231117072131.2886406-1-khtsai@google.com/
patch subject: [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue

in testcase: kernel-selftests
version: kernel-selftests-x86_64-60acb023-1_20230329
with following parameters:

	group: net
	test: fcnal-test.sh
	atomic_test: ipv4_ping



compiler: gcc-12
test machine: 36 threads 1 sockets Intel(R) Core(TM) i9-10980XE CPU @ 3.00GHz (Cascade Lake) with 32G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202311222304.1a72c7d4-oliver.sang@intel.com


[   18.016498][    T9] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected
[   18.016498][    T9] 6.7.0-rc1-00001-g90703e106b42 #1 Not tainted
[   18.016498][    T9] -----------------------------------------------------
[   18.016498][    T9] kworker/0:1/9 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
[   18.019119][    T1] iTCO_vendor_support: vendor-support=0
[   18.016498][    T9] ffffffff84f96760 (
[   18.019656][    T1] intel_pstate: HWP enabled by BIOS
[   18.016498][    T9] mmu_notifier_invalidate_range_start
[   18.020037][    T1] intel_pstate: Intel P-state driver initializing
[ 18.016498][ T9] ){+.+.}-{0:0}, at: fs_reclaim_acquire (mm/page_alloc.c:3710 mm/page_alloc.c:3701) 
[   18.016498][    T9]
[   18.016498][    T9] and this task is already holding:
[ 18.016498][ T9] ffff8881e0b12428 (&xhci->lock){-.-.}-{2:2}, at: xhci_urb_enqueue (drivers/usb/host/xhci.c:1525) 
[   18.016498][    T9] which would create a new lock dependency:
[   18.016498][    T9]  (&xhci->lock){-.-.}-{2:2} -> (mmu_notifier_invalidate_range_start){+.+.}-{0:0}
[   18.016498][    T9]
[   18.016498][    T9] but this new dependency connects a HARDIRQ-irq-safe lock:
[   18.016498][    T9]  (&xhci->lock){-.-.}-{2:2}
[   18.016498][    T9]
[   18.016498][    T9] ... which became HARDIRQ-irq-safe at:
[ 18.016498][ T9] __lock_acquire (kernel/locking/lockdep.c:5090) 
[ 18.016498][ T9] lock_acquire (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5755 kernel/locking/lockdep.c:5718) 
[ 18.016498][ T9] _raw_spin_lock (include/linux/spinlock_api_smp.h:134 kernel/locking/spinlock.c:154) 
[ 18.016498][ T9] xhci_irq (drivers/usb/host/xhci-ring.c:3032) 
[ 18.016498][ T9] __handle_irq_event_percpu (kernel/irq/handle.c:158) 



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20231122/202311222304.1a72c7d4-oliver.sang@intel.com



-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue
  2023-11-20 15:33     ` Mathias Nyman
@ 2023-11-28 13:57       ` Mathias Nyman
  2023-11-28 14:01         ` [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset Mathias Nyman
  2023-11-28 15:01         ` [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue Kuen-Han Tsai
  0 siblings, 2 replies; 13+ messages in thread
From: Mathias Nyman @ 2023-11-28 13:57 UTC (permalink / raw)
  To: Kuen-Han Tsai; +Cc: mathias.nyman, gregkh, linux-usb, linux-kernel, alan Stern

On 20.11.2023 17.33, Mathias Nyman wrote:
> On 18.11.2023 12.19, Kuen-Han Tsai wrote:
>> Hi Mathias
>>
>>>>        if (usb_endpoint_xfer_isoc(&urb->ep->desc))
>>>> @@ -1552,8 +1561,10 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
>>>>                num_tds = 1;
>>>>
>>>>        urb_priv = kzalloc(struct_size(urb_priv, td, num_tds), mem_flags);
>>> kzalloc with spinlock held, should preferably be moved outside lock, otherwise should use GFP_ATOMIC
>>
>> Thanks for pointing this out. I realize this patch is incorrect and it
>> is non-ideal to include many codes unrelated to xhci->devs[slot_id]
>> within the lock.
>>
>>> xhci_check_maxpacket() called here can't be called with spinlock held
>>
>> It appears that xhci_check_maxpacket() might potentially lead to a
>> deadlock later if a spinlock is held. Is this the concern you were
>> referring to? If not, please let me know if there are any other
>> potential issues that I may have missed, thanks!
> 
> xhci_check_maxpacket() will allocate memory, wait for completion, and use the same lock,
> so there are several issues here.
> 
> I actually think we shouldn't call xhci_check_maxpacket() at all while queuing urbs.
> 
> usb core knows when there was max packet size mismatch during enumeration.
> I think we should add a hook to the hcd that usb core can call in these cases

I moved the max packet checks away from xhci_urb_enqueue() and fixed up the locking.

I can't trigger the original issue, but I tested it by setting incorrect initial max packet
size values.

If you have the chance to test this with your setup I'd appreciate it.

patches found here:
git://git.kernel.org/pub/scm/linux/kernel/git/mnyman/xhci.git fix_urb_enqueue_locking
https://git.kernel.org/pub/scm/linux/kernel/git/mnyman/xhci.git/log/?h=fix_urb_enqueue_locking

I'll add them to this thread as well

thanks
Mathias


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset
  2023-11-28 13:57       ` Mathias Nyman
@ 2023-11-28 14:01         ` Mathias Nyman
  2023-11-28 14:01           ` [RFT PATCH 2/2] xhci: fix possible null pointer deref during xhci urb enqueue Mathias Nyman
  2023-11-28 15:32           ` [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset Kuen-Han Tsai
  2023-11-28 15:01         ` [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue Kuen-Han Tsai
  1 sibling, 2 replies; 13+ messages in thread
From: Mathias Nyman @ 2023-11-28 14:01 UTC (permalink / raw)
  To: khtsai; +Cc: gregkh, linux-usb, linux-kernel, stern, Mathias Nyman

The max packet size for full speed control endpoint 0 may vary. It is
defined in the device descriptor, which is read using the same endpoint.
Usb core sets a temporary max packet size value until the real value is
read.

xhci driver needs to reconfigure the endpoint context seen by the
controller if the max packet size changes.
It makes more sense to do this reconfiguration in xhci_endpoint_reset()
instead of urb enqueue as usb core will call endpoint reset during
enumeration if the max packet values differ.
Max packet size adjustment for endpoinr 0 can only happen once per
device enumeration.

Previously the max packet size was checked during every urb enqueue.
This is an additional check for every enqueued urb, and also turned out
to have locking issues as urbs may be queued in any context while xhci
max packet size reconfiguration requires memory allocation, locking, and
sleeping.

Tested with a full speed device using both old and new scheme enumeration
and a intentionally incorrect preliminary max packet size value.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
---
 drivers/usb/host/xhci.c | 85 ++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 43 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 884b0898d9c9..df31d44498d6 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1438,10 +1438,8 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
  * descriptor.  If the usb_device's max packet size changes after that point,
  * we need to issue an evaluate context command and wait on it.
  */
-static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
-		unsigned int ep_index, struct urb *urb, gfp_t mem_flags)
+static int xhci_check_ep0_maxpacket(struct xhci_hcd *xhci, struct xhci_virt_device *vdev)
 {
-	struct xhci_container_ctx *out_ctx;
 	struct xhci_input_control_ctx *ctrl_ctx;
 	struct xhci_ep_ctx *ep_ctx;
 	struct xhci_command *command;
@@ -1449,11 +1447,15 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
 	int hw_max_packet_size;
 	int ret = 0;
 
-	out_ctx = xhci->devs[slot_id]->out_ctx;
-	ep_ctx = xhci_get_ep_ctx(xhci, out_ctx, ep_index);
+	ep_ctx = xhci_get_ep_ctx(xhci, vdev->out_ctx, 0);
 	hw_max_packet_size = MAX_PACKET_DECODED(le32_to_cpu(ep_ctx->ep_info2));
-	max_packet_size = usb_endpoint_maxp(&urb->dev->ep0.desc);
-	if (hw_max_packet_size != max_packet_size) {
+	max_packet_size = usb_endpoint_maxp(&vdev->udev->ep0.desc);
+
+	if (hw_max_packet_size == max_packet_size)
+		return 0;
+
+	switch (max_packet_size) {
+	case 8: case 16: case 32: case 64: case 9:
 		xhci_dbg_trace(xhci,  trace_xhci_dbg_context_change,
 				"Max Packet Size for ep 0 changed.");
 		xhci_dbg_trace(xhci,  trace_xhci_dbg_context_change,
@@ -1465,28 +1467,22 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
 		xhci_dbg_trace(xhci,  trace_xhci_dbg_context_change,
 				"Issuing evaluate context command.");
 
-		/* Set up the input context flags for the command */
-		/* FIXME: This won't work if a non-default control endpoint
-		 * changes max packet sizes.
-		 */
-
-		command = xhci_alloc_command(xhci, true, mem_flags);
+		command = xhci_alloc_command(xhci, true, GFP_KERNEL);
 		if (!command)
 			return -ENOMEM;
 
-		command->in_ctx = xhci->devs[slot_id]->in_ctx;
+		command->in_ctx = vdev->in_ctx;
 		ctrl_ctx = xhci_get_input_control_ctx(command->in_ctx);
 		if (!ctrl_ctx) {
 			xhci_warn(xhci, "%s: Could not get input context, bad type.\n",
 					__func__);
 			ret = -ENOMEM;
-			goto command_cleanup;
+			break;
 		}
 		/* Set up the modified control endpoint 0 */
-		xhci_endpoint_copy(xhci, xhci->devs[slot_id]->in_ctx,
-				xhci->devs[slot_id]->out_ctx, ep_index);
+		xhci_endpoint_copy(xhci, vdev->in_ctx, vdev->out_ctx, 0);
 
-		ep_ctx = xhci_get_ep_ctx(xhci, command->in_ctx, ep_index);
+		ep_ctx = xhci_get_ep_ctx(xhci, command->in_ctx, 0);
 		ep_ctx->ep_info &= cpu_to_le32(~EP_STATE_MASK);/* must clear */
 		ep_ctx->ep_info2 &= cpu_to_le32(~MAX_PACKET_MASK);
 		ep_ctx->ep_info2 |= cpu_to_le32(MAX_PACKET(max_packet_size));
@@ -1494,17 +1490,20 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
 		ctrl_ctx->add_flags = cpu_to_le32(EP0_FLAG);
 		ctrl_ctx->drop_flags = 0;
 
-		ret = xhci_configure_endpoint(xhci, urb->dev, command,
-				true, false);
-
-		/* Clean up the input context for later use by bandwidth
-		 * functions.
-		 */
+		ret = xhci_configure_endpoint(xhci, vdev->udev, command,
+					      true, false);
+		/* Clean up the input context for later use by bandwidth functions */
 		ctrl_ctx->add_flags = cpu_to_le32(SLOT_FLAG);
-command_cleanup:
-		kfree(command->completion);
-		kfree(command);
+		break;
+	default:
+		dev_dbg(&vdev->udev->dev, "incorrect max packet size %d for ep0\n",
+			max_packet_size);
+		return -EINVAL;
 	}
+
+	kfree(command->completion);
+	kfree(command);
+
 	return ret;
 }
 
@@ -1561,21 +1560,6 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 
 	trace_xhci_urb_enqueue(urb);
 
-	if (usb_endpoint_xfer_control(&urb->ep->desc)) {
-		/* Check to see if the max packet size for the default control
-		 * endpoint changed during FS device enumeration
-		 */
-		if (urb->dev->speed == USB_SPEED_FULL) {
-			ret = xhci_check_maxpacket(xhci, slot_id,
-					ep_index, urb, mem_flags);
-			if (ret < 0) {
-				xhci_urb_free_priv(urb_priv);
-				urb->hcpriv = NULL;
-				return ret;
-			}
-		}
-	}
-
 	spin_lock_irqsave(&xhci->lock, flags);
 
 	if (xhci->xhc_state & XHCI_STATE_DYING) {
@@ -3104,6 +3088,21 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
 	int err;
 
 	xhci = hcd_to_xhci(hcd);
+	ep_index = xhci_get_endpoint_index(&host_ep->desc);
+
+	/*
+	 * Usb core assumes a max packet value for ep0 on FS devices until the
+	 * real value is read from the descriptor. Core resets Ep0 if values
+	 * mismatch. Reconfigure the xhci ep0 endpoint context here in that case
+	 */
+	if (usb_endpoint_xfer_control(&host_ep->desc) && ep_index == 0) {
+		udev = container_of(host_ep, struct usb_device, ep0);
+		if (udev->speed == USB_SPEED_FULL)
+			xhci_check_ep0_maxpacket(xhci, xhci->devs[udev->slot_id]);
+		/* Nothing else should be done here for ep0 during ep reset */
+		return;
+	}
+
 	if (!host_ep->hcpriv)
 		return;
 	udev = (struct usb_device *) host_ep->hcpriv;
@@ -3116,7 +3115,7 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
 	 */
 	if (!udev->slot_id || !vdev)
 		return;
-	ep_index = xhci_get_endpoint_index(&host_ep->desc);
+
 	ep = &vdev->eps[ep_index];
 
 	/* Bail out if toggle is already being cleared by a endpoint reset */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFT PATCH 2/2] xhci: fix possible null pointer deref during xhci urb enqueue
  2023-11-28 14:01         ` [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset Mathias Nyman
@ 2023-11-28 14:01           ` Mathias Nyman
  2023-11-28 15:32           ` [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset Kuen-Han Tsai
  1 sibling, 0 replies; 13+ messages in thread
From: Mathias Nyman @ 2023-11-28 14:01 UTC (permalink / raw)
  To: khtsai; +Cc: gregkh, linux-usb, linux-kernel, stern, Mathias Nyman

There is a short gap between urb being submitted and actually added to the
endpoint queue (linked). If the device is disconnected during this time
then usb core is not yet aware of the pending urb, and device may be freed
just before xhci_urq_enqueue() continues, dereferening the freed device.

Freeing the device is protected by the xhci spinlock, so make sure we take
and keep the lock while checking that device exists, dereference it, and
add the urb to the queue.

Remove the unnecessary URB check, usb core checks it before calling
xhci_urb_enqueue()

Suggested-by: Kuen-Han Tsai <khtsai@google.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
---
 drivers/usb/host/xhci.c | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index df31d44498d6..4929c4396e9e 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1521,24 +1521,7 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 	struct urb_priv	*urb_priv;
 	int num_tds;
 
-	if (!urb)
-		return -EINVAL;
-	ret = xhci_check_args(hcd, urb->dev, urb->ep,
-					true, true, __func__);
-	if (ret <= 0)
-		return ret ? ret : -EINVAL;
-
-	slot_id = urb->dev->slot_id;
 	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
-	ep_state = &xhci->devs[slot_id]->eps[ep_index].ep_state;
-
-	if (!HCD_HW_ACCESSIBLE(hcd))
-		return -ESHUTDOWN;
-
-	if (xhci->devs[slot_id]->flags & VDEV_PORT_ERROR) {
-		xhci_dbg(xhci, "Can't queue urb, port error, link inactive\n");
-		return -ENODEV;
-	}
 
 	if (usb_endpoint_xfer_isoc(&urb->ep->desc))
 		num_tds = urb->number_of_packets;
@@ -1562,12 +1545,35 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 
 	spin_lock_irqsave(&xhci->lock, flags);
 
+	ret = xhci_check_args(hcd, urb->dev, urb->ep,
+			      true, true, __func__);
+	if (ret <= 0) {
+		ret = ret ? ret : -EINVAL;
+		goto free_priv;
+	}
+
+	slot_id = urb->dev->slot_id;
+
+	if (!HCD_HW_ACCESSIBLE(hcd)) {
+		ret = -ESHUTDOWN;
+		goto free_priv;
+	}
+
+	if (xhci->devs[slot_id]->flags & VDEV_PORT_ERROR) {
+		xhci_dbg(xhci, "Can't queue urb, port error, link inactive\n");
+		ret = -ENODEV;
+		goto free_priv;
+	}
+
 	if (xhci->xhc_state & XHCI_STATE_DYING) {
 		xhci_dbg(xhci, "Ep 0x%x: URB %p submitted for non-responsive xHCI host.\n",
 			 urb->ep->desc.bEndpointAddress, urb);
 		ret = -ESHUTDOWN;
 		goto free_priv;
 	}
+
+	ep_state = &xhci->devs[slot_id]->eps[ep_index].ep_state;
+
 	if (*ep_state & (EP_GETTING_STREAMS | EP_GETTING_NO_STREAMS)) {
 		xhci_warn(xhci, "WARN: Can't enqueue URB, ep in streams transition state %x\n",
 			  *ep_state);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue
  2023-11-28 13:57       ` Mathias Nyman
  2023-11-28 14:01         ` [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset Mathias Nyman
@ 2023-11-28 15:01         ` Kuen-Han Tsai
  1 sibling, 0 replies; 13+ messages in thread
From: Kuen-Han Tsai @ 2023-11-28 15:01 UTC (permalink / raw)
  To: Mathias Nyman; +Cc: mathias.nyman, gregkh, linux-usb, linux-kernel, alan Stern

Thank you so much for fixing the issue, Mathias!

> I moved the max packet checks away from xhci_urb_enqueue() and fixed up the locking.
> I can't trigger the original issue, but I tested it by setting incorrect initial max packet
> size values.

I added a 3-seconds delay within xhci_check_maxpacket(). When I saw
the max packet size was being checked, I removed the USB device to
trigger the race problem.

[  172.392813][ T1960] [khtsai] xhci_check_maxpacket, before,
slot_id=2, devs[slot_id]=000000003cb76fec
[  174.290601][   T20] usb 2-1: USB disconnect, device number 2
[  174.290608][   T20] usb 2-1.2: USB disconnect, device number 3
[  174.297180][   T20] [khtsai] xhci_free_dev, ret=1
[  174.305010][  T133] usb usb3: USB disconnect, device number 1
[  174.316346][   T20] [khtsai] xhci_free_dev, ret=1
[  175.458962][ T1960] [khtsai] xhci_check_maxpacket, after,
slot_id=2, devs[slot_id]=0000000000000000
[  175.460835][ T1960] Unable to handle kernel NULL pointer
dereference at virtual address 0000000000000010

> If you have the chance to test this with your setup I'd appreciate it.

Sure, I will definitely help verify it. However, I believe the race
problem won't happen as your patch already removes max packet checks
from xhci_urb_enqueue() and also protects sections using the
xhci->devs[slot_id] virtual device.

> patches found here:
> git://git.kernel.org/pub/scm/linux/kernel/git/mnyman/xhci.git fix_urb_enqueue_locking
> https://git.kernel.org/pub/scm/linux/kernel/git/mnyman/xhci.git/log/?h=fix_urb_enqueue_locking

I'll add them to this thread as well

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset
  2023-11-28 14:01         ` [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset Mathias Nyman
  2023-11-28 14:01           ` [RFT PATCH 2/2] xhci: fix possible null pointer deref during xhci urb enqueue Mathias Nyman
@ 2023-11-28 15:32           ` Kuen-Han Tsai
  2023-11-29 14:57             ` Mathias Nyman
  1 sibling, 1 reply; 13+ messages in thread
From: Kuen-Han Tsai @ 2023-11-28 15:32 UTC (permalink / raw)
  To: Mathias Nyman; +Cc: gregkh, linux-usb, linux-kernel, stern

On Tue, Nov 28, 2023 at 10:00 PM Mathias Nyman
<mathias.nyman@linux.intel.com> wrote:
>
> The max packet size for full speed control endpoint 0 may vary. It is
> defined in the device descriptor, which is read using the same endpoint.
> Usb core sets a temporary max packet size value until the real value is
> read.
>
> xhci driver needs to reconfigure the endpoint context seen by the
> controller if the max packet size changes.
> It makes more sense to do this reconfiguration in xhci_endpoint_reset()
> instead of urb enqueue as usb core will call endpoint reset during
> enumeration if the max packet values differ.
> Max packet size adjustment for endpoinr 0 can only happen once per
> device enumeration.

s/endpoinr/endpoint

>
> Previously the max packet size was checked during every urb enqueue.
> This is an additional check for every enqueued urb, and also turned out
> to have locking issues as urbs may be queued in any context while xhci
> max packet size reconfiguration requires memory allocation, locking, and
> sleeping.
>
> Tested with a full speed device using both old and new scheme enumeration
> and a intentionally incorrect preliminary max packet size value.

s/a intentionally/an intentionally

>
> Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
> ---
>  drivers/usb/host/xhci.c | 85 ++++++++++++++++++++---------------------
>  1 file changed, 42 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
> index 884b0898d9c9..df31d44498d6 100644
> --- a/drivers/usb/host/xhci.c
> +++ b/drivers/usb/host/xhci.c
> @@ -1438,10 +1438,8 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
>   * descriptor.  If the usb_device's max packet size changes after that point,
>   * we need to issue an evaluate context command and wait on it.
>   */
> -static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
> -               unsigned int ep_index, struct urb *urb, gfp_t mem_flags)
> +static int xhci_check_ep0_maxpacket(struct xhci_hcd *xhci, struct xhci_virt_device *vdev)
>  {
> -       struct xhci_container_ctx *out_ctx;
>         struct xhci_input_control_ctx *ctrl_ctx;
>         struct xhci_ep_ctx *ep_ctx;
>         struct xhci_command *command;
> @@ -1449,11 +1447,15 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
>         int hw_max_packet_size;
>         int ret = 0;
>
> -       out_ctx = xhci->devs[slot_id]->out_ctx;
> -       ep_ctx = xhci_get_ep_ctx(xhci, out_ctx, ep_index);
> +       ep_ctx = xhci_get_ep_ctx(xhci, vdev->out_ctx, 0);
>         hw_max_packet_size = MAX_PACKET_DECODED(le32_to_cpu(ep_ctx->ep_info2));
> -       max_packet_size = usb_endpoint_maxp(&urb->dev->ep0.desc);
> -       if (hw_max_packet_size != max_packet_size) {
> +       max_packet_size = usb_endpoint_maxp(&vdev->udev->ep0.desc);
> +
> +       if (hw_max_packet_size == max_packet_size)
> +               return 0;
> +
> +       switch (max_packet_size) {
> +       case 8: case 16: case 32: case 64: case 9:
>                 xhci_dbg_trace(xhci,  trace_xhci_dbg_context_change,
>                                 "Max Packet Size for ep 0 changed.");
>                 xhci_dbg_trace(xhci,  trace_xhci_dbg_context_change,
> @@ -1465,28 +1467,22 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
>                 xhci_dbg_trace(xhci,  trace_xhci_dbg_context_change,
>                                 "Issuing evaluate context command.");
>
> -               /* Set up the input context flags for the command */
> -               /* FIXME: This won't work if a non-default control endpoint
> -                * changes max packet sizes.
> -                */
> -
> -               command = xhci_alloc_command(xhci, true, mem_flags);
> +               command = xhci_alloc_command(xhci, true, GFP_KERNEL);
>                 if (!command)
>                         return -ENOMEM;
>
> -               command->in_ctx = xhci->devs[slot_id]->in_ctx;
> +               command->in_ctx = vdev->in_ctx;
>                 ctrl_ctx = xhci_get_input_control_ctx(command->in_ctx);
>                 if (!ctrl_ctx) {
>                         xhci_warn(xhci, "%s: Could not get input context, bad type.\n",
>                                         __func__);
>                         ret = -ENOMEM;
> -                       goto command_cleanup;
> +                       break;
>                 }
>                 /* Set up the modified control endpoint 0 */
> -               xhci_endpoint_copy(xhci, xhci->devs[slot_id]->in_ctx,
> -                               xhci->devs[slot_id]->out_ctx, ep_index);
> +               xhci_endpoint_copy(xhci, vdev->in_ctx, vdev->out_ctx, 0);
>
> -               ep_ctx = xhci_get_ep_ctx(xhci, command->in_ctx, ep_index);
> +               ep_ctx = xhci_get_ep_ctx(xhci, command->in_ctx, 0);
>                 ep_ctx->ep_info &= cpu_to_le32(~EP_STATE_MASK);/* must clear */
>                 ep_ctx->ep_info2 &= cpu_to_le32(~MAX_PACKET_MASK);
>                 ep_ctx->ep_info2 |= cpu_to_le32(MAX_PACKET(max_packet_size));
> @@ -1494,17 +1490,20 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
>                 ctrl_ctx->add_flags = cpu_to_le32(EP0_FLAG);
>                 ctrl_ctx->drop_flags = 0;
>
> -               ret = xhci_configure_endpoint(xhci, urb->dev, command,
> -                               true, false);
> -
> -               /* Clean up the input context for later use by bandwidth
> -                * functions.
> -                */
> +               ret = xhci_configure_endpoint(xhci, vdev->udev, command,
> +                                             true, false);
> +               /* Clean up the input context for later use by bandwidth functions */
>                 ctrl_ctx->add_flags = cpu_to_le32(SLOT_FLAG);
> -command_cleanup:
> -               kfree(command->completion);
> -               kfree(command);
> +               break;
> +       default:
> +               dev_dbg(&vdev->udev->dev, "incorrect max packet size %d for ep0\n",
> +                       max_packet_size);
> +               return -EINVAL;
>         }
> +
> +       kfree(command->completion);
> +       kfree(command);
> +
>         return ret;
>  }
>
> @@ -1561,21 +1560,6 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
>
>         trace_xhci_urb_enqueue(urb);
>
> -       if (usb_endpoint_xfer_control(&urb->ep->desc)) {
> -               /* Check to see if the max packet size for the default control
> -                * endpoint changed during FS device enumeration
> -                */
> -               if (urb->dev->speed == USB_SPEED_FULL) {
> -                       ret = xhci_check_maxpacket(xhci, slot_id,
> -                                       ep_index, urb, mem_flags);
> -                       if (ret < 0) {
> -                               xhci_urb_free_priv(urb_priv);
> -                               urb->hcpriv = NULL;
> -                               return ret;
> -                       }
> -               }
> -       }
> -
>         spin_lock_irqsave(&xhci->lock, flags);
>
>         if (xhci->xhc_state & XHCI_STATE_DYING) {
> @@ -3104,6 +3088,21 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
>         int err;
>
>         xhci = hcd_to_xhci(hcd);
> +       ep_index = xhci_get_endpoint_index(&host_ep->desc);
> +
> +       /*
> +        * Usb core assumes a max packet value for ep0 on FS devices until the
> +        * real value is read from the descriptor. Core resets Ep0 if values
> +        * mismatch. Reconfigure the xhci ep0 endpoint context here in that case
> +        */
> +       if (usb_endpoint_xfer_control(&host_ep->desc) && ep_index == 0) {
> +               udev = container_of(host_ep, struct usb_device, ep0);
> +               if (udev->speed == USB_SPEED_FULL)
> +                       xhci_check_ep0_maxpacket(xhci, xhci->devs[udev->slot_id]);
> +               /* Nothing else should be done here for ep0 during ep reset */
> +               return;
> +       }
> +

Could there be a race condition between the xhci_endpoint_reset() and
xhci_free_dev() functions, resulting in the xhci->devs[udev->slot_id]
becoming null?
If so, a null pointer dereference will happen in
xhci_check_ep0_maxpacket() when accessing vdev->out_ctx.

>         if (!host_ep->hcpriv)
>                 return;
>         udev = (struct usb_device *) host_ep->hcpriv;
> @@ -3116,7 +3115,7 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
>          */
>         if (!udev->slot_id || !vdev)
>                 return;
> -       ep_index = xhci_get_endpoint_index(&host_ep->desc);
> +
>         ep = &vdev->eps[ep_index];
>
>         /* Bail out if toggle is already being cleared by a endpoint reset */
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset
  2023-11-28 15:32           ` [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset Kuen-Han Tsai
@ 2023-11-29 14:57             ` Mathias Nyman
  0 siblings, 0 replies; 13+ messages in thread
From: Mathias Nyman @ 2023-11-29 14:57 UTC (permalink / raw)
  To: Kuen-Han Tsai; +Cc: gregkh, linux-usb, linux-kernel, stern

>> +       ep_index = xhci_get_endpoint_index(&host_ep->desc);
>> +
>> +       /*
>> +        * Usb core assumes a max packet value for ep0 on FS devices until the
>> +        * real value is read from the descriptor. Core resets Ep0 if values
>> +        * mismatch. Reconfigure the xhci ep0 endpoint context here in that case
>> +        */
>> +       if (usb_endpoint_xfer_control(&host_ep->desc) && ep_index == 0) {
>> +               udev = container_of(host_ep, struct usb_device, ep0);
>> +               if (udev->speed == USB_SPEED_FULL)
>> +                       xhci_check_ep0_maxpacket(xhci, xhci->devs[udev->slot_id]);
>> +               /* Nothing else should be done here for ep0 during ep reset */
>> +               return;
>> +       }
>> +
> 
> Could there be a race condition between the xhci_endpoint_reset() and
> xhci_free_dev() functions, resulting in the xhci->devs[udev->slot_id]
> becoming null?
> If so, a null pointer dereference will happen in
> xhci_check_ep0_maxpacket() when accessing vdev->out_ctx.

should not race. xhci_free_dev() and xhci_endpoint_reset() for endpoint 0 should only
be called by hub driver hub_free_dev() and usb_ep0_reinit() respectively.

Hub driver takes care of concurrency for these

Thanks
Mathias


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2023-11-29 14:57 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-17  7:21 [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue Kuen-Han Tsai
2023-11-17 13:32 ` Mathias Nyman
2023-11-18 10:19   ` Kuen-Han Tsai
2023-11-20 15:33     ` Mathias Nyman
2023-11-28 13:57       ` Mathias Nyman
2023-11-28 14:01         ` [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset Mathias Nyman
2023-11-28 14:01           ` [RFT PATCH 2/2] xhci: fix possible null pointer deref during xhci urb enqueue Mathias Nyman
2023-11-28 15:32           ` [RFT PATCH 1/2] xhci: Reconfigure endpoint 0 max packet size only during endpoint reset Kuen-Han Tsai
2023-11-29 14:57             ` Mathias Nyman
2023-11-28 15:01         ` [PATCH] xhci: fix null pointer deref for xhci_urb_enqueue Kuen-Han Tsai
2023-11-17 13:53 ` Greg KH
2023-11-18 11:19   ` Kuen-Han Tsai
2023-11-23  1:54 ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.