All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xfrm : lock input tasklet skb queue
@ 2019-10-20 15:46 Tom Rix
  2019-10-21  8:37 ` Steffen Klassert
  2019-10-22  8:58 ` Joerg Vehlow
  0 siblings, 2 replies; 5+ messages in thread
From: Tom Rix @ 2019-10-20 15:46 UTC (permalink / raw)
  To: steffen.klassert, herbert, davem, netdev, linux-kernel

On PREEMPT_RT_FULL while running netperf, a corruption
of the skb queue causes an oops.

This appears to be caused by a race condition here
        __skb_queue_tail(&trans->queue, skb);
        tasklet_schedule(&trans->tasklet);
Where the queue is changed before the tasklet is locked by
tasklet_schedule.

The fix is to use the skb queue lock.

Signed-off-by: Tom Rix <trix@redhat.com>
---
 net/xfrm/xfrm_input.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 9b599ed66d97..226dead86828 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -758,12 +758,16 @@ static void xfrm_trans_reinject(unsigned long data)
     struct xfrm_trans_tasklet *trans = (void *)data;
     struct sk_buff_head queue;
     struct sk_buff *skb;
+    unsigned long flags;

     __skb_queue_head_init(&queue);
+    spin_lock_irqsave(&trans->queue.lock, flags);
     skb_queue_splice_init(&trans->queue, &queue);

     while ((skb = __skb_dequeue(&queue)))
         XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
+
+    spin_unlock_irqrestore(&trans->queue.lock, flags);
 }

 int xfrm_trans_queue(struct sk_buff *skb,
@@ -771,15 +775,20 @@ int xfrm_trans_queue(struct sk_buff *skb,
                    struct sk_buff *))
 {
     struct xfrm_trans_tasklet *trans;
+    unsigned long flags;

     trans = this_cpu_ptr(&xfrm_trans_tasklet);
+    spin_lock_irqsave(&trans->queue.lock, flags);

-    if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+    if (skb_queue_len(&trans->queue) >= netdev_max_backlog) {
+        spin_unlock_irqrestore(&trans->queue.lock, flags);
         return -ENOBUFS;
+    }

     XFRM_TRANS_SKB_CB(skb)->finish = finish;
     __skb_queue_tail(&trans->queue, skb);
     tasklet_schedule(&trans->tasklet);
+    spin_unlock_irqrestore(&trans->queue.lock, flags);
     return 0;
 }
 EXPORT_SYMBOL(xfrm_trans_queue);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] xfrm : lock input tasklet skb queue
  2019-10-20 15:46 [PATCH] xfrm : lock input tasklet skb queue Tom Rix
@ 2019-10-21  8:37 ` Steffen Klassert
  2019-10-21 16:31   ` Tom Rix
  2019-10-22  8:58 ` Joerg Vehlow
  1 sibling, 1 reply; 5+ messages in thread
From: Steffen Klassert @ 2019-10-21  8:37 UTC (permalink / raw)
  To: Tom Rix; +Cc: herbert, davem, netdev, linux-kernel

On Sun, Oct 20, 2019 at 08:46:10AM -0700, Tom Rix wrote:
> On PREEMPT_RT_FULL while running netperf, a corruption
> of the skb queue causes an oops.
> 
> This appears to be caused by a race condition here
>         __skb_queue_tail(&trans->queue, skb);
>         tasklet_schedule(&trans->tasklet);
> Where the queue is changed before the tasklet is locked by
> tasklet_schedule.
> 
> The fix is to use the skb queue lock.
> 
> Signed-off-by: Tom Rix <trix@redhat.com>
> ---
>  net/xfrm/xfrm_input.c | 11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
> index 9b599ed66d97..226dead86828 100644
> --- a/net/xfrm/xfrm_input.c
> +++ b/net/xfrm/xfrm_input.c
> @@ -758,12 +758,16 @@ static void xfrm_trans_reinject(unsigned long data)
>      struct xfrm_trans_tasklet *trans = (void *)data;
>      struct sk_buff_head queue;
>      struct sk_buff *skb;
> +    unsigned long flags;
> 
>      __skb_queue_head_init(&queue);
> +    spin_lock_irqsave(&trans->queue.lock, flags);
>      skb_queue_splice_init(&trans->queue, &queue);
> 
>      while ((skb = __skb_dequeue(&queue)))
>          XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
> +
> +    spin_unlock_irqrestore(&trans->queue.lock, flags);
>  }
> 
>  int xfrm_trans_queue(struct sk_buff *skb,
> @@ -771,15 +775,20 @@ int xfrm_trans_queue(struct sk_buff *skb,
>                     struct sk_buff *))
>  {
>      struct xfrm_trans_tasklet *trans;
> +    unsigned long flags;
> 
>      trans = this_cpu_ptr(&xfrm_trans_tasklet);
> +    spin_lock_irqsave(&trans->queue.lock, flags);

As you can see above 'trans' is per cpu, so a spinlock
is not needed here. Also this does not run in hard
interrupt context, so irqsave is also not needed.
I don't see how this can fix anything.

Can you please explain that race a bit more detailed?


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] xfrm : lock input tasklet skb queue
  2019-10-21  8:37 ` Steffen Klassert
@ 2019-10-21 16:31   ` Tom Rix
  2019-10-22  6:12     ` Herbert Xu
  0 siblings, 1 reply; 5+ messages in thread
From: Tom Rix @ 2019-10-21 16:31 UTC (permalink / raw)
  To: Steffen Klassert; +Cc: herbert, davem, netdev, linux-kernel

When preempt rt is full, softirq and interrupts run in kthreads. So it
is possible for the tasklet to sleep and for its queue to get modified
while it sleeps.

On Mon, Oct 21, 2019 at 1:37 AM Steffen Klassert
<steffen.klassert@secunet.com> wrote:
>
> On Sun, Oct 20, 2019 at 08:46:10AM -0700, Tom Rix wrote:
> > On PREEMPT_RT_FULL while running netperf, a corruption
> > of the skb queue causes an oops.
> >
> > This appears to be caused by a race condition here
> >         __skb_queue_tail(&trans->queue, skb);
> >         tasklet_schedule(&trans->tasklet);
> > Where the queue is changed before the tasklet is locked by
> > tasklet_schedule.
> >
> > The fix is to use the skb queue lock.
> >
> > Signed-off-by: Tom Rix <trix@redhat.com>
> > ---
> >  net/xfrm/xfrm_input.c | 11 ++++++++++-
> >  1 file changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
> > index 9b599ed66d97..226dead86828 100644
> > --- a/net/xfrm/xfrm_input.c
> > +++ b/net/xfrm/xfrm_input.c
> > @@ -758,12 +758,16 @@ static void xfrm_trans_reinject(unsigned long data)
> >      struct xfrm_trans_tasklet *trans = (void *)data;
> >      struct sk_buff_head queue;
> >      struct sk_buff *skb;
> > +    unsigned long flags;
> >
> >      __skb_queue_head_init(&queue);
> > +    spin_lock_irqsave(&trans->queue.lock, flags);
> >      skb_queue_splice_init(&trans->queue, &queue);
> >
> >      while ((skb = __skb_dequeue(&queue)))
> >          XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
> > +
> > +    spin_unlock_irqrestore(&trans->queue.lock, flags);
> >  }
> >
> >  int xfrm_trans_queue(struct sk_buff *skb,
> > @@ -771,15 +775,20 @@ int xfrm_trans_queue(struct sk_buff *skb,
> >                     struct sk_buff *))
> >  {
> >      struct xfrm_trans_tasklet *trans;
> > +    unsigned long flags;
> >
> >      trans = this_cpu_ptr(&xfrm_trans_tasklet);
> > +    spin_lock_irqsave(&trans->queue.lock, flags);
>
> As you can see above 'trans' is per cpu, so a spinlock
> is not needed here. Also this does not run in hard
> interrupt context, so irqsave is also not needed.
> I don't see how this can fix anything.
>
> Can you please explain that race a bit more detailed?
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] xfrm : lock input tasklet skb queue
  2019-10-21 16:31   ` Tom Rix
@ 2019-10-22  6:12     ` Herbert Xu
  0 siblings, 0 replies; 5+ messages in thread
From: Herbert Xu @ 2019-10-22  6:12 UTC (permalink / raw)
  To: Tom Rix; +Cc: Steffen Klassert, davem, netdev, linux-kernel

On Mon, Oct 21, 2019 at 09:31:13AM -0700, Tom Rix wrote:
> When preempt rt is full, softirq and interrupts run in kthreads. So it
> is possible for the tasklet to sleep and for its queue to get modified
> while it sleeps.

This is ridiculous.  The network stack is full of assumptions
like this.  So I think we need to fix preempt rt instead because
you can't make a major change like this without auditing the entire
kernel first rather than relying on a whack-a-mole approach.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] xfrm : lock input tasklet skb queue
  2019-10-20 15:46 [PATCH] xfrm : lock input tasklet skb queue Tom Rix
  2019-10-21  8:37 ` Steffen Klassert
@ 2019-10-22  8:58 ` Joerg Vehlow
  1 sibling, 0 replies; 5+ messages in thread
From: Joerg Vehlow @ 2019-10-22  8:58 UTC (permalink / raw)
  To: Tom Rix, steffen.klassert, herbert, davem, netdev, linux-kernel

Hi,

I already send a patch on 2019-09-09 to this mailing list with a similar 
issue[1].
Sadly no replies, although this is a huge bug in the rt kernel.
I fixed it a bit differently, using smaller locked regions.
You have also propably a bug in your patch, because trans->queue.lock is
no initialized by __skb_queue_head_init (in xfrm_input_init)

Jörg

[1] https://lkml.org/lkml/2019/9/9/111

Am 20.10.2019 um 17:46 schrieb Tom Rix:
> On PREEMPT_RT_FULL while running netperf, a corruption
> of the skb queue causes an oops.
>
> This appears to be caused by a race condition here
>          __skb_queue_tail(&trans->queue, skb);
>          tasklet_schedule(&trans->tasklet);
> Where the queue is changed before the tasklet is locked by
> tasklet_schedule.
>
> The fix is to use the skb queue lock.
>
> Signed-off-by: Tom Rix <trix@redhat.com>
> ---
>   net/xfrm/xfrm_input.c | 11 ++++++++++-
>   1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
> index 9b599ed66d97..226dead86828 100644
> --- a/net/xfrm/xfrm_input.c
> +++ b/net/xfrm/xfrm_input.c
> @@ -758,12 +758,16 @@ static void xfrm_trans_reinject(unsigned long data)
>       struct xfrm_trans_tasklet *trans = (void *)data;
>       struct sk_buff_head queue;
>       struct sk_buff *skb;
> +    unsigned long flags;
>
>       __skb_queue_head_init(&queue);
> +    spin_lock_irqsave(&trans->queue.lock, flags);
>       skb_queue_splice_init(&trans->queue, &queue);
>
>       while ((skb = __skb_dequeue(&queue)))
>           XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
> +
> +    spin_unlock_irqrestore(&trans->queue.lock, flags);
>   }
>
>   int xfrm_trans_queue(struct sk_buff *skb,
> @@ -771,15 +775,20 @@ int xfrm_trans_queue(struct sk_buff *skb,
>                      struct sk_buff *))
>   {
>       struct xfrm_trans_tasklet *trans;
> +    unsigned long flags;
>
>       trans = this_cpu_ptr(&xfrm_trans_tasklet);
> +    spin_lock_irqsave(&trans->queue.lock, flags);
>
> -    if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
> +    if (skb_queue_len(&trans->queue) >= netdev_max_backlog) {
> +        spin_unlock_irqrestore(&trans->queue.lock, flags);
>           return -ENOBUFS;
> +    }
>
>       XFRM_TRANS_SKB_CB(skb)->finish = finish;
>       __skb_queue_tail(&trans->queue, skb);
>       tasklet_schedule(&trans->tasklet);
> +    spin_unlock_irqrestore(&trans->queue.lock, flags);
>       return 0;
>   }
>   EXPORT_SYMBOL(xfrm_trans_queue);


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-10-22  8:58 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-20 15:46 [PATCH] xfrm : lock input tasklet skb queue Tom Rix
2019-10-21  8:37 ` Steffen Klassert
2019-10-21 16:31   ` Tom Rix
2019-10-22  6:12     ` Herbert Xu
2019-10-22  8:58 ` Joerg Vehlow

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.